mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-07-23 07:12:09 +00:00
swap: add per-partition lock for swapfile
swap_lock is heavily contended when I test swap to 3 fast SSD (even slightly slower than swap to 2 such SSD). The main contention comes from swap_info_get(). This patch tries to fix the gap with adding a new per-partition lock. Global data like nr_swapfiles, total_swap_pages, least_priority and swap_list are still protected by swap_lock. nr_swap_pages is an atomic now, it can be changed without swap_lock. In theory, it's possible get_swap_page() finds no swap pages but actually there are free swap pages. But sounds not a big problem. Accessing partition specific data (like scan_swap_map and so on) is only protected by swap_info_struct.lock. Changing swap_info_struct.flags need hold swap_lock and swap_info_struct.lock, because scan_scan_map() will check it. read the flags is ok with either the locks hold. If both swap_lock and swap_info_struct.lock must be hold, we always hold the former first to avoid deadlock. swap_entry_free() can change swap_list. To delete that code, we add a new highest_priority_index. Whenever get_swap_page() is called, we check it. If it's valid, we use it. It's a pity get_swap_page() still holds swap_lock(). But in practice, swap_lock() isn't heavily contended in my test with this patch (or I can say there are other much more heavier bottlenecks like TLB flush). And BTW, looks get_swap_page() doesn't really need the lock. We never free swap_info[] and we check SWAP_WRITEOK flag. The only risk without the lock is we could swapout to some low priority swap, but we can quickly recover after several rounds of swap, so sounds not a big deal to me. But I'd prefer to fix this if it's a real problem. "swap: make each swap partition have one address_space" improved the swapout speed from 1.7G/s to 2G/s. This patch further improves the speed to 2.3G/s, so around 15% improvement. It's a multi-process test, so TLB flush isn't the biggest bottleneck before the patches. [arnd@arndb.de: fix it for nommu] [hughd@google.com: add missing unlock] [minchan@kernel.org: get rid of lockdep whinge on sys_swapon] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
33806f06da
commit
ec8acf20af
8 changed files with 145 additions and 60 deletions
|
@ -202,6 +202,18 @@ struct swap_info_struct {
|
|||
unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
|
||||
atomic_t frontswap_pages; /* frontswap pages in-use counter */
|
||||
#endif
|
||||
spinlock_t lock; /*
|
||||
* protect map scan related fields like
|
||||
* swap_map, lowest_bit, highest_bit,
|
||||
* inuse_pages, cluster_next,
|
||||
* cluster_nr, lowest_alloc and
|
||||
* highest_alloc. other fields are only
|
||||
* changed at swapon/swapoff, so are
|
||||
* protected by swap_lock. changing
|
||||
* flags need hold this lock and
|
||||
* swap_lock. If both locks need hold,
|
||||
* hold swap_lock first.
|
||||
*/
|
||||
};
|
||||
|
||||
struct swap_list_t {
|
||||
|
@ -209,9 +221,6 @@ struct swap_list_t {
|
|||
int next; /* swapfile to be used next */
|
||||
};
|
||||
|
||||
/* Swap 50% full? Release swapcache more aggressively.. */
|
||||
#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
|
||||
|
||||
/* linux/mm/page_alloc.c */
|
||||
extern unsigned long totalram_pages;
|
||||
extern unsigned long totalreserve_pages;
|
||||
|
@ -347,8 +356,20 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
|
|||
struct vm_area_struct *vma, unsigned long addr);
|
||||
|
||||
/* linux/mm/swapfile.c */
|
||||
extern long nr_swap_pages;
|
||||
extern atomic_long_t nr_swap_pages;
|
||||
extern long total_swap_pages;
|
||||
|
||||
/* Swap 50% full? Release swapcache more aggressively.. */
|
||||
static inline bool vm_swap_full(void)
|
||||
{
|
||||
return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
|
||||
}
|
||||
|
||||
static inline long get_nr_swap_pages(void)
|
||||
{
|
||||
return atomic_long_read(&nr_swap_pages);
|
||||
}
|
||||
|
||||
extern void si_swapinfo(struct sysinfo *);
|
||||
extern swp_entry_t get_swap_page(void);
|
||||
extern swp_entry_t get_swap_page_of_type(int);
|
||||
|
@ -381,9 +402,10 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
|||
|
||||
#else /* CONFIG_SWAP */
|
||||
|
||||
#define nr_swap_pages 0L
|
||||
#define get_nr_swap_pages() 0L
|
||||
#define total_swap_pages 0L
|
||||
#define total_swapcache_pages() 0UL
|
||||
#define vm_swap_full() 0
|
||||
|
||||
#define si_swapinfo(val) \
|
||||
do { (val)->freeswap = (val)->totalswap = 0; } while (0)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue