mm: refactor swap-in logic out of shmem_getpage_gfp

swapin logic can be reused independently without rest of the logic in
shmem_getpage_gfp.  So lets refactor it out as an independent function.

Link: http://lkml.kernel.org/r/20190114153129.4852-1-vpillai@digitalocean.com
Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Vineeth Remanan Pillai 2019-03-05 15:46:58 -08:00 committed by Linus Torvalds
parent a9e7c39fa9
commit c5bf121e43

View file

@ -123,6 +123,10 @@ static unsigned long shmem_default_max_inodes(void)
static bool shmem_should_replace_page(struct page *page, gfp_t gfp); static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
static int shmem_replace_page(struct page **pagep, gfp_t gfp, static int shmem_replace_page(struct page **pagep, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index); struct shmem_inode_info *info, pgoff_t index);
static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma, gfp_t gfp, struct vm_area_struct *vma,
@ -1575,6 +1579,116 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
return error; return error;
} }
/*
* Swap in the page pointed to by *pagep.
* Caller has to make sure that *pagep contains a valid swapped page.
* Returns 0 and the page in pagep if success. On failure, returns the
* the error code and NULL in *pagep.
*/
static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
struct mem_cgroup *memcg;
struct page *page;
swp_entry_t swap;
int error;
VM_BUG_ON(!*pagep || !xa_is_value(*pagep));
swap = radix_to_swp_entry(*pagep);
*pagep = NULL;
/* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0);
if (!page) {
/* Or update major stats only when swapin succeeds?? */
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(charge_mm, PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
if (!page) {
error = -ENOMEM;
goto failed;
}
}
/* We have to do this with page locked to prevent races */
lock_page(page);
if (!PageSwapCache(page) || page_private(page) != swap.val ||
!shmem_confirm_swap(mapping, index, swap)) {
error = -EEXIST;
goto unlock;
}
if (!PageUptodate(page)) {
error = -EIO;
goto failed;
}
wait_on_page_writeback(page);
if (shmem_should_replace_page(page, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
if (error)
goto failed;
}
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
swp_to_radix_entry(swap), gfp);
/*
* We already confirmed swap under page lock, and make
* no memory allocation here, so usually no possibility
* of error; but free_swap_and_cache() only trylocks a
* page, so it is just possible that the entry has been
* truncated or holepunched since swap was confirmed.
* shmem_undo_range() will have done some of the
* unaccounting, now delete_from_swap_cache() will do
* the rest.
*/
if (error) {
mem_cgroup_cancel_charge(page, memcg, false);
delete_from_swap_cache(page);
}
}
if (error)
goto failed;
mem_cgroup_commit_charge(page, memcg, true, false);
spin_lock_irq(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
if (sgp == SGP_WRITE)
mark_page_accessed(page);
delete_from_swap_cache(page);
set_page_dirty(page);
swap_free(swap);
*pagep = page;
return 0;
failed:
if (!shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
unlock:
if (page) {
unlock_page(page);
put_page(page);
}
return error;
}
/* /*
* shmem_getpage_gfp - find page in cache, or get from swap, or allocate * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
* *
@ -1596,7 +1710,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct mm_struct *charge_mm; struct mm_struct *charge_mm;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct page *page; struct page *page;
swp_entry_t swap;
enum sgp_type sgp_huge = sgp; enum sgp_type sgp_huge = sgp;
pgoff_t hindex = index; pgoff_t hindex = index;
int error; int error;
@ -1608,17 +1721,23 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
if (sgp == SGP_NOHUGE || sgp == SGP_HUGE) if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
sgp = SGP_CACHE; sgp = SGP_CACHE;
repeat: repeat:
swap.val = 0;
page = find_lock_entry(mapping, index);
if (xa_is_value(page)) {
swap = radix_to_swp_entry(page);
page = NULL;
}
if (sgp <= SGP_CACHE && if (sgp <= SGP_CACHE &&
((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL; return -EINVAL;
goto unlock; }
sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : current->mm;
page = find_lock_entry(mapping, index);
if (xa_is_value(page)) {
error = shmem_swapin_page(inode, index, &page,
sgp, gfp, vma, fault_type);
if (error == -EEXIST)
goto repeat;
*pagep = page;
return error;
} }
if (page && sgp == SGP_WRITE) if (page && sgp == SGP_WRITE)
@ -1632,7 +1751,7 @@ repeat:
put_page(page); put_page(page);
page = NULL; page = NULL;
} }
if (page || (sgp == SGP_READ && !swap.val)) { if (page || sgp == SGP_READ) {
*pagep = page; *pagep = page;
return 0; return 0;
} }
@ -1641,215 +1760,138 @@ repeat:
* Fast cache lookup did not find it: * Fast cache lookup did not find it:
* bring it back from swap or allocate. * bring it back from swap or allocate.
*/ */
sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : current->mm;
if (swap.val) { if (vma && userfaultfd_missing(vma)) {
/* Look it up and read it in.. */ *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
page = lookup_swap_cache(swap, NULL, 0); return 0;
if (!page) { }
/* Or update major stats only when swapin succeeds?? */
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(charge_mm, PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
if (!page) {
error = -ENOMEM;
goto failed;
}
}
/* We have to do this with page locked to prevent races */ /* shmem_symlink() */
lock_page(page); if (mapping->a_ops != &shmem_aops)
if (!PageSwapCache(page) || page_private(page) != swap.val || goto alloc_nohuge;
!shmem_confirm_swap(mapping, index, swap)) { if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
error = -EEXIST; /* try again */ goto alloc_nohuge;
goto unlock; if (shmem_huge == SHMEM_HUGE_FORCE)
} goto alloc_huge;
if (!PageUptodate(page)) { switch (sbinfo->huge) {
error = -EIO; loff_t i_size;
goto failed; pgoff_t off;
} case SHMEM_HUGE_NEVER:
wait_on_page_writeback(page); goto alloc_nohuge;
case SHMEM_HUGE_WITHIN_SIZE:
if (shmem_should_replace_page(page, gfp)) { off = round_up(index, HPAGE_PMD_NR);
error = shmem_replace_page(&page, gfp, info, index); i_size = round_up(i_size_read(inode), PAGE_SIZE);
if (error) if (i_size >= HPAGE_PMD_SIZE &&
goto failed; i_size >> PAGE_SHIFT >= off)
}
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
swp_to_radix_entry(swap), gfp);
/*
* We already confirmed swap under page lock, and make
* no memory allocation here, so usually no possibility
* of error; but free_swap_and_cache() only trylocks a
* page, so it is just possible that the entry has been
* truncated or holepunched since swap was confirmed.
* shmem_undo_range() will have done some of the
* unaccounting, now delete_from_swap_cache() will do
* the rest.
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
if (error) {
mem_cgroup_cancel_charge(page, memcg, false);
delete_from_swap_cache(page);
}
}
if (error)
goto failed;
mem_cgroup_commit_charge(page, memcg, true, false);
spin_lock_irq(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
if (sgp == SGP_WRITE)
mark_page_accessed(page);
delete_from_swap_cache(page);
set_page_dirty(page);
swap_free(swap);
} else {
if (vma && userfaultfd_missing(vma)) {
*fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
return 0;
}
/* shmem_symlink() */
if (mapping->a_ops != &shmem_aops)
goto alloc_nohuge;
if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
goto alloc_nohuge;
if (shmem_huge == SHMEM_HUGE_FORCE)
goto alloc_huge; goto alloc_huge;
switch (sbinfo->huge) { /* fallthrough */
loff_t i_size; case SHMEM_HUGE_ADVISE:
pgoff_t off; if (sgp_huge == SGP_HUGE)
case SHMEM_HUGE_NEVER: goto alloc_huge;
goto alloc_nohuge; /* TODO: implement fadvise() hints */
case SHMEM_HUGE_WITHIN_SIZE: goto alloc_nohuge;
off = round_up(index, HPAGE_PMD_NR); }
i_size = round_up(i_size_read(inode), PAGE_SIZE);
if (i_size >= HPAGE_PMD_SIZE &&
i_size >> PAGE_SHIFT >= off)
goto alloc_huge;
/* fallthrough */
case SHMEM_HUGE_ADVISE:
if (sgp_huge == SGP_HUGE)
goto alloc_huge;
/* TODO: implement fadvise() hints */
goto alloc_nohuge;
}
alloc_huge: alloc_huge:
page = shmem_alloc_and_acct_page(gfp, inode, index, true); page = shmem_alloc_and_acct_page(gfp, inode, index, true);
if (IS_ERR(page)) { if (IS_ERR(page)) {
alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, alloc_nohuge:
index, false); page = shmem_alloc_and_acct_page(gfp, inode,
} index, false);
if (IS_ERR(page)) { }
int retry = 5; if (IS_ERR(page)) {
error = PTR_ERR(page); int retry = 5;
page = NULL;
if (error != -ENOSPC)
goto failed;
/*
* Try to reclaim some spece by splitting a huge page
* beyond i_size on the filesystem.
*/
while (retry--) {
int ret;
ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
if (ret == SHRINK_STOP)
break;
if (ret)
goto alloc_nohuge;
}
goto failed;
}
if (PageTransHuge(page))
hindex = round_down(index, HPAGE_PMD_NR);
else
hindex = index;
if (sgp == SGP_WRITE)
__SetPageReferenced(page);
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
PageTransHuge(page));
if (error)
goto unacct;
error = shmem_add_to_page_cache(page, mapping, hindex,
NULL, gfp & GFP_RECLAIM_MASK);
if (error) {
mem_cgroup_cancel_charge(page, memcg,
PageTransHuge(page));
goto unacct;
}
mem_cgroup_commit_charge(page, memcg, false,
PageTransHuge(page));
lru_cache_add_anon(page);
spin_lock_irq(&info->lock);
info->alloced += 1 << compound_order(page);
inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
alloced = true;
if (PageTransHuge(page) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
hindex + HPAGE_PMD_NR - 1) {
/*
* Part of the huge page is beyond i_size: subject
* to shrink under memory pressure.
*/
spin_lock(&sbinfo->shrinklist_lock);
/*
* _careful to defend against unlocked access to
* ->shrink_list in shmem_unused_huge_shrink()
*/
if (list_empty_careful(&info->shrinklist)) {
list_add_tail(&info->shrinklist,
&sbinfo->shrinklist);
sbinfo->shrinklist_len++;
}
spin_unlock(&sbinfo->shrinklist_lock);
}
error = PTR_ERR(page);
page = NULL;
if (error != -ENOSPC)
goto unlock;
/* /*
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page. * Try to reclaim some space by splitting a huge page
* beyond i_size on the filesystem.
*/ */
if (sgp == SGP_FALLOC) while (retry--) {
sgp = SGP_WRITE; int ret;
ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
if (ret == SHRINK_STOP)
break;
if (ret)
goto alloc_nohuge;
}
goto unlock;
}
if (PageTransHuge(page))
hindex = round_down(index, HPAGE_PMD_NR);
else
hindex = index;
if (sgp == SGP_WRITE)
__SetPageReferenced(page);
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
PageTransHuge(page));
if (error)
goto unacct;
error = shmem_add_to_page_cache(page, mapping, hindex,
NULL, gfp & GFP_RECLAIM_MASK);
if (error) {
mem_cgroup_cancel_charge(page, memcg,
PageTransHuge(page));
goto unacct;
}
mem_cgroup_commit_charge(page, memcg, false,
PageTransHuge(page));
lru_cache_add_anon(page);
spin_lock_irq(&info->lock);
info->alloced += 1 << compound_order(page);
inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
alloced = true;
if (PageTransHuge(page) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
hindex + HPAGE_PMD_NR - 1) {
/*
* Part of the huge page is beyond i_size: subject
* to shrink under memory pressure.
*/
spin_lock(&sbinfo->shrinklist_lock);
/*
* _careful to defend against unlocked access to
* ->shrink_list in shmem_unused_huge_shrink()
*/
if (list_empty_careful(&info->shrinklist)) {
list_add_tail(&info->shrinklist,
&sbinfo->shrinklist);
sbinfo->shrinklist_len++;
}
spin_unlock(&sbinfo->shrinklist_lock);
}
/*
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
*/
if (sgp == SGP_FALLOC)
sgp = SGP_WRITE;
clear: clear:
/* /*
* Let SGP_WRITE caller clear ends if write does not fill page; * Let SGP_WRITE caller clear ends if write does not fill page;
* but SGP_FALLOC on a page fallocated earlier must initialize * but SGP_FALLOC on a page fallocated earlier must initialize
* it now, lest undo on failure cancel our earlier guarantee. * it now, lest undo on failure cancel our earlier guarantee.
*/ */
if (sgp != SGP_WRITE && !PageUptodate(page)) { if (sgp != SGP_WRITE && !PageUptodate(page)) {
struct page *head = compound_head(page); struct page *head = compound_head(page);
int i; int i;
for (i = 0; i < (1 << compound_order(head)); i++) { for (i = 0; i < (1 << compound_order(head)); i++) {
clear_highpage(head + i); clear_highpage(head + i);
flush_dcache_page(head + i); flush_dcache_page(head + i);
}
SetPageUptodate(head);
} }
SetPageUptodate(head);
} }
/* Perhaps the file has been truncated since we checked */ /* Perhaps the file has been truncated since we checked */
@ -1879,9 +1921,6 @@ unacct:
put_page(page); put_page(page);
goto alloc_nohuge; goto alloc_nohuge;
} }
failed:
if (swap.val && !shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
unlock: unlock:
if (page) { if (page) {
unlock_page(page); unlock_page(page);