mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-26 09:04:30 +00:00
shmem: add huge pages support
Here's basic implementation of huge pages support for shmem/tmpfs. It's all pretty streight-forward: - shmem_getpage() allcoates huge page if it can and try to inserd into radix tree with shmem_add_to_page_cache(); - shmem_add_to_page_cache() puts the page onto radix-tree if there's space for it; - shmem_undo_range() removes huge pages, if it fully within range. Partial truncate of huge pages zero out this part of THP. This have visible effect on fallocate(FALLOC_FL_PUNCH_HOLE) behaviour. As we don't really create hole in this case, lseek(SEEK_HOLE) may have inconsistent results depending what pages happened to be allocated. - no need to change shmem_fault: core-mm will map an compound page as huge if VMA is suitable; Link: http://lkml.kernel.org/r/1466021202-61880-30-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
c01d5b3007
commit
800d8c63b2
9 changed files with 331 additions and 70 deletions
|
@ -156,6 +156,8 @@ void put_huge_zero_page(void);
|
||||||
|
|
||||||
#define transparent_hugepage_enabled(__vma) 0
|
#define transparent_hugepage_enabled(__vma) 0
|
||||||
|
|
||||||
|
static inline void prep_transhuge_page(struct page *page) {}
|
||||||
|
|
||||||
#define transparent_hugepage_flags 0UL
|
#define transparent_hugepage_flags 0UL
|
||||||
static inline int
|
static inline int
|
||||||
split_huge_page_to_list(struct page *page, struct list_head *list)
|
split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
|
|
|
@ -71,6 +71,9 @@ static inline struct page *shmem_read_mapping_page(
|
||||||
mapping_gfp_mask(mapping));
|
mapping_gfp_mask(mapping));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern bool shmem_charge(struct inode *inode, long pages);
|
||||||
|
extern void shmem_uncharge(struct inode *inode, long pages);
|
||||||
|
|
||||||
#ifdef CONFIG_TMPFS
|
#ifdef CONFIG_TMPFS
|
||||||
|
|
||||||
extern int shmem_add_seals(struct file *file, unsigned int seals);
|
extern int shmem_add_seals(struct file *file, unsigned int seals);
|
||||||
|
|
|
@ -219,8 +219,13 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||||
/* hugetlb pages do not participate in page cache accounting. */
|
/* hugetlb pages do not participate in page cache accounting. */
|
||||||
if (!PageHuge(page))
|
if (!PageHuge(page))
|
||||||
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
|
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
|
||||||
if (PageSwapBacked(page))
|
if (PageSwapBacked(page)) {
|
||||||
__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
|
__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
|
||||||
|
if (PageTransHuge(page))
|
||||||
|
__dec_zone_page_state(page, NR_SHMEM_THPS);
|
||||||
|
} else {
|
||||||
|
VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point page must be either written or cleaned by truncate.
|
* At this point page must be either written or cleaned by truncate.
|
||||||
|
|
|
@ -3316,6 +3316,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||||
if (head[i].index >= end) {
|
if (head[i].index >= end) {
|
||||||
__ClearPageDirty(head + i);
|
__ClearPageDirty(head + i);
|
||||||
__delete_from_page_cache(head + i, NULL);
|
__delete_from_page_cache(head + i, NULL);
|
||||||
|
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
|
||||||
|
shmem_uncharge(head->mapping->host, 1);
|
||||||
put_page(head + i);
|
put_page(head + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1142,7 +1142,7 @@ again:
|
||||||
* unmap shared but keep private pages.
|
* unmap shared but keep private pages.
|
||||||
*/
|
*/
|
||||||
if (details->check_mapping &&
|
if (details->check_mapping &&
|
||||||
details->check_mapping != page->mapping)
|
details->check_mapping != page_rmapping(page))
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ptent = ptep_get_and_clear_full(mm, addr, pte,
|
ptent = ptep_get_and_clear_full(mm, addr, pte,
|
||||||
|
|
|
@ -531,7 +531,7 @@ retry:
|
||||||
nid = page_to_nid(page);
|
nid = page_to_nid(page);
|
||||||
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
|
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
|
||||||
continue;
|
continue;
|
||||||
if (PageTransCompound(page) && PageAnon(page)) {
|
if (PageTransCompound(page)) {
|
||||||
get_page(page);
|
get_page(page);
|
||||||
pte_unmap_unlock(pte, ptl);
|
pte_unmap_unlock(pte, ptl);
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
|
|
@ -2563,6 +2563,7 @@ int set_page_dirty(struct page *page)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = page_mapping(page);
|
struct address_space *mapping = page_mapping(page);
|
||||||
|
|
||||||
|
page = compound_head(page);
|
||||||
if (likely(mapping)) {
|
if (likely(mapping)) {
|
||||||
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
|
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
|
||||||
/*
|
/*
|
||||||
|
|
380
mm/shmem.c
380
mm/shmem.c
|
@ -173,10 +173,13 @@ static inline int shmem_reacct_size(unsigned long flags,
|
||||||
* shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
|
* shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
|
||||||
* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
|
* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
|
||||||
*/
|
*/
|
||||||
static inline int shmem_acct_block(unsigned long flags)
|
static inline int shmem_acct_block(unsigned long flags, long pages)
|
||||||
{
|
{
|
||||||
return (flags & VM_NORESERVE) ?
|
if (!(flags & VM_NORESERVE))
|
||||||
security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0;
|
return 0;
|
||||||
|
|
||||||
|
return security_vm_enough_memory_mm(current->mm,
|
||||||
|
pages * VM_ACCT(PAGE_SIZE));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
|
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
|
||||||
|
@ -249,6 +252,51 @@ static void shmem_recalc_inode(struct inode *inode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool shmem_charge(struct inode *inode, long pages)
|
||||||
|
{
|
||||||
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
|
|
||||||
|
if (shmem_acct_block(info->flags, pages))
|
||||||
|
return false;
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
info->alloced += pages;
|
||||||
|
inode->i_blocks += pages * BLOCKS_PER_PAGE;
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
|
spin_unlock(&info->lock);
|
||||||
|
inode->i_mapping->nrpages += pages;
|
||||||
|
|
||||||
|
if (!sbinfo->max_blocks)
|
||||||
|
return true;
|
||||||
|
if (percpu_counter_compare(&sbinfo->used_blocks,
|
||||||
|
sbinfo->max_blocks - pages) > 0) {
|
||||||
|
inode->i_mapping->nrpages -= pages;
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
info->alloced -= pages;
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
|
spin_unlock(&info->lock);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
percpu_counter_add(&sbinfo->used_blocks, pages);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void shmem_uncharge(struct inode *inode, long pages)
|
||||||
|
{
|
||||||
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
|
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
info->alloced -= pages;
|
||||||
|
inode->i_blocks -= pages * BLOCKS_PER_PAGE;
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
|
spin_unlock(&info->lock);
|
||||||
|
|
||||||
|
if (sbinfo->max_blocks)
|
||||||
|
percpu_counter_sub(&sbinfo->used_blocks, pages);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Replace item expected in radix tree by a new item, while holding tree lock.
|
* Replace item expected in radix tree by a new item, while holding tree lock.
|
||||||
*/
|
*/
|
||||||
|
@ -376,30 +424,57 @@ static int shmem_add_to_page_cache(struct page *page,
|
||||||
struct address_space *mapping,
|
struct address_space *mapping,
|
||||||
pgoff_t index, void *expected)
|
pgoff_t index, void *expected)
|
||||||
{
|
{
|
||||||
int error;
|
int error, nr = hpage_nr_pages(page);
|
||||||
|
|
||||||
|
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||||
|
VM_BUG_ON_PAGE(index != round_down(index, nr), page);
|
||||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||||
|
VM_BUG_ON(expected && PageTransHuge(page));
|
||||||
|
|
||||||
get_page(page);
|
page_ref_add(page, nr);
|
||||||
page->mapping = mapping;
|
page->mapping = mapping;
|
||||||
page->index = index;
|
page->index = index;
|
||||||
|
|
||||||
spin_lock_irq(&mapping->tree_lock);
|
spin_lock_irq(&mapping->tree_lock);
|
||||||
if (!expected)
|
if (PageTransHuge(page)) {
|
||||||
|
void __rcu **results;
|
||||||
|
pgoff_t idx;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
error = 0;
|
||||||
|
if (radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||||
|
&results, &idx, index, 1) &&
|
||||||
|
idx < index + HPAGE_PMD_NR) {
|
||||||
|
error = -EEXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!error) {
|
||||||
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
|
error = radix_tree_insert(&mapping->page_tree,
|
||||||
|
index + i, page + i);
|
||||||
|
VM_BUG_ON(error);
|
||||||
|
}
|
||||||
|
count_vm_event(THP_FILE_ALLOC);
|
||||||
|
}
|
||||||
|
} else if (!expected) {
|
||||||
error = radix_tree_insert(&mapping->page_tree, index, page);
|
error = radix_tree_insert(&mapping->page_tree, index, page);
|
||||||
else
|
} else {
|
||||||
error = shmem_radix_tree_replace(mapping, index, expected,
|
error = shmem_radix_tree_replace(mapping, index, expected,
|
||||||
page);
|
page);
|
||||||
|
}
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
mapping->nrpages++;
|
mapping->nrpages += nr;
|
||||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
if (PageTransHuge(page))
|
||||||
__inc_zone_page_state(page, NR_SHMEM);
|
__inc_zone_page_state(page, NR_SHMEM_THPS);
|
||||||
|
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
|
||||||
|
__mod_zone_page_state(page_zone(page), NR_SHMEM, nr);
|
||||||
spin_unlock_irq(&mapping->tree_lock);
|
spin_unlock_irq(&mapping->tree_lock);
|
||||||
} else {
|
} else {
|
||||||
page->mapping = NULL;
|
page->mapping = NULL;
|
||||||
spin_unlock_irq(&mapping->tree_lock);
|
spin_unlock_irq(&mapping->tree_lock);
|
||||||
put_page(page);
|
page_ref_sub(page, nr);
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
@ -412,6 +487,8 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
|
||||||
struct address_space *mapping = page->mapping;
|
struct address_space *mapping = page->mapping;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||||
|
|
||||||
spin_lock_irq(&mapping->tree_lock);
|
spin_lock_irq(&mapping->tree_lock);
|
||||||
error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
|
error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
|
||||||
page->mapping = NULL;
|
page->mapping = NULL;
|
||||||
|
@ -591,10 +668,33 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
|
||||||
|
|
||||||
if (!trylock_page(page))
|
if (!trylock_page(page))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (PageTransTail(page)) {
|
||||||
|
/* Middle of THP: zero out the page */
|
||||||
|
clear_highpage(page);
|
||||||
|
unlock_page(page);
|
||||||
|
continue;
|
||||||
|
} else if (PageTransHuge(page)) {
|
||||||
|
if (index == round_down(end, HPAGE_PMD_NR)) {
|
||||||
|
/*
|
||||||
|
* Range ends in the middle of THP:
|
||||||
|
* zero out the page
|
||||||
|
*/
|
||||||
|
clear_highpage(page);
|
||||||
|
unlock_page(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
index += HPAGE_PMD_NR - 1;
|
||||||
|
i += HPAGE_PMD_NR - 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (!unfalloc || !PageUptodate(page)) {
|
if (!unfalloc || !PageUptodate(page)) {
|
||||||
if (page->mapping == mapping) {
|
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||||
|
if (page_mapping(page) == mapping) {
|
||||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||||
truncate_inode_page(mapping, page);
|
truncate_inode_page(mapping, page);
|
||||||
}
|
}
|
||||||
|
@ -670,8 +770,36 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||||
}
|
}
|
||||||
|
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
|
||||||
|
if (PageTransTail(page)) {
|
||||||
|
/* Middle of THP: zero out the page */
|
||||||
|
clear_highpage(page);
|
||||||
|
unlock_page(page);
|
||||||
|
/*
|
||||||
|
* Partial thp truncate due 'start' in middle
|
||||||
|
* of THP: don't need to look on these pages
|
||||||
|
* again on !pvec.nr restart.
|
||||||
|
*/
|
||||||
|
if (index != round_down(end, HPAGE_PMD_NR))
|
||||||
|
start++;
|
||||||
|
continue;
|
||||||
|
} else if (PageTransHuge(page)) {
|
||||||
|
if (index == round_down(end, HPAGE_PMD_NR)) {
|
||||||
|
/*
|
||||||
|
* Range ends in the middle of THP:
|
||||||
|
* zero out the page
|
||||||
|
*/
|
||||||
|
clear_highpage(page);
|
||||||
|
unlock_page(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
index += HPAGE_PMD_NR - 1;
|
||||||
|
i += HPAGE_PMD_NR - 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (!unfalloc || !PageUptodate(page)) {
|
if (!unfalloc || !PageUptodate(page)) {
|
||||||
if (page->mapping == mapping) {
|
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||||
|
if (page_mapping(page) == mapping) {
|
||||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||||
truncate_inode_page(mapping, page);
|
truncate_inode_page(mapping, page);
|
||||||
} else {
|
} else {
|
||||||
|
@ -929,6 +1057,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
swp_entry_t swap;
|
swp_entry_t swap;
|
||||||
pgoff_t index;
|
pgoff_t index;
|
||||||
|
|
||||||
|
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||||
BUG_ON(!PageLocked(page));
|
BUG_ON(!PageLocked(page));
|
||||||
mapping = page->mapping;
|
mapping = page->mapping;
|
||||||
index = page->index;
|
index = page->index;
|
||||||
|
@ -1065,24 +1194,63 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
|
||||||
#define vm_policy vm_private_data
|
#define vm_policy vm_private_data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
|
||||||
|
struct shmem_inode_info *info, pgoff_t index)
|
||||||
|
{
|
||||||
|
/* Create a pseudo vma that just contains the policy */
|
||||||
|
vma->vm_start = 0;
|
||||||
|
/* Bias interleave by inode number to distribute better across nodes */
|
||||||
|
vma->vm_pgoff = index + info->vfs_inode.i_ino;
|
||||||
|
vma->vm_ops = NULL;
|
||||||
|
vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
/* Drop reference taken by mpol_shared_policy_lookup() */
|
||||||
|
mpol_cond_put(vma->vm_policy);
|
||||||
|
}
|
||||||
|
|
||||||
static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
|
static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
|
||||||
struct shmem_inode_info *info, pgoff_t index)
|
struct shmem_inode_info *info, pgoff_t index)
|
||||||
{
|
{
|
||||||
struct vm_area_struct pvma;
|
struct vm_area_struct pvma;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
/* Create a pseudo vma that just contains the policy */
|
shmem_pseudo_vma_init(&pvma, info, index);
|
||||||
pvma.vm_start = 0;
|
|
||||||
/* Bias interleave by inode number to distribute better across nodes */
|
|
||||||
pvma.vm_pgoff = index + info->vfs_inode.i_ino;
|
|
||||||
pvma.vm_ops = NULL;
|
|
||||||
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
|
|
||||||
|
|
||||||
page = swapin_readahead(swap, gfp, &pvma, 0);
|
page = swapin_readahead(swap, gfp, &pvma, 0);
|
||||||
|
shmem_pseudo_vma_destroy(&pvma);
|
||||||
|
|
||||||
/* Drop reference taken by mpol_shared_policy_lookup() */
|
return page;
|
||||||
mpol_cond_put(pvma.vm_policy);
|
}
|
||||||
|
|
||||||
|
static struct page *shmem_alloc_hugepage(gfp_t gfp,
|
||||||
|
struct shmem_inode_info *info, pgoff_t index)
|
||||||
|
{
|
||||||
|
struct vm_area_struct pvma;
|
||||||
|
struct inode *inode = &info->vfs_inode;
|
||||||
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
pgoff_t idx, hindex = round_down(index, HPAGE_PMD_NR);
|
||||||
|
void __rcu **results;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
|
||||||
|
hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
shmem_pseudo_vma_init(&pvma, info, hindex);
|
||||||
|
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
|
||||||
|
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
|
||||||
|
shmem_pseudo_vma_destroy(&pvma);
|
||||||
|
if (page)
|
||||||
|
prep_transhuge_page(page);
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1092,23 +1260,51 @@ static struct page *shmem_alloc_page(gfp_t gfp,
|
||||||
struct vm_area_struct pvma;
|
struct vm_area_struct pvma;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
/* Create a pseudo vma that just contains the policy */
|
shmem_pseudo_vma_init(&pvma, info, index);
|
||||||
pvma.vm_start = 0;
|
page = alloc_page_vma(gfp, &pvma, 0);
|
||||||
/* Bias interleave by inode number to distribute better across nodes */
|
shmem_pseudo_vma_destroy(&pvma);
|
||||||
pvma.vm_pgoff = index + info->vfs_inode.i_ino;
|
|
||||||
pvma.vm_ops = NULL;
|
|
||||||
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
|
|
||||||
|
|
||||||
page = alloc_pages_vma(gfp, 0, &pvma, 0, numa_node_id(), false);
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
|
||||||
|
struct shmem_inode_info *info, struct shmem_sb_info *sbinfo,
|
||||||
|
pgoff_t index, bool huge)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
int nr;
|
||||||
|
int err = -ENOSPC;
|
||||||
|
|
||||||
|
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
|
||||||
|
huge = false;
|
||||||
|
nr = huge ? HPAGE_PMD_NR : 1;
|
||||||
|
|
||||||
|
if (shmem_acct_block(info->flags, nr))
|
||||||
|
goto failed;
|
||||||
|
if (sbinfo->max_blocks) {
|
||||||
|
if (percpu_counter_compare(&sbinfo->used_blocks,
|
||||||
|
sbinfo->max_blocks - nr) > 0)
|
||||||
|
goto unacct;
|
||||||
|
percpu_counter_add(&sbinfo->used_blocks, nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (huge)
|
||||||
|
page = shmem_alloc_hugepage(gfp, info, index);
|
||||||
|
else
|
||||||
|
page = shmem_alloc_page(gfp, info, index);
|
||||||
if (page) {
|
if (page) {
|
||||||
__SetPageLocked(page);
|
__SetPageLocked(page);
|
||||||
__SetPageSwapBacked(page);
|
__SetPageSwapBacked(page);
|
||||||
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Drop reference taken by mpol_shared_policy_lookup() */
|
err = -ENOMEM;
|
||||||
mpol_cond_put(pvma.vm_policy);
|
if (sbinfo->max_blocks)
|
||||||
|
percpu_counter_add(&sbinfo->used_blocks, -nr);
|
||||||
return page;
|
unacct:
|
||||||
|
shmem_unacct_blocks(info->flags, nr);
|
||||||
|
failed:
|
||||||
|
return ERR_PTR(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1213,6 +1409,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
swp_entry_t swap;
|
swp_entry_t swap;
|
||||||
|
pgoff_t hindex = index;
|
||||||
int error;
|
int error;
|
||||||
int once = 0;
|
int once = 0;
|
||||||
int alloced = 0;
|
int alloced = 0;
|
||||||
|
@ -1334,47 +1531,74 @@ repeat:
|
||||||
swap_free(swap);
|
swap_free(swap);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (shmem_acct_block(info->flags)) {
|
/* shmem_symlink() */
|
||||||
error = -ENOSPC;
|
if (mapping->a_ops != &shmem_aops)
|
||||||
goto failed;
|
goto alloc_nohuge;
|
||||||
}
|
if (shmem_huge == SHMEM_HUGE_DENY)
|
||||||
if (sbinfo->max_blocks) {
|
goto alloc_nohuge;
|
||||||
if (percpu_counter_compare(&sbinfo->used_blocks,
|
if (shmem_huge == SHMEM_HUGE_FORCE)
|
||||||
sbinfo->max_blocks) >= 0) {
|
goto alloc_huge;
|
||||||
error = -ENOSPC;
|
switch (sbinfo->huge) {
|
||||||
goto unacct;
|
loff_t i_size;
|
||||||
}
|
pgoff_t off;
|
||||||
percpu_counter_inc(&sbinfo->used_blocks);
|
case SHMEM_HUGE_NEVER:
|
||||||
|
goto alloc_nohuge;
|
||||||
|
case SHMEM_HUGE_WITHIN_SIZE:
|
||||||
|
off = round_up(index, HPAGE_PMD_NR);
|
||||||
|
i_size = round_up(i_size_read(inode), PAGE_SIZE);
|
||||||
|
if (i_size >= HPAGE_PMD_SIZE &&
|
||||||
|
i_size >> PAGE_SHIFT >= off)
|
||||||
|
goto alloc_huge;
|
||||||
|
/* fallthrough */
|
||||||
|
case SHMEM_HUGE_ADVISE:
|
||||||
|
/* TODO: wire up fadvise()/madvise() */
|
||||||
|
goto alloc_nohuge;
|
||||||
}
|
}
|
||||||
|
|
||||||
page = shmem_alloc_page(gfp, info, index);
|
alloc_huge:
|
||||||
if (!page) {
|
page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
|
||||||
error = -ENOMEM;
|
index, true);
|
||||||
goto decused;
|
if (IS_ERR(page)) {
|
||||||
|
alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
|
||||||
|
index, false);
|
||||||
}
|
}
|
||||||
|
if (IS_ERR(page)) {
|
||||||
|
error = PTR_ERR(page);
|
||||||
|
page = NULL;
|
||||||
|
goto failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PageTransHuge(page))
|
||||||
|
hindex = round_down(index, HPAGE_PMD_NR);
|
||||||
|
else
|
||||||
|
hindex = index;
|
||||||
|
|
||||||
if (sgp == SGP_WRITE)
|
if (sgp == SGP_WRITE)
|
||||||
__SetPageReferenced(page);
|
__SetPageReferenced(page);
|
||||||
|
|
||||||
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
|
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
|
||||||
false);
|
PageTransHuge(page));
|
||||||
if (error)
|
if (error)
|
||||||
goto decused;
|
goto unacct;
|
||||||
error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
|
error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
|
||||||
|
compound_order(page));
|
||||||
if (!error) {
|
if (!error) {
|
||||||
error = shmem_add_to_page_cache(page, mapping, index,
|
error = shmem_add_to_page_cache(page, mapping, hindex,
|
||||||
NULL);
|
NULL);
|
||||||
radix_tree_preload_end();
|
radix_tree_preload_end();
|
||||||
}
|
}
|
||||||
if (error) {
|
if (error) {
|
||||||
mem_cgroup_cancel_charge(page, memcg, false);
|
mem_cgroup_cancel_charge(page, memcg,
|
||||||
goto decused;
|
PageTransHuge(page));
|
||||||
|
goto unacct;
|
||||||
}
|
}
|
||||||
mem_cgroup_commit_charge(page, memcg, false, false);
|
mem_cgroup_commit_charge(page, memcg, false,
|
||||||
|
PageTransHuge(page));
|
||||||
lru_cache_add_anon(page);
|
lru_cache_add_anon(page);
|
||||||
|
|
||||||
spin_lock(&info->lock);
|
spin_lock(&info->lock);
|
||||||
info->alloced++;
|
info->alloced += 1 << compound_order(page);
|
||||||
inode->i_blocks += BLOCKS_PER_PAGE;
|
inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
|
||||||
shmem_recalc_inode(inode);
|
shmem_recalc_inode(inode);
|
||||||
spin_unlock(&info->lock);
|
spin_unlock(&info->lock);
|
||||||
alloced = true;
|
alloced = true;
|
||||||
|
@ -1390,10 +1614,15 @@ clear:
|
||||||
* but SGP_FALLOC on a page fallocated earlier must initialize
|
* but SGP_FALLOC on a page fallocated earlier must initialize
|
||||||
* it now, lest undo on failure cancel our earlier guarantee.
|
* it now, lest undo on failure cancel our earlier guarantee.
|
||||||
*/
|
*/
|
||||||
if (sgp != SGP_WRITE) {
|
if (sgp != SGP_WRITE && !PageUptodate(page)) {
|
||||||
clear_highpage(page);
|
struct page *head = compound_head(page);
|
||||||
flush_dcache_page(page);
|
int i;
|
||||||
SetPageUptodate(page);
|
|
||||||
|
for (i = 0; i < (1 << compound_order(head)); i++) {
|
||||||
|
clear_highpage(head + i);
|
||||||
|
flush_dcache_page(head + i);
|
||||||
|
}
|
||||||
|
SetPageUptodate(head);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1410,17 +1639,23 @@ clear:
|
||||||
error = -EINVAL;
|
error = -EINVAL;
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
*pagep = page;
|
*pagep = page + index - hindex;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Error recovery.
|
* Error recovery.
|
||||||
*/
|
*/
|
||||||
decused:
|
|
||||||
if (sbinfo->max_blocks)
|
|
||||||
percpu_counter_add(&sbinfo->used_blocks, -1);
|
|
||||||
unacct:
|
unacct:
|
||||||
shmem_unacct_blocks(info->flags, 1);
|
if (sbinfo->max_blocks)
|
||||||
|
percpu_counter_sub(&sbinfo->used_blocks,
|
||||||
|
1 << compound_order(page));
|
||||||
|
shmem_unacct_blocks(info->flags, 1 << compound_order(page));
|
||||||
|
|
||||||
|
if (PageTransHuge(page)) {
|
||||||
|
unlock_page(page);
|
||||||
|
put_page(page);
|
||||||
|
goto alloc_nohuge;
|
||||||
|
}
|
||||||
failed:
|
failed:
|
||||||
if (swap.val && !shmem_confirm_swap(mapping, index, swap))
|
if (swap.val && !shmem_confirm_swap(mapping, index, swap))
|
||||||
error = -EEXIST;
|
error = -EEXIST;
|
||||||
|
@ -1758,12 +1993,23 @@ shmem_write_end(struct file *file, struct address_space *mapping,
|
||||||
i_size_write(inode, pos + copied);
|
i_size_write(inode, pos + copied);
|
||||||
|
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
|
struct page *head = compound_head(page);
|
||||||
|
if (PageTransCompound(page)) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
|
if (head + i == page)
|
||||||
|
continue;
|
||||||
|
clear_highpage(head + i);
|
||||||
|
flush_dcache_page(head + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (copied < PAGE_SIZE) {
|
if (copied < PAGE_SIZE) {
|
||||||
unsigned from = pos & (PAGE_SIZE - 1);
|
unsigned from = pos & (PAGE_SIZE - 1);
|
||||||
zero_user_segments(page, 0, from,
|
zero_user_segments(page, 0, from,
|
||||||
from + copied, PAGE_SIZE);
|
from + copied, PAGE_SIZE);
|
||||||
}
|
}
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(head);
|
||||||
}
|
}
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
|
|
@ -292,6 +292,7 @@ static bool need_activate_page_drain(int cpu)
|
||||||
|
|
||||||
void activate_page(struct page *page)
|
void activate_page(struct page *page)
|
||||||
{
|
{
|
||||||
|
page = compound_head(page);
|
||||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||||
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
||||||
|
|
||||||
|
@ -316,6 +317,7 @@ void activate_page(struct page *page)
|
||||||
{
|
{
|
||||||
struct zone *zone = page_zone(page);
|
struct zone *zone = page_zone(page);
|
||||||
|
|
||||||
|
page = compound_head(page);
|
||||||
spin_lock_irq(&zone->lru_lock);
|
spin_lock_irq(&zone->lru_lock);
|
||||||
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
|
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
|
||||||
spin_unlock_irq(&zone->lru_lock);
|
spin_unlock_irq(&zone->lru_lock);
|
||||||
|
|
Loading…
Add table
Reference in a new issue