mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-03 13:04:01 +00:00
mm: munlock: manual pte walk in fast path instead of follow_page_mask()
Currently munlock_vma_pages_range() calls follow_page_mask() to obtain each individual struct page. This entails repeated full page table translations and page table lock taken for each page separately. This patch avoids the costly follow_page_mask() where possible, by iterating over ptes within single pmd under single page table lock. The first pte is obtained by get_locked_pte() for non-THP page acquired by the initial follow_page_mask(). The rest of the on-stack pagevec for munlock is filled up using pte_walk as long as pte_present() and vm_normal_page() are sufficient to obtain the struct page. After this patch, a 14% speedup was measured for munlocking a 56GB large memory area with THP disabled. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Jörn Engel <joern@logfs.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5b40998ae3
commit
7a8010cd36
2 changed files with 85 additions and 37 deletions
|
@ -643,12 +643,12 @@ static inline enum zone_type page_zonenum(const struct page *page)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The identification function is only used by the buddy allocator for
|
* The identification function is mainly used by the buddy allocator for
|
||||||
* determining if two pages could be buddies. We are not really
|
* determining if two pages could be buddies. We are not really identifying
|
||||||
* identifying a zone since we could be using a the section number
|
* the zone since we could be using the section number id if we do not have
|
||||||
* id if we have not node id available in page flags.
|
* node id available in page flags.
|
||||||
* We guarantee only that it will return the same value for two
|
* We only guarantee that it will return the same value for two combinable
|
||||||
* combinable pages in a zone.
|
* pages in a zone.
|
||||||
*/
|
*/
|
||||||
static inline int page_zone_id(struct page *page)
|
static inline int page_zone_id(struct page *page)
|
||||||
{
|
{
|
||||||
|
|
110
mm/mlock.c
110
mm/mlock.c
|
@ -280,8 +280,7 @@ static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
|
||||||
* The second phase finishes the munlock only for pages where isolation
|
* The second phase finishes the munlock only for pages where isolation
|
||||||
* succeeded.
|
* succeeded.
|
||||||
*
|
*
|
||||||
* Note that pvec is modified during the process. Before returning
|
* Note that the pagevec may be modified during the process.
|
||||||
* pagevec_reinit() is called on it.
|
|
||||||
*/
|
*/
|
||||||
static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
||||||
{
|
{
|
||||||
|
@ -356,8 +355,60 @@ skip_munlock:
|
||||||
*/
|
*/
|
||||||
if (pagevec_count(&pvec_putback))
|
if (pagevec_count(&pvec_putback))
|
||||||
__putback_lru_fast(&pvec_putback, pgrescued);
|
__putback_lru_fast(&pvec_putback, pgrescued);
|
||||||
|
}
|
||||||
|
|
||||||
pagevec_reinit(pvec);
|
/*
|
||||||
|
* Fill up pagevec for __munlock_pagevec using pte walk
|
||||||
|
*
|
||||||
|
* The function expects that the struct page corresponding to @start address is
|
||||||
|
* a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
|
||||||
|
*
|
||||||
|
* The rest of @pvec is filled by subsequent pages within the same pmd and same
|
||||||
|
* zone, as long as the pte's are present and vm_normal_page() succeeds. These
|
||||||
|
* pages also get pinned.
|
||||||
|
*
|
||||||
|
* Returns the address of the next page that should be scanned. This equals
|
||||||
|
* @start + PAGE_SIZE when no page could be added by the pte walk.
|
||||||
|
*/
|
||||||
|
static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
|
||||||
|
struct vm_area_struct *vma, int zoneid, unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
|
{
|
||||||
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize pte walk starting at the already pinned page where we
|
||||||
|
* are sure that there is a pte.
|
||||||
|
*/
|
||||||
|
pte = get_locked_pte(vma->vm_mm, start, &ptl);
|
||||||
|
end = min(end, pmd_addr_end(start, end));
|
||||||
|
|
||||||
|
/* The page next to the pinned page is the first we will try to get */
|
||||||
|
start += PAGE_SIZE;
|
||||||
|
while (start < end) {
|
||||||
|
struct page *page = NULL;
|
||||||
|
pte++;
|
||||||
|
if (pte_present(*pte))
|
||||||
|
page = vm_normal_page(vma, start, *pte);
|
||||||
|
/*
|
||||||
|
* Break if page could not be obtained or the page's node+zone does not
|
||||||
|
* match
|
||||||
|
*/
|
||||||
|
if (!page || page_zone_id(page) != zoneid)
|
||||||
|
break;
|
||||||
|
|
||||||
|
get_page(page);
|
||||||
|
/*
|
||||||
|
* Increase the address that will be returned *before* the
|
||||||
|
* eventual break due to pvec becoming full by adding the page
|
||||||
|
*/
|
||||||
|
start += PAGE_SIZE;
|
||||||
|
if (pagevec_add(pvec, page) == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pte_unmap_unlock(pte, ptl);
|
||||||
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -381,17 +432,16 @@ skip_munlock:
|
||||||
void munlock_vma_pages_range(struct vm_area_struct *vma,
|
void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
struct pagevec pvec;
|
|
||||||
struct zone *zone = NULL;
|
|
||||||
|
|
||||||
pagevec_init(&pvec, 0);
|
|
||||||
vma->vm_flags &= ~VM_LOCKED;
|
vma->vm_flags &= ~VM_LOCKED;
|
||||||
|
|
||||||
while (start < end) {
|
while (start < end) {
|
||||||
struct page *page;
|
struct page *page = NULL;
|
||||||
unsigned int page_mask, page_increm;
|
unsigned int page_mask, page_increm;
|
||||||
struct zone *pagezone;
|
struct pagevec pvec;
|
||||||
|
struct zone *zone;
|
||||||
|
int zoneid;
|
||||||
|
|
||||||
|
pagevec_init(&pvec, 0);
|
||||||
/*
|
/*
|
||||||
* Although FOLL_DUMP is intended for get_dump_page(),
|
* Although FOLL_DUMP is intended for get_dump_page(),
|
||||||
* it just so happens that its special treatment of the
|
* it just so happens that its special treatment of the
|
||||||
|
@ -400,22 +450,10 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||||
* has sneaked into the range, we won't oops here: great).
|
* has sneaked into the range, we won't oops here: great).
|
||||||
*/
|
*/
|
||||||
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
||||||
&page_mask);
|
&page_mask);
|
||||||
|
|
||||||
if (page && !IS_ERR(page)) {
|
if (page && !IS_ERR(page)) {
|
||||||
pagezone = page_zone(page);
|
|
||||||
/* The whole pagevec must be in the same zone */
|
|
||||||
if (pagezone != zone) {
|
|
||||||
if (pagevec_count(&pvec))
|
|
||||||
__munlock_pagevec(&pvec, zone);
|
|
||||||
zone = pagezone;
|
|
||||||
}
|
|
||||||
if (PageTransHuge(page)) {
|
if (PageTransHuge(page)) {
|
||||||
/*
|
|
||||||
* THP pages are not handled by pagevec due
|
|
||||||
* to their possible split (see below).
|
|
||||||
*/
|
|
||||||
if (pagevec_count(&pvec))
|
|
||||||
__munlock_pagevec(&pvec, zone);
|
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
/*
|
/*
|
||||||
* Any THP page found by follow_page_mask() may
|
* Any THP page found by follow_page_mask() may
|
||||||
|
@ -428,21 +466,31 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||||
put_page(page); /* follow_page_mask() */
|
put_page(page); /* follow_page_mask() */
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Non-huge pages are handled in batches
|
* Non-huge pages are handled in batches via
|
||||||
* via pagevec. The pin from
|
* pagevec. The pin from follow_page_mask()
|
||||||
* follow_page_mask() prevents them from
|
* prevents them from collapsing by THP.
|
||||||
* collapsing by THP.
|
|
||||||
*/
|
*/
|
||||||
if (pagevec_add(&pvec, page) == 0)
|
pagevec_add(&pvec, page);
|
||||||
__munlock_pagevec(&pvec, zone);
|
zone = page_zone(page);
|
||||||
|
zoneid = page_zone_id(page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to fill the rest of pagevec using fast
|
||||||
|
* pte walk. This will also update start to
|
||||||
|
* the next page to process. Then munlock the
|
||||||
|
* pagevec.
|
||||||
|
*/
|
||||||
|
start = __munlock_pagevec_fill(&pvec, vma,
|
||||||
|
zoneid, start, end);
|
||||||
|
__munlock_pagevec(&pvec, zone);
|
||||||
|
goto next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
|
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
|
||||||
start += page_increm * PAGE_SIZE;
|
start += page_increm * PAGE_SIZE;
|
||||||
|
next:
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
if (pagevec_count(&pvec))
|
|
||||||
__munlock_pagevec(&pvec, zone);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Add table
Reference in a new issue