mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-26 00:54:39 +00:00
In sparse_init(), if CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y, system will allocate one continuous memory chunk for mem maps on one node and populate the relevant page tables to map memory section one by one. If fail to populate for a certain mem section, print warning and its ->section_mem_map will be cleared to cancel the marking of being present. Like this, the number of mem sections marked as present could become less during sparse_init() execution. Here just defer the ms->section_mem_map clearing if failed to populate its page tables until the last for_each_present_section_nr() loop. This is in preparation for later optimizing the mem map allocation. [akpm@linux-foundation.org: remove now-unused local `ms', per Oscar] Link: http://lkml.kernel.org/r/20180228032657.32385-3-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Acked-by: Dave Hansen <dave.hansen@intel.com> Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Pankaj Gupta <pagupta@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
312 lines
7.9 KiB
C
312 lines
7.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Virtual Memory Map support
|
|
*
|
|
* (C) 2007 sgi. Christoph Lameter.
|
|
*
|
|
* Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
|
|
* virt_to_page, page_address() to be implemented as a base offset
|
|
* calculation without memory access.
|
|
*
|
|
* However, virtual mappings need a page table and TLBs. Many Linux
|
|
* architectures already map their physical space using 1-1 mappings
|
|
* via TLBs. For those arches the virtual memory map is essentially
|
|
* for free if we use the same page size as the 1-1 mappings. In that
|
|
* case the overhead consists of a few additional pages that are
|
|
* allocated to create a view of memory for vmemmap.
|
|
*
|
|
* The architecture is expected to provide a vmemmap_populate() function
|
|
* to instantiate the mapping.
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/bootmem.h>
|
|
#include <linux/memremap.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/sched.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/pgtable.h>
|
|
|
|
/*
|
|
* Allocate a block of memory to be used to back the virtual memory map
|
|
* or to back the page tables that are used to create the mapping.
|
|
* Uses the main allocators if they are available, else bootmem.
|
|
*/
|
|
|
|
static void * __ref __earlyonly_bootmem_alloc(int node,
|
|
unsigned long size,
|
|
unsigned long align,
|
|
unsigned long goal)
|
|
{
|
|
return memblock_virt_alloc_try_nid_raw(size, align, goal,
|
|
BOOTMEM_ALLOC_ACCESSIBLE, node);
|
|
}
|
|
|
|
static void *vmemmap_buf;
|
|
static void *vmemmap_buf_end;
|
|
|
|
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
|
|
{
|
|
/* If the main allocator is up use that, fallback to bootmem. */
|
|
if (slab_is_available()) {
|
|
gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
|
|
int order = get_order(size);
|
|
static bool warned;
|
|
struct page *page;
|
|
|
|
page = alloc_pages_node(node, gfp_mask, order);
|
|
if (page)
|
|
return page_address(page);
|
|
|
|
if (!warned) {
|
|
warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
|
|
"vmemmap alloc failure: order:%u", order);
|
|
warned = true;
|
|
}
|
|
return NULL;
|
|
} else
|
|
return __earlyonly_bootmem_alloc(node, size, size,
|
|
__pa(MAX_DMA_ADDRESS));
|
|
}
|
|
|
|
/* need to make sure size is all the same during early stage */
|
|
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
|
|
{
|
|
void *ptr;
|
|
|
|
if (!vmemmap_buf)
|
|
return vmemmap_alloc_block(size, node);
|
|
|
|
/* take the from buf */
|
|
ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
|
|
if (ptr + size > vmemmap_buf_end)
|
|
return vmemmap_alloc_block(size, node);
|
|
|
|
vmemmap_buf = ptr + size;
|
|
|
|
return ptr;
|
|
}
|
|
|
|
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
|
|
{
|
|
return altmap->base_pfn + altmap->reserve + altmap->alloc
|
|
+ altmap->align;
|
|
}
|
|
|
|
static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long allocated = altmap->alloc + altmap->align;
|
|
|
|
if (altmap->free > allocated)
|
|
return altmap->free - allocated;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* altmap_alloc_block_buf - allocate pages from the device page map
|
|
* @altmap: device page map
|
|
* @size: size (in bytes) of the allocation
|
|
*
|
|
* Allocations are aligned to the size of the request.
|
|
*/
|
|
void * __meminit altmap_alloc_block_buf(unsigned long size,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long pfn, nr_pfns, nr_align;
|
|
|
|
if (size & ~PAGE_MASK) {
|
|
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
|
|
__func__, size);
|
|
return NULL;
|
|
}
|
|
|
|
pfn = vmem_altmap_next_pfn(altmap);
|
|
nr_pfns = size >> PAGE_SHIFT;
|
|
nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
|
|
nr_align = ALIGN(pfn, nr_align) - pfn;
|
|
if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
|
|
return NULL;
|
|
|
|
altmap->alloc += nr_pfns;
|
|
altmap->align += nr_align;
|
|
pfn += nr_align;
|
|
|
|
pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
|
|
__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
|
|
return __va(__pfn_to_phys(pfn));
|
|
}
|
|
|
|
void __meminit vmemmap_verify(pte_t *pte, int node,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long pfn = pte_pfn(*pte);
|
|
int actual_node = early_pfn_to_nid(pfn);
|
|
|
|
if (node_distance(actual_node, node) > LOCAL_DISTANCE)
|
|
pr_warn("[%lx-%lx] potential offnode page_structs\n",
|
|
start, end - 1);
|
|
}
|
|
|
|
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
|
|
{
|
|
pte_t *pte = pte_offset_kernel(pmd, addr);
|
|
if (pte_none(*pte)) {
|
|
pte_t entry;
|
|
void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
|
|
if (!p)
|
|
return NULL;
|
|
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
|
|
set_pte_at(&init_mm, addr, pte, entry);
|
|
}
|
|
return pte;
|
|
}
|
|
|
|
static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
|
|
{
|
|
void *p = vmemmap_alloc_block(size, node);
|
|
|
|
if (!p)
|
|
return NULL;
|
|
memset(p, 0, size);
|
|
|
|
return p;
|
|
}
|
|
|
|
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
|
|
{
|
|
pmd_t *pmd = pmd_offset(pud, addr);
|
|
if (pmd_none(*pmd)) {
|
|
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
|
|
if (!p)
|
|
return NULL;
|
|
pmd_populate_kernel(&init_mm, pmd, p);
|
|
}
|
|
return pmd;
|
|
}
|
|
|
|
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
|
|
{
|
|
pud_t *pud = pud_offset(p4d, addr);
|
|
if (pud_none(*pud)) {
|
|
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
|
|
if (!p)
|
|
return NULL;
|
|
pud_populate(&init_mm, pud, p);
|
|
}
|
|
return pud;
|
|
}
|
|
|
|
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
|
|
{
|
|
p4d_t *p4d = p4d_offset(pgd, addr);
|
|
if (p4d_none(*p4d)) {
|
|
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
|
|
if (!p)
|
|
return NULL;
|
|
p4d_populate(&init_mm, p4d, p);
|
|
}
|
|
return p4d;
|
|
}
|
|
|
|
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
|
|
{
|
|
pgd_t *pgd = pgd_offset_k(addr);
|
|
if (pgd_none(*pgd)) {
|
|
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
|
|
if (!p)
|
|
return NULL;
|
|
pgd_populate(&init_mm, pgd, p);
|
|
}
|
|
return pgd;
|
|
}
|
|
|
|
int __meminit vmemmap_populate_basepages(unsigned long start,
|
|
unsigned long end, int node)
|
|
{
|
|
unsigned long addr = start;
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
|
|
for (; addr < end; addr += PAGE_SIZE) {
|
|
pgd = vmemmap_pgd_populate(addr, node);
|
|
if (!pgd)
|
|
return -ENOMEM;
|
|
p4d = vmemmap_p4d_populate(pgd, addr, node);
|
|
if (!p4d)
|
|
return -ENOMEM;
|
|
pud = vmemmap_pud_populate(p4d, addr, node);
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
pmd = vmemmap_pmd_populate(pud, addr, node);
|
|
if (!pmd)
|
|
return -ENOMEM;
|
|
pte = vmemmap_pte_populate(pmd, addr, node);
|
|
if (!pte)
|
|
return -ENOMEM;
|
|
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long start;
|
|
unsigned long end;
|
|
struct page *map;
|
|
|
|
map = pfn_to_page(pnum * PAGES_PER_SECTION);
|
|
start = (unsigned long)map;
|
|
end = (unsigned long)(map + PAGES_PER_SECTION);
|
|
|
|
if (vmemmap_populate(start, end, nid, altmap))
|
|
return NULL;
|
|
|
|
return map;
|
|
}
|
|
|
|
void __init sparse_mem_maps_populate_node(struct page **map_map,
|
|
unsigned long pnum_begin,
|
|
unsigned long pnum_end,
|
|
unsigned long map_count, int nodeid)
|
|
{
|
|
unsigned long pnum;
|
|
unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
|
|
void *vmemmap_buf_start;
|
|
|
|
size = ALIGN(size, PMD_SIZE);
|
|
vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
|
|
PMD_SIZE, __pa(MAX_DMA_ADDRESS));
|
|
|
|
if (vmemmap_buf_start) {
|
|
vmemmap_buf = vmemmap_buf_start;
|
|
vmemmap_buf_end = vmemmap_buf_start + size * map_count;
|
|
}
|
|
|
|
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
|
if (!present_section_nr(pnum))
|
|
continue;
|
|
|
|
map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
|
|
if (map_map[pnum])
|
|
continue;
|
|
pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
|
|
__func__);
|
|
}
|
|
|
|
if (vmemmap_buf_start) {
|
|
/* need to free left buf */
|
|
memblock_free_early(__pa(vmemmap_buf),
|
|
vmemmap_buf_end - vmemmap_buf);
|
|
vmemmap_buf = NULL;
|
|
vmemmap_buf_end = NULL;
|
|
}
|
|
}
|