mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-17 20:04:02 +00:00
With PFN_MODE_PMEM namespace, the memmap area is allocated from the device
area. Some architectures map the memmap area with large page size. On
architectures like ppc64, 16MB page for memap mapping can map 262144 pfns.
This maps a namespace size of 16G.
When populating memmap region with 16MB page from the device area,
make sure the allocated space is not used to map resources outside this
namespace. Such usage of device area will prevent a namespace destroy.
Add resource end pnf in altmap and use that to check if the memmap area
allocation can map pfn outside the namespace. On ppc64 in such case we fallback
to allocation from memory.
This fix kernel crash reported below:
[ 132.034989] WARNING: CPU: 13 PID: 13719 at mm/memremap.c:133 devm_memremap_pages_release+0x2d8/0x2e0
[ 133.464754] BUG: Unable to handle kernel data access at 0xc00c00010b204000
[ 133.464760] Faulting instruction address: 0xc00000000007580c
[ 133.464766] Oops: Kernel access of bad area, sig: 11 [#1]
[ 133.464771] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
.....
[ 133.464901] NIP [c00000000007580c] vmemmap_free+0x2ac/0x3d0
[ 133.464906] LR [c0000000000757f8] vmemmap_free+0x298/0x3d0
[ 133.464910] Call Trace:
[ 133.464914] [c000007cbfd0f7b0] [c0000000000757f8] vmemmap_free+0x298/0x3d0 (unreliable)
[ 133.464921] [c000007cbfd0f8d0] [c000000000370a44] section_deactivate+0x1a4/0x240
[ 133.464928] [c000007cbfd0f980] [c000000000386270] __remove_pages+0x3a0/0x590
[ 133.464935] [c000007cbfd0fa50] [c000000000074158] arch_remove_memory+0x88/0x160
[ 133.464942] [c000007cbfd0fae0] [c0000000003be8c0] devm_memremap_pages_release+0x150/0x2e0
[ 133.464949] [c000007cbfd0fb70] [c000000000738ea0] devm_action_release+0x30/0x50
[ 133.464955] [c000007cbfd0fb90] [c00000000073a5a4] release_nodes+0x344/0x400
[ 133.464961] [c000007cbfd0fc40] [c00000000073378c] device_release_driver_internal+0x15c/0x250
[ 133.464968] [c000007cbfd0fc80] [c00000000072fd14] unbind_store+0x104/0x110
[ 133.464973] [c000007cbfd0fcd0] [c00000000072ee24] drv_attr_store+0x44/0x70
[ 133.464981] [c000007cbfd0fcf0] [c0000000004a32bc] sysfs_kf_write+0x6c/0xa0
[ 133.464987] [c000007cbfd0fd10] [c0000000004a1dfc] kernfs_fop_write+0x17c/0x250
[ 133.464993] [c000007cbfd0fd60] [c0000000003c348c] __vfs_write+0x3c/0x70
[ 133.464999] [c000007cbfd0fd80] [c0000000003c75d0] vfs_write+0xd0/0x250
djbw: Aneesh notes that this crash can likely be triggered in any kernel that
supports 'papr_scm', so flagging that commit for -stable consideration.
Fixes: b5beae5e22
("powerpc/pseries: Add driver for PAPR SCM regions")
Cc: <stable@vger.kernel.org>
Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
Tested-by: Santosh Sivaraj <santosh@fossix.org>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Link: https://lore.kernel.org/r/20190910062826.10041-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
175 lines
5.3 KiB
C
175 lines
5.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_MEMREMAP_H_
|
|
#define _LINUX_MEMREMAP_H_
|
|
#include <linux/ioport.h>
|
|
#include <linux/percpu-refcount.h>
|
|
|
|
struct resource;
|
|
struct device;
|
|
|
|
/**
|
|
* struct vmem_altmap - pre-allocated storage for vmemmap_populate
|
|
* @base_pfn: base of the entire dev_pagemap mapping
|
|
* @reserve: pages mapped, but reserved for driver use (relative to @base)
|
|
* @free: free pages set aside in the mapping for memmap storage
|
|
* @align: pages reserved to meet allocation alignments
|
|
* @alloc: track pages consumed, private to vmemmap_populate()
|
|
*/
|
|
struct vmem_altmap {
|
|
const unsigned long base_pfn;
|
|
const unsigned long end_pfn;
|
|
const unsigned long reserve;
|
|
unsigned long free;
|
|
unsigned long align;
|
|
unsigned long alloc;
|
|
};
|
|
|
|
/*
|
|
* Specialize ZONE_DEVICE memory into multiple types each having differents
|
|
* usage.
|
|
*
|
|
* MEMORY_DEVICE_PRIVATE:
|
|
* Device memory that is not directly addressable by the CPU: CPU can neither
|
|
* read nor write private memory. In this case, we do still have struct pages
|
|
* backing the device memory. Doing so simplifies the implementation, but it is
|
|
* important to remember that there are certain points at which the struct page
|
|
* must be treated as an opaque object, rather than a "normal" struct page.
|
|
*
|
|
* A more complete discussion of unaddressable memory may be found in
|
|
* include/linux/hmm.h and Documentation/vm/hmm.rst.
|
|
*
|
|
* MEMORY_DEVICE_FS_DAX:
|
|
* Host memory that has similar access semantics as System RAM i.e. DMA
|
|
* coherent and supports page pinning. In support of coordinating page
|
|
* pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
|
|
* wakeup event whenever a page is unpinned and becomes idle. This
|
|
* wakeup is used to coordinate physical address space management (ex:
|
|
* fs truncate/hole punch) vs pinned pages (ex: device dma).
|
|
*
|
|
* MEMORY_DEVICE_DEVDAX:
|
|
* Host memory that has similar access semantics as System RAM i.e. DMA
|
|
* coherent and supports page pinning. In contrast to
|
|
* MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax
|
|
* character device.
|
|
*
|
|
* MEMORY_DEVICE_PCI_P2PDMA:
|
|
* Device memory residing in a PCI BAR intended for use with Peer-to-Peer
|
|
* transactions.
|
|
*/
|
|
enum memory_type {
|
|
/* 0 is reserved to catch uninitialized type fields */
|
|
MEMORY_DEVICE_PRIVATE = 1,
|
|
MEMORY_DEVICE_FS_DAX,
|
|
MEMORY_DEVICE_DEVDAX,
|
|
MEMORY_DEVICE_PCI_P2PDMA,
|
|
};
|
|
|
|
struct dev_pagemap_ops {
|
|
/*
|
|
* Called once the page refcount reaches 1. (ZONE_DEVICE pages never
|
|
* reach 0 refcount unless there is a refcount bug. This allows the
|
|
* device driver to implement its own memory management.)
|
|
*/
|
|
void (*page_free)(struct page *page);
|
|
|
|
/*
|
|
* Transition the refcount in struct dev_pagemap to the dead state.
|
|
*/
|
|
void (*kill)(struct dev_pagemap *pgmap);
|
|
|
|
/*
|
|
* Wait for refcount in struct dev_pagemap to be idle and reap it.
|
|
*/
|
|
void (*cleanup)(struct dev_pagemap *pgmap);
|
|
|
|
/*
|
|
* Used for private (un-addressable) device memory only. Must migrate
|
|
* the page back to a CPU accessible page.
|
|
*/
|
|
vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
|
|
};
|
|
|
|
#define PGMAP_ALTMAP_VALID (1 << 0)
|
|
|
|
/**
|
|
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
|
|
* @altmap: pre-allocated/reserved memory for vmemmap allocations
|
|
* @res: physical address range covered by @ref
|
|
* @ref: reference count that pins the devm_memremap_pages() mapping
|
|
* @internal_ref: internal reference if @ref is not provided by the caller
|
|
* @done: completion for @internal_ref
|
|
* @dev: host device of the mapping for debug
|
|
* @data: private data pointer for page_free()
|
|
* @type: memory type: see MEMORY_* in memory_hotplug.h
|
|
* @flags: PGMAP_* flags to specify defailed behavior
|
|
* @ops: method table
|
|
*/
|
|
struct dev_pagemap {
|
|
struct vmem_altmap altmap;
|
|
struct resource res;
|
|
struct percpu_ref *ref;
|
|
struct percpu_ref internal_ref;
|
|
struct completion done;
|
|
struct device *dev;
|
|
enum memory_type type;
|
|
unsigned int flags;
|
|
u64 pci_p2pdma_bus_offset;
|
|
const struct dev_pagemap_ops *ops;
|
|
};
|
|
|
|
static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
|
|
{
|
|
if (pgmap->flags & PGMAP_ALTMAP_VALID)
|
|
return &pgmap->altmap;
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef CONFIG_ZONE_DEVICE
|
|
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
|
|
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
|
|
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
|
|
struct dev_pagemap *pgmap);
|
|
|
|
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
|
|
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
|
|
#else
|
|
static inline void *devm_memremap_pages(struct device *dev,
|
|
struct dev_pagemap *pgmap)
|
|
{
|
|
/*
|
|
* Fail attempts to call devm_memremap_pages() without
|
|
* ZONE_DEVICE support enabled, this requires callers to fall
|
|
* back to plain devm_memremap() based on config
|
|
*/
|
|
WARN_ON_ONCE(1);
|
|
return ERR_PTR(-ENXIO);
|
|
}
|
|
|
|
static inline void devm_memunmap_pages(struct device *dev,
|
|
struct dev_pagemap *pgmap)
|
|
{
|
|
}
|
|
|
|
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
|
|
struct dev_pagemap *pgmap)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void vmem_altmap_free(struct vmem_altmap *altmap,
|
|
unsigned long nr_pfns)
|
|
{
|
|
}
|
|
#endif /* CONFIG_ZONE_DEVICE */
|
|
|
|
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
|
|
{
|
|
if (pgmap)
|
|
percpu_ref_put(pgmap->ref);
|
|
}
|
|
#endif /* _LINUX_MEMREMAP_H_ */
|