libnvdimm for 4.18

* DAX broke a fundamental assumption of truncate of file mapped pages.
   The truncate path assumed that it is safe to disconnect a pinned page
   from a file and let the filesystem reclaim the physical block. With DAX
   the page is equivalent to the filesystem block. Introduce
   dax_layout_busy_page() to enable filesystems to wait for pinned DAX
   pages to be released. Without this wait a filesystem could allocate
   blocks under active device-DMA to a new file.
 
 * DAX arranges for the block layer to be bypassed and uses
   dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
   However, the memcpy_mcsafe() facility is available through the pmem
   block driver. In order to safely handle media errors, via the DAX
   block-layer bypass, introduce copy_to_iter_mcsafe().
 
 * Fix cache management policy relative to the ACPI NFIT Platform
   Capabilities Structure to properly elide cache flushes when they are not
   necessary. The table indicates whether CPU caches are power-fail
   protected. Clarify that a deep flush is always performed on
   REQ_{FUA,PREFLUSH} requests.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJbGxI7AAoJEB7SkWpmfYgCDjsP/2Lcibu9Kf4tKIzuInsle6iE
 6qP29qlkpHVTpDKbhvIxTYTYL9sMU0DNUrpPCJR/EYdeyztLWDFC5EAT1wF240vf
 maV37s/uP331jSC/2VJnKWzBs2ztQxmKLEIQCxh6aT0qs9cbaOvJgB/WlVu+qtsl
 aGJFLmb6vdQacp31noU5plKrMgMA1pADyF5qx9I9K2HwowHE7T368ZEFS/3S//c3
 LXmpx/Nfq52sGu/qbRbu6B1CTJhIGhmarObyQnvBYoKntK1Ov4e8DS95wD3EhNDe
 FuRkOCUKhjl6cFy7QVWh1ct1bFm84ny+b4/AtbpOmv9l/+0mveJ7e+5mu8HQTifT
 wYiEe2xzXJ+OG/xntv8SvlZKMpjP3BqI0jYsTutsjT4oHrciiXdXM186cyS+BiGp
 KtFmWyncQJgfiTq6+Hj5XpP9BapNS+OYdYgUagw9ZwzdzptuGFYUMSVOBrYrn6c/
 fwqtxjubykJoW0P3pkIoT91arFSea7nxOKnGwft06imQ7TwR4ARsI308feQ9itJq
 2P2e7/20nYMsw2aRaUDDA70Yu+Lagn1m8WL87IybUGeUDLb1BAkjphAlWa6COJ+u
 PhvAD2tvyM9m0c7O5Mytvz7iWKG6SVgatoAyOPkaeplQK8khZ+wEpuK58sO6C1w8
 4GBvt9ri9i/Ww/A+ppWs
 =4bfw
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "This adds a user for the new 'bytes-remaining' updates to
  memcpy_mcsafe() that you already received through Ingo via the
  x86-dax- for-linus pull.

  Not included here, but still targeting this cycle, is support for
  handling memory media errors (poison) consumed via userspace dax
  mappings.

  Summary:

   - DAX broke a fundamental assumption of truncate of file mapped
     pages. The truncate path assumed that it is safe to disconnect a
     pinned page from a file and let the filesystem reclaim the physical
     block. With DAX the page is equivalent to the filesystem block.
     Introduce dax_layout_busy_page() to enable filesystems to wait for
     pinned DAX pages to be released. Without this wait a filesystem
     could allocate blocks under active device-DMA to a new file.

   - DAX arranges for the block layer to be bypassed and uses
     dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
     However, the memcpy_mcsafe() facility is available through the pmem
     block driver. In order to safely handle media errors, via the DAX
     block-layer bypass, introduce copy_to_iter_mcsafe().

   - Fix cache management policy relative to the ACPI NFIT Platform
     Capabilities Structure to properly elide cache flushes when they
     are not necessary. The table indicates whether CPU caches are
     power-fail protected. Clarify that a deep flush is always performed
     on REQ_{FUA,PREFLUSH} requests"

* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
  dax: Use dax_write_cache* helpers
  libnvdimm, pmem: Do not flush power-fail protected CPU caches
  libnvdimm, pmem: Unconditionally deep flush on *sync
  libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
  acpi, nfit: Remove ecc_unit_size
  dax: dax_insert_mapping_entry always succeeds
  libnvdimm, e820: Register all pmem resources
  libnvdimm: Debug probe times
  linvdimm, pmem: Preserve read-only setting for pmem devices
  x86, nfit_test: Add unit test for memcpy_mcsafe()
  pmem: Switch to copy_to_iter_mcsafe()
  dax: Report bytes remaining in dax_iomap_actor()
  dax: Introduce a ->copy_to_iter dax operation
  uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
  xfs, dax: introduce xfs_break_dax_layouts()
  xfs: prepare xfs_break_layouts() for another layout type
  xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
  mm, fs, dax: handle layout changes to pinned dax mappings
  mm: fix __gup_device_huge vs unmap
  mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
  ...
This commit is contained in:
Linus Torvalds 2018-06-08 17:21:52 -07:00
commit 7d3bf613e9
40 changed files with 925 additions and 380 deletions

View file

@ -830,27 +830,65 @@ static inline bool is_zone_device_page(const struct page *page)
}
#endif
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
void put_zone_device_private_or_public_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(device_private_key);
#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
static inline bool is_device_private_page(const struct page *page);
static inline bool is_device_public_page(const struct page *page);
#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
static inline void put_zone_device_private_or_public_page(struct page *page)
#ifdef CONFIG_DEV_PAGEMAP_OPS
void dev_pagemap_get_ops(void);
void dev_pagemap_put_ops(void);
void __put_devmap_managed_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
static inline bool put_devmap_managed_page(struct page *page)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
switch (page->pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
case MEMORY_DEVICE_PUBLIC:
case MEMORY_DEVICE_FS_DAX:
__put_devmap_managed_page(page);
return true;
default:
break;
}
return false;
}
static inline bool is_device_private_page(const struct page *page)
{
return is_zone_device_page(page) &&
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
}
static inline bool is_device_public_page(const struct page *page)
{
return is_zone_device_page(page) &&
page->pgmap->type == MEMORY_DEVICE_PUBLIC;
}
#else /* CONFIG_DEV_PAGEMAP_OPS */
static inline void dev_pagemap_get_ops(void)
{
}
#define IS_HMM_ENABLED 0
static inline void dev_pagemap_put_ops(void)
{
}
static inline bool put_devmap_managed_page(struct page *page)
{
return false;
}
static inline bool is_device_private_page(const struct page *page)
{
return false;
}
static inline bool is_device_public_page(const struct page *page)
{
return false;
}
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#endif /* CONFIG_DEV_PAGEMAP_OPS */
static inline void get_page(struct page *page)
{
@ -868,16 +906,13 @@ static inline void put_page(struct page *page)
page = compound_head(page);
/*
* For private device pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the private device page is
* free and we need to inform the device driver through callback. See
* For devmap managed pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the page is free and we
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) ||
unlikely(is_device_public_page(page)))) {
put_zone_device_private_or_public_page(page);
if (put_devmap_managed_page(page))
return;
}
if (put_page_testzero(page))
__put_page(page);