mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull nvdimm fixes from Dan Williams: "A small crop of lockdep, sleeping while atomic, and other fixes / band-aids in advance of the full-blown reworks targeting the next merge window. The largest change here is "libnvdimm: fix blk free space accounting" which deletes a pile of buggy code that better testing would have caught before merging. The next change that is borderline too big for a late rc is switching the device-dax locking from rcu to srcu, I couldn't think of a smaller way to make that fix. The __copy_user_nocache fix will have a full replacement in 4.12 to move those pmem special case considerations into the pmem driver. The "libnvdimm: band aid btt vs clear poison locking" commit admits that our error clearing support for btt went in broken, so we just disable it in 4.11 and -stable. A replacement / full fix is in the pipeline for 4.12 Some of these would have been caught earlier had DEBUG_ATOMIC_SLEEP been enabled on my development station. I wonder if we should have: config DEBUG_ATOMIC_SLEEP default PROVE_LOCKING ...since I mistakenly thought I got both with PROVE_LOCKING=y. These have received a build success notification from the 0day robot, and some have appeared in a -next release with no reported issues" * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: x86, pmem: fix broken __copy_user_nocache cache-bypass assumptions device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation libnvdimm: band aid btt vs clear poison locking libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep splat libnvdimm: fix blk free space accounting acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit comparison)
This commit is contained in:
commit
d5ff0814fd
7 changed files with 70 additions and 85 deletions
|
@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
|
||||||
* @size: number of bytes to write back
|
* @size: number of bytes to write back
|
||||||
*
|
*
|
||||||
* Write back a cache range using the CLWB (cache line write back)
|
* Write back a cache range using the CLWB (cache line write back)
|
||||||
* instruction.
|
* instruction. Note that @size is internally rounded up to be cache
|
||||||
|
* line size aligned.
|
||||||
*/
|
*/
|
||||||
static inline void arch_wb_cache_pmem(void *addr, size_t size)
|
static inline void arch_wb_cache_pmem(void *addr, size_t size)
|
||||||
{
|
{
|
||||||
|
@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
|
||||||
clwb(p);
|
clwb(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
|
|
||||||
* iterators, so for other types (bvec & kvec) we must do a cache write-back.
|
|
||||||
*/
|
|
||||||
static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
|
|
||||||
{
|
|
||||||
return iter_is_iovec(i) == false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* arch_copy_from_iter_pmem - copy data from an iterator to PMEM
|
* arch_copy_from_iter_pmem - copy data from an iterator to PMEM
|
||||||
* @addr: PMEM destination address
|
* @addr: PMEM destination address
|
||||||
|
@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
|
||||||
/* TODO: skip the write-back by always using non-temporal stores */
|
/* TODO: skip the write-back by always using non-temporal stores */
|
||||||
len = copy_from_iter_nocache(addr, bytes, i);
|
len = copy_from_iter_nocache(addr, bytes, i);
|
||||||
|
|
||||||
if (__iter_needs_pmem_wb(i))
|
/*
|
||||||
|
* In the iovec case on x86_64 copy_from_iter_nocache() uses
|
||||||
|
* non-temporal stores for the bulk of the transfer, but we need
|
||||||
|
* to manually flush if the transfer is unaligned. A cached
|
||||||
|
* memory copy is used when destination or size is not naturally
|
||||||
|
* aligned. That is:
|
||||||
|
* - Require 8-byte alignment when size is 8 bytes or larger.
|
||||||
|
* - Require 4-byte alignment when size is 4 bytes.
|
||||||
|
*
|
||||||
|
* In the non-iovec case the entire destination needs to be
|
||||||
|
* flushed.
|
||||||
|
*/
|
||||||
|
if (iter_is_iovec(i)) {
|
||||||
|
unsigned long flushed, dest = (unsigned long) addr;
|
||||||
|
|
||||||
|
if (bytes < 8) {
|
||||||
|
if (!IS_ALIGNED(dest, 4) || (bytes != 4))
|
||||||
|
arch_wb_cache_pmem(addr, 1);
|
||||||
|
} else {
|
||||||
|
if (!IS_ALIGNED(dest, 8)) {
|
||||||
|
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
|
||||||
|
arch_wb_cache_pmem(addr, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
flushed = dest - (unsigned long) addr;
|
||||||
|
if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
|
||||||
|
arch_wb_cache_pmem(addr + bytes - 1, 1);
|
||||||
|
}
|
||||||
|
} else
|
||||||
arch_wb_cache_pmem(addr, bytes);
|
arch_wb_cache_pmem(addr, bytes);
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
|
|
|
@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
|
||||||
const struct nfit_set_info_map *map0 = m0;
|
const struct nfit_set_info_map *map0 = m0;
|
||||||
const struct nfit_set_info_map *map1 = m1;
|
const struct nfit_set_info_map *map1 = m1;
|
||||||
|
|
||||||
return map0->region_offset - map1->region_offset;
|
if (map0->region_offset < map1->region_offset)
|
||||||
|
return -1;
|
||||||
|
else if (map0->region_offset > map1->region_offset)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Retrieve the nth entry referencing this spa */
|
/* Retrieve the nth entry referencing this spa */
|
||||||
|
|
|
@ -2,6 +2,7 @@ menuconfig DEV_DAX
|
||||||
tristate "DAX: direct access to differentiated memory"
|
tristate "DAX: direct access to differentiated memory"
|
||||||
default m if NVDIMM_DAX
|
default m if NVDIMM_DAX
|
||||||
depends on TRANSPARENT_HUGEPAGE
|
depends on TRANSPARENT_HUGEPAGE
|
||||||
|
select SRCU
|
||||||
help
|
help
|
||||||
Support raw access to differentiated (persistence, bandwidth,
|
Support raw access to differentiated (persistence, bandwidth,
|
||||||
latency...) memory via an mmap(2) capable character
|
latency...) memory via an mmap(2) capable character
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "dax.h"
|
#include "dax.h"
|
||||||
|
|
||||||
static dev_t dax_devt;
|
static dev_t dax_devt;
|
||||||
|
DEFINE_STATIC_SRCU(dax_srcu);
|
||||||
static struct class *dax_class;
|
static struct class *dax_class;
|
||||||
static DEFINE_IDA(dax_minor_ida);
|
static DEFINE_IDA(dax_minor_ida);
|
||||||
static int nr_dax = CONFIG_NR_DEV_DAX;
|
static int nr_dax = CONFIG_NR_DEV_DAX;
|
||||||
|
@ -60,7 +61,7 @@ struct dax_region {
|
||||||
* @region - parent region
|
* @region - parent region
|
||||||
* @dev - device backing the character device
|
* @dev - device backing the character device
|
||||||
* @cdev - core chardev data
|
* @cdev - core chardev data
|
||||||
* @alive - !alive + rcu grace period == no new mappings can be established
|
* @alive - !alive + srcu grace period == no new mappings can be established
|
||||||
* @id - child id in the region
|
* @id - child id in the region
|
||||||
* @num_resources - number of physical address extents in this device
|
* @num_resources - number of physical address extents in this device
|
||||||
* @res - array of physical address ranges
|
* @res - array of physical address ranges
|
||||||
|
@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
|
||||||
static int dax_dev_huge_fault(struct vm_fault *vmf,
|
static int dax_dev_huge_fault(struct vm_fault *vmf,
|
||||||
enum page_entry_size pe_size)
|
enum page_entry_size pe_size)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc, id;
|
||||||
struct file *filp = vmf->vma->vm_file;
|
struct file *filp = vmf->vma->vm_file;
|
||||||
struct dax_dev *dax_dev = filp->private_data;
|
struct dax_dev *dax_dev = filp->private_data;
|
||||||
|
|
||||||
|
@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
|
||||||
? "write" : "read",
|
? "write" : "read",
|
||||||
vmf->vma->vm_start, vmf->vma->vm_end);
|
vmf->vma->vm_start, vmf->vma->vm_end);
|
||||||
|
|
||||||
rcu_read_lock();
|
id = srcu_read_lock(&dax_srcu);
|
||||||
switch (pe_size) {
|
switch (pe_size) {
|
||||||
case PE_SIZE_PTE:
|
case PE_SIZE_PTE:
|
||||||
rc = __dax_dev_pte_fault(dax_dev, vmf);
|
rc = __dax_dev_pte_fault(dax_dev, vmf);
|
||||||
|
@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
|
||||||
default:
|
default:
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
srcu_read_unlock(&dax_srcu, id);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
|
||||||
* Note, rcu is not protecting the liveness of dax_dev, rcu is
|
* Note, rcu is not protecting the liveness of dax_dev, rcu is
|
||||||
* ensuring that any fault handlers that might have seen
|
* ensuring that any fault handlers that might have seen
|
||||||
* dax_dev->alive == true, have completed. Any fault handlers
|
* dax_dev->alive == true, have completed. Any fault handlers
|
||||||
* that start after synchronize_rcu() has started will abort
|
* that start after synchronize_srcu() has started will abort
|
||||||
* upon seeing dax_dev->alive == false.
|
* upon seeing dax_dev->alive == false.
|
||||||
*/
|
*/
|
||||||
dax_dev->alive = false;
|
dax_dev->alive = false;
|
||||||
synchronize_rcu();
|
synchronize_srcu(&dax_srcu);
|
||||||
unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
|
unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
|
||||||
cdev_del(cdev);
|
cdev_del(cdev);
|
||||||
device_unregister(dev);
|
device_unregister(dev);
|
||||||
|
|
|
@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
|
||||||
rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
|
rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
||||||
|
|
||||||
if (copy_to_user(p, buf, buf_len))
|
if (copy_to_user(p, buf, buf_len))
|
||||||
rc = -EFAULT;
|
rc = -EFAULT;
|
||||||
|
|
||||||
|
vfree(buf);
|
||||||
|
return rc;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
||||||
out:
|
out:
|
||||||
|
|
|
@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
|
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
|
||||||
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
|
/*
|
||||||
|
* FIXME: nsio_rw_bytes() may be called from atomic
|
||||||
|
* context in the btt case and nvdimm_clear_poison()
|
||||||
|
* takes a sleeping lock. Until the locking can be
|
||||||
|
* reworked this capability requires that the namespace
|
||||||
|
* is not claimed by btt.
|
||||||
|
*/
|
||||||
|
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
|
||||||
|
&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
|
||||||
long cleared;
|
long cleared;
|
||||||
|
|
||||||
cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
|
cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
|
||||||
|
|
|
@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
|
||||||
|
|
||||||
int alias_dpa_busy(struct device *dev, void *data)
|
int alias_dpa_busy(struct device *dev, void *data)
|
||||||
{
|
{
|
||||||
resource_size_t map_end, blk_start, new, busy;
|
resource_size_t map_end, blk_start, new;
|
||||||
struct blk_alloc_info *info = data;
|
struct blk_alloc_info *info = data;
|
||||||
struct nd_mapping *nd_mapping;
|
struct nd_mapping *nd_mapping;
|
||||||
struct nd_region *nd_region;
|
struct nd_region *nd_region;
|
||||||
|
@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
|
||||||
retry:
|
retry:
|
||||||
/*
|
/*
|
||||||
* Find the free dpa from the end of the last pmem allocation to
|
* Find the free dpa from the end of the last pmem allocation to
|
||||||
* the end of the interleave-set mapping that is not already
|
* the end of the interleave-set mapping.
|
||||||
* covered by a blk allocation.
|
|
||||||
*/
|
*/
|
||||||
busy = 0;
|
|
||||||
for_each_dpa_resource(ndd, res) {
|
for_each_dpa_resource(ndd, res) {
|
||||||
|
if (strncmp(res->name, "pmem", 4) != 0)
|
||||||
|
continue;
|
||||||
if ((res->start >= blk_start && res->start < map_end)
|
if ((res->start >= blk_start && res->start < map_end)
|
||||||
|| (res->end >= blk_start
|
|| (res->end >= blk_start
|
||||||
&& res->end <= map_end)) {
|
&& res->end <= map_end)) {
|
||||||
if (strncmp(res->name, "pmem", 4) == 0) {
|
new = max(blk_start, min(map_end + 1, res->end + 1));
|
||||||
new = max(blk_start, min(map_end + 1,
|
|
||||||
res->end + 1));
|
|
||||||
if (new != blk_start) {
|
if (new != blk_start) {
|
||||||
blk_start = new;
|
blk_start = new;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
} else
|
|
||||||
busy += min(map_end, res->end)
|
|
||||||
- max(nd_mapping->start, res->start) + 1;
|
|
||||||
} else if (nd_mapping->start > res->start
|
|
||||||
&& map_end < res->end) {
|
|
||||||
/* total eclipse of the PMEM region mapping */
|
|
||||||
busy += nd_mapping->size;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
info->available -= blk_start - nd_mapping->start + busy;
|
info->available -= blk_start - nd_mapping->start;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_dpa_busy(struct device *dev, void *data)
|
|
||||||
{
|
|
||||||
struct blk_alloc_info *info = data;
|
|
||||||
struct nd_mapping *nd_mapping;
|
|
||||||
struct nd_region *nd_region;
|
|
||||||
resource_size_t map_end;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
if (!is_nd_pmem(dev))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
nd_region = to_nd_region(dev);
|
|
||||||
for (i = 0; i < nd_region->ndr_mappings; i++) {
|
|
||||||
nd_mapping = &nd_region->mapping[i];
|
|
||||||
if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i >= nd_region->ndr_mappings)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
map_end = nd_mapping->start + nd_mapping->size - 1;
|
|
||||||
if (info->res->start >= nd_mapping->start
|
|
||||||
&& info->res->start < map_end) {
|
|
||||||
if (info->res->end <= map_end) {
|
|
||||||
info->busy = 0;
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
info->busy -= info->res->end - map_end;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else if (info->res->end >= nd_mapping->start
|
|
||||||
&& info->res->end <= map_end) {
|
|
||||||
info->busy -= nd_mapping->start - info->res->start;
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
info->busy -= nd_mapping->size;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* nd_blk_available_dpa - account the unused dpa of BLK region
|
* nd_blk_available_dpa - account the unused dpa of BLK region
|
||||||
* @nd_mapping: container of dpa-resource-root + labels
|
* @nd_mapping: container of dpa-resource-root + labels
|
||||||
|
@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
|
||||||
for_each_dpa_resource(ndd, res) {
|
for_each_dpa_resource(ndd, res) {
|
||||||
if (strncmp(res->name, "blk", 3) != 0)
|
if (strncmp(res->name, "blk", 3) != 0)
|
||||||
continue;
|
continue;
|
||||||
|
info.available -= resource_size(res);
|
||||||
info.res = res;
|
|
||||||
info.busy = resource_size(res);
|
|
||||||
device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
|
|
||||||
info.available -= info.busy;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return info.available;
|
return info.available;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue