mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-03-27 09:25:43 +00:00
We have a race condition between move_pages() and freeing hugepages, where
move_pages() calls follow_page(FOLL_GET) for hugepages internally and
tries to get its refcount without preventing concurrent freeing. This
race crashes the kernel, so this patch fixes it by moving FOLL_GET code
for hugepages into follow_huge_pmd() with taking the page table lock.
This patch intentionally removes page==NULL check after pte_page.
This is justified because pte_page() never returns NULL for any
architectures or configurations.
This patch changes the behavior of follow_huge_pmd() for tail pages and
then tail pages can be pinned/returned. So the caller must be changed to
properly handle the returned tail pages.
We could have a choice to add the similar locking to
follow_huge_(addr|pud) for consistency, but it's not necessary because
currently these functions don't support FOLL_GET flag, so let's leave it
for future development.
Here is the reproducer:
$ cat movepages.c
#include <stdio.h>
#include <stdlib.h>
#include <numaif.h>
#define ADDR_INPUT 0x700000000000UL
#define HPS 0x200000
#define PS 0x1000
int main(int argc, char *argv[]) {
int i;
int nr_hp = strtol(argv[1], NULL, 0);
int nr_p = nr_hp * HPS / PS;
int ret;
void **addrs;
int *status;
int *nodes;
pid_t pid;
pid = strtol(argv[2], NULL, 0);
addrs = malloc(sizeof(char *) * nr_p + 1);
status = malloc(sizeof(char *) * nr_p + 1);
nodes = malloc(sizeof(char *) * nr_p + 1);
while (1) {
for (i = 0; i < nr_p; i++) {
addrs[i] = (void *)ADDR_INPUT + i * PS;
nodes[i] = 1;
status[i] = 0;
}
ret = numa_move_pages(pid, nr_p, addrs, nodes, status,
MPOL_MF_MOVE_ALL);
if (ret == -1)
err("move_pages");
for (i = 0; i < nr_p; i++) {
addrs[i] = (void *)ADDR_INPUT + i * PS;
nodes[i] = 0;
status[i] = 0;
}
ret = numa_move_pages(pid, nr_p, addrs, nodes, status,
MPOL_MF_MOVE_ALL);
if (ret == -1)
err("move_pages");
}
return 0;
}
$ cat hugepage.c
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#define ADDR_INPUT 0x700000000000UL
#define HPS 0x200000
int main(int argc, char *argv[]) {
int nr_hp = strtol(argv[1], NULL, 0);
char *p;
while (1) {
p = mmap((void *)ADDR_INPUT, nr_hp * HPS, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
if (p != (void *)ADDR_INPUT) {
perror("mmap");
break;
}
memset(p, 0, nr_hp * HPS);
munmap(p, nr_hp * HPS);
}
}
$ sysctl vm.nr_hugepages=40
$ ./hugepage 10 &
$ ./movepages 10 $(pgrep -f hugepage)
Fixes:
|
||
---|---|---|
.. | ||
backing-dev.c | ||
balloon_compaction.c | ||
bootmem.c | ||
cleancache.c | ||
cma.c | ||
compaction.c | ||
debug-pagealloc.c | ||
debug.c | ||
dmapool.c | ||
early_ioremap.c | ||
fadvise.c | ||
failslab.c | ||
filemap.c | ||
filemap_xip.c | ||
frontswap.c | ||
gup.c | ||
highmem.c | ||
huge_memory.c | ||
hugetlb.c | ||
hugetlb_cgroup.c | ||
hwpoison-inject.c | ||
init-mm.c | ||
internal.h | ||
interval_tree.c | ||
iov_iter.c | ||
Kconfig | ||
Kconfig.debug | ||
kmemcheck.c | ||
kmemleak-test.c | ||
kmemleak.c | ||
ksm.c | ||
list_lru.c | ||
maccess.c | ||
madvise.c | ||
Makefile | ||
memblock.c | ||
memcontrol.c | ||
memory-failure.c | ||
memory.c | ||
memory_hotplug.c | ||
mempolicy.c | ||
mempool.c | ||
migrate.c | ||
mincore.c | ||
mlock.c | ||
mm_init.c | ||
mmap.c | ||
mmu_context.c | ||
mmu_notifier.c | ||
mmzone.c | ||
mprotect.c | ||
mremap.c | ||
msync.c | ||
nobootmem.c | ||
nommu.c | ||
oom_kill.c | ||
page-writeback.c | ||
page_alloc.c | ||
page_counter.c | ||
page_ext.c | ||
page_io.c | ||
page_isolation.c | ||
page_owner.c | ||
pagewalk.c | ||
percpu-km.c | ||
percpu-vm.c | ||
percpu.c | ||
pgtable-generic.c | ||
process_vm_access.c | ||
quicklist.c | ||
readahead.c | ||
rmap.c | ||
shmem.c | ||
slab.c | ||
slab.h | ||
slab_common.c | ||
slob.c | ||
slub.c | ||
sparse-vmemmap.c | ||
sparse.c | ||
swap.c | ||
swap_cgroup.c | ||
swap_state.c | ||
swapfile.c | ||
truncate.c | ||
util.c | ||
vmacache.c | ||
vmalloc.c | ||
vmpressure.c | ||
vmscan.c | ||
vmstat.c | ||
workingset.c | ||
zbud.c | ||
zpool.c | ||
zsmalloc.c | ||
zswap.c |