mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-28 09:31:14 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge misc updates and fixes from Andrew Morton: - late-breaking ocfs2 updates - random bunch of fixes * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: disable DEFERRED_STRUCT_PAGE_INIT on !NO_BOOTMEM mm/memcontrol.c: move comments for get_mctgt_type() to proper position mm/memcontrol.c: fix the margin computation in mem_cgroup_margin() mm/cma: silence warnings due to max() usage mm: thp: avoid false positive VM_BUG_ON_PAGE in page_move_anon_rmap() oom_reaper: close race with exiting task mm: use early_pfn_to_nid in register_page_bootmem_info_node mm: use early_pfn_to_nid in page_ext_init MAINTAINERS: Kdump maintainers update MAINTAINERS: add kexec_core.c and kexec_file.c mm: oom: do not reap task if there are live threads in threadgroup direct-io: fix direct write stale data exposure from concurrent buffered read ocfs2: bump up o2cb network protocol version ocfs2: o2hb: fix hb hung time ocfs2: o2hb: don't negotiate if last hb fail ocfs2: o2hb: add some user/debug log ocfs2: o2hb: add NEGOTIATE_APPROVE message ocfs2: o2hb: add NEGO_TIMEOUT message ocfs2: o2hb: add negotiate timer
This commit is contained in:
commit
af7d93729c
12 changed files with 246 additions and 51 deletions
|
@ -6421,8 +6421,9 @@ F: Documentation/kbuild/kconfig-language.txt
|
||||||
F: scripts/kconfig/
|
F: scripts/kconfig/
|
||||||
|
|
||||||
KDUMP
|
KDUMP
|
||||||
M: Vivek Goyal <vgoyal@redhat.com>
|
M: Dave Young <dyoung@redhat.com>
|
||||||
M: Haren Myneni <hbabu@us.ibm.com>
|
M: Baoquan He <bhe@redhat.com>
|
||||||
|
R: Vivek Goyal <vgoyal@redhat.com>
|
||||||
L: kexec@lists.infradead.org
|
L: kexec@lists.infradead.org
|
||||||
W: http://lse.sourceforge.net/kdump/
|
W: http://lse.sourceforge.net/kdump/
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
@ -6568,7 +6569,7 @@ L: kexec@lists.infradead.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: include/linux/kexec.h
|
F: include/linux/kexec.h
|
||||||
F: include/uapi/linux/kexec.h
|
F: include/uapi/linux/kexec.h
|
||||||
F: kernel/kexec.c
|
F: kernel/kexec*
|
||||||
|
|
||||||
KEYS/KEYRINGS:
|
KEYS/KEYRINGS:
|
||||||
M: David Howells <dhowells@redhat.com>
|
M: David Howells <dhowells@redhat.com>
|
||||||
|
|
|
@ -628,11 +628,11 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
|
||||||
map_bh->b_size = fs_count << i_blkbits;
|
map_bh->b_size = fs_count << i_blkbits;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For writes inside i_size on a DIO_SKIP_HOLES filesystem we
|
* For writes that could fill holes inside i_size on a
|
||||||
* forbid block creations: only overwrites are permitted.
|
* DIO_SKIP_HOLES filesystem we forbid block creations: only
|
||||||
* We will return early to the caller once we see an
|
* overwrites are permitted. We will return early to the caller
|
||||||
* unmapped buffer head returned, and the caller will fall
|
* once we see an unmapped buffer head returned, and the caller
|
||||||
* back to buffered I/O.
|
* will fall back to buffered I/O.
|
||||||
*
|
*
|
||||||
* Otherwise the decision is left to the get_blocks method,
|
* Otherwise the decision is left to the get_blocks method,
|
||||||
* which may decide to handle it or also return an unmapped
|
* which may decide to handle it or also return an unmapped
|
||||||
|
@ -640,8 +640,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
|
||||||
*/
|
*/
|
||||||
create = dio->rw & WRITE;
|
create = dio->rw & WRITE;
|
||||||
if (dio->flags & DIO_SKIP_HOLES) {
|
if (dio->flags & DIO_SKIP_HOLES) {
|
||||||
if (sdio->block_in_file < (i_size_read(dio->inode) >>
|
if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
|
||||||
sdio->blkbits))
|
i_blkbits))
|
||||||
create = 0;
|
create = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,10 +272,21 @@ struct o2hb_region {
|
||||||
struct delayed_work hr_write_timeout_work;
|
struct delayed_work hr_write_timeout_work;
|
||||||
unsigned long hr_last_timeout_start;
|
unsigned long hr_last_timeout_start;
|
||||||
|
|
||||||
|
/* negotiate timer, used to negotiate extending hb timeout. */
|
||||||
|
struct delayed_work hr_nego_timeout_work;
|
||||||
|
unsigned long hr_nego_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||||
|
|
||||||
/* Used during o2hb_check_slot to hold a copy of the block
|
/* Used during o2hb_check_slot to hold a copy of the block
|
||||||
* being checked because we temporarily have to zero out the
|
* being checked because we temporarily have to zero out the
|
||||||
* crc field. */
|
* crc field. */
|
||||||
struct o2hb_disk_heartbeat_block *hr_tmp_block;
|
struct o2hb_disk_heartbeat_block *hr_tmp_block;
|
||||||
|
|
||||||
|
/* Message key for negotiate timeout message. */
|
||||||
|
unsigned int hr_key;
|
||||||
|
struct list_head hr_handler_list;
|
||||||
|
|
||||||
|
/* last hb status, 0 for success, other value for error. */
|
||||||
|
int hr_last_hb_status;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct o2hb_bio_wait_ctxt {
|
struct o2hb_bio_wait_ctxt {
|
||||||
|
@ -284,6 +295,17 @@ struct o2hb_bio_wait_ctxt {
|
||||||
int wc_error;
|
int wc_error;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
O2HB_NEGO_TIMEOUT_MSG = 1,
|
||||||
|
O2HB_NEGO_APPROVE_MSG = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct o2hb_nego_msg {
|
||||||
|
u8 node_num;
|
||||||
|
};
|
||||||
|
|
||||||
static void o2hb_write_timeout(struct work_struct *work)
|
static void o2hb_write_timeout(struct work_struct *work)
|
||||||
{
|
{
|
||||||
int failed, quorum;
|
int failed, quorum;
|
||||||
|
@ -319,7 +341,7 @@ static void o2hb_write_timeout(struct work_struct *work)
|
||||||
o2quo_disk_timeout();
|
o2quo_disk_timeout();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void o2hb_arm_write_timeout(struct o2hb_region *reg)
|
static void o2hb_arm_timeout(struct o2hb_region *reg)
|
||||||
{
|
{
|
||||||
/* Arm writeout only after thread reaches steady state */
|
/* Arm writeout only after thread reaches steady state */
|
||||||
if (atomic_read(®->hr_steady_iterations) != 0)
|
if (atomic_read(®->hr_steady_iterations) != 0)
|
||||||
|
@ -334,14 +356,132 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
|
||||||
spin_unlock(&o2hb_live_lock);
|
spin_unlock(&o2hb_live_lock);
|
||||||
}
|
}
|
||||||
cancel_delayed_work(®->hr_write_timeout_work);
|
cancel_delayed_work(®->hr_write_timeout_work);
|
||||||
reg->hr_last_timeout_start = jiffies;
|
|
||||||
schedule_delayed_work(®->hr_write_timeout_work,
|
schedule_delayed_work(®->hr_write_timeout_work,
|
||||||
msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS));
|
msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS));
|
||||||
|
|
||||||
|
cancel_delayed_work(®->hr_nego_timeout_work);
|
||||||
|
/* negotiate timeout must be less than write timeout. */
|
||||||
|
schedule_delayed_work(®->hr_nego_timeout_work,
|
||||||
|
msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
|
||||||
|
memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
|
static void o2hb_disarm_timeout(struct o2hb_region *reg)
|
||||||
{
|
{
|
||||||
cancel_delayed_work_sync(®->hr_write_timeout_work);
|
cancel_delayed_work_sync(®->hr_write_timeout_work);
|
||||||
|
cancel_delayed_work_sync(®->hr_nego_timeout_work);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int o2hb_send_nego_msg(int key, int type, u8 target)
|
||||||
|
{
|
||||||
|
struct o2hb_nego_msg msg;
|
||||||
|
int status, ret;
|
||||||
|
|
||||||
|
msg.node_num = o2nm_this_node();
|
||||||
|
again:
|
||||||
|
ret = o2net_send_message(type, key, &msg, sizeof(msg),
|
||||||
|
target, &status);
|
||||||
|
|
||||||
|
if (ret == -EAGAIN || ret == -ENOMEM) {
|
||||||
|
msleep(100);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void o2hb_nego_timeout(struct work_struct *work)
|
||||||
|
{
|
||||||
|
unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||||
|
int master_node, i, ret;
|
||||||
|
struct o2hb_region *reg;
|
||||||
|
|
||||||
|
reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work);
|
||||||
|
/* don't negotiate timeout if last hb failed since it is very
|
||||||
|
* possible io failed. Should let write timeout fence self.
|
||||||
|
*/
|
||||||
|
if (reg->hr_last_hb_status)
|
||||||
|
return;
|
||||||
|
|
||||||
|
o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
|
||||||
|
/* lowest node as master node to make negotiate decision. */
|
||||||
|
master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
|
||||||
|
|
||||||
|
if (master_node == o2nm_this_node()) {
|
||||||
|
if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
|
||||||
|
printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n",
|
||||||
|
o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
|
||||||
|
config_item_name(®->hr_item), reg->hr_dev_name);
|
||||||
|
set_bit(master_node, reg->hr_nego_node_bitmap);
|
||||||
|
}
|
||||||
|
if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
|
||||||
|
sizeof(reg->hr_nego_node_bitmap))) {
|
||||||
|
/* check negotiate bitmap every second to do timeout
|
||||||
|
* approve decision.
|
||||||
|
*/
|
||||||
|
schedule_delayed_work(®->hr_nego_timeout_work,
|
||||||
|
msecs_to_jiffies(1000));
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n",
|
||||||
|
config_item_name(®->hr_item), reg->hr_dev_name);
|
||||||
|
/* approve negotiate timeout request. */
|
||||||
|
o2hb_arm_timeout(reg);
|
||||||
|
|
||||||
|
i = -1;
|
||||||
|
while ((i = find_next_bit(live_node_bitmap,
|
||||||
|
O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
|
||||||
|
if (i == master_node)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i);
|
||||||
|
ret = o2hb_send_nego_msg(reg->hr_key,
|
||||||
|
O2HB_NEGO_APPROVE_MSG, i);
|
||||||
|
if (ret)
|
||||||
|
mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n",
|
||||||
|
i, ret);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* negotiate timeout with master node. */
|
||||||
|
printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n",
|
||||||
|
o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item),
|
||||||
|
reg->hr_dev_name, master_node);
|
||||||
|
ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
|
||||||
|
master_node);
|
||||||
|
if (ret)
|
||||||
|
mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n",
|
||||||
|
master_node, ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||||
|
void **ret_data)
|
||||||
|
{
|
||||||
|
struct o2hb_region *reg = data;
|
||||||
|
struct o2hb_nego_msg *nego_msg;
|
||||||
|
|
||||||
|
nego_msg = (struct o2hb_nego_msg *)msg->buf;
|
||||||
|
printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n",
|
||||||
|
nego_msg->node_num, config_item_name(®->hr_item), reg->hr_dev_name);
|
||||||
|
if (nego_msg->node_num < O2NM_MAX_NODES)
|
||||||
|
set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
|
||||||
|
else
|
||||||
|
mlog(ML_ERROR, "got nego timeout message from bad node.\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||||
|
void **ret_data)
|
||||||
|
{
|
||||||
|
struct o2hb_region *reg = data;
|
||||||
|
|
||||||
|
printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n",
|
||||||
|
config_item_name(®->hr_item), reg->hr_dev_name);
|
||||||
|
o2hb_arm_timeout(reg);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
|
static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
|
||||||
|
@ -1032,7 +1172,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
||||||
/* Skip disarming the timeout if own slot has stale/bad data */
|
/* Skip disarming the timeout if own slot has stale/bad data */
|
||||||
if (own_slot_ok) {
|
if (own_slot_ok) {
|
||||||
o2hb_set_quorum_device(reg);
|
o2hb_set_quorum_device(reg);
|
||||||
o2hb_arm_write_timeout(reg);
|
o2hb_arm_timeout(reg);
|
||||||
|
reg->hr_last_timeout_start = jiffies;
|
||||||
}
|
}
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
|
@ -1096,6 +1237,7 @@ static int o2hb_thread(void *data)
|
||||||
before_hb = ktime_get_real();
|
before_hb = ktime_get_real();
|
||||||
|
|
||||||
ret = o2hb_do_disk_heartbeat(reg);
|
ret = o2hb_do_disk_heartbeat(reg);
|
||||||
|
reg->hr_last_hb_status = ret;
|
||||||
|
|
||||||
after_hb = ktime_get_real();
|
after_hb = ktime_get_real();
|
||||||
|
|
||||||
|
@ -1114,7 +1256,7 @@ static int o2hb_thread(void *data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
o2hb_disarm_write_timeout(reg);
|
o2hb_disarm_timeout(reg);
|
||||||
|
|
||||||
/* unclean stop is only used in very bad situation */
|
/* unclean stop is only used in very bad situation */
|
||||||
for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
|
for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
|
||||||
|
@ -1451,6 +1593,7 @@ static void o2hb_region_release(struct config_item *item)
|
||||||
list_del(®->hr_all_item);
|
list_del(®->hr_all_item);
|
||||||
spin_unlock(&o2hb_live_lock);
|
spin_unlock(&o2hb_live_lock);
|
||||||
|
|
||||||
|
o2net_unregister_handler_list(®->hr_handler_list);
|
||||||
kfree(reg);
|
kfree(reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1762,6 +1905,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout);
|
INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout);
|
||||||
|
INIT_DELAYED_WORK(®->hr_nego_timeout_work, o2hb_nego_timeout);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A node is considered live after it has beat LIVE_THRESHOLD
|
* A node is considered live after it has beat LIVE_THRESHOLD
|
||||||
|
@ -1995,13 +2139,37 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
|
||||||
|
|
||||||
config_item_init_type_name(®->hr_item, name, &o2hb_region_type);
|
config_item_init_type_name(®->hr_item, name, &o2hb_region_type);
|
||||||
|
|
||||||
|
/* this is the same way to generate msg key as dlm, for local heartbeat,
|
||||||
|
* name is also the same, so make initial crc value different to avoid
|
||||||
|
* message key conflict.
|
||||||
|
*/
|
||||||
|
reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS,
|
||||||
|
name, strlen(name));
|
||||||
|
INIT_LIST_HEAD(®->hr_handler_list);
|
||||||
|
ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key,
|
||||||
|
sizeof(struct o2hb_nego_msg),
|
||||||
|
o2hb_nego_timeout_handler,
|
||||||
|
reg, NULL, ®->hr_handler_list);
|
||||||
|
if (ret)
|
||||||
|
goto free;
|
||||||
|
|
||||||
|
ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
|
||||||
|
sizeof(struct o2hb_nego_msg),
|
||||||
|
o2hb_nego_approve_handler,
|
||||||
|
reg, NULL, ®->hr_handler_list);
|
||||||
|
if (ret)
|
||||||
|
goto unregister_handler;
|
||||||
|
|
||||||
ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
|
ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
config_item_put(®->hr_item);
|
config_item_put(®->hr_item);
|
||||||
goto free;
|
goto unregister_handler;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ®->hr_item;
|
return ®->hr_item;
|
||||||
|
|
||||||
|
unregister_handler:
|
||||||
|
o2net_unregister_handler_list(®->hr_handler_list);
|
||||||
free:
|
free:
|
||||||
kfree(reg);
|
kfree(reg);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
|
@ -44,6 +44,9 @@
|
||||||
* version here in tcp_internal.h should not need to be bumped for
|
* version here in tcp_internal.h should not need to be bumped for
|
||||||
* filesystem locking changes.
|
* filesystem locking changes.
|
||||||
*
|
*
|
||||||
|
* New in version 12
|
||||||
|
* - Negotiate hb timeout when storage is down.
|
||||||
|
*
|
||||||
* New in version 11
|
* New in version 11
|
||||||
* - Negotiation of filesystem locking in the dlm join.
|
* - Negotiation of filesystem locking in the dlm join.
|
||||||
*
|
*
|
||||||
|
@ -75,7 +78,7 @@
|
||||||
* - full 64 bit i_size in the metadata lock lvbs
|
* - full 64 bit i_size in the metadata lock lvbs
|
||||||
* - introduction of "rw" lock and pushing meta/data locking down
|
* - introduction of "rw" lock and pushing meta/data locking down
|
||||||
*/
|
*/
|
||||||
#define O2NET_PROTOCOL_VERSION 11ULL
|
#define O2NET_PROTOCOL_VERSION 12ULL
|
||||||
struct o2net_handshake {
|
struct o2net_handshake {
|
||||||
__be64 protocol_version;
|
__be64 protocol_version;
|
||||||
__be64 connector_id;
|
__be64 connector_id;
|
||||||
|
|
|
@ -607,6 +607,7 @@ asmlinkage __visible void __init start_kernel(void)
|
||||||
initrd_start = 0;
|
initrd_start = 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
page_ext_init();
|
||||||
debug_objects_mem_init();
|
debug_objects_mem_init();
|
||||||
kmemleak_init();
|
kmemleak_init();
|
||||||
setup_per_cpu_pageset();
|
setup_per_cpu_pageset();
|
||||||
|
@ -1003,8 +1004,6 @@ static noinline void __init kernel_init_freeable(void)
|
||||||
sched_init_smp();
|
sched_init_smp();
|
||||||
|
|
||||||
page_alloc_init_late();
|
page_alloc_init_late();
|
||||||
/* Initialize page ext after all struct pages are initializaed */
|
|
||||||
page_ext_init();
|
|
||||||
|
|
||||||
do_basic_setup();
|
do_basic_setup();
|
||||||
|
|
||||||
|
|
|
@ -648,7 +648,7 @@ config DEFERRED_STRUCT_PAGE_INIT
|
||||||
bool "Defer initialisation of struct pages to kthreads"
|
bool "Defer initialisation of struct pages to kthreads"
|
||||||
default n
|
default n
|
||||||
depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
|
depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
|
||||||
depends on MEMORY_HOTPLUG
|
depends on NO_BOOTMEM && MEMORY_HOTPLUG
|
||||||
depends on !FLATMEM
|
depends on !FLATMEM
|
||||||
help
|
help
|
||||||
Ordinarily all struct pages are initialised during early boot in a
|
Ordinarily all struct pages are initialised during early boot in a
|
||||||
|
|
7
mm/cma.c
7
mm/cma.c
|
@ -183,7 +183,8 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* ensure minimal alignment required by mm core */
|
/* ensure minimal alignment required by mm core */
|
||||||
alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
|
alignment = PAGE_SIZE <<
|
||||||
|
max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
|
||||||
|
|
||||||
/* alignment should be aligned with order_per_bit */
|
/* alignment should be aligned with order_per_bit */
|
||||||
if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
|
if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
|
||||||
|
@ -266,8 +267,8 @@ int __init cma_declare_contiguous(phys_addr_t base,
|
||||||
* migratetype page by page allocator's buddy algorithm. In the case,
|
* migratetype page by page allocator's buddy algorithm. In the case,
|
||||||
* you couldn't get a contiguous memory, which is not what we want.
|
* you couldn't get a contiguous memory, which is not what we want.
|
||||||
*/
|
*/
|
||||||
alignment = max(alignment,
|
alignment = max(alignment, (phys_addr_t)PAGE_SIZE <<
|
||||||
(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
|
max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
|
||||||
base = ALIGN(base, alignment);
|
base = ALIGN(base, alignment);
|
||||||
size = ALIGN(size, alignment);
|
size = ALIGN(size, alignment);
|
||||||
limit &= ~(alignment - 1);
|
limit &= ~(alignment - 1);
|
||||||
|
|
|
@ -1108,6 +1108,8 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
|
||||||
limit = READ_ONCE(memcg->memsw.limit);
|
limit = READ_ONCE(memcg->memsw.limit);
|
||||||
if (count <= limit)
|
if (count <= limit)
|
||||||
margin = min(margin, limit - count);
|
margin = min(margin, limit - count);
|
||||||
|
else
|
||||||
|
margin = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return margin;
|
return margin;
|
||||||
|
@ -4307,24 +4309,6 @@ static int mem_cgroup_do_precharge(unsigned long count)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* get_mctgt_type - get target type of moving charge
|
|
||||||
* @vma: the vma the pte to be checked belongs
|
|
||||||
* @addr: the address corresponding to the pte to be checked
|
|
||||||
* @ptent: the pte to be checked
|
|
||||||
* @target: the pointer the target page or swap ent will be stored(can be NULL)
|
|
||||||
*
|
|
||||||
* Returns
|
|
||||||
* 0(MC_TARGET_NONE): if the pte is not a target for move charge.
|
|
||||||
* 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
|
|
||||||
* move charge. if @target is not NULL, the page is stored in target->page
|
|
||||||
* with extra refcnt got(Callers should handle it).
|
|
||||||
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
|
|
||||||
* target for charge migration. if @target is not NULL, the entry is stored
|
|
||||||
* in target->ent.
|
|
||||||
*
|
|
||||||
* Called with pte lock held.
|
|
||||||
*/
|
|
||||||
union mc_target {
|
union mc_target {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
swp_entry_t ent;
|
swp_entry_t ent;
|
||||||
|
@ -4513,6 +4497,25 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get_mctgt_type - get target type of moving charge
|
||||||
|
* @vma: the vma the pte to be checked belongs
|
||||||
|
* @addr: the address corresponding to the pte to be checked
|
||||||
|
* @ptent: the pte to be checked
|
||||||
|
* @target: the pointer the target page or swap ent will be stored(can be NULL)
|
||||||
|
*
|
||||||
|
* Returns
|
||||||
|
* 0(MC_TARGET_NONE): if the pte is not a target for move charge.
|
||||||
|
* 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
|
||||||
|
* move charge. if @target is not NULL, the page is stored in target->page
|
||||||
|
* with extra refcnt got(Callers should handle it).
|
||||||
|
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
|
||||||
|
* target for charge migration. if @target is not NULL, the entry is stored
|
||||||
|
* in target->ent.
|
||||||
|
*
|
||||||
|
* Called with pte lock held.
|
||||||
|
*/
|
||||||
|
|
||||||
static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
|
static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t ptent, union mc_target *target)
|
unsigned long addr, pte_t ptent, union mc_target *target)
|
||||||
{
|
{
|
||||||
|
|
|
@ -300,7 +300,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||||
* multiple nodes we check that this pfn does not already
|
* multiple nodes we check that this pfn does not already
|
||||||
* reside in some other nodes.
|
* reside in some other nodes.
|
||||||
*/
|
*/
|
||||||
if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
|
if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
|
||||||
register_page_bootmem_info_section(pfn);
|
register_page_bootmem_info_section(pfn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -443,12 +443,28 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
struct mmu_gather tlb;
|
struct mmu_gather tlb;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm = NULL;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
struct zap_details details = {.check_swap_entries = true,
|
struct zap_details details = {.check_swap_entries = true,
|
||||||
.ignore_dirty = true};
|
.ignore_dirty = true};
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to make sure to not race with the victim exit path
|
||||||
|
* and cause premature new oom victim selection:
|
||||||
|
* __oom_reap_task exit_mm
|
||||||
|
* atomic_inc_not_zero
|
||||||
|
* mmput
|
||||||
|
* atomic_dec_and_test
|
||||||
|
* exit_oom_victim
|
||||||
|
* [...]
|
||||||
|
* out_of_memory
|
||||||
|
* select_bad_process
|
||||||
|
* # no TIF_MEMDIE task selects new victim
|
||||||
|
* unmap_page_range # frees some memory
|
||||||
|
*/
|
||||||
|
mutex_lock(&oom_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure we find the associated mm_struct even when the particular
|
* Make sure we find the associated mm_struct even when the particular
|
||||||
* thread has already terminated and cleared its mm.
|
* thread has already terminated and cleared its mm.
|
||||||
|
@ -457,19 +473,19 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||||
*/
|
*/
|
||||||
p = find_lock_task_mm(tsk);
|
p = find_lock_task_mm(tsk);
|
||||||
if (!p)
|
if (!p)
|
||||||
return true;
|
goto unlock_oom;
|
||||||
|
|
||||||
mm = p->mm;
|
mm = p->mm;
|
||||||
if (!atomic_inc_not_zero(&mm->mm_users)) {
|
if (!atomic_inc_not_zero(&mm->mm_users)) {
|
||||||
task_unlock(p);
|
task_unlock(p);
|
||||||
return true;
|
goto unlock_oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
task_unlock(p);
|
task_unlock(p);
|
||||||
|
|
||||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||||
ret = false;
|
ret = false;
|
||||||
goto out;
|
goto unlock_oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
tlb_gather_mmu(&tlb, mm, 0, -1);
|
tlb_gather_mmu(&tlb, mm, 0, -1);
|
||||||
|
@ -511,12 +527,14 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||||
* to release its memory.
|
* to release its memory.
|
||||||
*/
|
*/
|
||||||
set_bit(MMF_OOM_REAPED, &mm->flags);
|
set_bit(MMF_OOM_REAPED, &mm->flags);
|
||||||
out:
|
unlock_oom:
|
||||||
|
mutex_unlock(&oom_lock);
|
||||||
/*
|
/*
|
||||||
* Drop our reference but make sure the mmput slow path is called from a
|
* Drop our reference but make sure the mmput slow path is called from a
|
||||||
* different context because we shouldn't risk we get stuck there and
|
* different context because we shouldn't risk we get stuck there and
|
||||||
* put the oom_reaper out of the way.
|
* put the oom_reaper out of the way.
|
||||||
*/
|
*/
|
||||||
|
if (mm)
|
||||||
mmput_async(mm);
|
mmput_async(mm);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -611,8 +629,6 @@ void try_oom_reaper(struct task_struct *tsk)
|
||||||
|
|
||||||
if (!process_shares_mm(p, mm))
|
if (!process_shares_mm(p, mm))
|
||||||
continue;
|
continue;
|
||||||
if (same_thread_group(p, tsk))
|
|
||||||
continue;
|
|
||||||
if (fatal_signal_pending(p))
|
if (fatal_signal_pending(p))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
@ -390,8 +390,10 @@ void __init page_ext_init(void)
|
||||||
* We know some arch can have a nodes layout such as
|
* We know some arch can have a nodes layout such as
|
||||||
* -------------pfn-------------->
|
* -------------pfn-------------->
|
||||||
* N0 | N1 | N2 | N0 | N1 | N2|....
|
* N0 | N1 | N2 | N0 | N1 | N2|....
|
||||||
|
*
|
||||||
|
* Take into account DEFERRED_STRUCT_PAGE_INIT.
|
||||||
*/
|
*/
|
||||||
if (pfn_to_nid(pfn) != nid)
|
if (early_pfn_to_nid(pfn) != nid)
|
||||||
continue;
|
continue;
|
||||||
if (init_section_page_ext(pfn, nid))
|
if (init_section_page_ext(pfn, nid))
|
||||||
goto oom;
|
goto oom;
|
||||||
|
|
|
@ -1098,6 +1098,8 @@ void page_move_anon_rmap(struct page *page,
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||||
VM_BUG_ON_VMA(!anon_vma, vma);
|
VM_BUG_ON_VMA(!anon_vma, vma);
|
||||||
|
if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
|
||||||
|
address &= HPAGE_PMD_MASK;
|
||||||
VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
|
VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
|
||||||
|
|
||||||
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue