Minor merge needed, due to function move.

Main excitement here is Peter Zijlstra's lockless rbtree optimization to
 speed module address lookup.  He found some abusers of the module lock
 doing that too.
 
 A little bit of parameter work here too; including Dan Streetman's breaking
 up the big param mutex so writing a parameter can load another module (yeah,
 really).  Unfortunately that broke the usual suspects, !CONFIG_MODULES and
 !CONFIG_SYSFS, so those fixes were appended too.
 
 Cheers,
 Rusty.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJVkgKHAAoJENkgDmzRrbjxQpwQAJVmBN6jF3SnwbQXv9vRixjH
 58V33sb1G1RW+kXxQ3/e8jLX/4VaN479CufruXQp+IJWXsN/CH0lbC3k8m7u50d7
 b1Zeqd/Yrh79rkc11b0X1698uGCSMlzz+V54Z0QOTEEX+nSu2ZZvccFS4UaHkn3z
 rqDo00lb7rxQz8U25qro2OZrG6D3ub2q20TkWUB8EO4AOHkPn8KWP2r429Axrr0K
 wlDWDTTt8/IsvPbuPf3T15RAhq1avkMXWn9nDXDjyWbpLfTn8NFnWmtesgY7Jl4t
 GjbXC5WYekX3w2ZDB9KaT/DAMQ1a7RbMXNSz4RX4VbzDl+yYeSLmIh2G9fZb1PbB
 PsIxrOgy4BquOWsJPm+zeFPSC3q9Cfu219L4AmxSjiZxC3dlosg5rIB892Mjoyv4
 qxmg6oiqtc4Jxv+Gl9lRFVOqyHZrTC5IJ+xgfv1EyP6kKMUKLlDZtxZAuQxpUyxR
 HZLq220RYnYSvkWauikq4M8fqFM8bdt6hLJnv7bVqllseROk9stCvjSiE3A9szH5
 OgtOfYV5GhOeb8pCZqJKlGDw+RoJ21jtNCgOr6DgkNKV9CX/kL/Puwv8gnA0B0eh
 dxCeB7f/gcLl7Cg3Z3gVVcGlgak6JWrLf5ITAJhBZ8Lv+AtL2DKmwEWS/iIMRmek
 tLdh/a9GiCitqS0bT7GE
 =tWPQ
 -----END PGP SIGNATURE-----

Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull module updates from Rusty Russell:
 "Main excitement here is Peter Zijlstra's lockless rbtree optimization
  to speed module address lookup.  He found some abusers of the module
  lock doing that too.

  A little bit of parameter work here too; including Dan Streetman's
  breaking up the big param mutex so writing a parameter can load
  another module (yeah, really).  Unfortunately that broke the usual
  suspects, !CONFIG_MODULES and !CONFIG_SYSFS, so those fixes were
  appended too"

* tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (26 commits)
  modules: only use mod->param_lock if CONFIG_MODULES
  param: fix module param locks when !CONFIG_SYSFS.
  rcu: merge fix for Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE()
  module: add per-module param_lock
  module: make perm const
  params: suppress unused variable error, warn once just in case code changes.
  modules: clarify CONFIG_MODULE_COMPRESS help, suggest 'N'.
  kernel/module.c: avoid ifdefs for sig_enforce declaration
  kernel/workqueue.c: remove ifdefs over wq_power_efficient
  kernel/params.c: export param_ops_bool_enable_only
  kernel/params.c: generalize bool_enable_only
  kernel/module.c: use generic module param operaters for sig_enforce
  kernel/params: constify struct kernel_param_ops uses
  sysfs: tightened sysfs permission checks
  module: Rework module_addr_{min,max}
  module: Use __module_address() for module_address_lookup()
  module: Make the mod_tree stuff conditional on PERF_EVENTS || TRACING
  module: Optimize __module_address() using a latched RB-tree
  rbtree: Implement generic latch_tree
  seqlock: Introduce raw_read_seqcount_latch()
  ...
This commit is contained in:
Linus Torvalds 2015-07-01 10:49:25 -07:00
commit 02201e3f1b
48 changed files with 887 additions and 359 deletions

View file

@ -101,48 +101,201 @@
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
#ifdef CONFIG_MODULES_TREE_LOOKUP
/*
* Use a latched RB-tree for __module_address(); this allows us to use
* RCU-sched lookups of the address from any context.
*
* Because modules have two address ranges: init and core, we need two
* latch_tree_nodes entries. Therefore we need the back-pointer from
* mod_tree_node.
*
* Because init ranges are short lived we mark them unlikely and have placed
* them outside the critical cacheline in struct module.
*
* This is conditional on PERF_EVENTS || TRACING because those can really hit
* __module_address() hard by doing a lot of stack unwinding; potentially from
* NMI context.
*/
static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
struct module *mod = mtn->mod;
if (unlikely(mtn == &mod->mtn_init))
return (unsigned long)mod->module_init;
return (unsigned long)mod->module_core;
}
static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
{
struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
struct module *mod = mtn->mod;
if (unlikely(mtn == &mod->mtn_init))
return (unsigned long)mod->init_size;
return (unsigned long)mod->core_size;
}
static __always_inline bool
mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
{
return __mod_tree_val(a) < __mod_tree_val(b);
}
static __always_inline int
mod_tree_comp(void *key, struct latch_tree_node *n)
{
unsigned long val = (unsigned long)key;
unsigned long start, end;
start = __mod_tree_val(n);
if (val < start)
return -1;
end = start + __mod_tree_size(n);
if (val >= end)
return 1;
return 0;
}
static const struct latch_tree_ops mod_tree_ops = {
.less = mod_tree_less,
.comp = mod_tree_comp,
};
static struct mod_tree_root {
struct latch_tree_root root;
unsigned long addr_min;
unsigned long addr_max;
} mod_tree __cacheline_aligned = {
.addr_min = -1UL,
};
#define module_addr_min mod_tree.addr_min
#define module_addr_max mod_tree.addr_max
static noinline void __mod_tree_insert(struct mod_tree_node *node)
{
latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
}
static void __mod_tree_remove(struct mod_tree_node *node)
{
latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
}
/*
* These modifications: insert, remove_init and remove; are serialized by the
* module_mutex.
*/
static void mod_tree_insert(struct module *mod)
{
mod->mtn_core.mod = mod;
mod->mtn_init.mod = mod;
__mod_tree_insert(&mod->mtn_core);
if (mod->init_size)
__mod_tree_insert(&mod->mtn_init);
}
static void mod_tree_remove_init(struct module *mod)
{
if (mod->init_size)
__mod_tree_remove(&mod->mtn_init);
}
static void mod_tree_remove(struct module *mod)
{
__mod_tree_remove(&mod->mtn_core);
mod_tree_remove_init(mod);
}
static struct module *mod_find(unsigned long addr)
{
struct latch_tree_node *ltn;
ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
if (!ltn)
return NULL;
return container_of(ltn, struct mod_tree_node, node)->mod;
}
#else /* MODULES_TREE_LOOKUP */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
static void mod_tree_insert(struct module *mod) { }
static void mod_tree_remove_init(struct module *mod) { }
static void mod_tree_remove(struct module *mod) { }
static struct module *mod_find(unsigned long addr)
{
struct module *mod;
list_for_each_entry_rcu(mod, &modules, list) {
if (within_module(addr, mod))
return mod;
}
return NULL;
}
#endif /* MODULES_TREE_LOOKUP */
/*
* Bounds of module text, for speeding up __module_address.
* Protected by module_mutex.
*/
static void __mod_update_bounds(void *base, unsigned int size)
{
unsigned long min = (unsigned long)base;
unsigned long max = min + size;
if (min < module_addr_min)
module_addr_min = min;
if (max > module_addr_max)
module_addr_max = max;
}
static void mod_update_bounds(struct module *mod)
{
__mod_update_bounds(mod->module_core, mod->core_size);
if (mod->init_size)
__mod_update_bounds(mod->module_init, mod->init_size);
}
#ifdef CONFIG_KGDB_KDB
struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
#endif /* CONFIG_KGDB_KDB */
#ifdef CONFIG_MODULE_SIG
#ifdef CONFIG_MODULE_SIG_FORCE
static bool sig_enforce = true;
#else
static bool sig_enforce = false;
static int param_set_bool_enable_only(const char *val,
const struct kernel_param *kp)
static void module_assert_mutex(void)
{
int err;
bool test;
struct kernel_param dummy_kp = *kp;
dummy_kp.arg = &test;
err = param_set_bool(val, &dummy_kp);
if (err)
return err;
/* Don't let them unset it once it's set! */
if (!test && sig_enforce)
return -EROFS;
if (test)
sig_enforce = true;
return 0;
lockdep_assert_held(&module_mutex);
}
static const struct kernel_param_ops param_ops_bool_enable_only = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bool_enable_only,
.get = param_get_bool,
};
#define param_check_bool_enable_only param_check_bool
static void module_assert_mutex_or_preempt(void)
{
#ifdef CONFIG_LOCKDEP
if (unlikely(!debug_locks))
return;
WARN_ON(!rcu_read_lock_sched_held() &&
!lockdep_is_held(&module_mutex));
#endif
}
static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
#ifndef CONFIG_MODULE_SIG_FORCE
module_param(sig_enforce, bool_enable_only, 0644);
#endif /* !CONFIG_MODULE_SIG_FORCE */
#endif /* CONFIG_MODULE_SIG */
/* Block module loading/unloading? */
int modules_disabled = 0;
@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
/* Bounds of module allocation, for speeding __module_address.
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
module_assert_mutex_or_preempt();
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len,
{
struct module *mod;
module_assert_mutex();
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
/* Since this should be found in kernel (which can't be removed),
* no locking is necessary. */
/*
* Since this should be found in kernel (which can't be removed), no
* locking is necessary -- use preempt_disable() to placate lockdep.
*/
preempt_disable();
if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
&crc, true, false))
&crc, true, false)) {
preempt_enable();
BUG();
}
preempt_enable();
return check_version(sechdrs, versindex,
VMLINUX_SYMBOL_STR(module_layout), mod, crc,
NULL);
@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod)
mod_kobject_put(mod);
}
static void init_param_lock(struct module *mod)
{
mutex_init(&mod->param_lock);
}
#else /* !CONFIG_SYSFS */
static int mod_sysfs_setup(struct module *mod,
@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod)
{
}
static void init_param_lock(struct module *mod)
{
}
#endif /* CONFIG_SYSFS */
static void mod_sysfs_teardown(struct module *mod)
@ -1852,10 +2018,11 @@ static void free_module(struct module *mod)
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
mod_tree_remove(mod);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
/* Wait for RCU synchronizing before releasing mod->list and buglist. */
synchronize_rcu();
/* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
synchronize_sched();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
static void *module_alloc_update_bounds(unsigned long size)
{
void *ret = module_alloc(size);
if (ret) {
mutex_lock(&module_mutex);
/* Update module bounds. */
if ((unsigned long)ret < module_addr_min)
module_addr_min = (unsigned long)ret;
if ((unsigned long)ret + size > module_addr_max)
module_addr_max = (unsigned long)ret + size;
mutex_unlock(&module_mutex);
}
return ret;
}
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
ptr = module_alloc(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info)
mod->module_core = ptr;
if (mod->init_size) {
ptr = module_alloc_update_bounds(mod->init_size);
ptr = module_alloc(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod)
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
mod_tree_remove_init(mod);
unset_module_init_ro_nx(mod);
module_arch_freeing_init(mod);
mod->module_init = NULL;
@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod)
mod->init_text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
* walking this with preempt disabled. In all the failure paths,
* we call synchronize_rcu/synchronize_sched, but we don't want
* to slow down the success path, so use actual RCU here.
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
*/
call_rcu(&freeinit->rcu, do_free_init);
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
@ -3188,7 +3340,9 @@ again:
err = -EEXIST;
goto out;
}
mod_update_bounds(mod);
list_add_rcu(&mod->list, &modules);
mod_tree_insert(mod);
err = 0;
out:
@ -3304,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
if (err)
goto unlink_mod;
init_param_lock(mod);
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
@ -3402,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
/* Wait for RCU synchronizing before releasing mod->list. */
synchronize_rcu();
/* Wait for RCU-sched synchronizing before releasing mod->list. */
synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
@ -3527,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr,
char **modname,
char *namebuf)
{
struct module *mod;
const char *ret = NULL;
struct module *mod;
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module(addr, mod)) {
if (modname)
*modname = mod->name;
ret = get_ksymbol(mod, addr, size, offset);
break;
}
mod = __module_address(addr);
if (mod) {
if (modname)
*modname = mod->name;
ret = get_ksymbol(mod, addr, size, offset);
}
/* Make a copy in here where it's safe */
if (ret) {
@ -3547,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr,
ret = namebuf;
}
preempt_enable();
return ret;
}
@ -3670,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
unsigned int i;
int ret;
module_assert_mutex();
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
@ -3844,13 +3999,15 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
list_for_each_entry_rcu(mod, &modules, list) {
module_assert_mutex_or_preempt();
mod = mod_find(addr);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module(addr, mod))
return mod;
mod = NULL;
}
return NULL;
return mod;
}
EXPORT_SYMBOL_GPL(__module_address);