Merge branch 'master' of /home/davem/src/GIT/linux-2.6/

Conflicts:
	drivers/net/pcmcia/fmvj18x_cs.c
	drivers/net/pcmcia/nmclan_cs.c
	drivers/net/pcmcia/xirc2ps_cs.c
	drivers/net/wireless/ray_cs.c
This commit is contained in:
David S. Miller 2009-12-05 15:22:26 -08:00
commit 28b4d5cc17
325 changed files with 9119 additions and 9761 deletions

202
kernel/Kconfig.locks Normal file
View file

@ -0,0 +1,202 @@
#
# The ARCH_INLINE foo is necessary because select ignores "depends on"
#
config ARCH_INLINE_SPIN_TRYLOCK
bool
config ARCH_INLINE_SPIN_TRYLOCK_BH
bool
config ARCH_INLINE_SPIN_LOCK
bool
config ARCH_INLINE_SPIN_LOCK_BH
bool
config ARCH_INLINE_SPIN_LOCK_IRQ
bool
config ARCH_INLINE_SPIN_LOCK_IRQSAVE
bool
config ARCH_INLINE_SPIN_UNLOCK
bool
config ARCH_INLINE_SPIN_UNLOCK_BH
bool
config ARCH_INLINE_SPIN_UNLOCK_IRQ
bool
config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
bool
config ARCH_INLINE_READ_TRYLOCK
bool
config ARCH_INLINE_READ_LOCK
bool
config ARCH_INLINE_READ_LOCK_BH
bool
config ARCH_INLINE_READ_LOCK_IRQ
bool
config ARCH_INLINE_READ_LOCK_IRQSAVE
bool
config ARCH_INLINE_READ_UNLOCK
bool
config ARCH_INLINE_READ_UNLOCK_BH
bool
config ARCH_INLINE_READ_UNLOCK_IRQ
bool
config ARCH_INLINE_READ_UNLOCK_IRQRESTORE
bool
config ARCH_INLINE_WRITE_TRYLOCK
bool
config ARCH_INLINE_WRITE_LOCK
bool
config ARCH_INLINE_WRITE_LOCK_BH
bool
config ARCH_INLINE_WRITE_LOCK_IRQ
bool
config ARCH_INLINE_WRITE_LOCK_IRQSAVE
bool
config ARCH_INLINE_WRITE_UNLOCK
bool
config ARCH_INLINE_WRITE_UNLOCK_BH
bool
config ARCH_INLINE_WRITE_UNLOCK_IRQ
bool
config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
bool
#
# lock_* functions are inlined when:
# - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y
#
# trylock_* functions are inlined when:
# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
#
# unlock and unlock_irq functions are inlined when:
# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
# or
# - DEBUG_SPINLOCK=n and PREEMPT=n
#
# unlock_bh and unlock_irqrestore functions are inlined when:
# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
#
config INLINE_SPIN_TRYLOCK
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK
config INLINE_SPIN_TRYLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH
config INLINE_SPIN_LOCK
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK
config INLINE_SPIN_LOCK_BH
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_SPIN_LOCK_BH
config INLINE_SPIN_LOCK_IRQ
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_SPIN_LOCK_IRQ
config INLINE_SPIN_LOCK_IRQSAVE
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_SPIN_LOCK_IRQSAVE
config INLINE_SPIN_UNLOCK
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK)
config INLINE_SPIN_UNLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
config INLINE_SPIN_UNLOCK_IRQ
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH)
config INLINE_SPIN_UNLOCK_IRQRESTORE
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
config INLINE_READ_TRYLOCK
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK
config INLINE_READ_LOCK
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK
config INLINE_READ_LOCK_BH
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_READ_LOCK_BH
config INLINE_READ_LOCK_IRQ
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_READ_LOCK_IRQ
config INLINE_READ_LOCK_IRQSAVE
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_READ_LOCK_IRQSAVE
config INLINE_READ_UNLOCK
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK)
config INLINE_READ_UNLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH
config INLINE_READ_UNLOCK_IRQ
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH)
config INLINE_READ_UNLOCK_IRQRESTORE
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE
config INLINE_WRITE_TRYLOCK
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK
config INLINE_WRITE_LOCK
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK
config INLINE_WRITE_LOCK_BH
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_WRITE_LOCK_BH
config INLINE_WRITE_LOCK_IRQ
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_WRITE_LOCK_IRQ
config INLINE_WRITE_LOCK_IRQSAVE
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_WRITE_LOCK_IRQSAVE
config INLINE_WRITE_UNLOCK
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK)
config INLINE_WRITE_UNLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH
config INLINE_WRITE_UNLOCK_IRQ
def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH)
config INLINE_WRITE_UNLOCK_IRQRESTORE
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
config MUTEX_SPIN_ON_OWNER
def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES

View file

@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_TREE_RCU) += rcutree.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
obj-$(CONFIG_TINY_RCU) += rcutiny.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o

View file

@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set);
EXPORT_SYMBOL(__cap_full_set);
EXPORT_SYMBOL(__cap_init_eff_set);
#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
int file_caps_enabled = 1;
static int __init file_caps_disable(char *str)
@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str)
return 1;
}
__setup("no_file_caps", file_caps_disable);
#endif
/*
* More recent versions of libcap are available from:
@ -169,8 +167,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
kernel_cap_t pE, pI, pP;
ret = cap_validate_magic(header, &tocopy);
if (ret != 0)
return ret;
if ((dataptr == NULL) || (ret != 0))
return ((dataptr == NULL) && (ret == -EINVAL)) ? 0 : ret;
if (get_user(pid, &header->pid))
return -EFAULT;
@ -238,7 +236,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy;
unsigned i, tocopy, copybytes;
kernel_cap_t inheritable, permitted, effective;
struct cred *new;
int ret;
@ -255,8 +253,11 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
if (pid != 0 && pid != task_pid_vnr(current))
return -EPERM;
if (copy_from_user(&kdata, data,
tocopy * sizeof(struct __user_cap_data_struct)))
copybytes = tocopy * sizeof(struct __user_cap_data_struct);
if (copybytes > sizeof(kdata))
return -EFAULT;
if (copy_from_user(&kdata, data, copybytes))
return -EFAULT;
for (i = 0; i < tocopy; i++) {

View file

@ -144,7 +144,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
rcu_read_lock();
do_each_thread(g, t) {
if (!--max_count)
if (!max_count--)
goto unlock;
if (!--batch_count) {
batch_count = HUNG_TASK_BATCHING;

View file

@ -166,11 +166,11 @@ int set_irq_data(unsigned int irq, void *data)
EXPORT_SYMBOL(set_irq_data);
/**
* set_irq_data - set irq type data for an irq
* set_irq_msi - set MSI descriptor data for an irq
* @irq: Interrupt number
* @entry: Pointer to MSI descriptor data
*
* Set the hardware irq controller data for an irq
* Set the MSI descriptor entry for an irq
*/
int set_irq_msi(unsigned int irq, struct msi_desc *entry)
{
@ -590,7 +590,7 @@ out_unlock:
}
/**
* handle_percpu_IRQ - Per CPU local irq handler
* handle_percpu_irq - Per CPU local irq handler
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
*

View file

@ -136,7 +136,7 @@ out:
static int default_affinity_open(struct inode *inode, struct file *file)
{
return single_open(file, default_affinity_show, NULL);
return single_open(file, default_affinity_show, PDE(inode)->data);
}
static const struct file_operations default_affinity_proc_fops = {
@ -148,18 +148,28 @@ static const struct file_operations default_affinity_proc_fops = {
};
#endif
static int irq_spurious_read(char *page, char **start, off_t off,
int count, int *eof, void *data)
static int irq_spurious_proc_show(struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long) data);
return sprintf(page, "count %u\n"
"unhandled %u\n"
"last_unhandled %u ms\n",
desc->irq_count,
desc->irqs_unhandled,
jiffies_to_msecs(desc->last_unhandled));
struct irq_desc *desc = irq_to_desc((long) m->private);
seq_printf(m, "count %u\n" "unhandled %u\n" "last_unhandled %u ms\n",
desc->irq_count, desc->irqs_unhandled,
jiffies_to_msecs(desc->last_unhandled));
return 0;
}
static int irq_spurious_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, irq_spurious_proc_show, NULL);
}
static const struct file_operations irq_spurious_proc_fops = {
.open = irq_spurious_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#define MAX_NAMELEN 128
static int name_unique(unsigned int irq, struct irqaction *new_action)
@ -204,7 +214,6 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
void register_irq_proc(unsigned int irq, struct irq_desc *desc)
{
char name [MAX_NAMELEN];
struct proc_dir_entry *entry;
if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
return;
@ -214,6 +223,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
/* create /proc/irq/1234 */
desc->dir = proc_mkdir(name, root_irq_dir);
if (!desc->dir)
return;
#ifdef CONFIG_SMP
/* create /proc/irq/<irq>/smp_affinity */
@ -221,11 +232,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
&irq_affinity_proc_fops, (void *)(long)irq);
#endif
entry = create_proc_entry("spurious", 0444, desc->dir);
if (entry) {
entry->data = (void *)(long)irq;
entry->read_proc = irq_spurious_read;
}
proc_create_data("spurious", 0444, desc->dir,
&irq_spurious_proc_fops, (void *)(long)irq);
}
#undef MAX_NAMELEN

View file

@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
return ok;
}
static void poll_all_shared_irqs(void)
static void poll_spurious_irqs(unsigned long dummy)
{
struct irq_desc *desc;
int i;
@ -125,23 +125,11 @@ static void poll_all_shared_irqs(void)
try_one_irq(i, desc);
local_irq_enable();
}
}
static void poll_spurious_irqs(unsigned long dummy)
{
poll_all_shared_irqs();
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
#ifdef CONFIG_DEBUG_SHIRQ
void debug_poll_all_shared_irqs(void)
{
poll_all_shared_irqs();
}
#endif
/*
* If 99,900 of the previous 100,000 interrupts have not been handled
* then assume that the IRQ is stuck in some manner. Drop a diagnostic

View file

@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...)
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
ret = security_kernel_module_request();
if (ret)
return ret;
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return -ENAMETOOLONG;
ret = security_kernel_module_request(module_name);
if (ret)
return ret;
/* If modprobe needs a service that is in a module, we get a recursive
* loop. Limit the number of running kmod threads to max_threads/2 or
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method

View file

@ -1014,9 +1014,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
/* Pre-allocate memory for max kretprobe instances */
if (rp->maxactive <= 0) {
#ifdef CONFIG_PREEMPT
rp->maxactive = max(10, 2 * NR_CPUS);
rp->maxactive = max(10, 2 * num_possible_cpus());
#else
rp->maxactive = NR_CPUS;
rp->maxactive = num_possible_cpus();
#endif
}
spin_lock_init(&rp->lock);

View file

@ -1187,7 +1187,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
/* Count loaded sections and allocate structures */
for (i = 0; i < nsect; i++)
if (sechdrs[i].sh_flags & SHF_ALLOC)
if (sechdrs[i].sh_flags & SHF_ALLOC
&& sechdrs[i].sh_size)
nloaded++;
size[0] = ALIGN(sizeof(*sect_attrs)
+ nloaded * sizeof(sect_attrs->attrs[0]),
@ -1207,6 +1208,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
for (i = 0; i < nsect; i++) {
if (! (sechdrs[i].sh_flags & SHF_ALLOC))
continue;
if (!sechdrs[i].sh_size)
continue;
sattr->address = sechdrs[i].sh_addr;
sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
GFP_KERNEL);

View file

@ -148,8 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable();
mutex_acquire(&lock->dep_map, subclass, 0, ip);
#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \
!defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES)
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* Optimistic spinning.
*

View file

@ -33,6 +33,7 @@
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
#include <linux/ratelimit.h>
#include <asm/uaccess.h>
@ -1376,11 +1377,11 @@ late_initcall(disable_boot_consoles);
*/
DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
int printk_ratelimit(void)
int __printk_ratelimit(const char *func)
{
return __ratelimit(&printk_ratelimit_state);
return ___ratelimit(&printk_ratelimit_state, func);
}
EXPORT_SYMBOL(printk_ratelimit);
EXPORT_SYMBOL(__printk_ratelimit);
/**
* printk_timed_ratelimit - caller-controlled printk ratelimiting

View file

@ -44,7 +44,6 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kernel_stat.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map =
EXPORT_SYMBOL_GPL(rcu_lock_map);
#endif
int rcu_scheduler_active __read_mostly;
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
@ -66,122 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head)
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
#ifdef CONFIG_TREE_PREEMPT_RCU
/**
* synchronize_rcu - wait until a grace period has elapsed.
*
* Control will return to the caller some time after a full grace
* period has elapsed, in other words after all currently executing RCU
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
void synchronize_rcu(void)
{
struct rcu_synchronize rcu;
if (!rcu_scheduler_active)
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
/**
* synchronize_sched - wait until an rcu-sched grace period has elapsed.
*
* Control will return to the caller some time after a full rcu-sched
* grace period has elapsed, in other words after all currently executing
* rcu-sched read-side critical sections have completed. These read-side
* critical sections are delimited by rcu_read_lock_sched() and
* rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
* local_irq_disable(), and so on may be used in place of
* rcu_read_lock_sched().
*
* This means that all preempt_disable code sequences, including NMI and
* hardware-interrupt handlers, in progress on entry will have completed
* before this primitive returns. However, this does not guarantee that
* softirq handlers will have completed, since in some kernels, these
* handlers can run in process context, and can block.
*
* This primitive provides the guarantees made by the (now removed)
* synchronize_kernel() API. In contrast, synchronize_rcu() only
* guarantees that rcu_read_lock() sections will have completed.
* In "classic RCU", these two guarantees happen to be one and
* the same, but can differ in realtime RCU implementations.
*/
void synchronize_sched(void)
{
struct rcu_synchronize rcu;
if (rcu_blocking_is_gp())
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
/**
* synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
*
* Control will return to the caller some time after a full rcu_bh grace
* period has elapsed, in other words after all currently executing rcu_bh
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
* and may be nested.
*/
void synchronize_rcu_bh(void)
{
struct rcu_synchronize rcu;
if (rcu_blocking_is_gp())
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
unsigned long action, void *hcpu)
{
return rcu_cpu_notify(self, action, hcpu);
}
void __init rcu_init(void)
{
int i;
__rcu_init();
cpu_notifier(rcu_barrier_cpu_hotplug, 0);
/*
* We don't need protection against CPU-hotplug here because
* this is called early in boot, before either interrupts
* or the scheduler are operational.
*/
for_each_online_cpu(i)
rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
}
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}

282
kernel/rcutiny.c Normal file
View file

@ -0,0 +1,282 @@
/*
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright IBM Corporation, 2008
*
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*/
#include <linux/moduleparam.h>
#include <linux/completion.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/cpu.h>
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_ctrlblk = {
.donetail = &rcu_ctrlblk.rcucblist,
.curtail = &rcu_ctrlblk.rcucblist,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
};
#ifdef CONFIG_NO_HZ
static long rcu_dynticks_nesting = 1;
/*
* Enter dynticks-idle mode, which is an extended quiescent state
* if we have fully entered that mode (i.e., if the new value of
* dynticks_nesting is zero).
*/
void rcu_enter_nohz(void)
{
if (--rcu_dynticks_nesting == 0)
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
}
/*
* Exit dynticks-idle mode, so that we are no longer in an extended
* quiescent state.
*/
void rcu_exit_nohz(void)
{
rcu_dynticks_nesting++;
}
#endif /* #ifdef CONFIG_NO_HZ */
/*
* Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
* Also disable irqs to avoid confusion due to interrupt handlers
* invoking call_rcu().
*/
static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
{
unsigned long flags;
local_irq_save(flags);
if (rcp->rcucblist != NULL &&
rcp->donetail != rcp->curtail) {
rcp->donetail = rcp->curtail;
local_irq_restore(flags);
return 1;
}
local_irq_restore(flags);
return 0;
}
/*
* Record an rcu quiescent state. And an rcu_bh quiescent state while we
* are at it, given that any rcu quiescent state is also an rcu_bh
* quiescent state. Use "+" instead of "||" to defeat short circuiting.
*/
void rcu_sched_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
}
/*
* Record an rcu_bh quiescent state.
*/
void rcu_bh_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
}
/*
* Check to see if the scheduling-clock interrupt came from an extended
* quiescent state, and, if so, tell RCU about it.
*/
void rcu_check_callbacks(int cpu, int user)
{
if (user ||
(idle_cpu(cpu) &&
!in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT)))
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
}
/*
* Helper function for rcu_process_callbacks() that operates on the
* specified rcu_ctrlkblk structure.
*/
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
{
struct rcu_head *next, *list;
unsigned long flags;
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail)
return;
/* Move the ready-to-invoke callbacks to a local list. */
local_irq_save(flags);
list = rcp->rcucblist;
rcp->rcucblist = *rcp->donetail;
*rcp->donetail = NULL;
if (rcp->curtail == rcp->donetail)
rcp->curtail = &rcp->rcucblist;
rcp->donetail = &rcp->rcucblist;
local_irq_restore(flags);
/* Invoke the callbacks on the local list. */
while (list) {
next = list->next;
prefetch(next);
list->func(list);
list = next;
}
}
/*
* Invoke any callbacks whose grace period has completed.
*/
static void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
}
/*
* Wait for a grace period to elapse. But it is illegal to invoke
* synchronize_sched() from within an RCU read-side critical section.
* Therefore, any legal call to synchronize_sched() is a quiescent
* state, and so on a UP system, synchronize_sched() need do nothing.
* Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the
* benefits of doing might_sleep() to reduce latency.)
*
* Cool, huh? (Due to Josh Triplett.)
*
* But we want to make this a static inline later.
*/
void synchronize_sched(void)
{
cond_resched();
}
EXPORT_SYMBOL_GPL(synchronize_sched);
void synchronize_rcu_bh(void)
{
synchronize_sched();
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
/*
* Helper function for call_rcu() and call_rcu_bh().
*/
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp)
{
unsigned long flags;
head->func = func;
head->next = NULL;
local_irq_save(flags);
*rcp->curtail = head;
rcp->curtail = &head->next;
local_irq_restore(flags);
}
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu);
/*
* Post an RCU bottom-half callback to be invoked after any subsequent
* quiescent state.
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_bh_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
void rcu_barrier(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
void rcu_barrier_bh(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
void rcu_barrier_sched(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}

View file

@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p)
cur_ops->deferred_free(rp);
}
static int rcu_no_completed(void)
{
return 0;
}
static void rcu_torture_deferred_free(struct rcu_torture *p)
{
call_rcu(&p->rtort_rcu, rcu_torture_cb);
@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = {
.name = "rcu_sync"
};
static struct rcu_torture_ops rcu_expedited_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock,
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
.cb_barrier = NULL,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_expedited"
};
/*
* Definitions for rcu_bh torture testing.
*/
@ -547,6 +567,25 @@ static struct rcu_torture_ops srcu_ops = {
.name = "srcu"
};
static void srcu_torture_synchronize_expedited(void)
{
synchronize_srcu_expedited(&srcu_ctl);
}
static struct rcu_torture_ops srcu_expedited_ops = {
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize_expedited,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_expedited"
};
/*
* Definitions for sched torture testing.
*/
@ -562,11 +601,6 @@ static void sched_torture_read_unlock(int idx)
preempt_enable();
}
static int sched_torture_completed(void)
{
return 0;
}
static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@ -583,7 +617,7 @@ static struct rcu_torture_ops sched_ops = {
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.completed = rcu_no_completed,
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
@ -592,13 +626,13 @@ static struct rcu_torture_ops sched_ops = {
.name = "sched"
};
static struct rcu_torture_ops sched_ops_sync = {
static struct rcu_torture_ops sched_sync_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
@ -612,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
@ -1097,9 +1131,10 @@ rcu_torture_init(void)
int cpu;
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
&sched_expedited_ops,
&srcu_ops, &sched_ops, &sched_ops_sync, };
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops,
&srcu_ops, &srcu_expedited_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@ -1110,8 +1145,12 @@ rcu_torture_init(void)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
torture_type);
printk(KERN_ALERT "rcu-torture types:");
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
printk(KERN_ALERT " %s", torture_ops[i]->name);
printk(KERN_ALERT "\n");
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}

View file

@ -46,18 +46,22 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
#include "rcutree.h"
/* Data structures. */
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
#define RCU_STATE_INITIALIZER(name) { \
.level = { &name.node[0] }, \
.levelcnt = { \
NUM_RCU_LVL_0, /* root of hierarchy. */ \
NUM_RCU_LVL_1, \
NUM_RCU_LVL_2, \
NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
NUM_RCU_LVL_3, \
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
.signaled = RCU_GP_IDLE, \
.gpnum = -300, \
@ -77,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static int rcu_scheduler_active __read_mostly;
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
@ -98,7 +104,7 @@ void rcu_sched_qs(int cpu)
struct rcu_data *rdp;
rdp = &per_cpu(rcu_sched_data, cpu);
rdp->passed_quiesc_completed = rdp->completed;
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
rcu_preempt_note_context_switch(cpu);
@ -109,7 +115,7 @@ void rcu_bh_qs(int cpu)
struct rcu_data *rdp;
rdp = &per_cpu(rcu_bh_data, cpu);
rdp->passed_quiesc_completed = rdp->completed;
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
}
@ -335,27 +341,8 @@ void rcu_irq_exit(void)
set_need_resched();
}
/*
* Record the specified "completed" value, which is later used to validate
* dynticks counter manipulations. Specify "rsp->completed - 1" to
* unconditionally invalidate any future dynticks manipulations (which is
* useful at the beginning of a grace period).
*/
static void dyntick_record_completed(struct rcu_state *rsp, long comp)
{
rsp->dynticks_completed = comp;
}
#ifdef CONFIG_SMP
/*
* Recall the previously recorded value of the completion for dynticks.
*/
static long dyntick_recall_completed(struct rcu_state *rsp)
{
return rsp->dynticks_completed;
}
/*
* Snapshot the specified CPU's dynticks counter so that we can later
* credit them with an implicit quiescent state. Return 1 if this CPU
@ -419,24 +406,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#else /* #ifdef CONFIG_NO_HZ */
static void dyntick_record_completed(struct rcu_state *rsp, long comp)
{
}
#ifdef CONFIG_SMP
/*
* If there are no dynticks, then the only way that a CPU can passively
* be in a quiescent state is to be offline. Unlike dynticks idle, which
* is a point in time during the prior (already finished) grace period,
* an offline CPU is always in a quiescent state, and thus can be
* unconditionally applied. So just return the current value of completed.
*/
static long dyntick_recall_completed(struct rcu_state *rsp)
{
return rsp->completed;
}
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
return 0;
@ -553,13 +524,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
/*
* Update CPU-local rcu_data state to record the newly noticed grace period.
* This is used both when we started the grace period and when we notice
* that someone else started the grace period.
* that someone else started the grace period. The caller must hold the
* ->lock of the leaf rcu_node structure corresponding to the current CPU,
* and must have irqs disabled.
*/
static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
{
if (rdp->gpnum != rnp->gpnum) {
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
rdp->gpnum = rnp->gpnum;
}
}
static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
{
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
rdp->gpnum = rsp->gpnum;
unsigned long flags;
struct rcu_node *rnp;
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
!spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
local_irq_restore(flags);
return;
}
__note_new_gpnum(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -583,31 +574,59 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
* the root node's ->lock, which is released before return. Hard irqs must
* be disabled.
* Advance this CPU's callbacks, but only if the current grace period
* has ended. This may be called only from the CPU to whom the rdp
* belongs. In addition, the corresponding leaf rcu_node structure's
* ->lock must be held by the caller, with irqs disabled.
*/
static void
rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
{
struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
/* Did another grace period end? */
if (rdp->completed != rnp->completed) {
if (!cpu_needs_another_gp(rsp, rdp)) {
spin_unlock_irqrestore(&rnp->lock, flags);
/* Advance callbacks. No harm if list empty. */
rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
}
}
/*
* Advance this CPU's callbacks, but only if the current grace period
* has ended. This may be called only from the CPU to whom the rdp
* belongs.
*/
static void
rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_node *rnp;
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
!spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
local_irq_restore(flags);
return;
}
__rcu_process_gp_end(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
}
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
dyntick_record_completed(rsp, rsp->completed - 1);
note_new_gpnum(rsp, rdp);
/*
* Do per-CPU grace-period initialization for running CPU. The caller
* must hold the lock of the leaf rcu_node structure corresponding to
* this CPU.
*/
static void
rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
{
/* Prior grace period ended, so advance callbacks for current CPU. */
__rcu_process_gp_end(rsp, rnp, rdp);
/*
* Because this CPU just now started the new grace period, we know
@ -623,12 +642,59 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
/* Set state so that this CPU will detect the next quiescent state. */
__note_new_gpnum(rsp, rnp, rdp);
}
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
* the root node's ->lock, which is released before return. Hard irqs must
* be disabled.
*/
static void
rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
if (!cpu_needs_another_gp(rsp, rdp)) {
if (rnp->completed == rsp->completed) {
spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
* Propagate new ->completed value to rcu_node structures
* so that other CPUs don't have to wait until the start
* of the next grace period to process their callbacks.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->completed = rsp->completed;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
local_irq_restore(flags);
return;
}
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
/* Special-case the common single-level case. */
if (NUM_RCU_NODES == 1) {
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
@ -661,6 +727,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
@ -672,58 +741,32 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
}
/*
* Advance this CPU's callbacks, but only if the current grace period
* has ended. This may be called only from the CPU to whom the rdp
* belongs.
* Report a full set of quiescent states to the specified rcu_state
* data structure. This involves cleaning up after the prior grace
* period and letting rcu_start_gp() start up the next grace period
* if one is needed. Note that the caller must hold rnp->lock, as
* required by rcu_start_gp(), which will release it.
*/
static void
rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
{
long completed_snap;
unsigned long flags;
local_irq_save(flags);
completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */
/* Did another grace period end? */
if (rdp->completed != completed_snap) {
/* Advance callbacks. No harm if list empty. */
rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
/* Remember that we saw this grace-period completion. */
rdp->completed = completed_snap;
}
local_irq_restore(flags);
}
/*
* Clean up after the prior grace period and let rcu_start_gp() start up
* the next grace period if one is needed. Note that the caller must
* hold rnp->lock, as required by rcu_start_gp(), which will release it.
*/
static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
rsp->completed = rsp->gpnum;
rsp->signaled = RCU_GP_IDLE;
rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
/*
* Similar to cpu_quiet(), for which it is a helper function. Allows
* a group of CPUs to be quieted at one go, though all the CPUs in the
* group must be represented by the same leaf rcu_node structure.
* That structure's lock must be held upon entry, and it is released
* before return.
* Similar to rcu_report_qs_rdp(), for which it is a helper function.
* Allows quiescent states for a group of CPUs to be reported at one go
* to the specified rcu_node structure, though all the CPUs in the group
* must be represented by the same rcu_node structure (which need not be
* a leaf rcu_node structure, though it often will be). That structure's
* lock must be held upon entry, and it is released before return.
*/
static void
cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long flags)
rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
struct rcu_node *rnp_c;
@ -759,21 +802,23 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Get here if we are the last CPU to pass through a quiescent
* state for this grace period. Invoke cpu_quiet_msk_finish()
* state for this grace period. Invoke rcu_report_qs_rsp()
* to clean up and start the next grace period if one is needed.
*/
cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
}
/*
* Record a quiescent state for the specified CPU, which must either be
* the current CPU. The lastcomp argument is used to make sure we are
* still in the grace period of interest. We don't want to end the current
* grace period based on quiescent states detected in an earlier grace
* period!
* Record a quiescent state for the specified CPU to that CPU's rcu_data
* structure. This must be either called from the specified CPU, or
* called when the specified CPU is known to be offline (and when it is
* also known that no other CPU is concurrently trying to help the offline
* CPU). The lastcomp argument is used to make sure we are still in the
* grace period of interest. We don't want to end the current grace period
* based on quiescent states detected in an earlier grace period!
*/
static void
cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
{
unsigned long flags;
unsigned long mask;
@ -781,15 +826,15 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
rnp = rdp->mynode;
spin_lock_irqsave(&rnp->lock, flags);
if (lastcomp != ACCESS_ONCE(rsp->completed)) {
if (lastcomp != rnp->completed) {
/*
* Someone beat us to it for this grace period, so leave.
* The race with GP start is resolved by the fact that we
* hold the leaf rcu_node lock, so that the per-CPU bits
* cannot yet be initialized -- so we would simply find our
* CPU's bit already cleared in cpu_quiet_msk() if this race
* occurred.
* CPU's bit already cleared in rcu_report_qs_rnp() if this
* race occurred.
*/
rdp->passed_quiesc = 0; /* try again later! */
spin_unlock_irqrestore(&rnp->lock, flags);
@ -807,7 +852,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
*/
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
}
}
@ -838,8 +883,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
if (!rdp->passed_quiesc)
return;
/* Tell RCU we are done (but cpu_quiet() will be the judge of that). */
cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
/*
* Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that).
*/
rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
}
#ifdef CONFIG_HOTPLUG_CPU
@ -899,8 +947,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
long lastcomp;
unsigned long mask;
int need_report = 0;
struct rcu_data *rdp = rsp->rda[cpu];
struct rcu_node *rnp;
@ -914,30 +962,32 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
if (rnp->qsmaskinit != 0) {
spin_unlock(&rnp->lock); /* irqs remain disabled. */
if (rnp != rdp->mynode)
spin_unlock(&rnp->lock); /* irqs remain disabled. */
break;
}
/*
* If there was a task blocking the current grace period,
* and if all CPUs have checked in, we need to propagate
* the quiescent state up the rcu_node hierarchy. But that
* is inconvenient at the moment due to deadlock issues if
* this should end the current grace period. So set the
* offlined CPU's bit in ->qsmask in order to force the
* next force_quiescent_state() invocation to clean up this
* mess in a deadlock-free manner.
*/
if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask)
rnp->qsmask |= mask;
if (rnp == rdp->mynode)
need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
else
spin_unlock(&rnp->lock); /* irqs remain disabled. */
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
rnp = rnp->parent;
} while (rnp != NULL);
lastcomp = rsp->completed;
spin_unlock_irqrestore(&rsp->onofflock, flags);
/*
* We still hold the leaf rcu_node structure lock here, and
* irqs are still disabled. The reason for this subterfuge is
* because invoking rcu_report_unblock_qs_rnp() with ->onofflock
* held leads to deadlock.
*/
spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
rnp = rdp->mynode;
if (need_report & RCU_OFL_TASKS_NORM_GP)
rcu_report_unblock_qs_rnp(rnp, flags);
else
spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp);
rcu_adopt_orphan_cbs(rsp);
}
@ -1109,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
rcu_for_each_leaf_node(rsp, rnp) {
mask = 0;
spin_lock_irqsave(&rnp->lock, flags);
if (rsp->completed != lastcomp) {
if (rnp->completed != lastcomp) {
spin_unlock_irqrestore(&rnp->lock, flags);
return 1;
}
@ -1123,10 +1173,10 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
mask |= bit;
}
if (mask != 0 && rsp->completed == lastcomp) {
if (mask != 0 && rnp->completed == lastcomp) {
/* cpu_quiet_msk() releases rnp->lock. */
cpu_quiet_msk(mask, rsp, rnp, flags);
/* rcu_report_qs_rnp() releases rnp->lock. */
rcu_report_qs_rnp(mask, rsp, rnp, flags);
continue;
}
spin_unlock_irqrestore(&rnp->lock, flags);
@ -1144,6 +1194,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
long lastcomp;
struct rcu_node *rnp = rcu_get_root(rsp);
u8 signaled;
u8 forcenow;
if (!rcu_gp_in_progress(rsp))
return; /* No grace period in progress, nothing to force. */
@ -1156,10 +1207,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
goto unlock_ret; /* no emergency and done recently. */
rsp->n_force_qs++;
spin_lock(&rnp->lock);
lastcomp = rsp->completed;
lastcomp = rsp->gpnum - 1;
signaled = rsp->signaled;
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
if (lastcomp == rsp->gpnum) {
if(!rcu_gp_in_progress(rsp)) {
rsp->n_force_qs_ngp++;
spin_unlock(&rnp->lock);
goto unlock_ret; /* no GP in progress, time updated. */
@ -1180,21 +1231,29 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
if (rcu_process_dyntick(rsp, lastcomp,
dyntick_save_progress_counter))
goto unlock_ret;
/* fall into next case. */
case RCU_SAVE_COMPLETED:
/* Update state, record completion counter. */
forcenow = 0;
spin_lock(&rnp->lock);
if (lastcomp == rsp->completed &&
rsp->signaled == RCU_SAVE_DYNTICK) {
if (lastcomp + 1 == rsp->gpnum &&
lastcomp == rsp->completed &&
rsp->signaled == signaled) {
rsp->signaled = RCU_FORCE_QS;
dyntick_record_completed(rsp, lastcomp);
rsp->completed_fqs = lastcomp;
forcenow = signaled == RCU_SAVE_COMPLETED;
}
spin_unlock(&rnp->lock);
break;
if (!forcenow)
break;
/* fall into next case. */
case RCU_FORCE_QS:
/* Check dyntick-idle state, send IPI to laggarts. */
if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp),
if (rcu_process_dyntick(rsp, rsp->completed_fqs,
rcu_implicit_dynticks_qs))
goto unlock_ret;
@ -1351,6 +1410,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
/**
* synchronize_sched - wait until an rcu-sched grace period has elapsed.
*
* Control will return to the caller some time after a full rcu-sched
* grace period has elapsed, in other words after all currently executing
* rcu-sched read-side critical sections have completed. These read-side
* critical sections are delimited by rcu_read_lock_sched() and
* rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
* local_irq_disable(), and so on may be used in place of
* rcu_read_lock_sched().
*
* This means that all preempt_disable code sequences, including NMI and
* hardware-interrupt handlers, in progress on entry will have completed
* before this primitive returns. However, this does not guarantee that
* softirq handlers will have completed, since in some kernels, these
* handlers can run in process context, and can block.
*
* This primitive provides the guarantees made by the (now removed)
* synchronize_kernel() API. In contrast, synchronize_rcu() only
* guarantees that rcu_read_lock() sections will have completed.
* In "classic RCU", these two guarantees happen to be one and
* the same, but can differ in realtime RCU implementations.
*/
void synchronize_sched(void)
{
struct rcu_synchronize rcu;
if (rcu_blocking_is_gp())
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
/**
* synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
*
* Control will return to the caller some time after a full rcu_bh grace
* period has elapsed, in other words after all currently executing rcu_bh
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
* and may be nested.
*/
void synchronize_rcu_bh(void)
{
struct rcu_synchronize rcu;
if (rcu_blocking_is_gp())
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
/*
* Check to see if there is any immediate RCU-related work to be done
* by the current CPU, for the specified type of RCU, returning 1 if so.
@ -1360,6 +1481,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
*/
static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
{
struct rcu_node *rnp = rdp->mynode;
rdp->n_rcu_pending++;
/* Check for CPU stalls, if enabled. */
@ -1384,13 +1507,13 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
}
/* Has another RCU grace period completed? */
if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
rdp->n_rp_gp_completed++;
return 1;
}
/* Has a new RCU grace period started? */
if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
rdp->n_rp_gp_started++;
return 1;
}
@ -1433,6 +1556,21 @@ int rcu_needs_cpu(int cpu)
rcu_preempt_needs_cpu(cpu);
}
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
* RCU read-side critical sections (during which time, this idle
* task is booting the system). After this function is called, the
* idle tasks are prohibited from containing RCU read-side critical
* sections.
*/
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
static atomic_t rcu_barrier_cpu_count;
static DEFINE_MUTEX(rcu_barrier_mutex);
@ -1544,21 +1682,16 @@ static void __cpuinit
rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
{
unsigned long flags;
long lastcomp;
unsigned long mask;
struct rcu_data *rdp = rsp->rda[cpu];
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
spin_lock_irqsave(&rnp->lock, flags);
lastcomp = rsp->completed;
rdp->completed = lastcomp;
rdp->gpnum = lastcomp;
rdp->passed_quiesc = 0; /* We could be racing with new GP, */
rdp->qs_pending = 1; /* so set up to respond to current GP. */
rdp->beenonline = 1; /* We have now been online. */
rdp->preemptable = preemptable;
rdp->passed_quiesc_completed = lastcomp - 1;
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@ -1580,6 +1713,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit |= mask;
mask = rnp->grpmask;
if (rnp == rdp->mynode) {
rdp->gpnum = rnp->completed; /* if GP in progress... */
rdp->completed = rnp->completed;
rdp->passed_quiesc_completed = rnp->completed - 1;
}
spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
} while (rnp != NULL && !(rnp->qsmaskinit & mask));
@ -1597,8 +1735,8 @@ static void __cpuinit rcu_online_cpu(int cpu)
/*
* Handle CPU online/offline notification events.
*/
int __cpuinit rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@ -1685,8 +1823,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
cpustride *= rsp->levelspread[i];
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
if (rnp != rcu_get_root(rsp))
spin_lock_init(&rnp->lock);
spin_lock_init(&rnp->lock);
lockdep_set_class(&rnp->lock, &rcu_node_class[i]);
rnp->gpnum = 0;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;
@ -1707,9 +1845,10 @@ static void __init rcu_init_one(struct rcu_state *rsp)
rnp->level = i;
INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
}
}
spin_lock_init(&rcu_get_root(rsp)->lock);
}
/*
@ -1735,16 +1874,30 @@ do { \
} \
} while (0)
void __init __rcu_init(void)
void __init rcu_init(void)
{
int i;
rcu_bootup_announce();
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
#if NUM_RCU_LVL_4 != 0
printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n");
#endif /* #if NUM_RCU_LVL_4 != 0 */
RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
__rcu_init_preempt();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/*
* We don't need protection against CPU-hotplug here because
* this is called early in boot, before either interrupts
* or the scheduler are operational.
*/
cpu_notifier(rcu_cpu_notify, 0);
for_each_online_cpu(i)
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
}
#include "rcutree_plugin.h"

View file

@ -34,10 +34,11 @@
* In practice, this has not been tested, so there is probably some
* bug somewhere.
*/
#define MAX_RCU_LVLS 3
#define MAX_RCU_LVLS 4
#define RCU_FANOUT (CONFIG_RCU_FANOUT)
#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT
# define NUM_RCU_LVLS 1
@ -45,23 +46,33 @@
# define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_SQ
# define NUM_RCU_LVLS 2
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_CUBE
# define NUM_RCU_LVLS 3
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_3 NR_CPUS
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_FOURTH
# define NUM_RCU_LVLS 4
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_4 NR_CPUS
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT */
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
/*
@ -84,14 +95,21 @@ struct rcu_node {
long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
long completed; /* Last grace period completed for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long qsmask; /* CPUs or groups that need to switch in */
/* order for current grace period to proceed.*/
/* In leaf rcu_node, each bit corresponds to */
/* an rcu_data structure, otherwise, each */
/* bit corresponds to a child rcu_node */
/* structure. */
unsigned long expmask; /* Groups that have ->blocked_tasks[] */
/* elements that need to drain to allow the */
/* current expedited grace period to */
/* complete (only for TREE_PREEMPT_RCU). */
unsigned long qsmaskinit;
/* Per-GP initialization for qsmask. */
/* Per-GP initial value for qsmask & expmask. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
/* Only one bit will be set in this mask. */
int grplo; /* lowest-numbered CPU or group here. */
@ -99,7 +117,7 @@ struct rcu_node {
u8 grpnum; /* CPU/group number for next level up. */
u8 level; /* root is at level 0. */
struct rcu_node *parent;
struct list_head blocked_tasks[2];
struct list_head blocked_tasks[4];
/* Tasks blocked in RCU read-side critsect. */
/* Grace period number (->gpnum) x blocked */
/* by tasks on the (x & 0x1) element of the */
@ -114,6 +132,21 @@ struct rcu_node {
for ((rnp) = &(rsp)->node[0]; \
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
/*
* Do a breadth-first scan of the non-leaf rcu_node structures for the
* specified rcu_state structure. Note that if there is a singleton
* rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \
(rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
/*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
* structure. Note that if there is a singleton rcu_node tree with but
* one rcu_node structure, this loop -will- visit the rcu_node structure.
* It is still a leaf node, even if it is also the root node.
*/
#define rcu_for_each_leaf_node(rsp, rnp) \
for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
@ -204,11 +237,12 @@ struct rcu_data {
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */
#define RCU_FORCE_QS 4 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
#define RCU_SIGNAL_INIT RCU_FORCE_QS
#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@ -246,7 +280,7 @@ struct rcu_state {
long gpnum; /* Current gp number. */
long completed; /* # of last completed gp. */
/* End of fields guarded by root rcu_node's lock. */
/* End of fields guarded by root rcu_node's lock. */
spinlock_t onofflock; /* exclude on/offline and */
/* starting new GP. Also */
@ -260,6 +294,8 @@ struct rcu_state {
long orphan_qlen; /* Number of orphaned cbs. */
spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
long completed_fqs; /* Value of completed @ snap. */
/* Protected by fqslock. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
@ -274,11 +310,15 @@ struct rcu_state {
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
#ifdef CONFIG_NO_HZ
long dynticks_completed; /* Value of completed @ snap. */
#endif /* #ifdef CONFIG_NO_HZ */
};
/* Return values for rcu_preempt_offline_tasks(). */
#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
/* GP were moved to root. */
#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
/* GP were moved to root. */
#ifdef RCU_TREE_NONCORE
/*
@ -298,10 +338,14 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#else /* #ifdef RCU_TREE_NONCORE */
/* Forward declarations for rcutree_plugin.h */
static inline void rcu_bootup_announce(void);
static void rcu_bootup_announce(void);
long rcu_batches_completed(void);
static void rcu_preempt_note_context_switch(int cpu);
static int rcu_preempted_readers(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
unsigned long flags);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void rcu_print_task_stall(struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@ -315,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu);
static void rcu_preempt_check_callbacks(int cpu);
static void rcu_preempt_process_callbacks(void);
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
static int rcu_preempt_pending(int cpu);
static int rcu_preempt_needs_cpu(int cpu);
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);

View file

@ -24,16 +24,19 @@
* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/delay.h>
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
static int rcu_preempted_readers_exp(struct rcu_node *rnp);
/*
* Tell them what RCU they are running.
*/
static inline void rcu_bootup_announce(void)
static void __init rcu_bootup_announce(void)
{
printk(KERN_INFO
"Experimental preemptable hierarchical RCU implementation.\n");
@ -67,7 +70,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
static void rcu_preempt_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
rdp->passed_quiesc_completed = rdp->completed;
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
}
@ -157,14 +160,58 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
*/
static int rcu_preempted_readers(struct rcu_node *rnp)
{
return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
int phase = rnp->gpnum & 0x1;
return !list_empty(&rnp->blocked_tasks[phase]) ||
!list_empty(&rnp->blocked_tasks[phase + 2]);
}
/*
* Record a quiescent state for all tasks that were previously queued
* on the specified rcu_node structure and that were blocking the current
* RCU grace period. The caller must hold the specified rnp->lock with
* irqs disabled, and this lock is released upon return, but irqs remain
* disabled.
*/
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
unsigned long mask;
struct rcu_node *rnp_p;
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
spin_unlock_irqrestore(&rnp->lock, flags);
return; /* Still need more quiescent states! */
}
rnp_p = rnp->parent;
if (rnp_p == NULL) {
/*
* Either there is only one rcu_node in the tree,
* or tasks were kicked up to root rcu_node due to
* CPUs going offline.
*/
rcu_report_qs_rsp(&rcu_preempt_state, flags);
return;
}
/* Report up the rest of the hierarchy. */
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
spin_lock(&rnp_p->lock); /* irqs already disabled. */
rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
}
/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
*/
static void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
int empty_exp;
unsigned long flags;
unsigned long mask;
struct rcu_node *rnp;
int special;
@ -207,36 +254,30 @@ static void rcu_read_unlock_special(struct task_struct *t)
spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
empty = !rcu_preempted_readers(rnp);
empty_exp = !rcu_preempted_readers_exp(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
list_del_init(&t->rcu_node_entry);
t->rcu_blocked_node = NULL;
/*
* If this was the last task on the current list, and if
* we aren't waiting on any CPUs, report the quiescent state.
* Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
* drop rnp->lock and restore irq.
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
*/
if (!empty && rnp->qsmask == 0 &&
!rcu_preempted_readers(rnp)) {
struct rcu_node *rnp_p;
if (rnp->parent == NULL) {
/* Only one rcu_node in the tree. */
cpu_quiet_msk_finish(&rcu_preempt_state, flags);
return;
}
/* Report up the rest of the hierarchy. */
mask = rnp->grpmask;
if (empty)
spin_unlock_irqrestore(&rnp->lock, flags);
rnp_p = rnp->parent;
spin_lock_irqsave(&rnp_p->lock, flags);
WARN_ON_ONCE(rnp->qsmask);
cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
return;
}
spin_unlock(&rnp->lock);
else
rcu_report_unblock_qs_rnp(rnp, flags);
/*
* If this was the last task on the expedited lists,
* then we need to report up the rcu_node hierarchy.
*/
if (!empty_exp && !rcu_preempted_readers_exp(rnp))
rcu_report_exp_rnp(&rcu_preempt_state, rnp);
} else {
local_irq_restore(flags);
}
local_irq_restore(flags);
}
/*
@ -303,6 +344,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
* rcu_node. The reason for not just moving them to the immediate
* parent is to remove the need for rcu_read_unlock_special() to
* make more than two attempts to acquire the target rcu_node's lock.
* Returns true if there were tasks blocking the current RCU grace
* period.
*
* Returns 1 if there was previously a task blocking the current grace
* period on the specified rcu_node structure.
@ -316,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
int i;
struct list_head *lp;
struct list_head *lp_root;
int retval = rcu_preempted_readers(rnp);
int retval = 0;
struct rcu_node *rnp_root = rcu_get_root(rsp);
struct task_struct *tp;
@ -326,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
}
WARN_ON_ONCE(rnp != rdp->mynode &&
(!list_empty(&rnp->blocked_tasks[0]) ||
!list_empty(&rnp->blocked_tasks[1])));
!list_empty(&rnp->blocked_tasks[1]) ||
!list_empty(&rnp->blocked_tasks[2]) ||
!list_empty(&rnp->blocked_tasks[3])));
/*
* Move tasks up to root rcu_node. Rely on the fact that the
@ -334,7 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
* rcu_nodes in terms of gp_num value. This fact allows us to
* move the blocked_tasks[] array directly, element by element.
*/
for (i = 0; i < 2; i++) {
if (rcu_preempted_readers(rnp))
retval |= RCU_OFL_TASKS_NORM_GP;
if (rcu_preempted_readers_exp(rnp))
retval |= RCU_OFL_TASKS_EXP_GP;
for (i = 0; i < 4; i++) {
lp = &rnp->blocked_tasks[i];
lp_root = &rnp_root->blocked_tasks[i];
while (!list_empty(lp)) {
@ -346,7 +395,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
spin_unlock(&rnp_root->lock); /* irqs remain disabled */
}
}
return retval;
}
@ -398,14 +446,183 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu);
/**
* synchronize_rcu - wait until a grace period has elapsed.
*
* Control will return to the caller some time after a full grace
* period has elapsed, in other words after all currently executing RCU
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
void synchronize_rcu(void)
{
struct rcu_synchronize rcu;
if (!rcu_scheduler_active)
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
static long sync_rcu_preempt_exp_count;
static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
/*
* Wait for an rcu-preempt grace period. We are supposed to expedite the
* grace period, but this is the crude slow compatability hack, so just
* invoke synchronize_rcu().
* Return non-zero if there are any tasks in RCU read-side critical
* sections blocking the current preemptible-RCU expedited grace period.
* If there is no preemptible-RCU expedited grace period currently in
* progress, returns zero unconditionally.
*/
static int rcu_preempted_readers_exp(struct rcu_node *rnp)
{
return !list_empty(&rnp->blocked_tasks[2]) ||
!list_empty(&rnp->blocked_tasks[3]);
}
/*
* return non-zero if there is no RCU expedited grace period in progress
* for the specified rcu_node structure, in other words, if all CPUs and
* tasks covered by the specified rcu_node structure have done their bit
* for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means.
*
* Caller must hold sync_rcu_preempt_exp_mutex.
*/
static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{
return !rcu_preempted_readers_exp(rnp) &&
ACCESS_ONCE(rnp->expmask) == 0;
}
/*
* Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU
* grace period. This event is reported either to the rcu_node structure on
* which the task was queued or to one of that rcu_node structure's ancestors,
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
* Caller must hold sync_rcu_preempt_exp_mutex.
*/
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
{
unsigned long flags;
unsigned long mask;
spin_lock_irqsave(&rnp->lock, flags);
for (;;) {
if (!sync_rcu_preempt_exp_done(rnp))
break;
if (rnp->parent == NULL) {
wake_up(&sync_rcu_preempt_exp_wq);
break;
}
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
spin_lock(&rnp->lock); /* irqs already disabled */
rnp->expmask &= ~mask;
}
spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
* Snapshot the tasks blocking the newly started preemptible-RCU expedited
* grace period for the specified rcu_node structure. If there are no such
* tasks, report it up the rcu_node hierarchy.
*
* Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
*/
static void
sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
{
int must_wait;
spin_lock(&rnp->lock); /* irqs already disabled */
list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
must_wait = rcu_preempted_readers_exp(rnp);
spin_unlock(&rnp->lock); /* irqs remain disabled */
if (!must_wait)
rcu_report_exp_rnp(rsp, rnp);
}
/*
* Wait for an rcu-preempt grace period, but expedite it. The basic idea
* is to invoke synchronize_sched_expedited() to push all the tasks to
* the ->blocked_tasks[] lists, move all entries from the first set of
* ->blocked_tasks[] lists to the second set, and finally wait for this
* second set to drain.
*/
void synchronize_rcu_expedited(void)
{
synchronize_rcu();
unsigned long flags;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_preempt_state;
long snap;
int trycount = 0;
smp_mb(); /* Caller's modifications seen first by other CPUs. */
snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
smp_mb(); /* Above access cannot bleed into critical section. */
/*
* Acquire lock, falling back to synchronize_rcu() if too many
* lock-acquisition failures. Of course, if someone does the
* expedited grace period for us, just leave.
*/
while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
if (trycount++ < 10)
udelay(trycount * num_online_cpus());
else {
synchronize_rcu();
return;
}
if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
goto mb_ret; /* Others did our work for us. */
}
if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
goto unlock_mb_ret; /* Others did our work for us. */
/* force all RCU readers onto blocked_tasks[]. */
synchronize_sched_expedited();
spin_lock_irqsave(&rsp->onofflock, flags);
/* Initialize ->expmask for all non-leaf rcu_node structures. */
rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->expmask = rnp->qsmaskinit;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
/* Snapshot current state of ->blocked_tasks[] lists. */
rcu_for_each_leaf_node(rsp, rnp)
sync_rcu_preempt_exp_init(rsp, rnp);
if (NUM_RCU_NODES > 1)
sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
spin_unlock_irqrestore(&rsp->onofflock, flags);
/* Wait for snapshotted ->blocked_tasks[] lists to drain. */
rnp = rcu_get_root(rsp);
wait_event(sync_rcu_preempt_exp_wq,
sync_rcu_preempt_exp_done(rnp));
/* Clean up and exit. */
smp_mb(); /* ensure expedited GP seen before counter increment. */
ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
unlock_mb_ret:
mutex_unlock(&sync_rcu_preempt_exp_mutex);
mb_ret:
smp_mb(); /* ensure subsequent action seen after grace period. */
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
@ -481,7 +698,7 @@ void exit_rcu(void)
/*
* Tell them what RCU they are running.
*/
static inline void rcu_bootup_announce(void)
static void __init rcu_bootup_announce(void)
{
printk(KERN_INFO "Hierarchical RCU implementation.\n");
}
@ -512,6 +729,16 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
/* Because preemptible RCU does not exist, no quieting of tasks. */
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
{
spin_unlock_irqrestore(&rnp->lock, flags);
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
/*
@ -594,6 +821,20 @@ void synchronize_rcu_expedited(void)
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
#ifdef CONFIG_HOTPLUG_CPU
/*
* Because preemptable RCU does not exist, there is never any need to
* report on tasks preempted in RCU read-side critical sections during
* expedited RCU grace periods.
*/
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
{
return;
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
/*
* Because preemptable RCU does not exist, it never has any work to do.
*/

View file

@ -155,12 +155,15 @@ static const struct file_operations rcudata_csv_fops = {
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
{
long gpnum;
int level = 0;
int phase;
struct rcu_node *rnp;
gpnum = rsp->gpnum;
seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
rsp->completed, rsp->gpnum, rsp->signaled,
rsp->completed, gpnum, rsp->signaled,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff),
rsp->n_force_qs, rsp->n_force_qs_ngp,
@ -171,8 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
seq_puts(m, "\n");
level = rnp->level;
}
seq_printf(m, "%lx/%lx %d:%d ^%d ",
phase = gpnum & 0x1;
seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ",
rnp->qsmask, rnp->qsmaskinit,
"T."[list_empty(&rnp->blocked_tasks[phase])],
"E."[list_empty(&rnp->blocked_tasks[phase + 2])],
"T."[list_empty(&rnp->blocked_tasks[!phase])],
"E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
rnp->grplo, rnp->grphi, rnp->grpnum);
}
seq_puts(m, "\n");

View file

@ -5481,7 +5481,7 @@ need_resched_nonpreemptible:
}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_SMP
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
@ -10901,6 +10901,7 @@ void synchronize_sched_expedited(void)
spin_unlock_irqrestore(&rq->lock, flags);
}
rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
synchronize_sched_expedited_count++;
mutex_unlock(&rcu_sched_expedited_mutex);
put_online_cpus();
if (need_full_sync)

View file

@ -22,6 +22,7 @@
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/signalfd.h>
#include <linux/ratelimit.h>
#include <linux/tracehook.h>
#include <linux/capability.h>
#include <linux/freezer.h>
@ -41,6 +42,8 @@
static struct kmem_cache *sigqueue_cachep;
int print_fatal_signals __read_mostly;
static void __user *sig_handler(struct task_struct *t, int sig)
{
return t->sighand->action[sig - 1].sa.sa_handler;
@ -159,7 +162,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
{
unsigned long i, *s, *m, x;
int sig = 0;
s = pending->signal.sig;
m = mask->sig;
switch (_NSIG_WORDS) {
@ -184,17 +187,31 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
sig = ffz(~x) + 1;
break;
}
return sig;
}
static inline void print_dropped_signal(int sig)
{
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
if (!print_fatal_signals)
return;
if (!__ratelimit(&ratelimit_state))
return;
printk(KERN_INFO "%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n",
current->comm, current->pid, sig);
}
/*
* allocate a new signal queue record
* - this may be called without locks if and only if t == current, otherwise an
* appopriate lock must be held to stop the target task from exiting
*/
static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
int override_rlimit)
static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
{
struct sigqueue *q = NULL;
struct user_struct *user;
@ -207,10 +224,15 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
*/
user = get_uid(__task_cred(t)->user);
atomic_inc(&user->sigpending);
if (override_rlimit ||
atomic_read(&user->sigpending) <=
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
print_dropped_signal(sig);
}
if (unlikely(q == NULL)) {
atomic_dec(&user->sigpending);
free_uid(user);
@ -869,7 +891,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
else
override_rlimit = 0;
q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
override_rlimit);
if (q) {
list_add_tail(&q->list, &pending->list);
@ -925,8 +947,6 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
return __send_signal(sig, info, t, group, from_ancestor_ns);
}
int print_fatal_signals;
static void print_fatal_signal(struct pt_regs *regs, int signr)
{
printk("%s/%d: potentially unexpected fatal signal %d.\n",
@ -1293,19 +1313,19 @@ EXPORT_SYMBOL(kill_pid);
* These functions support sending signals using preallocated sigqueue
* structures. This is needed "because realtime applications cannot
* afford to lose notifications of asynchronous events, like timer
* expirations or I/O completions". In the case of Posix Timers
* expirations or I/O completions". In the case of Posix Timers
* we allocate the sigqueue structure from the timer_create. If this
* allocation fails we are able to report the failure to the application
* with an EAGAIN error.
*/
struct sigqueue *sigqueue_alloc(void)
{
struct sigqueue *q;
struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
if (q)
q->flags |= SIGQUEUE_PREALLOC;
return(q);
return q;
}
void sigqueue_free(struct sigqueue *q)

View file

@ -265,9 +265,7 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data);
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait until function has completed on other CPUs.
*
* Returns 0 on success, else a negative status code. Note that @wait
* will be implicitly turned on in case of allocation failures, since
* we fall back to on-stack allocation.
* Returns 0 on success, else a negative status code.
*/
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
@ -321,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
}
EXPORT_SYMBOL(smp_call_function_single);
/*
* smp_call_function_any - Run a function on any of the given cpus
* @mask: The mask of cpus it can run on.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait until function has completed.
*
* Returns 0 on success, else a negative status code (if no cpus were online).
* Note that @wait will be implicitly turned on in case of allocation failures,
* since we fall back to on-stack allocation.
*
* Selection preference:
* 1) current cpu if in @mask
* 2) any cpu of current node if in @mask
* 3) any other online cpu in @mask
*/
int smp_call_function_any(const struct cpumask *mask,
void (*func)(void *info), void *info, int wait)
{
unsigned int cpu;
const struct cpumask *nodemask;
int ret;
/* Try for same CPU (cheapest) */
cpu = get_cpu();
if (cpumask_test_cpu(cpu, mask))
goto call;
/* Try for same node. */
nodemask = cpumask_of_node(cpu);
for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
cpu = cpumask_next_and(cpu, nodemask, mask)) {
if (cpu_online(cpu))
goto call;
}
/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
cpu = cpumask_any_and(mask, cpu_online_mask);
call:
ret = smp_call_function_single(cpu, func, info, wait);
put_cpu();
return ret;
}
EXPORT_SYMBOL_GPL(smp_call_function_any);
/**
* __smp_call_function_single(): Run a function on another CPU
* @cpu: The CPU to run on.
@ -355,9 +398,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* If @wait is true, then returns once @func has returned. Note that @wait
* will be implicitly turned on in case of allocation failures, since
* we fall back to on-stack allocation.
* If @wait is true, then returns once @func has returned.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. Preemption
@ -443,8 +484,7 @@ EXPORT_SYMBOL(smp_call_function_many);
* Returns 0.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func. In case of allocation
* failure, @wait will be implicitly turned on.
* it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.

View file

@ -302,9 +302,9 @@ void irq_exit(void)
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
rcu_irq_exit();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
rcu_irq_exit();
if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
tick_nohz_stop_sched_tick(0);
#endif

View file

@ -21,145 +21,28 @@
#include <linux/debug_locks.h>
#include <linux/module.h>
#ifndef _spin_trylock
int __lockfunc _spin_trylock(spinlock_t *lock)
{
return __spin_trylock(lock);
}
EXPORT_SYMBOL(_spin_trylock);
#endif
#ifndef _read_trylock
int __lockfunc _read_trylock(rwlock_t *lock)
{
return __read_trylock(lock);
}
EXPORT_SYMBOL(_read_trylock);
#endif
#ifndef _write_trylock
int __lockfunc _write_trylock(rwlock_t *lock)
{
return __write_trylock(lock);
}
EXPORT_SYMBOL(_write_trylock);
#endif
/*
* If lockdep is enabled then we use the non-preemption spin-ops
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
*/
#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
#ifndef _read_lock
void __lockfunc _read_lock(rwlock_t *lock)
{
__read_lock(lock);
}
EXPORT_SYMBOL(_read_lock);
#endif
#ifndef _spin_lock_irqsave
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
{
return __spin_lock_irqsave(lock);
}
EXPORT_SYMBOL(_spin_lock_irqsave);
#endif
#ifndef _spin_lock_irq
void __lockfunc _spin_lock_irq(spinlock_t *lock)
{
__spin_lock_irq(lock);
}
EXPORT_SYMBOL(_spin_lock_irq);
#endif
#ifndef _spin_lock_bh
void __lockfunc _spin_lock_bh(spinlock_t *lock)
{
__spin_lock_bh(lock);
}
EXPORT_SYMBOL(_spin_lock_bh);
#endif
#ifndef _read_lock_irqsave
unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
{
return __read_lock_irqsave(lock);
}
EXPORT_SYMBOL(_read_lock_irqsave);
#endif
#ifndef _read_lock_irq
void __lockfunc _read_lock_irq(rwlock_t *lock)
{
__read_lock_irq(lock);
}
EXPORT_SYMBOL(_read_lock_irq);
#endif
#ifndef _read_lock_bh
void __lockfunc _read_lock_bh(rwlock_t *lock)
{
__read_lock_bh(lock);
}
EXPORT_SYMBOL(_read_lock_bh);
#endif
#ifndef _write_lock_irqsave
unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
{
return __write_lock_irqsave(lock);
}
EXPORT_SYMBOL(_write_lock_irqsave);
#endif
#ifndef _write_lock_irq
void __lockfunc _write_lock_irq(rwlock_t *lock)
{
__write_lock_irq(lock);
}
EXPORT_SYMBOL(_write_lock_irq);
#endif
#ifndef _write_lock_bh
void __lockfunc _write_lock_bh(rwlock_t *lock)
{
__write_lock_bh(lock);
}
EXPORT_SYMBOL(_write_lock_bh);
#endif
#ifndef _spin_lock
void __lockfunc _spin_lock(spinlock_t *lock)
{
__spin_lock(lock);
}
EXPORT_SYMBOL(_spin_lock);
#endif
#ifndef _write_lock
void __lockfunc _write_lock(rwlock_t *lock)
{
__write_lock(lock);
}
EXPORT_SYMBOL(_write_lock);
#endif
#else /* CONFIG_PREEMPT: */
/*
* The __lock_function inlines are taken from
* include/linux/spinlock_api_smp.h
*/
#else
/*
* We build the __lock_function inlines here. They are too large for
* inlining all over the place, but here is only one user per function
* which embedds them into the calling _lock_function below.
*
* This could be a long-held lock. We both prepare to spin for a long
* time (making _this_ CPU preemptable if possible), and we also signal
* towards that other CPU that it should break the lock ASAP.
*
* (We do this in a function because inlining it would be excessive.)
*/
#define BUILD_LOCK_OPS(op, locktype) \
void __lockfunc _##op##_lock(locktype##_t *lock) \
void __lockfunc __##op##_lock(locktype##_t *lock) \
{ \
for (;;) { \
preempt_disable(); \
@ -175,9 +58,7 @@ void __lockfunc _##op##_lock(locktype##_t *lock) \
(lock)->break_lock = 0; \
} \
\
EXPORT_SYMBOL(_##op##_lock); \
\
unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \
unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \
{ \
unsigned long flags; \
\
@ -198,16 +79,12 @@ unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \
return flags; \
} \
\
EXPORT_SYMBOL(_##op##_lock_irqsave); \
\
void __lockfunc _##op##_lock_irq(locktype##_t *lock) \
void __lockfunc __##op##_lock_irq(locktype##_t *lock) \
{ \
_##op##_lock_irqsave(lock); \
} \
\
EXPORT_SYMBOL(_##op##_lock_irq); \
\
void __lockfunc _##op##_lock_bh(locktype##_t *lock) \
void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
{ \
unsigned long flags; \
\
@ -220,23 +97,21 @@ void __lockfunc _##op##_lock_bh(locktype##_t *lock) \
local_bh_disable(); \
local_irq_restore(flags); \
} \
\
EXPORT_SYMBOL(_##op##_lock_bh)
/*
* Build preemption-friendly versions of the following
* lock-spinning functions:
*
* _[spin|read|write]_lock()
* _[spin|read|write]_lock_irq()
* _[spin|read|write]_lock_irqsave()
* _[spin|read|write]_lock_bh()
* __[spin|read|write]_lock()
* __[spin|read|write]_lock_irq()
* __[spin|read|write]_lock_irqsave()
* __[spin|read|write]_lock_bh()
*/
BUILD_LOCK_OPS(spin, spinlock);
BUILD_LOCK_OPS(read, rwlock);
BUILD_LOCK_OPS(write, rwlock);
#endif /* CONFIG_PREEMPT */
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@ -248,7 +123,8 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_spin_lock_nested);
unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock,
int subclass)
{
unsigned long flags;
@ -272,7 +148,127 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
#endif
#ifndef _spin_unlock
#ifndef CONFIG_INLINE_SPIN_TRYLOCK
int __lockfunc _spin_trylock(spinlock_t *lock)
{
return __spin_trylock(lock);
}
EXPORT_SYMBOL(_spin_trylock);
#endif
#ifndef CONFIG_INLINE_READ_TRYLOCK
int __lockfunc _read_trylock(rwlock_t *lock)
{
return __read_trylock(lock);
}
EXPORT_SYMBOL(_read_trylock);
#endif
#ifndef CONFIG_INLINE_WRITE_TRYLOCK
int __lockfunc _write_trylock(rwlock_t *lock)
{
return __write_trylock(lock);
}
EXPORT_SYMBOL(_write_trylock);
#endif
#ifndef CONFIG_INLINE_READ_LOCK
void __lockfunc _read_lock(rwlock_t *lock)
{
__read_lock(lock);
}
EXPORT_SYMBOL(_read_lock);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
{
return __spin_lock_irqsave(lock);
}
EXPORT_SYMBOL(_spin_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
void __lockfunc _spin_lock_irq(spinlock_t *lock)
{
__spin_lock_irq(lock);
}
EXPORT_SYMBOL(_spin_lock_irq);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK_BH
void __lockfunc _spin_lock_bh(spinlock_t *lock)
{
__spin_lock_bh(lock);
}
EXPORT_SYMBOL(_spin_lock_bh);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
{
return __read_lock_irqsave(lock);
}
EXPORT_SYMBOL(_read_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_IRQ
void __lockfunc _read_lock_irq(rwlock_t *lock)
{
__read_lock_irq(lock);
}
EXPORT_SYMBOL(_read_lock_irq);
#endif
#ifndef CONFIG_INLINE_READ_LOCK_BH
void __lockfunc _read_lock_bh(rwlock_t *lock)
{
__read_lock_bh(lock);
}
EXPORT_SYMBOL(_read_lock_bh);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
{
return __write_lock_irqsave(lock);
}
EXPORT_SYMBOL(_write_lock_irqsave);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
void __lockfunc _write_lock_irq(rwlock_t *lock)
{
__write_lock_irq(lock);
}
EXPORT_SYMBOL(_write_lock_irq);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK_BH
void __lockfunc _write_lock_bh(rwlock_t *lock)
{
__write_lock_bh(lock);
}
EXPORT_SYMBOL(_write_lock_bh);
#endif
#ifndef CONFIG_INLINE_SPIN_LOCK
void __lockfunc _spin_lock(spinlock_t *lock)
{
__spin_lock(lock);
}
EXPORT_SYMBOL(_spin_lock);
#endif
#ifndef CONFIG_INLINE_WRITE_LOCK
void __lockfunc _write_lock(rwlock_t *lock)
{
__write_lock(lock);
}
EXPORT_SYMBOL(_write_lock);
#endif
#ifndef CONFIG_INLINE_SPIN_UNLOCK
void __lockfunc _spin_unlock(spinlock_t *lock)
{
__spin_unlock(lock);
@ -280,7 +276,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock)
EXPORT_SYMBOL(_spin_unlock);
#endif
#ifndef _write_unlock
#ifndef CONFIG_INLINE_WRITE_UNLOCK
void __lockfunc _write_unlock(rwlock_t *lock)
{
__write_unlock(lock);
@ -288,7 +284,7 @@ void __lockfunc _write_unlock(rwlock_t *lock)
EXPORT_SYMBOL(_write_unlock);
#endif
#ifndef _read_unlock
#ifndef CONFIG_INLINE_READ_UNLOCK
void __lockfunc _read_unlock(rwlock_t *lock)
{
__read_unlock(lock);
@ -296,7 +292,7 @@ void __lockfunc _read_unlock(rwlock_t *lock)
EXPORT_SYMBOL(_read_unlock);
#endif
#ifndef _spin_unlock_irqrestore
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
__spin_unlock_irqrestore(lock, flags);
@ -304,7 +300,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
EXPORT_SYMBOL(_spin_unlock_irqrestore);
#endif
#ifndef _spin_unlock_irq
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
void __lockfunc _spin_unlock_irq(spinlock_t *lock)
{
__spin_unlock_irq(lock);
@ -312,7 +308,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock)
EXPORT_SYMBOL(_spin_unlock_irq);
#endif
#ifndef _spin_unlock_bh
#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
void __lockfunc _spin_unlock_bh(spinlock_t *lock)
{
__spin_unlock_bh(lock);
@ -320,7 +316,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock)
EXPORT_SYMBOL(_spin_unlock_bh);
#endif
#ifndef _read_unlock_irqrestore
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__read_unlock_irqrestore(lock, flags);
@ -328,7 +324,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
EXPORT_SYMBOL(_read_unlock_irqrestore);
#endif
#ifndef _read_unlock_irq
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
void __lockfunc _read_unlock_irq(rwlock_t *lock)
{
__read_unlock_irq(lock);
@ -336,7 +332,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock)
EXPORT_SYMBOL(_read_unlock_irq);
#endif
#ifndef _read_unlock_bh
#ifndef CONFIG_INLINE_READ_UNLOCK_BH
void __lockfunc _read_unlock_bh(rwlock_t *lock)
{
__read_unlock_bh(lock);
@ -344,7 +340,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock)
EXPORT_SYMBOL(_read_unlock_bh);
#endif
#ifndef _write_unlock_irqrestore
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__write_unlock_irqrestore(lock, flags);
@ -352,7 +348,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
EXPORT_SYMBOL(_write_unlock_irqrestore);
#endif
#ifndef _write_unlock_irq
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
void __lockfunc _write_unlock_irq(rwlock_t *lock)
{
__write_unlock_irq(lock);
@ -360,7 +356,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock)
EXPORT_SYMBOL(_write_unlock_irq);
#endif
#ifndef _write_unlock_bh
#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
void __lockfunc _write_unlock_bh(rwlock_t *lock)
{
__write_unlock_bh(lock);
@ -368,7 +364,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock)
EXPORT_SYMBOL(_write_unlock_bh);
#endif
#ifndef _spin_trylock_bh
#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
int __lockfunc _spin_trylock_bh(spinlock_t *lock)
{
return __spin_trylock_bh(lock);

View file

@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp)
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
return (sp->per_cpu_ref ? 0 : -ENOMEM);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
/*
* srcu_readers_active_idx -- returns approximate number of readers
@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
free_percpu(sp->per_cpu_ref);
sp->per_cpu_ref = NULL;
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/**
* srcu_read_lock - register a new reader for an SRCU-protected structure.
@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp)
preempt_enable();
return idx;
}
EXPORT_SYMBOL_GPL(srcu_read_lock);
/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx)
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
preempt_enable();
}
EXPORT_SYMBOL_GPL(srcu_read_unlock);
/**
* synchronize_srcu - wait for prior SRCU read-side critical-section completion
* @sp: srcu_struct with which to synchronize.
*
* Flip the completed counter, and wait for the old count to drain to zero.
* As with classic RCU, the updater must use some separate means of
* synchronizing concurrent updates. Can block; must be called from
* process context.
*
* Note that it is illegal to call synchornize_srcu() from the corresponding
* SRCU read-side critical section; doing so will result in deadlock.
* However, it is perfectly legal to call synchronize_srcu() on one
* srcu_struct from some other srcu_struct's read-side critical section.
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
void synchronize_srcu(struct srcu_struct *sp)
void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
{
int idx;
@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp)
return;
}
synchronize_sched(); /* Force memory barrier on all CPUs. */
sync_func(); /* Force memory barrier on all CPUs. */
/*
* The preceding synchronize_sched() ensures that any CPU that
@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp)
idx = sp->completed & 0x1;
sp->completed++;
synchronize_sched(); /* Force memory barrier on all CPUs. */
sync_func(); /* Force memory barrier on all CPUs. */
/*
* At this point, because of the preceding synchronize_sched(),
@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp)
while (srcu_readers_active_idx(sp, idx))
schedule_timeout_interruptible(1);
synchronize_sched(); /* Force memory barrier on all CPUs. */
sync_func(); /* Force memory barrier on all CPUs. */
/*
* The preceding synchronize_sched() forces all srcu_read_unlock()
@ -236,6 +229,47 @@ void synchronize_srcu(struct srcu_struct *sp)
mutex_unlock(&sp->mutex);
}
/**
* synchronize_srcu - wait for prior SRCU read-side critical-section completion
* @sp: srcu_struct with which to synchronize.
*
* Flip the completed counter, and wait for the old count to drain to zero.
* As with classic RCU, the updater must use some separate means of
* synchronizing concurrent updates. Can block; must be called from
* process context.
*
* Note that it is illegal to call synchronize_srcu() from the corresponding
* SRCU read-side critical section; doing so will result in deadlock.
* However, it is perfectly legal to call synchronize_srcu() on one
* srcu_struct from some other srcu_struct's read-side critical section.
*/
void synchronize_srcu(struct srcu_struct *sp)
{
__synchronize_srcu(sp, synchronize_sched);
}
EXPORT_SYMBOL_GPL(synchronize_srcu);
/**
* synchronize_srcu_expedited - like synchronize_srcu, but less patient
* @sp: srcu_struct with which to synchronize.
*
* Flip the completed counter, and wait for the old count to drain to zero.
* As with classic RCU, the updater must use some separate means of
* synchronizing concurrent updates. Can block; must be called from
* process context.
*
* Note that it is illegal to call synchronize_srcu_expedited()
* from the corresponding SRCU read-side critical section; doing so
* will result in deadlock. However, it is perfectly legal to call
* synchronize_srcu_expedited() on one srcu_struct from some other
* srcu_struct's read-side critical section.
*/
void synchronize_srcu_expedited(struct srcu_struct *sp)
{
__synchronize_srcu(sp, synchronize_sched_expedited);
}
EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
/**
* srcu_batches_completed - return batches completed.
* @sp: srcu_struct on which to report batch completion.
@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp)
{
return sp->completed;
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
EXPORT_SYMBOL_GPL(srcu_read_lock);
EXPORT_SYMBOL_GPL(srcu_read_unlock);
EXPORT_SYMBOL_GPL(synchronize_srcu);
EXPORT_SYMBOL_GPL(srcu_batches_completed);

View file

@ -36,6 +36,7 @@
#include <linux/sysrq.h>
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/ratelimit.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
@ -158,6 +159,8 @@ extern int no_unaligned_warning;
extern int unaligned_dump_stack;
#endif
extern struct ratelimit_state printk_ratelimit_state;
#ifdef CONFIG_RT_MUTEXES
extern int max_lock_depth;
#endif

View file

@ -60,6 +60,13 @@ static int last_ftrace_enabled;
/* Quick disabling of function tracer. */
int function_trace_stop;
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
struct ftrace_pid {
struct list_head list;
struct pid *pid;
};
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
#endif
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_ops *op = ftrace_list;
@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
else
func = ftrace_list_func;
if (ftrace_pid_trace) {
if (!list_empty(&ftrace_pids)) {
set_ftrace_pid_function(func);
func = ftrace_pid_func;
}
@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_list->next == &ftrace_list_end) {
ftrace_func_t func = ftrace_list->func;
if (ftrace_pid_trace) {
if (!list_empty(&ftrace_pids)) {
set_ftrace_pid_function(func);
func = ftrace_pid_func;
}
@ -231,7 +242,7 @@ static void ftrace_update_pid_func(void)
func = __ftrace_trace_function;
#endif
if (ftrace_pid_trace) {
if (!list_empty(&ftrace_pids)) {
set_ftrace_pid_function(func);
func = ftrace_pid_func;
} else {
@ -821,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#endif /* CONFIG_FUNCTION_PROFILER */
/* set when tracing only a pid */
struct pid *ftrace_pid_trace;
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
#ifdef CONFIG_DYNAMIC_FTRACE
@ -1261,12 +1270,34 @@ static int ftrace_update_code(struct module *mod)
ftrace_new_addrs = p->newlist;
p->flags = 0L;
/* convert record (i.e, patch mcount-call with NOP) */
if (ftrace_code_disable(mod, p)) {
p->flags |= FTRACE_FL_CONVERTED;
ftrace_update_cnt++;
} else
/*
* Do the initial record convertion from mcount jump
* to the NOP instructions.
*/
if (!ftrace_code_disable(mod, p)) {
ftrace_free_rec(p);
continue;
}
p->flags |= FTRACE_FL_CONVERTED;
ftrace_update_cnt++;
/*
* If the tracing is enabled, go ahead and enable the record.
*
* The reason not to enable the record immediatelly is the
* inherent check of ftrace_make_nop/ftrace_make_call for
* correct previous instructions. Making first the NOP
* conversion puts the module to the correct state, thus
* passing the ftrace_make_call check.
*/
if (ftrace_start_up) {
int failed = __ftrace_replace_code(p, 1);
if (failed) {
ftrace_bug(failed, p->ip);
ftrace_free_rec(p);
}
}
}
stop = ftrace_now(raw_smp_processor_id());
@ -1656,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
return ret;
}
enum {
MATCH_FULL,
MATCH_FRONT_ONLY,
MATCH_MIDDLE_ONLY,
MATCH_END_ONLY,
};
/*
* (static function - no need for kernel doc)
*
* Pass in a buffer containing a glob and this function will
* set search to point to the search part of the buffer and
* return the type of search it is (see enum above).
* This does modify buff.
*
* Returns enum type.
* search returns the pointer to use for comparison.
* not returns 1 if buff started with a '!'
* 0 otherwise.
*/
static int
ftrace_setup_glob(char *buff, int len, char **search, int *not)
{
int type = MATCH_FULL;
int i;
if (buff[0] == '!') {
*not = 1;
buff++;
len--;
} else
*not = 0;
*search = buff;
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
if (!i) {
*search = buff + 1;
type = MATCH_END_ONLY;
} else {
if (type == MATCH_END_ONLY)
type = MATCH_MIDDLE_ONLY;
else
type = MATCH_FRONT_ONLY;
buff[i] = 0;
break;
}
}
}
return type;
}
static int ftrace_match(char *str, char *regex, int len, int type)
{
int matched = 0;
@ -1758,7 +1735,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
int not;
flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
type = ftrace_setup_glob(buff, len, &search, &not);
type = filter_parse_regex(buff, len, &search, &not);
search_len = strlen(search);
@ -1826,7 +1803,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
}
if (strlen(buff)) {
type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
type = filter_parse_regex(buff, strlen(buff), &search, &not);
search_len = strlen(search);
}
@ -1991,7 +1968,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
int count = 0;
char *search;
type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
type = filter_parse_regex(glob, strlen(glob), &search, &not);
len = strlen(search);
/* we do not support '!' for function probes */
@ -2068,7 +2045,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
else if (glob) {
int not;
type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
type = filter_parse_regex(glob, strlen(glob), &search, &not);
len = strlen(search);
/* we do not support '!' for function probes */
@ -2312,6 +2289,32 @@ static int __init set_ftrace_filter(char *str)
}
__setup("ftrace_filter=", set_ftrace_filter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
static int __init set_graph_function(char *str)
{
strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
__setup("ftrace_graph_filter=", set_graph_function);
static void __init set_ftrace_early_graph(char *buf)
{
int ret;
char *func;
while (buf) {
func = strsep(&buf, ",");
/* we allow only one expression at a time */
ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
func);
if (ret)
printk(KERN_DEBUG "ftrace: function %s not "
"traceable\n", func);
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static void __init set_ftrace_early_filter(char *buf, int enable)
{
char *func;
@ -2328,6 +2331,10 @@ static void __init set_ftrace_early_filters(void)
set_ftrace_early_filter(ftrace_filter_buf, 1);
if (ftrace_notrace_buf[0])
set_ftrace_early_filter(ftrace_notrace_buf, 0);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (ftrace_graph_buf[0])
set_ftrace_early_graph(ftrace_graph_buf);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
}
static int
@ -2513,7 +2520,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
return -ENODEV;
/* decode regex */
type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
if (not)
return -EINVAL;
@ -2624,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
static int ftrace_convert_nops(struct module *mod,
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
@ -2684,7 +2691,7 @@ static void ftrace_init_module(struct module *mod,
{
if (ftrace_disabled || start == end)
return;
ftrace_convert_nops(mod, start, end);
ftrace_process_locs(mod, start, end);
}
static int ftrace_module_notify(struct notifier_block *self,
@ -2745,7 +2752,7 @@ void __init ftrace_init(void)
last_ftrace_enabled = ftrace_enabled = 1;
ret = ftrace_convert_nops(NULL,
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
__stop_mcount_loc);
@ -2778,23 +2785,6 @@ static inline void ftrace_startup_enable(int command) { }
# define ftrace_shutdown_sysctl() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
static ssize_t
ftrace_pid_read(struct file *file, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
int r;
if (ftrace_pid_trace == ftrace_swapper_pid)
r = sprintf(buf, "swapper tasks\n");
else if (ftrace_pid_trace)
r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
else
r = sprintf(buf, "no pid\n");
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static void clear_ftrace_swapper(void)
{
struct task_struct *p;
@ -2845,14 +2835,12 @@ static void set_ftrace_pid(struct pid *pid)
rcu_read_unlock();
}
static void clear_ftrace_pid_task(struct pid **pid)
static void clear_ftrace_pid_task(struct pid *pid)
{
if (*pid == ftrace_swapper_pid)
if (pid == ftrace_swapper_pid)
clear_ftrace_swapper();
else
clear_ftrace_pid(*pid);
*pid = NULL;
clear_ftrace_pid(pid);
}
static void set_ftrace_pid_task(struct pid *pid)
@ -2863,12 +2851,141 @@ static void set_ftrace_pid_task(struct pid *pid)
set_ftrace_pid(pid);
}
static int ftrace_pid_add(int p)
{
struct pid *pid;
struct ftrace_pid *fpid;
int ret = -EINVAL;
mutex_lock(&ftrace_lock);
if (!p)
pid = ftrace_swapper_pid;
else
pid = find_get_pid(p);
if (!pid)
goto out;
ret = 0;
list_for_each_entry(fpid, &ftrace_pids, list)
if (fpid->pid == pid)
goto out_put;
ret = -ENOMEM;
fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
if (!fpid)
goto out_put;
list_add(&fpid->list, &ftrace_pids);
fpid->pid = pid;
set_ftrace_pid_task(pid);
ftrace_update_pid_func();
ftrace_startup_enable(0);
mutex_unlock(&ftrace_lock);
return 0;
out_put:
if (pid != ftrace_swapper_pid)
put_pid(pid);
out:
mutex_unlock(&ftrace_lock);
return ret;
}
static void ftrace_pid_reset(void)
{
struct ftrace_pid *fpid, *safe;
mutex_lock(&ftrace_lock);
list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
struct pid *pid = fpid->pid;
clear_ftrace_pid_task(pid);
list_del(&fpid->list);
kfree(fpid);
}
ftrace_update_pid_func();
ftrace_startup_enable(0);
mutex_unlock(&ftrace_lock);
}
static void *fpid_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&ftrace_lock);
if (list_empty(&ftrace_pids) && (!*pos))
return (void *) 1;
return seq_list_start(&ftrace_pids, *pos);
}
static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
{
if (v == (void *)1)
return NULL;
return seq_list_next(v, &ftrace_pids, pos);
}
static void fpid_stop(struct seq_file *m, void *p)
{
mutex_unlock(&ftrace_lock);
}
static int fpid_show(struct seq_file *m, void *v)
{
const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
if (v == (void *)1) {
seq_printf(m, "no pid\n");
return 0;
}
if (fpid->pid == ftrace_swapper_pid)
seq_printf(m, "swapper tasks\n");
else
seq_printf(m, "%u\n", pid_vnr(fpid->pid));
return 0;
}
static const struct seq_operations ftrace_pid_sops = {
.start = fpid_start,
.next = fpid_next,
.stop = fpid_stop,
.show = fpid_show,
};
static int
ftrace_pid_open(struct inode *inode, struct file *file)
{
int ret = 0;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_pid_reset();
if (file->f_mode & FMODE_READ)
ret = seq_open(file, &ftrace_pid_sops);
return ret;
}
static ssize_t
ftrace_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct pid *pid;
char buf[64];
char buf[64], *tmp;
long val;
int ret;
@ -2880,57 +2997,38 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
buf[cnt] = 0;
ret = strict_strtol(buf, 10, &val);
/*
* Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
* to clean the filter quietly.
*/
tmp = strstrip(buf);
if (strlen(tmp) == 0)
return 1;
ret = strict_strtol(tmp, 10, &val);
if (ret < 0)
return ret;
mutex_lock(&ftrace_lock);
if (val < 0) {
/* disable pid tracing */
if (!ftrace_pid_trace)
goto out;
ret = ftrace_pid_add(val);
clear_ftrace_pid_task(&ftrace_pid_trace);
return ret ? ret : cnt;
}
} else {
/* swapper task is special */
if (!val) {
pid = ftrace_swapper_pid;
if (pid == ftrace_pid_trace)
goto out;
} else {
pid = find_get_pid(val);
static int
ftrace_pid_release(struct inode *inode, struct file *file)
{
if (file->f_mode & FMODE_READ)
seq_release(inode, file);
if (pid == ftrace_pid_trace) {
put_pid(pid);
goto out;
}
}
if (ftrace_pid_trace)
clear_ftrace_pid_task(&ftrace_pid_trace);
if (!pid)
goto out;
ftrace_pid_trace = pid;
set_ftrace_pid_task(ftrace_pid_trace);
}
/* update the function call */
ftrace_update_pid_func();
ftrace_startup_enable(0);
out:
mutex_unlock(&ftrace_lock);
return cnt;
return 0;
}
static const struct file_operations ftrace_pid_fops = {
.read = ftrace_pid_read,
.write = ftrace_pid_write,
.open = ftrace_pid_open,
.write = ftrace_pid_write,
.read = seq_read,
.llseek = seq_lseek,
.release = ftrace_pid_release,
};
static __init int ftrace_init_debugfs(void)
@ -3293,4 +3391,3 @@ void ftrace_graph_stop(void)
ftrace_stop();
}
#endif

View file

@ -1787,9 +1787,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail,
struct buffer_page *commit_page,
struct buffer_page *tail_page, u64 *ts)
{
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
struct buffer_page *next_page;
int ret;
@ -1892,13 +1892,10 @@ static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts)
{
struct buffer_page *tail_page, *commit_page;
struct buffer_page *tail_page;
struct ring_buffer_event *event;
unsigned long tail, write;
commit_page = cpu_buffer->commit_page;
/* we just need to protect against interrupts */
barrier();
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
@ -1909,7 +1906,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* See if we shot pass the end of this buffer page */
if (write > BUF_PAGE_SIZE)
return rb_move_tail(cpu_buffer, length, tail,
commit_page, tail_page, ts);
tail_page, ts);
/* We reserved something on the buffer */

View file

@ -35,6 +35,28 @@ static int disable_reader;
module_param(disable_reader, uint, 0644);
MODULE_PARM_DESC(disable_reader, "only run producer");
static int write_iteration = 50;
module_param(write_iteration, uint, 0644);
MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
static int producer_nice = 19;
static int consumer_nice = 19;
static int producer_fifo = -1;
static int consumer_fifo = -1;
module_param(producer_nice, uint, 0644);
MODULE_PARM_DESC(producer_nice, "nice prio for producer");
module_param(consumer_nice, uint, 0644);
MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
module_param(producer_fifo, uint, 0644);
MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
module_param(consumer_fifo, uint, 0644);
MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
static int read_events;
static int kill_test;
@ -208,15 +230,18 @@ static void ring_buffer_producer(void)
do {
struct ring_buffer_event *event;
int *entry;
int i;
event = ring_buffer_lock_reserve(buffer, 10);
if (!event) {
missed++;
} else {
hit++;
entry = ring_buffer_event_data(event);
*entry = smp_processor_id();
ring_buffer_unlock_commit(buffer, event);
for (i = 0; i < write_iteration; i++) {
event = ring_buffer_lock_reserve(buffer, 10);
if (!event) {
missed++;
} else {
hit++;
entry = ring_buffer_event_data(event);
*entry = smp_processor_id();
ring_buffer_unlock_commit(buffer, event);
}
}
do_gettimeofday(&end_tv);
@ -263,6 +288,27 @@ static void ring_buffer_producer(void)
if (kill_test)
trace_printk("ERROR!\n");
if (!disable_reader) {
if (consumer_fifo < 0)
trace_printk("Running Consumer at nice: %d\n",
consumer_nice);
else
trace_printk("Running Consumer at SCHED_FIFO %d\n",
consumer_fifo);
}
if (producer_fifo < 0)
trace_printk("Running Producer at nice: %d\n",
producer_nice);
else
trace_printk("Running Producer at SCHED_FIFO %d\n",
producer_fifo);
/* Let the user know that the test is running at low priority */
if (producer_fifo < 0 && consumer_fifo < 0 &&
producer_nice == 19 && consumer_nice == 19)
trace_printk("WARNING!!! This test is running at lowest priority.\n");
trace_printk("Time: %lld (usecs)\n", time);
trace_printk("Overruns: %lld\n", overruns);
if (disable_reader)
@ -392,6 +438,27 @@ static int __init ring_buffer_benchmark_init(void)
if (IS_ERR(producer))
goto out_kill;
/*
* Run them as low-prio background tasks by default:
*/
if (!disable_reader) {
if (consumer_fifo >= 0) {
struct sched_param param = {
.sched_priority = consumer_fifo
};
sched_setscheduler(consumer, SCHED_FIFO, &param);
} else
set_user_nice(consumer, consumer_nice);
}
if (producer_fifo >= 0) {
struct sched_param param = {
.sched_priority = consumer_fifo
};
sched_setscheduler(producer, SCHED_FIFO, &param);
} else
set_user_nice(producer, producer_nice);
return 0;
out_kill:

View file

@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf);
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
static char *default_bootup_tracer;
static int __init set_ftrace(char *str)
static int __init set_cmdline_ftrace(char *str)
{
strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
default_bootup_tracer = bootup_tracer_buf;
@ -137,7 +137,7 @@ static int __init set_ftrace(char *str)
ring_buffer_expanded = 1;
return 1;
}
__setup("ftrace=", set_ftrace);
__setup("ftrace=", set_cmdline_ftrace);
static int __init set_ftrace_dump_on_oops(char *str)
{
@ -1361,10 +1361,11 @@ int trace_array_vprintk(struct trace_array *tr,
pause_graph_tracing();
raw_local_irq_save(irq_flags);
__raw_spin_lock(&trace_buf_lock);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
len = min(len, TRACE_BUF_SIZE-1);
trace_buf[len] = 0;
if (args == NULL) {
strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
len = strlen(trace_buf);
} else
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
size = sizeof(*entry) + len + 1;
buffer = tr->buffer;
@ -1373,10 +1374,10 @@ int trace_array_vprintk(struct trace_array *tr,
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->ip = ip;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
entry->buf[len] = '\0';
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
@ -3319,22 +3320,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
return cnt;
}
static int mark_printk(const char *fmt, ...)
{
int ret;
va_list args;
va_start(args, fmt);
ret = trace_vprintk(0, fmt, args);
va_end(args);
return ret;
}
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
char *buf;
char *end;
if (tracing_disabled)
return -EINVAL;
@ -3342,7 +3332,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
buf = kmalloc(cnt + 1, GFP_KERNEL);
buf = kmalloc(cnt + 2, GFP_KERNEL);
if (buf == NULL)
return -ENOMEM;
@ -3350,14 +3340,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
kfree(buf);
return -EFAULT;
}
if (buf[cnt-1] != '\n') {
buf[cnt] = '\n';
buf[cnt+1] = '\0';
} else
buf[cnt] = '\0';
/* Cut from the first nil or newline. */
buf[cnt] = '\0';
end = strchr(buf, '\n');
if (end)
*end = '\0';
cnt = mark_printk("%s\n", buf);
cnt = trace_vprintk(0, buf, NULL);
kfree(buf);
*fpos += cnt;
@ -3730,7 +3719,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return ENOMEM;
return -ENOMEM;
trace_seq_init(s);

View file

@ -483,10 +483,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 0;
}
#else
static inline int ftrace_trace_addr(unsigned long addr)
{
return 1;
}
static inline int ftrace_graph_addr(unsigned long addr)
{
return 1;
@ -500,12 +496,12 @@ print_graph_function(struct trace_iterator *iter)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
extern struct pid *ftrace_pid_trace;
extern struct list_head ftrace_pids;
#ifdef CONFIG_FUNCTION_TRACER
static inline int ftrace_trace_task(struct task_struct *task)
{
if (!ftrace_pid_trace)
if (list_empty(&ftrace_pids))
return 1;
return test_tsk_trace_trace(task);
@ -699,22 +695,40 @@ struct event_subsystem {
};
struct filter_pred;
struct regex;
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
int val1, int val2);
struct filter_pred {
filter_pred_fn_t fn;
u64 val;
char str_val[MAX_FILTER_STR_VAL];
int str_len;
char *field_name;
int offset;
int not;
int op;
int pop_n;
typedef int (*regex_match_func)(char *str, struct regex *r, int len);
enum regex_type {
MATCH_FULL,
MATCH_FRONT_ONLY,
MATCH_MIDDLE_ONLY,
MATCH_END_ONLY,
};
struct regex {
char pattern[MAX_FILTER_STR_VAL];
int len;
int field_len;
regex_match_func match;
};
struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
char *field_name;
int offset;
int not;
int op;
int pop_n;
};
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
struct trace_seq *s);
extern int apply_event_filter(struct ftrace_event_call *call,

View file

@ -20,6 +20,8 @@
#include <linux/ktime.h>
#include <linux/trace_clock.h>
#include "trace.h"
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
*
@ -28,17 +30,17 @@
*/
u64 notrace trace_clock_local(void)
{
unsigned long flags;
u64 clock;
int resched;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
raw_local_irq_save(flags);
resched = ftrace_preempt_disable();
clock = sched_clock();
raw_local_irq_restore(flags);
ftrace_preempt_enable(resched);
return clock;
}

View file

@ -878,9 +878,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
"'%s/filter' entry\n", name);
}
entry = trace_create_file("enable", 0644, system->entry,
(void *)system->name,
&ftrace_system_enable_fops);
trace_create_file("enable", 0644, system->entry,
(void *)system->name,
&ftrace_system_enable_fops);
return system->entry;
}
@ -892,7 +892,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
const struct file_operations *filter,
const struct file_operations *format)
{
struct dentry *entry;
int ret;
/*
@ -910,12 +909,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
}
if (call->regfunc)
entry = trace_create_file("enable", 0644, call->dir, call,
enable);
trace_create_file("enable", 0644, call->dir, call,
enable);
if (call->id && call->profile_enable)
entry = trace_create_file("id", 0444, call->dir, call,
id);
trace_create_file("id", 0444, call->dir, call,
id);
if (call->define_fields) {
ret = call->define_fields(call);
@ -924,16 +923,16 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
" events/%s\n", call->name);
return ret;
}
entry = trace_create_file("filter", 0644, call->dir, call,
filter);
trace_create_file("filter", 0644, call->dir, call,
filter);
}
/* A trace may not want to export its format */
if (!call->show_format)
return 0;
entry = trace_create_file("format", 0444, call->dir, call,
format);
trace_create_file("format", 0444, call->dir, call,
format);
return 0;
}

View file

@ -18,8 +18,6 @@
* Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@ -197,9 +195,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
char *addr = (char *)(event + pred->offset);
int cmp, match;
cmp = strncmp(addr, pred->str_val, pred->str_len);
cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
match = (!cmp) ^ pred->not;
match = cmp ^ pred->not;
return match;
}
@ -211,9 +209,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
char **addr = (char **)(event + pred->offset);
int cmp, match;
cmp = strncmp(*addr, pred->str_val, pred->str_len);
cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
match = (!cmp) ^ pred->not;
match = cmp ^ pred->not;
return match;
}
@ -237,9 +235,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
char *addr = (char *)(event + str_loc);
int cmp, match;
cmp = strncmp(addr, pred->str_val, str_len);
cmp = pred->regex.match(addr, &pred->regex, str_len);
match = (!cmp) ^ pred->not;
match = cmp ^ pred->not;
return match;
}
@ -250,6 +248,124 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
return 0;
}
/* Basic regex callbacks */
static int regex_match_full(char *str, struct regex *r, int len)
{
if (strncmp(str, r->pattern, len) == 0)
return 1;
return 0;
}
static int regex_match_front(char *str, struct regex *r, int len)
{
if (strncmp(str, r->pattern, len) == 0)
return 1;
return 0;
}
static int regex_match_middle(char *str, struct regex *r, int len)
{
if (strstr(str, r->pattern))
return 1;
return 0;
}
static int regex_match_end(char *str, struct regex *r, int len)
{
char *ptr = strstr(str, r->pattern);
if (ptr && (ptr[r->len] == 0))
return 1;
return 0;
}
/**
* filter_parse_regex - parse a basic regex
* @buff: the raw regex
* @len: length of the regex
* @search: will point to the beginning of the string to compare
* @not: tell whether the match will have to be inverted
*
* This passes in a buffer containing a regex and this function will
* set search to point to the search part of the buffer and
* return the type of search it is (see enum above).
* This does modify buff.
*
* Returns enum type.
* search returns the pointer to use for comparison.
* not returns 1 if buff started with a '!'
* 0 otherwise.
*/
enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
{
int type = MATCH_FULL;
int i;
if (buff[0] == '!') {
*not = 1;
buff++;
len--;
} else
*not = 0;
*search = buff;
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
if (!i) {
*search = buff + 1;
type = MATCH_END_ONLY;
} else {
if (type == MATCH_END_ONLY)
type = MATCH_MIDDLE_ONLY;
else
type = MATCH_FRONT_ONLY;
buff[i] = 0;
break;
}
}
}
return type;
}
static int filter_build_regex(struct filter_pred *pred)
{
struct regex *r = &pred->regex;
char *search, *dup;
enum regex_type type;
int not;
type = filter_parse_regex(r->pattern, r->len, &search, &not);
dup = kstrdup(search, GFP_KERNEL);
if (!dup)
return -ENOMEM;
strcpy(r->pattern, dup);
kfree(dup);
r->len = strlen(r->pattern);
switch (type) {
case MATCH_FULL:
r->match = regex_match_full;
break;
case MATCH_FRONT_ONLY:
r->match = regex_match_front;
break;
case MATCH_MIDDLE_ONLY:
r->match = regex_match_middle;
break;
case MATCH_END_ONLY:
r->match = regex_match_end;
break;
}
pred->not ^= not;
return 0;
}
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct ftrace_event_call *call, void *rec)
{
@ -396,7 +512,7 @@ static void filter_clear_pred(struct filter_pred *pred)
{
kfree(pred->field_name);
pred->field_name = NULL;
pred->str_len = 0;
pred->regex.len = 0;
}
static int filter_set_pred(struct filter_pred *dest,
@ -660,21 +776,24 @@ static int filter_add_pred(struct filter_parse_state *ps,
}
if (is_string_field(field)) {
pred->str_len = field->size;
ret = filter_build_regex(pred);
if (ret)
return ret;
if (field->filter_type == FILTER_STATIC_STRING)
if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string;
else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
else {
fn = filter_pred_pchar;
pred->str_len = strlen(pred->str_val);
pred->regex.field_len = strlen(pred->regex.pattern);
}
} else {
if (field->is_signed)
ret = strict_strtoll(pred->str_val, 0, &val);
ret = strict_strtoll(pred->regex.pattern, 0, &val);
else
ret = strict_strtoull(pred->str_val, 0, &val);
ret = strict_strtoull(pred->regex.pattern, 0, &val);
if (ret) {
parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
return -EINVAL;
@ -1045,8 +1164,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
return NULL;
}
strcpy(pred->str_val, operand2);
pred->str_len = strlen(operand2);
strcpy(pred->regex.pattern, operand2);
pred->regex.len = strlen(pred->regex.pattern);
pred->op = op;

View file

@ -48,11 +48,11 @@
struct ____ftrace_##name { \
tstruct \
}; \
static void __used ____ftrace_check_##name(void) \
static void __always_unused ____ftrace_check_##name(void) \
{ \
struct ____ftrace_##name *__entry = NULL; \
\
/* force cmpile-time check on F_printk() */ \
/* force compile-time check on F_printk() */ \
printk(print); \
}

View file

@ -14,6 +14,69 @@ static int sys_refcount_exit;
static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
extern unsigned long __start_syscalls_metadata[];
extern unsigned long __stop_syscalls_metadata[];
static struct syscall_metadata **syscalls_metadata;
static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
{
struct syscall_metadata *start;
struct syscall_metadata *stop;
char str[KSYM_SYMBOL_LEN];
start = (struct syscall_metadata *)__start_syscalls_metadata;
stop = (struct syscall_metadata *)__stop_syscalls_metadata;
kallsyms_lookup(syscall, NULL, NULL, NULL, str);
for ( ; start < stop; start++) {
/*
* Only compare after the "sys" prefix. Archs that use
* syscall wrappers may have syscalls symbols aliases prefixed
* with "SyS" instead of "sys", leading to an unwanted
* mismatch.
*/
if (start->name && !strcmp(start->name + 3, str + 3))
return start;
}
return NULL;
}
static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
return NULL;
return syscalls_metadata[nr];
}
int syscall_name_to_nr(char *name)
{
int i;
if (!syscalls_metadata)
return -1;
for (i = 0; i < NR_syscalls; i++) {
if (syscalls_metadata[i]) {
if (!strcmp(syscalls_metadata[i]->name, name))
return i;
}
}
return -1;
}
void set_syscall_enter_id(int num, int id)
{
syscalls_metadata[num]->enter_id = id;
}
void set_syscall_exit_id(int num, int id)
{
syscalls_metadata[num]->exit_id = id;
}
enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags)
{
@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = {
.trace = print_syscall_exit,
};
int __init init_ftrace_syscalls(void)
{
struct syscall_metadata *meta;
unsigned long addr;
int i;
syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
NR_syscalls, GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
return -ENOMEM;
}
for (i = 0; i < NR_syscalls; i++) {
addr = arch_syscall_addr(i);
meta = find_syscall_meta(addr);
syscalls_metadata[i] = meta;
}
return 0;
}
core_initcall(init_ftrace_syscalls);
#ifdef CONFIG_EVENT_PROFILE
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);