mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-04-02 12:23:49 +00:00
x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting the range of the cpumask operators that loop over the bits in a cpumask_t variable. This removes a large amount of wasted cpu cycles. * Add performance variants of the cpumask operators: int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * Modify following to use performance variants: #define num_online_cpus() cpus_weight_nr(cpu_online_map) #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) #define num_present_cpus() cpus_weight_nr(cpu_present_map) #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) * Comment added to include/linux/cpumask.h: Note: The alternate operations with the suffix "_nr" are used to limit the range of the loop to nr_cpu_ids instead of NR_CPUS when NR_CPUS > 64 for performance reasons. If NR_CPUS is <= 64 then most assembler bitmask operators execute faster with a constant range, so the operator will continue to use NR_CPUS. Another consideration is that nr_cpu_ids is initialized to NR_CPUS and isn't lowered until the possible cpus are discovered (including any disabled cpus). So early uses will span the entire range of NR_CPUS. (The net effect is that for systems with 64 or less CPU's there are no functional changes.) For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Reviewed-by: Paul Jackson <pj@sgi.com> Reviewed-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
143aa5c53b
commit
41df0d61c2
2 changed files with 71 additions and 30 deletions
|
@ -17,6 +17,20 @@
|
||||||
* For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
|
* For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
|
||||||
* For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
|
* For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
|
||||||
*
|
*
|
||||||
|
* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
|
||||||
|
* Note: The alternate operations with the suffix "_nr" are used
|
||||||
|
* to limit the range of the loop to nr_cpu_ids instead of
|
||||||
|
* NR_CPUS when NR_CPUS > 64 for performance reasons.
|
||||||
|
* If NR_CPUS is <= 64 then most assembler bitmask
|
||||||
|
* operators execute faster with a constant range, so
|
||||||
|
* the operator will continue to use NR_CPUS.
|
||||||
|
*
|
||||||
|
* Another consideration is that nr_cpu_ids is initialized
|
||||||
|
* to NR_CPUS and isn't lowered until the possible cpus are
|
||||||
|
* discovered (including any disabled cpus). So early uses
|
||||||
|
* will span the entire range of NR_CPUS.
|
||||||
|
* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
|
||||||
|
*
|
||||||
* The available cpumask operations are:
|
* The available cpumask operations are:
|
||||||
*
|
*
|
||||||
* void cpu_set(cpu, mask) turn on bit 'cpu' in mask
|
* void cpu_set(cpu, mask) turn on bit 'cpu' in mask
|
||||||
|
@ -38,12 +52,14 @@
|
||||||
* int cpus_empty(mask) Is mask empty (no bits sets)?
|
* int cpus_empty(mask) Is mask empty (no bits sets)?
|
||||||
* int cpus_full(mask) Is mask full (all bits sets)?
|
* int cpus_full(mask) Is mask full (all bits sets)?
|
||||||
* int cpus_weight(mask) Hamming weigh - number of set bits
|
* int cpus_weight(mask) Hamming weigh - number of set bits
|
||||||
|
* int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
|
||||||
*
|
*
|
||||||
* void cpus_shift_right(dst, src, n) Shift right
|
* void cpus_shift_right(dst, src, n) Shift right
|
||||||
* void cpus_shift_left(dst, src, n) Shift left
|
* void cpus_shift_left(dst, src, n) Shift left
|
||||||
*
|
*
|
||||||
* int first_cpu(mask) Number lowest set bit, or NR_CPUS
|
* int first_cpu(mask) Number lowest set bit, or NR_CPUS
|
||||||
* int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
|
* int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
|
||||||
|
* int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
|
||||||
*
|
*
|
||||||
* cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
|
* cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
|
||||||
* CPU_MASK_ALL Initializer - all bits set
|
* CPU_MASK_ALL Initializer - all bits set
|
||||||
|
@ -59,7 +75,8 @@
|
||||||
* void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
|
* void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
|
||||||
* void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
|
* void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
|
||||||
*
|
*
|
||||||
* for_each_cpu_mask(cpu, mask) for-loop cpu over mask
|
* for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS
|
||||||
|
* for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
|
||||||
*
|
*
|
||||||
* int num_online_cpus() Number of online CPUs
|
* int num_online_cpus() Number of online CPUs
|
||||||
* int num_possible_cpus() Number of all possible CPUs
|
* int num_possible_cpus() Number of all possible CPUs
|
||||||
|
@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
|
||||||
bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
|
bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
int __first_cpu(const cpumask_t *srcp);
|
|
||||||
#define first_cpu(src) __first_cpu(&(src))
|
|
||||||
int __next_cpu(int n, const cpumask_t *srcp);
|
|
||||||
#define next_cpu(n, src) __next_cpu((n), &(src))
|
|
||||||
#else
|
|
||||||
#define first_cpu(src) ({ (void)(src); 0; })
|
|
||||||
#define next_cpu(n, src) ({ (void)(src); 1; })
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
|
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
|
||||||
extern cpumask_t *cpumask_of_cpu_map;
|
extern cpumask_t *cpumask_of_cpu_map;
|
||||||
|
@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
|
||||||
bitmap_fold(dstp->bits, origp->bits, sz, nbits);
|
bitmap_fold(dstp->bits, origp->bits, sz, nbits);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if NR_CPUS > 1
|
#if NR_CPUS == 1
|
||||||
|
|
||||||
|
#define nr_cpu_ids 1
|
||||||
|
#define first_cpu(src) ({ (void)(src); 0; })
|
||||||
|
#define next_cpu(n, src) ({ (void)(src); 1; })
|
||||||
|
#define any_online_cpu(mask) 0
|
||||||
|
#define for_each_cpu_mask(cpu, mask) \
|
||||||
|
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
|
||||||
|
|
||||||
|
#else /* NR_CPUS > 1 */
|
||||||
|
|
||||||
|
extern int nr_cpu_ids;
|
||||||
|
int __first_cpu(const cpumask_t *srcp);
|
||||||
|
int __next_cpu(int n, const cpumask_t *srcp);
|
||||||
|
int __any_online_cpu(const cpumask_t *mask);
|
||||||
|
|
||||||
|
#define first_cpu(src) __first_cpu(&(src))
|
||||||
|
#define next_cpu(n, src) __next_cpu((n), &(src))
|
||||||
|
#define any_online_cpu(mask) __any_online_cpu(&(mask))
|
||||||
#define for_each_cpu_mask(cpu, mask) \
|
#define for_each_cpu_mask(cpu, mask) \
|
||||||
for ((cpu) = first_cpu(mask); \
|
for ((cpu) = first_cpu(mask); \
|
||||||
(cpu) < NR_CPUS; \
|
(cpu) < NR_CPUS; \
|
||||||
(cpu) = next_cpu((cpu), (mask)))
|
(cpu) = next_cpu((cpu), (mask)))
|
||||||
#else /* NR_CPUS == 1 */
|
#endif
|
||||||
#define for_each_cpu_mask(cpu, mask) \
|
|
||||||
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
|
#if NR_CPUS <= 64
|
||||||
#endif /* NR_CPUS */
|
|
||||||
|
#define next_cpu_nr(n, src) next_cpu(n, src)
|
||||||
|
#define cpus_weight_nr(cpumask) cpus_weight(cpumask)
|
||||||
|
#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
|
||||||
|
|
||||||
|
#else /* NR_CPUS > 64 */
|
||||||
|
|
||||||
|
int __next_cpu_nr(int n, const cpumask_t *srcp);
|
||||||
|
#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src))
|
||||||
|
#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids)
|
||||||
|
#define for_each_cpu_mask_nr(cpu, mask) \
|
||||||
|
for ((cpu) = first_cpu(mask); \
|
||||||
|
(cpu) < nr_cpu_ids; \
|
||||||
|
(cpu) = next_cpu_nr((cpu), (mask)))
|
||||||
|
|
||||||
|
#endif /* NR_CPUS > 64 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following particular system cpumasks and operations manage
|
* The following particular system cpumasks and operations manage
|
||||||
|
@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map;
|
||||||
extern cpumask_t cpu_present_map;
|
extern cpumask_t cpu_present_map;
|
||||||
|
|
||||||
#if NR_CPUS > 1
|
#if NR_CPUS > 1
|
||||||
#define num_online_cpus() cpus_weight(cpu_online_map)
|
#define num_online_cpus() cpus_weight_nr(cpu_online_map)
|
||||||
#define num_possible_cpus() cpus_weight(cpu_possible_map)
|
#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
|
||||||
#define num_present_cpus() cpus_weight(cpu_present_map)
|
#define num_present_cpus() cpus_weight_nr(cpu_present_map)
|
||||||
#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
|
#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
|
||||||
#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
|
#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
|
||||||
#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
|
#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
|
||||||
|
@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map;
|
||||||
|
|
||||||
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
|
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
|
||||||
extern int nr_cpu_ids;
|
#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map)
|
||||||
#define any_online_cpu(mask) __any_online_cpu(&(mask))
|
#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map)
|
||||||
int __any_online_cpu(const cpumask_t *mask);
|
|
||||||
#else
|
|
||||||
#define nr_cpu_ids 1
|
|
||||||
#define any_online_cpu(mask) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
|
|
||||||
#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map)
|
|
||||||
#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
|
|
||||||
|
|
||||||
#endif /* __LINUX_CPUMASK_H */
|
#endif /* __LINUX_CPUMASK_H */
|
||||||
|
|
|
@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__next_cpu);
|
EXPORT_SYMBOL(__next_cpu);
|
||||||
|
|
||||||
|
#if NR_CPUS > 64
|
||||||
|
int __next_cpu_nr(int n, const cpumask_t *srcp)
|
||||||
|
{
|
||||||
|
return min_t(int, nr_cpu_ids,
|
||||||
|
find_next_bit(srcp->bits, nr_cpu_ids, n+1));
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__next_cpu_nr);
|
||||||
|
#endif
|
||||||
|
|
||||||
int __any_online_cpu(const cpumask_t *mask)
|
int __any_online_cpu(const cpumask_t *mask)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
Loading…
Add table
Reference in a new issue