mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
x86: Add optimized popcnt variants
Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
parent
1527bc8b92
commit
d61931d89b
8 changed files with 108 additions and 18 deletions
|
@ -9,7 +9,7 @@
|
|||
* The Hamming Weight of a number is the total number of bits set in it.
|
||||
*/
|
||||
|
||||
unsigned int __arch_hweight32(unsigned int w)
|
||||
unsigned int __sw_hweight32(unsigned int w)
|
||||
{
|
||||
#ifdef ARCH_HAS_FAST_MULTIPLIER
|
||||
w -= (w >> 1) & 0x55555555;
|
||||
|
@ -24,30 +24,30 @@ unsigned int __arch_hweight32(unsigned int w)
|
|||
return (res + (res >> 16)) & 0x000000FF;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(__arch_hweight32);
|
||||
EXPORT_SYMBOL(__sw_hweight32);
|
||||
|
||||
unsigned int __arch_hweight16(unsigned int w)
|
||||
unsigned int __sw_hweight16(unsigned int w)
|
||||
{
|
||||
unsigned int res = w - ((w >> 1) & 0x5555);
|
||||
res = (res & 0x3333) + ((res >> 2) & 0x3333);
|
||||
res = (res + (res >> 4)) & 0x0F0F;
|
||||
return (res + (res >> 8)) & 0x00FF;
|
||||
}
|
||||
EXPORT_SYMBOL(__arch_hweight16);
|
||||
EXPORT_SYMBOL(__sw_hweight16);
|
||||
|
||||
unsigned int __arch_hweight8(unsigned int w)
|
||||
unsigned int __sw_hweight8(unsigned int w)
|
||||
{
|
||||
unsigned int res = w - ((w >> 1) & 0x55);
|
||||
res = (res & 0x33) + ((res >> 2) & 0x33);
|
||||
return (res + (res >> 4)) & 0x0F;
|
||||
}
|
||||
EXPORT_SYMBOL(__arch_hweight8);
|
||||
EXPORT_SYMBOL(__sw_hweight8);
|
||||
|
||||
unsigned long __arch_hweight64(__u64 w)
|
||||
unsigned long __sw_hweight64(__u64 w)
|
||||
{
|
||||
#if BITS_PER_LONG == 32
|
||||
return __arch_hweight32((unsigned int)(w >> 32)) +
|
||||
__arch_hweight32((unsigned int)w);
|
||||
return __sw_hweight32((unsigned int)(w >> 32)) +
|
||||
__sw_hweight32((unsigned int)w);
|
||||
#elif BITS_PER_LONG == 64
|
||||
#ifdef ARCH_HAS_FAST_MULTIPLIER
|
||||
w -= (w >> 1) & 0x5555555555555555ul;
|
||||
|
@ -64,4 +64,4 @@ unsigned long __arch_hweight64(__u64 w)
|
|||
#endif
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(__arch_hweight64);
|
||||
EXPORT_SYMBOL(__sw_hweight64);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue