mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-16 04:04:06 +00:00
lib/string: optimized memcpy
Patch series "lib/string: optimized mem* functions", v2. Rewrite the generic mem{cpy,move,set} so that memory is accessed with the widest size possible, but without doing unaligned accesses. This was originally posted as C string functions for RISC-V[1], but as there was no specific RISC-V code, it was proposed for the generic lib/string.c implementation. Tested on RISC-V and on x86_64 by undefining __HAVE_ARCH_MEM{CPY,SET,MOVE} and HAVE_EFFICIENT_UNALIGNED_ACCESS. These are the performances of memcpy() and memset() of a RISC-V machine on a 32 mbyte buffer: memcpy: original aligned: 75 Mb/s original unaligned: 75 Mb/s new aligned: 114 Mb/s new unaligned: 107 Mb/s memset: original aligned: 140 Mb/s original unaligned: 140 Mb/s new aligned: 241 Mb/s new unaligned: 241 Mb/s The size increase is negligible: $ scripts/bloat-o-meter vmlinux.orig vmlinux add/remove: 0/0 grow/shrink: 4/1 up/down: 427/-6 (421) Function old new delta memcpy 29 351 +322 memset 29 117 +88 strlcat 68 78 +10 strlcpy 50 57 +7 memmove 56 50 -6 Total: Before=8556964, After=8557385, chg +0.00% These functions will be used for RISC-V initially. [1] https://lore.kernel.org/linux-riscv/20210617152754.17960-1-mcroce@linux.microsoft.com/ This patch (of 3): Rewrite the generic memcpy() to copy a word at time, without generating unaligned accesses. The procedure is made of three steps: First copy data one byte at time until the destination buffer is aligned to a long boundary. Then copy the data one long at time shifting the current and the next long to compose a long at every cycle. Finally, copy the remainder one byte at time. This is the improvement on RISC-V: original aligned: 75 Mb/s original unaligned: 75 Mb/s new aligned: 114 Mb/s new unaligned: 107 Mb/s and this the binary size increase according to bloat-o-meter: Function old new delta memcpy 36 324 +288 Link: https://lkml.kernel.org/r/20210702123153.14093-1-mcroce@linux.microsoft.com Link: https://lkml.kernel.org/r/20210702123153.14093-2-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce <mcroce@microsoft.com> Cc: Nick Kossifidis <mick@ics.forth.gr> Cc: Guo Ren <guoren@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Laight <David.Laight@aculab.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Emil Renner Berthing <kernel@esmil.dk> Cc: Drew Fustini <drew@beagleboard.org> Cc: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
This commit is contained in:
parent
917c37e86b
commit
c33f0843e9
1 changed files with 77 additions and 3 deletions
80
lib/string.c
80
lib/string.c
|
@ -33,6 +33,23 @@
|
|||
#include <asm/word-at-a-time.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#define BYTES_LONG sizeof(long)
|
||||
#define WORD_MASK (BYTES_LONG - 1)
|
||||
#define MIN_THRESHOLD (BYTES_LONG * 2)
|
||||
|
||||
/* convenience union to avoid cast between different pointer types */
|
||||
union types {
|
||||
u8 *as_u8;
|
||||
unsigned long *as_ulong;
|
||||
uintptr_t as_uptr;
|
||||
};
|
||||
|
||||
union const_types {
|
||||
const u8 *as_u8;
|
||||
const unsigned long *as_ulong;
|
||||
uintptr_t as_uptr;
|
||||
};
|
||||
|
||||
#ifndef __HAVE_ARCH_STRNCASECMP
|
||||
/**
|
||||
* strncasecmp - Case insensitive, length-limited string comparison
|
||||
|
@ -869,6 +886,13 @@ EXPORT_SYMBOL(memset64);
|
|||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_MEMCPY
|
||||
|
||||
#ifdef __BIG_ENDIAN
|
||||
#define MERGE_UL(h, l, d) ((h) << ((d) * 8) | (l) >> ((BYTES_LONG - (d)) * 8))
|
||||
#else
|
||||
#define MERGE_UL(h, l, d) ((h) >> ((d) * 8) | (l) << ((BYTES_LONG - (d)) * 8))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* memcpy - Copy one area of memory to another
|
||||
* @dest: Where to copy to
|
||||
|
@ -880,14 +904,64 @@ EXPORT_SYMBOL(memset64);
|
|||
*/
|
||||
void *memcpy(void *dest, const void *src, size_t count)
|
||||
{
|
||||
char *tmp = dest;
|
||||
const char *s = src;
|
||||
union const_types s = { .as_u8 = src };
|
||||
union types d = { .as_u8 = dest };
|
||||
int distance = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
|
||||
if (count < MIN_THRESHOLD)
|
||||
goto copy_remainder;
|
||||
|
||||
/* Copy a byte at time until destination is aligned. */
|
||||
for (; d.as_uptr & WORD_MASK; count--)
|
||||
*d.as_u8++ = *s.as_u8++;
|
||||
|
||||
distance = s.as_uptr & WORD_MASK;
|
||||
}
|
||||
|
||||
if (distance) {
|
||||
unsigned long last, next;
|
||||
|
||||
/*
|
||||
* s is distance bytes ahead of d, and d just reached
|
||||
* the alignment boundary. Move s backward to word align it
|
||||
* and shift data to compensate for distance, in order to do
|
||||
* word-by-word copy.
|
||||
*/
|
||||
s.as_u8 -= distance;
|
||||
|
||||
next = s.as_ulong[0];
|
||||
for (; count >= BYTES_LONG; count -= BYTES_LONG) {
|
||||
last = next;
|
||||
next = s.as_ulong[1];
|
||||
|
||||
d.as_ulong[0] = MERGE_UL(last, next, distance);
|
||||
|
||||
d.as_ulong++;
|
||||
s.as_ulong++;
|
||||
}
|
||||
|
||||
/* Restore s with the original offset. */
|
||||
s.as_u8 += distance;
|
||||
} else {
|
||||
/*
|
||||
* If the source and dest lower bits are the same, do a simple
|
||||
* 32/64 bit wide copy.
|
||||
*/
|
||||
for (; count >= BYTES_LONG; count -= BYTES_LONG)
|
||||
*d.as_ulong++ = *s.as_ulong++;
|
||||
}
|
||||
|
||||
copy_remainder:
|
||||
while (count--)
|
||||
*tmp++ = *s++;
|
||||
*d.as_u8++ = *s.as_u8++;
|
||||
|
||||
return dest;
|
||||
}
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
|
||||
#undef MERGE_UL
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_MEMMOVE
|
||||
|
|
Loading…
Add table
Reference in a new issue