Message ID | 20230202181149.2181553-7-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | Improve generic string routines | expand |
On Thu, Feb 2, 2023 at 12:12 PM Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > New algorithm now calls strchrnul. > > Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc-linux-gnu, > and powerpc64-linux-gnu by removing the arch-specific assembly > implementation and disabling multi-arch (it covers both LE and BE > for 64 and 32 bits). > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- > string/strchr.c | 164 ++-------------------------------------- > sysdeps/s390/strchr-c.c | 11 +-- > 2 files changed, 14 insertions(+), 161 deletions(-) > > diff --git a/string/strchr.c b/string/strchr.c > index 1572b8b42e..90ae0b69fc 100644 > --- a/string/strchr.c > +++ b/string/strchr.c > @@ -1,10 +1,5 @@ > /* Copyright (C) 1991-2023 Free Software Foundation, Inc. > This file is part of the GNU C Library. > - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), > - with help from Dan Sahlin (dan@sics.se) and > - bug fix and commentary by Jim Blandy (jimb@ai.mit.edu); > - adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu), > - and implemented by Roland McGrath (roland@ai.mit.edu). > > The GNU C Library is free software; you can redistribute it and/or > modify it under the terms of the GNU Lesser General Public > @@ -21,165 +16,22 @@ > <https://www.gnu.org/licenses/>. */ > > #include <string.h> > -#include <stdlib.h> > > #undef strchr > +#undef index > > -#ifndef STRCHR > -# define STRCHR strchr > +#ifdef STRCHR > +# define strchr STRCHR > #endif > > /* Find the first occurrence of C in S. */ > char * > -STRCHR (const char *s, int c_in) > +strchr (const char *s, int c_in) > { > - const unsigned char *char_ptr; > - const unsigned long int *longword_ptr; > - unsigned long int longword, magic_bits, charmask; > - unsigned char c; > - > - c = (unsigned char) c_in; > - > - /* Handle the first few characters by reading one character at a time. > - Do this until CHAR_PTR is aligned on a longword boundary. */ > - for (char_ptr = (const unsigned char *) s; > - ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0; > - ++char_ptr) > - if (*char_ptr == c) > - return (void *) char_ptr; > - else if (*char_ptr == '\0') > - return NULL; > - > - /* All these elucidatory comments refer to 4-byte longwords, > - but the theory applies equally well to 8-byte longwords. */ > - > - longword_ptr = (unsigned long int *) char_ptr; > - > - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits > - the "holes." Note that there is a hole just to the left of > - each byte, with an extra at the end: > - > - bits: 01111110 11111110 11111110 11111111 > - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD > - > - The 1-bits make sure that carries propagate to the next 0-bit. > - The 0-bits provide holes for carries to fall into. */ > - magic_bits = -1; > - magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1; > - > - /* Set up a longword, each of whose bytes is C. */ > - charmask = c | (c << 8); > - charmask |= charmask << 16; > - if (sizeof (longword) > 4) > - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ > - charmask |= (charmask << 16) << 16; > - if (sizeof (longword) > 8) > - abort (); > - > - /* Instead of the traditional loop which tests each character, > - we will test a longword at a time. The tricky part is testing > - if *any of the four* bytes in the longword in question are zero. */ > - for (;;) > - { > - /* We tentatively exit the loop if adding MAGIC_BITS to > - LONGWORD fails to change any of the hole bits of LONGWORD. > - > - 1) Is this safe? Will it catch all the zero bytes? > - Suppose there is a byte with all zeros. Any carry bits > - propagating from its left will fall into the hole at its > - least significant bit and stop. Since there will be no > - carry from its most significant bit, the LSB of the > - byte to the left will be unchanged, and the zero will be > - detected. > - > - 2) Is this worthwhile? Will it ignore everything except > - zero bytes? Suppose every byte of LONGWORD has a bit set > - somewhere. There will be a carry into bit 8. If bit 8 > - is set, this will carry into bit 16. If bit 8 is clear, > - one of bits 9-15 must be set, so there will be a carry > - into bit 16. Similarly, there will be a carry into bit > - 24. If one of bits 24-30 is set, there will be a carry > - into bit 31, so all of the hole bits will be changed. > - > - The one misfire occurs when bits 24-30 are clear and bit > - 31 is set; in this case, the hole at bit 31 is not > - changed. If we had access to the processor carry flag, > - we could close this loophole by putting the fourth hole > - at bit 32! > - > - So it ignores everything except 128's, when they're aligned > - properly. > - > - 3) But wait! Aren't we looking for C as well as zero? > - Good point. So what we do is XOR LONGWORD with a longword, > - each of whose bytes is C. This turns each byte that is C > - into a zero. */ > - > - longword = *longword_ptr++; > - > - /* Add MAGIC_BITS to LONGWORD. */ > - if ((((longword + magic_bits) > - > - /* Set those bits that were unchanged by the addition. */ > - ^ ~longword) > - > - /* Look at only the hole bits. If any of the hole bits > - are unchanged, most likely one of the bytes was a > - zero. */ > - & ~magic_bits) != 0 > - > - /* That caught zeroes. Now test for C. */ > - || ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask)) > - & ~magic_bits) != 0) > - { > - /* Which of the bytes was C or zero? > - If none of them were, it was a misfire; continue the search. */ > - > - const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); > - > - if (*cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (sizeof (longword) > 4) > - { > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - if (*++cp == c) > - return (char *) cp; > - else if (*cp == '\0') > - return NULL; > - } > - } > - } > - > - return NULL; > + char *r = __strchrnul (s, c_in); > + return (*(unsigned char *)r == (unsigned char)c_in) ? r : NULL; > } > - > -#ifdef weak_alias > -# undef index > +#ifndef STRCHR > weak_alias (strchr, index) > -#endif > libc_hidden_builtin_def (strchr) > +#endif > diff --git a/sysdeps/s390/strchr-c.c b/sysdeps/s390/strchr-c.c > index c00f2cceea..90822ae0f4 100644 > --- a/sysdeps/s390/strchr-c.c > +++ b/sysdeps/s390/strchr-c.c > @@ -21,13 +21,14 @@ > #if HAVE_STRCHR_C > # if HAVE_STRCHR_IFUNC > # define STRCHR STRCHR_C > -# undef weak_alias > +# endif > + > +# include <string/strchr.c> > + > +# if HAVE_STRCHR_IFUNC > # if defined SHARED && IS_IN (libc) > -# undef libc_hidden_builtin_def > -# define libc_hidden_builtin_def(name) \ > - __hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); > +__hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); > # endif > # endif > > -# include <string/strchr.c> > #endif > -- > 2.34.1 > LGTM. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
On 2/2/23 08:11, Adhemerval Zanella wrote: > New algorithm now calls strchrnul. > > Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc-linux-gnu, > and powerpc64-linux-gnu by removing the arch-specific assembly > implementation and disabling multi-arch (it covers both LE and BE > for 64 and 32 bits). > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> strchrnul needs libc_hidden_builtin_proto. On riscv64: 8: 00000097 auipc ra,0x0 8: R_RISCV_CALL_PLT __strchrnul 8: R_RISCV_RELAX *ABS* c: 000080e7 jalr ra # 8 <__GI_strchr+0x8> r~
On 03/02/23 23:58, Richard Henderson wrote: > On 2/2/23 08:11, Adhemerval Zanella wrote: >> New algorithm now calls strchrnul. >> >> Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc-linux-gnu, >> and powerpc64-linux-gnu by removing the arch-specific assembly >> implementation and disabling multi-arch (it covers both LE and BE >> for 64 and 32 bits). >> >> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > strchrnul needs libc_hidden_builtin_proto. On riscv64: > > 8: 00000097 auipc ra,0x0 > 8: R_RISCV_CALL_PLT __strchrnul > 8: R_RISCV_RELAX *ABS* > c: 000080e7 jalr ra # 8 <__GI_strchr+0x8> It is similar to x86_64 as well: x86_64-linux-gnu$ objdump -dwr posix/execvpe.os [...] 234: 4c 89 ff mov %r15,%rdi 237: e8 00 00 00 00 call 23c <__execvpe_common+0x12c> 238: R_X86_64_PLT32 __strchrnul-0x4 [...] But the static linker ends up generating a local call as expected: $ riscv64-glibc-linux-gnu-objdump -dr libc.so [...] 000000000008726e <strchr>: 8726e: 7179 addi sp,sp,-48 87270: ec26 sd s1,24(sp) 87272: 000e9497 auipc s1,0xe9 87276: 4264b483 ld s1,1062(s1) # 170698 <__stack_chk_guard@GLIBC_2.27> 8727a: 609c ld a5,0(s1) 8727c: e43e sd a5,8(sp) 8727e: 4781 li a5,0 87280: f022 sd s0,32(sp) 87282: f406 sd ra,40(sp) 87284: 842e mv s0,a1 87286: 02a000ef jal ra,872b0 <strchrnul> [...] The intra PLT should trigger a regression with make check local-plt test, and I did a make check for all architecture to make sure I am not missing anything. I will send a patch to fix this internal inconsistencies.
diff --git a/string/strchr.c b/string/strchr.c index 1572b8b42e..90ae0b69fc 100644 --- a/string/strchr.c +++ b/string/strchr.c @@ -1,10 +1,5 @@ /* Copyright (C) 1991-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - bug fix and commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -21,165 +16,22 @@ <https://www.gnu.org/licenses/>. */ #include <string.h> -#include <stdlib.h> #undef strchr +#undef index -#ifndef STRCHR -# define STRCHR strchr +#ifdef STRCHR +# define strchr STRCHR #endif /* Find the first occurrence of C in S. */ char * -STRCHR (const char *s, int c_in) +strchr (const char *s, int c_in) { - const unsigned char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, magic_bits, charmask; - unsigned char c; - - c = (unsigned char) c_in; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0; - ++char_ptr) - if (*char_ptr == c) - return (void *) char_ptr; - else if (*char_ptr == '\0') - return NULL; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - magic_bits = -1; - magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1; - - /* Set up a longword, each of whose bytes is C. */ - charmask = c | (c << 8); - charmask |= charmask << 16; - if (sizeof (longword) > 4) - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - charmask |= (charmask << 16) << 16; - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) - { - /* We tentatively exit the loop if adding MAGIC_BITS to - LONGWORD fails to change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-30 is set, there will be a carry - into bit 31, so all of the hole bits will be changed. - - The one misfire occurs when bits 24-30 are clear and bit - 31 is set; in this case, the hole at bit 31 is not - changed. If we had access to the processor carry flag, - we could close this loophole by putting the fourth hole - at bit 32! - - So it ignores everything except 128's, when they're aligned - properly. - - 3) But wait! Aren't we looking for C as well as zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - longword = *longword_ptr++; - - /* Add MAGIC_BITS to LONGWORD. */ - if ((((longword + magic_bits) - - /* Set those bits that were unchanged by the addition. */ - ^ ~longword) - - /* Look at only the hole bits. If any of the hole bits - are unchanged, most likely one of the bytes was a - zero. */ - & ~magic_bits) != 0 - - /* That caught zeroes. Now test for C. */ - || ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask)) - & ~magic_bits) != 0) - { - /* Which of the bytes was C or zero? - If none of them were, it was a misfire; continue the search. */ - - const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); - - if (*cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (sizeof (longword) > 4) - { - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - } - } - } - - return NULL; + char *r = __strchrnul (s, c_in); + return (*(unsigned char *)r == (unsigned char)c_in) ? r : NULL; } - -#ifdef weak_alias -# undef index +#ifndef STRCHR weak_alias (strchr, index) -#endif libc_hidden_builtin_def (strchr) +#endif diff --git a/sysdeps/s390/strchr-c.c b/sysdeps/s390/strchr-c.c index c00f2cceea..90822ae0f4 100644 --- a/sysdeps/s390/strchr-c.c +++ b/sysdeps/s390/strchr-c.c @@ -21,13 +21,14 @@ #if HAVE_STRCHR_C # if HAVE_STRCHR_IFUNC # define STRCHR STRCHR_C -# undef weak_alias +# endif + +# include <string/strchr.c> + +# if HAVE_STRCHR_IFUNC # if defined SHARED && IS_IN (libc) -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); +__hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); # endif # endif -# include <string/strchr.c> #endif
New algorithm now calls strchrnul. Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc-linux-gnu, and powerpc64-linux-gnu by removing the arch-specific assembly implementation and disabling multi-arch (it covers both LE and BE for 64 and 32 bits). Reviewed-by: Richard Henderson <richard.henderson@linaro.org> --- string/strchr.c | 164 ++-------------------------------------- sysdeps/s390/strchr-c.c | 11 +-- 2 files changed, 14 insertions(+), 161 deletions(-)