Message ID | 20220323215734.3927131-9-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,01/23] benchtests: Use json-lib in bench-strchr.c | expand |
On Wed, Mar 23, 2022 at 3:00 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The generic implementation is faster. > > geometric_mean(N=20) of all benchmarks New / Original: .678 > > All string/memory tests pass. > --- > Geomtric Mean N=20 runs; All functions page aligned > len, align1, align2, pos, New Time / Old Time > 0, 0, 0, 512, 0.054 > 1, 0, 0, 512, 0.055 > 1, 1, 0, 512, 0.051 > 1, 0, 1, 512, 0.054 > 1, 1, 1, 512, 0.054 > 2, 0, 0, 512, 0.861 > 2, 2, 0, 512, 0.861 > 2, 0, 2, 512, 0.861 > 2, 2, 2, 512, 0.864 > 3, 0, 0, 512, 0.854 > 3, 3, 0, 512, 0.848 > 3, 0, 3, 512, 0.845 > 3, 3, 3, 512, 0.85 > 4, 0, 0, 512, 0.851 > 4, 4, 0, 512, 0.85 > 4, 0, 4, 512, 0.852 > 4, 4, 4, 512, 0.849 > 5, 0, 0, 512, 0.938 > 5, 5, 0, 512, 0.94 > 5, 0, 5, 512, 0.864 > 5, 5, 5, 512, 0.86 > 6, 0, 0, 512, 0.858 > 6, 6, 0, 512, 0.869 > 6, 0, 6, 512, 0.847 > 6, 6, 6, 512, 0.868 > 7, 0, 0, 512, 0.867 > 7, 7, 0, 512, 0.861 > 7, 0, 7, 512, 0.864 > 7, 7, 7, 512, 0.863 > 8, 0, 0, 512, 0.884 > 8, 0, 8, 512, 0.884 > 9, 0, 0, 512, 0.886 > 9, 1, 0, 512, 0.894 > 9, 0, 9, 512, 0.889 > 9, 1, 9, 512, 0.886 > 10, 0, 0, 512, 0.859 > 10, 2, 0, 512, 0.859 > 10, 0, 10, 512, 0.862 > 10, 2, 10, 512, 0.861 > 11, 0, 0, 512, 0.846 > 11, 3, 0, 512, 0.865 > 11, 0, 11, 512, 0.859 > 11, 3, 11, 512, 0.862 > 12, 0, 0, 512, 0.858 > 12, 4, 0, 512, 0.857 > 12, 0, 12, 512, 0.964 > 12, 4, 12, 512, 0.876 > 13, 0, 0, 512, 0.827 > 13, 5, 0, 512, 0.805 > 13, 0, 13, 512, 0.821 > 13, 5, 13, 512, 0.825 > 14, 0, 0, 512, 0.786 > 14, 6, 0, 512, 0.786 > 14, 0, 14, 512, 0.803 > 14, 6, 14, 512, 0.783 > 15, 0, 0, 512, 0.778 > 15, 7, 0, 512, 0.792 > 15, 0, 15, 512, 0.796 > 15, 7, 15, 512, 0.799 > 16, 0, 0, 512, 0.803 > 16, 0, 16, 512, 0.815 > 17, 0, 0, 512, 0.812 > 17, 1, 0, 512, 0.826 > 17, 0, 17, 512, 0.803 > 17, 1, 17, 512, 0.856 > 18, 0, 0, 512, 0.801 > 18, 2, 0, 512, 0.886 > 18, 0, 18, 512, 0.805 > 18, 2, 18, 512, 0.807 > 19, 0, 0, 512, 0.814 > 19, 3, 0, 512, 0.804 > 19, 0, 19, 512, 0.813 > 19, 3, 19, 512, 0.814 > 20, 0, 0, 512, 0.885 > 20, 4, 0, 512, 0.799 > 20, 0, 20, 512, 0.826 > 20, 4, 20, 512, 0.808 > 21, 0, 0, 512, 0.816 > 21, 5, 0, 512, 0.824 > 21, 0, 21, 512, 0.819 > 21, 5, 21, 512, 0.826 > 22, 0, 0, 512, 0.814 > 22, 6, 0, 512, 0.824 > 22, 0, 22, 512, 0.81 > 22, 6, 22, 512, 0.806 > 23, 0, 0, 512, 0.825 > 23, 7, 0, 512, 0.829 > 23, 0, 23, 512, 0.809 > 23, 7, 23, 512, 0.823 > 24, 0, 0, 512, 0.829 > 24, 0, 24, 512, 0.823 > 25, 0, 0, 512, 0.864 > 25, 1, 0, 512, 0.895 > 25, 0, 25, 512, 0.88 > 25, 1, 25, 512, 0.848 > 26, 0, 0, 512, 0.903 > 26, 2, 0, 512, 0.888 > 26, 0, 26, 512, 0.894 > 26, 2, 26, 512, 0.89 > 27, 0, 0, 512, 0.914 > 27, 3, 0, 512, 0.917 > 27, 0, 27, 512, 0.902 > 27, 3, 27, 512, 0.887 > 28, 0, 0, 512, 0.887 > 28, 4, 0, 512, 0.877 > 28, 0, 28, 512, 0.893 > 28, 4, 28, 512, 0.866 > 29, 0, 0, 512, 0.885 > 29, 5, 0, 512, 0.907 > 29, 0, 29, 512, 0.894 > 29, 5, 29, 512, 0.906 > 30, 0, 0, 512, 0.88 > 30, 6, 0, 512, 0.898 > 30, 0, 30, 512, 0.9 > 30, 6, 30, 512, 0.895 > 31, 0, 0, 512, 0.893 > 31, 7, 0, 512, 0.874 > 31, 0, 31, 512, 0.894 > 31, 7, 31, 512, 0.899 > 4, 0, 0, 32, 0.618 > 4, 1, 0, 32, 0.627 > 4, 0, 1, 32, 0.625 > 4, 1, 1, 32, 0.613 > 4, 0, 0, 64, 0.913 > 4, 2, 0, 64, 0.801 > 4, 0, 2, 64, 0.759 > 4, 2, 2, 64, 0.761 > 4, 0, 0, 128, 0.822 > 4, 3, 0, 128, 0.863 > 4, 0, 3, 128, 0.867 > 4, 3, 3, 128, 0.917 > 4, 0, 0, 256, 0.816 > 4, 4, 0, 256, 0.812 > 4, 0, 4, 256, 0.803 > 4, 4, 4, 256, 0.811 > 4, 5, 0, 512, 0.848 > 4, 0, 5, 512, 0.843 > 4, 5, 5, 512, 0.857 > 4, 0, 0, 1024, 0.886 > 4, 6, 0, 1024, 0.887 > 4, 0, 6, 1024, 0.881 > 4, 6, 6, 1024, 0.873 > 4, 0, 0, 2048, 0.892 > 4, 7, 0, 2048, 0.894 > 4, 0, 7, 2048, 0.89 > 4, 7, 7, 2048, 0.874 > 10, 1, 0, 64, 0.946 > 10, 1, 1, 64, 0.81 > 10, 2, 0, 64, 0.804 > 10, 2, 2, 64, 0.82 > 10, 3, 0, 64, 0.772 > 10, 3, 3, 64, 0.772 > 10, 4, 0, 64, 0.748 > 10, 4, 4, 64, 0.751 > 10, 5, 0, 64, 0.76 > 10, 5, 5, 64, 0.76 > 10, 6, 0, 64, 0.726 > 10, 6, 6, 64, 0.718 > 10, 7, 0, 64, 0.724 > 10, 7, 7, 64, 0.72 > 6, 0, 0, 0, 0.415 > 6, 0, 0, 1, 0.423 > 6, 0, 1, 1, 0.412 > 6, 0, 0, 2, 0.433 > 6, 0, 2, 2, 0.434 > 6, 0, 0, 3, 0.427 > 6, 0, 3, 3, 0.428 > 6, 0, 0, 4, 0.465 > 6, 0, 4, 4, 0.466 > 6, 0, 0, 5, 0.463 > 6, 0, 5, 5, 0.468 > 6, 0, 0, 6, 0.435 > 6, 0, 6, 6, 0.444 > 6, 0, 0, 7, 0.41 > 6, 0, 7, 7, 0.42 > 6, 0, 0, 8, 0.474 > 6, 0, 8, 8, 0.501 > 6, 0, 0, 9, 0.471 > 6, 0, 9, 9, 0.489 > 6, 0, 0, 10, 0.462 > 6, 0, 10, 10, 0.46 > 6, 0, 0, 11, 0.459 > 6, 0, 11, 11, 0.458 > 6, 0, 0, 12, 0.516 > 6, 0, 12, 12, 0.51 > 6, 0, 0, 13, 0.494 > 6, 0, 13, 13, 0.524 > 6, 0, 0, 14, 0.486 > 6, 0, 14, 14, 0.5 > 6, 0, 0, 15, 0.48 > 6, 0, 15, 15, 0.501 > 6, 0, 0, 16, 0.54 > 6, 0, 16, 16, 0.538 > 6, 0, 0, 17, 0.503 > 6, 0, 17, 17, 0.541 > 6, 0, 0, 18, 0.537 > 6, 0, 18, 18, 0.549 > 6, 0, 0, 19, 0.527 > 6, 0, 19, 19, 0.537 > 6, 0, 0, 20, 0.539 > 6, 0, 20, 20, 0.554 > 6, 0, 0, 21, 0.558 > 6, 0, 21, 21, 0.541 > 6, 0, 0, 22, 0.546 > 6, 0, 22, 22, 0.561 > 6, 0, 0, 23, 0.54 > 6, 0, 23, 23, 0.536 > 6, 0, 0, 24, 0.565 > 6, 0, 24, 24, 0.584 > 6, 0, 0, 25, 0.563 > 6, 0, 25, 25, 0.58 > 6, 0, 0, 26, 0.555 > 6, 0, 26, 26, 0.584 > 6, 0, 0, 27, 0.569 > 6, 0, 27, 27, 0.587 > 6, 0, 0, 28, 0.612 > 6, 0, 28, 28, 0.623 > 6, 0, 0, 29, 0.604 > 6, 0, 29, 29, 0.621 > 6, 0, 0, 30, 0.59 > 6, 0, 30, 30, 0.609 > 6, 0, 0, 31, 0.577 > 6, 0, 31, 31, 0.588 > 6, 0, 0, 32, 0.621 > 6, 0, 32, 32, 0.608 > 6, 0, 0, 33, 0.601 > 6, 0, 33, 33, 0.623 > 6, 0, 0, 34, 0.614 > 6, 0, 34, 34, 0.615 > 6, 0, 0, 35, 0.598 > 6, 0, 35, 35, 0.608 > 6, 0, 0, 36, 0.626 > 6, 0, 36, 36, 0.634 > 6, 0, 0, 37, 0.62 > 6, 0, 37, 37, 0.634 > 6, 0, 0, 38, 0.612 > 6, 0, 38, 38, 0.637 > 6, 0, 0, 39, 0.627 > 6, 0, 39, 39, 0.612 > 6, 0, 0, 40, 0.661 > 6, 0, 40, 40, 0.674 > 6, 0, 0, 41, 0.633 > 6, 0, 41, 41, 0.643 > 6, 0, 0, 42, 0.634 > 6, 0, 42, 42, 0.636 > 6, 0, 0, 43, 0.619 > 6, 0, 43, 43, 0.625 > 6, 0, 0, 44, 0.654 > 6, 0, 44, 44, 0.654 > 6, 0, 0, 45, 0.647 > 6, 0, 45, 45, 0.649 > 6, 0, 0, 46, 0.651 > 6, 0, 46, 46, 0.651 > 6, 0, 0, 47, 0.646 > 6, 0, 47, 47, 0.648 > 6, 0, 0, 48, 0.662 > 6, 0, 48, 48, 0.664 > 6, 0, 0, 49, 0.68 > 6, 0, 49, 49, 0.667 > 6, 0, 0, 50, 0.654 > 6, 0, 50, 50, 0.659 > 6, 0, 0, 51, 0.638 > 6, 0, 51, 51, 0.639 > 6, 0, 0, 52, 0.665 > 6, 0, 52, 52, 0.669 > 6, 0, 0, 53, 0.658 > 6, 0, 53, 53, 0.656 > 6, 0, 0, 54, 0.669 > 6, 0, 54, 54, 0.67 > 6, 0, 0, 55, 0.668 > 6, 0, 55, 55, 0.664 > 6, 0, 0, 56, 0.701 > 6, 0, 56, 56, 0.695 > 6, 0, 0, 57, 0.687 > 6, 0, 57, 57, 0.696 > 6, 0, 0, 58, 0.693 > 6, 0, 58, 58, 0.704 > 6, 0, 0, 59, 0.695 > 6, 0, 59, 59, 0.708 > 6, 0, 0, 60, 0.708 > 6, 0, 60, 60, 0.728 > 6, 0, 0, 61, 0.708 > 6, 0, 61, 61, 0.71 > 6, 0, 0, 62, 0.715 > 6, 0, 62, 62, 0.705 > 6, 0, 0, 63, 0.677 > 6, 0, 63, 63, 0.702 > > .../{strcspn-sse2.S => strcspn-sse2.c} | 8 +- > sysdeps/x86_64/strcspn.S | 119 ------------------ > 2 files changed, 4 insertions(+), 123 deletions(-) > rename sysdeps/x86_64/multiarch/{strcspn-sse2.S => strcspn-sse2.c} (85%) > delete mode 100644 sysdeps/x86_64/strcspn.S > > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c > similarity index 85% > rename from sysdeps/x86_64/multiarch/strcspn-sse2.S > rename to sysdeps/x86_64/multiarch/strcspn-sse2.c > index f97e856e1f..3a04bb39fc 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.S > +++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c > @@ -1,4 +1,4 @@ > -/* strcspn optimized with SSE2. > +/* strcspn. > Copyright (C) 2017-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -19,10 +19,10 @@ > #if IS_IN (libc) > > # include <sysdep.h> > -# define strcspn __strcspn_sse2 > +# define STRCSPN __strcspn_sse2 > > # undef libc_hidden_builtin_def > -# define libc_hidden_builtin_def(strcspn) > +# define libc_hidden_builtin_def(STRCSPN) > #endif > > -#include <sysdeps/x86_64/strcspn.S> > +#include <string/strcspn.c> > diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S > deleted file mode 100644 > index f3cd86c606..0000000000 > --- a/sysdeps/x86_64/strcspn.S > +++ /dev/null > @@ -1,119 +0,0 @@ > -/* strcspn (str, ss) -- Return the length of the initial segment of STR > - which contains no characters from SS. > - For AMD x86-64. > - Copyright (C) 1994-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#include <sysdep.h> > -#include "asm-syntax.h" > - > - .text > -ENTRY (strcspn) > - > - movq %rdi, %rdx /* Save SRC. */ > - > - /* First we create a table with flags for all possible characters. > - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are > - supported by the C string functions we have 256 characters. > - Before inserting marks for the stop characters we clear the whole > - table. */ > - movq %rdi, %r8 /* Save value. */ > - subq $256, %rsp /* Make space for 256 bytes. */ > - cfi_adjust_cfa_offset(256) > - movl $32, %ecx /* 32*8 bytes = 256 bytes. */ > - movq %rsp, %rdi > - xorl %eax, %eax /* We store 0s. */ > - cld > - rep > - stosq > - > - movq %rsi, %rax /* Setup skipset. */ > - > -/* For understanding the following code remember that %rcx == 0 now. > - Although all the following instruction only modify %cl we always > - have a correct zero-extended 64-bit value in %rcx. */ > - > - .p2align 4 > -L(2): movb (%rax), %cl /* get byte from skipset */ > - testb %cl, %cl /* is NUL char? */ > - jz L(1) /* yes => start compare loop */ > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > - > - movb 1(%rax), %cl /* get byte from skipset */ > - testb $0xff, %cl /* is NUL char? */ > - jz L(1) /* yes => start compare loop */ > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > - > - movb 2(%rax), %cl /* get byte from skipset */ > - testb $0xff, %cl /* is NUL char? */ > - jz L(1) /* yes => start compare loop */ > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > - > - movb 3(%rax), %cl /* get byte from skipset */ > - addq $4, %rax /* increment skipset pointer */ > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > - testb $0xff, %cl /* is NUL char? */ > - jnz L(2) /* no => process next dword from skipset */ > - > -L(1): leaq -4(%rdx), %rax /* prepare loop */ > - > - /* We use a neat trick for the following loop. Normally we would > - have to test for two termination conditions > - 1. a character in the skipset was found > - and > - 2. the end of the string was found > - But as a sign that the character is in the skipset we store its > - value in the table. But the value of NUL is NUL so the loop > - terminates for NUL in every case. */ > - > - .p2align 4 > -L(3): addq $4, %rax /* adjust pointer for full loop round */ > - > - movb (%rax), %cl /* get byte from string */ > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > - je L(4) /* yes => return */ > - > - movb 1(%rax), %cl /* get byte from string */ > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > - je L(5) /* yes => return */ > - > - movb 2(%rax), %cl /* get byte from string */ > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > - jz L(6) /* yes => return */ > - > - movb 3(%rax), %cl /* get byte from string */ > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > - jne L(3) /* no => start loop again */ > - > - incq %rax /* adjust pointer */ > -L(6): incq %rax > -L(5): incq %rax > - > -L(4): addq $256, %rsp /* remove skipset */ > - cfi_adjust_cfa_offset(-256) > -#ifdef USE_AS_STRPBRK > - xorl %edx,%edx > - orb %cl, %cl /* was last character NUL? */ > - cmovzq %rdx, %rax /* Yes: return NULL */ > -#else > - subq %rdx, %rax /* we have to return the number of valid > - characters, so compute distance to first > - non-valid character */ > -#endif > - ret > -END (strcspn) > -libc_hidden_builtin_def (strcspn) > -- > 2.25.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks.
On Thu, Mar 24, 2022 at 11:59 AM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Wed, Mar 23, 2022 at 3:00 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > The generic implementation is faster. > > > > geometric_mean(N=20) of all benchmarks New / Original: .678 > > > > All string/memory tests pass. > > --- > > Geomtric Mean N=20 runs; All functions page aligned > > len, align1, align2, pos, New Time / Old Time > > 0, 0, 0, 512, 0.054 > > 1, 0, 0, 512, 0.055 > > 1, 1, 0, 512, 0.051 > > 1, 0, 1, 512, 0.054 > > 1, 1, 1, 512, 0.054 > > 2, 0, 0, 512, 0.861 > > 2, 2, 0, 512, 0.861 > > 2, 0, 2, 512, 0.861 > > 2, 2, 2, 512, 0.864 > > 3, 0, 0, 512, 0.854 > > 3, 3, 0, 512, 0.848 > > 3, 0, 3, 512, 0.845 > > 3, 3, 3, 512, 0.85 > > 4, 0, 0, 512, 0.851 > > 4, 4, 0, 512, 0.85 > > 4, 0, 4, 512, 0.852 > > 4, 4, 4, 512, 0.849 > > 5, 0, 0, 512, 0.938 > > 5, 5, 0, 512, 0.94 > > 5, 0, 5, 512, 0.864 > > 5, 5, 5, 512, 0.86 > > 6, 0, 0, 512, 0.858 > > 6, 6, 0, 512, 0.869 > > 6, 0, 6, 512, 0.847 > > 6, 6, 6, 512, 0.868 > > 7, 0, 0, 512, 0.867 > > 7, 7, 0, 512, 0.861 > > 7, 0, 7, 512, 0.864 > > 7, 7, 7, 512, 0.863 > > 8, 0, 0, 512, 0.884 > > 8, 0, 8, 512, 0.884 > > 9, 0, 0, 512, 0.886 > > 9, 1, 0, 512, 0.894 > > 9, 0, 9, 512, 0.889 > > 9, 1, 9, 512, 0.886 > > 10, 0, 0, 512, 0.859 > > 10, 2, 0, 512, 0.859 > > 10, 0, 10, 512, 0.862 > > 10, 2, 10, 512, 0.861 > > 11, 0, 0, 512, 0.846 > > 11, 3, 0, 512, 0.865 > > 11, 0, 11, 512, 0.859 > > 11, 3, 11, 512, 0.862 > > 12, 0, 0, 512, 0.858 > > 12, 4, 0, 512, 0.857 > > 12, 0, 12, 512, 0.964 > > 12, 4, 12, 512, 0.876 > > 13, 0, 0, 512, 0.827 > > 13, 5, 0, 512, 0.805 > > 13, 0, 13, 512, 0.821 > > 13, 5, 13, 512, 0.825 > > 14, 0, 0, 512, 0.786 > > 14, 6, 0, 512, 0.786 > > 14, 0, 14, 512, 0.803 > > 14, 6, 14, 512, 0.783 > > 15, 0, 0, 512, 0.778 > > 15, 7, 0, 512, 0.792 > > 15, 0, 15, 512, 0.796 > > 15, 7, 15, 512, 0.799 > > 16, 0, 0, 512, 0.803 > > 16, 0, 16, 512, 0.815 > > 17, 0, 0, 512, 0.812 > > 17, 1, 0, 512, 0.826 > > 17, 0, 17, 512, 0.803 > > 17, 1, 17, 512, 0.856 > > 18, 0, 0, 512, 0.801 > > 18, 2, 0, 512, 0.886 > > 18, 0, 18, 512, 0.805 > > 18, 2, 18, 512, 0.807 > > 19, 0, 0, 512, 0.814 > > 19, 3, 0, 512, 0.804 > > 19, 0, 19, 512, 0.813 > > 19, 3, 19, 512, 0.814 > > 20, 0, 0, 512, 0.885 > > 20, 4, 0, 512, 0.799 > > 20, 0, 20, 512, 0.826 > > 20, 4, 20, 512, 0.808 > > 21, 0, 0, 512, 0.816 > > 21, 5, 0, 512, 0.824 > > 21, 0, 21, 512, 0.819 > > 21, 5, 21, 512, 0.826 > > 22, 0, 0, 512, 0.814 > > 22, 6, 0, 512, 0.824 > > 22, 0, 22, 512, 0.81 > > 22, 6, 22, 512, 0.806 > > 23, 0, 0, 512, 0.825 > > 23, 7, 0, 512, 0.829 > > 23, 0, 23, 512, 0.809 > > 23, 7, 23, 512, 0.823 > > 24, 0, 0, 512, 0.829 > > 24, 0, 24, 512, 0.823 > > 25, 0, 0, 512, 0.864 > > 25, 1, 0, 512, 0.895 > > 25, 0, 25, 512, 0.88 > > 25, 1, 25, 512, 0.848 > > 26, 0, 0, 512, 0.903 > > 26, 2, 0, 512, 0.888 > > 26, 0, 26, 512, 0.894 > > 26, 2, 26, 512, 0.89 > > 27, 0, 0, 512, 0.914 > > 27, 3, 0, 512, 0.917 > > 27, 0, 27, 512, 0.902 > > 27, 3, 27, 512, 0.887 > > 28, 0, 0, 512, 0.887 > > 28, 4, 0, 512, 0.877 > > 28, 0, 28, 512, 0.893 > > 28, 4, 28, 512, 0.866 > > 29, 0, 0, 512, 0.885 > > 29, 5, 0, 512, 0.907 > > 29, 0, 29, 512, 0.894 > > 29, 5, 29, 512, 0.906 > > 30, 0, 0, 512, 0.88 > > 30, 6, 0, 512, 0.898 > > 30, 0, 30, 512, 0.9 > > 30, 6, 30, 512, 0.895 > > 31, 0, 0, 512, 0.893 > > 31, 7, 0, 512, 0.874 > > 31, 0, 31, 512, 0.894 > > 31, 7, 31, 512, 0.899 > > 4, 0, 0, 32, 0.618 > > 4, 1, 0, 32, 0.627 > > 4, 0, 1, 32, 0.625 > > 4, 1, 1, 32, 0.613 > > 4, 0, 0, 64, 0.913 > > 4, 2, 0, 64, 0.801 > > 4, 0, 2, 64, 0.759 > > 4, 2, 2, 64, 0.761 > > 4, 0, 0, 128, 0.822 > > 4, 3, 0, 128, 0.863 > > 4, 0, 3, 128, 0.867 > > 4, 3, 3, 128, 0.917 > > 4, 0, 0, 256, 0.816 > > 4, 4, 0, 256, 0.812 > > 4, 0, 4, 256, 0.803 > > 4, 4, 4, 256, 0.811 > > 4, 5, 0, 512, 0.848 > > 4, 0, 5, 512, 0.843 > > 4, 5, 5, 512, 0.857 > > 4, 0, 0, 1024, 0.886 > > 4, 6, 0, 1024, 0.887 > > 4, 0, 6, 1024, 0.881 > > 4, 6, 6, 1024, 0.873 > > 4, 0, 0, 2048, 0.892 > > 4, 7, 0, 2048, 0.894 > > 4, 0, 7, 2048, 0.89 > > 4, 7, 7, 2048, 0.874 > > 10, 1, 0, 64, 0.946 > > 10, 1, 1, 64, 0.81 > > 10, 2, 0, 64, 0.804 > > 10, 2, 2, 64, 0.82 > > 10, 3, 0, 64, 0.772 > > 10, 3, 3, 64, 0.772 > > 10, 4, 0, 64, 0.748 > > 10, 4, 4, 64, 0.751 > > 10, 5, 0, 64, 0.76 > > 10, 5, 5, 64, 0.76 > > 10, 6, 0, 64, 0.726 > > 10, 6, 6, 64, 0.718 > > 10, 7, 0, 64, 0.724 > > 10, 7, 7, 64, 0.72 > > 6, 0, 0, 0, 0.415 > > 6, 0, 0, 1, 0.423 > > 6, 0, 1, 1, 0.412 > > 6, 0, 0, 2, 0.433 > > 6, 0, 2, 2, 0.434 > > 6, 0, 0, 3, 0.427 > > 6, 0, 3, 3, 0.428 > > 6, 0, 0, 4, 0.465 > > 6, 0, 4, 4, 0.466 > > 6, 0, 0, 5, 0.463 > > 6, 0, 5, 5, 0.468 > > 6, 0, 0, 6, 0.435 > > 6, 0, 6, 6, 0.444 > > 6, 0, 0, 7, 0.41 > > 6, 0, 7, 7, 0.42 > > 6, 0, 0, 8, 0.474 > > 6, 0, 8, 8, 0.501 > > 6, 0, 0, 9, 0.471 > > 6, 0, 9, 9, 0.489 > > 6, 0, 0, 10, 0.462 > > 6, 0, 10, 10, 0.46 > > 6, 0, 0, 11, 0.459 > > 6, 0, 11, 11, 0.458 > > 6, 0, 0, 12, 0.516 > > 6, 0, 12, 12, 0.51 > > 6, 0, 0, 13, 0.494 > > 6, 0, 13, 13, 0.524 > > 6, 0, 0, 14, 0.486 > > 6, 0, 14, 14, 0.5 > > 6, 0, 0, 15, 0.48 > > 6, 0, 15, 15, 0.501 > > 6, 0, 0, 16, 0.54 > > 6, 0, 16, 16, 0.538 > > 6, 0, 0, 17, 0.503 > > 6, 0, 17, 17, 0.541 > > 6, 0, 0, 18, 0.537 > > 6, 0, 18, 18, 0.549 > > 6, 0, 0, 19, 0.527 > > 6, 0, 19, 19, 0.537 > > 6, 0, 0, 20, 0.539 > > 6, 0, 20, 20, 0.554 > > 6, 0, 0, 21, 0.558 > > 6, 0, 21, 21, 0.541 > > 6, 0, 0, 22, 0.546 > > 6, 0, 22, 22, 0.561 > > 6, 0, 0, 23, 0.54 > > 6, 0, 23, 23, 0.536 > > 6, 0, 0, 24, 0.565 > > 6, 0, 24, 24, 0.584 > > 6, 0, 0, 25, 0.563 > > 6, 0, 25, 25, 0.58 > > 6, 0, 0, 26, 0.555 > > 6, 0, 26, 26, 0.584 > > 6, 0, 0, 27, 0.569 > > 6, 0, 27, 27, 0.587 > > 6, 0, 0, 28, 0.612 > > 6, 0, 28, 28, 0.623 > > 6, 0, 0, 29, 0.604 > > 6, 0, 29, 29, 0.621 > > 6, 0, 0, 30, 0.59 > > 6, 0, 30, 30, 0.609 > > 6, 0, 0, 31, 0.577 > > 6, 0, 31, 31, 0.588 > > 6, 0, 0, 32, 0.621 > > 6, 0, 32, 32, 0.608 > > 6, 0, 0, 33, 0.601 > > 6, 0, 33, 33, 0.623 > > 6, 0, 0, 34, 0.614 > > 6, 0, 34, 34, 0.615 > > 6, 0, 0, 35, 0.598 > > 6, 0, 35, 35, 0.608 > > 6, 0, 0, 36, 0.626 > > 6, 0, 36, 36, 0.634 > > 6, 0, 0, 37, 0.62 > > 6, 0, 37, 37, 0.634 > > 6, 0, 0, 38, 0.612 > > 6, 0, 38, 38, 0.637 > > 6, 0, 0, 39, 0.627 > > 6, 0, 39, 39, 0.612 > > 6, 0, 0, 40, 0.661 > > 6, 0, 40, 40, 0.674 > > 6, 0, 0, 41, 0.633 > > 6, 0, 41, 41, 0.643 > > 6, 0, 0, 42, 0.634 > > 6, 0, 42, 42, 0.636 > > 6, 0, 0, 43, 0.619 > > 6, 0, 43, 43, 0.625 > > 6, 0, 0, 44, 0.654 > > 6, 0, 44, 44, 0.654 > > 6, 0, 0, 45, 0.647 > > 6, 0, 45, 45, 0.649 > > 6, 0, 0, 46, 0.651 > > 6, 0, 46, 46, 0.651 > > 6, 0, 0, 47, 0.646 > > 6, 0, 47, 47, 0.648 > > 6, 0, 0, 48, 0.662 > > 6, 0, 48, 48, 0.664 > > 6, 0, 0, 49, 0.68 > > 6, 0, 49, 49, 0.667 > > 6, 0, 0, 50, 0.654 > > 6, 0, 50, 50, 0.659 > > 6, 0, 0, 51, 0.638 > > 6, 0, 51, 51, 0.639 > > 6, 0, 0, 52, 0.665 > > 6, 0, 52, 52, 0.669 > > 6, 0, 0, 53, 0.658 > > 6, 0, 53, 53, 0.656 > > 6, 0, 0, 54, 0.669 > > 6, 0, 54, 54, 0.67 > > 6, 0, 0, 55, 0.668 > > 6, 0, 55, 55, 0.664 > > 6, 0, 0, 56, 0.701 > > 6, 0, 56, 56, 0.695 > > 6, 0, 0, 57, 0.687 > > 6, 0, 57, 57, 0.696 > > 6, 0, 0, 58, 0.693 > > 6, 0, 58, 58, 0.704 > > 6, 0, 0, 59, 0.695 > > 6, 0, 59, 59, 0.708 > > 6, 0, 0, 60, 0.708 > > 6, 0, 60, 60, 0.728 > > 6, 0, 0, 61, 0.708 > > 6, 0, 61, 61, 0.71 > > 6, 0, 0, 62, 0.715 > > 6, 0, 62, 62, 0.705 > > 6, 0, 0, 63, 0.677 > > 6, 0, 63, 63, 0.702 > > > > .../{strcspn-sse2.S => strcspn-sse2.c} | 8 +- > > sysdeps/x86_64/strcspn.S | 119 ------------------ > > 2 files changed, 4 insertions(+), 123 deletions(-) > > rename sysdeps/x86_64/multiarch/{strcspn-sse2.S => strcspn-sse2.c} (85%) > > delete mode 100644 sysdeps/x86_64/strcspn.S > > > > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c > > similarity index 85% > > rename from sysdeps/x86_64/multiarch/strcspn-sse2.S > > rename to sysdeps/x86_64/multiarch/strcspn-sse2.c > > index f97e856e1f..3a04bb39fc 100644 > > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c > > @@ -1,4 +1,4 @@ > > -/* strcspn optimized with SSE2. > > +/* strcspn. > > Copyright (C) 2017-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -19,10 +19,10 @@ > > #if IS_IN (libc) > > > > # include <sysdep.h> > > -# define strcspn __strcspn_sse2 > > +# define STRCSPN __strcspn_sse2 > > > > # undef libc_hidden_builtin_def > > -# define libc_hidden_builtin_def(strcspn) > > +# define libc_hidden_builtin_def(STRCSPN) > > #endif > > > > -#include <sysdeps/x86_64/strcspn.S> > > +#include <string/strcspn.c> > > diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S > > deleted file mode 100644 > > index f3cd86c606..0000000000 > > --- a/sysdeps/x86_64/strcspn.S > > +++ /dev/null > > @@ -1,119 +0,0 @@ > > -/* strcspn (str, ss) -- Return the length of the initial segment of STR > > - which contains no characters from SS. > > - For AMD x86-64. > > - Copyright (C) 1994-2022 Free Software Foundation, Inc. > > - This file is part of the GNU C Library. > > - > > - The GNU C Library is free software; you can redistribute it and/or > > - modify it under the terms of the GNU Lesser General Public > > - License as published by the Free Software Foundation; either > > - version 2.1 of the License, or (at your option) any later version. > > - > > - The GNU C Library is distributed in the hope that it will be useful, > > - but WITHOUT ANY WARRANTY; without even the implied warranty of > > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - Lesser General Public License for more details. > > - > > - You should have received a copy of the GNU Lesser General Public > > - License along with the GNU C Library; if not, see > > - <https://www.gnu.org/licenses/>. */ > > - > > -#include <sysdep.h> > > -#include "asm-syntax.h" > > - > > - .text > > -ENTRY (strcspn) > > - > > - movq %rdi, %rdx /* Save SRC. */ > > - > > - /* First we create a table with flags for all possible characters. > > - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are > > - supported by the C string functions we have 256 characters. > > - Before inserting marks for the stop characters we clear the whole > > - table. */ > > - movq %rdi, %r8 /* Save value. */ > > - subq $256, %rsp /* Make space for 256 bytes. */ > > - cfi_adjust_cfa_offset(256) > > - movl $32, %ecx /* 32*8 bytes = 256 bytes. */ > > - movq %rsp, %rdi > > - xorl %eax, %eax /* We store 0s. */ > > - cld > > - rep > > - stosq > > - > > - movq %rsi, %rax /* Setup skipset. */ > > - > > -/* For understanding the following code remember that %rcx == 0 now. > > - Although all the following instruction only modify %cl we always > > - have a correct zero-extended 64-bit value in %rcx. */ > > - > > - .p2align 4 > > -L(2): movb (%rax), %cl /* get byte from skipset */ > > - testb %cl, %cl /* is NUL char? */ > > - jz L(1) /* yes => start compare loop */ > > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > > - > > - movb 1(%rax), %cl /* get byte from skipset */ > > - testb $0xff, %cl /* is NUL char? */ > > - jz L(1) /* yes => start compare loop */ > > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > > - > > - movb 2(%rax), %cl /* get byte from skipset */ > > - testb $0xff, %cl /* is NUL char? */ > > - jz L(1) /* yes => start compare loop */ > > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > > - > > - movb 3(%rax), %cl /* get byte from skipset */ > > - addq $4, %rax /* increment skipset pointer */ > > - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ > > - testb $0xff, %cl /* is NUL char? */ > > - jnz L(2) /* no => process next dword from skipset */ > > - > > -L(1): leaq -4(%rdx), %rax /* prepare loop */ > > - > > - /* We use a neat trick for the following loop. Normally we would > > - have to test for two termination conditions > > - 1. a character in the skipset was found > > - and > > - 2. the end of the string was found > > - But as a sign that the character is in the skipset we store its > > - value in the table. But the value of NUL is NUL so the loop > > - terminates for NUL in every case. */ > > - > > - .p2align 4 > > -L(3): addq $4, %rax /* adjust pointer for full loop round */ > > - > > - movb (%rax), %cl /* get byte from string */ > > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > > - je L(4) /* yes => return */ > > - > > - movb 1(%rax), %cl /* get byte from string */ > > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > > - je L(5) /* yes => return */ > > - > > - movb 2(%rax), %cl /* get byte from string */ > > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > > - jz L(6) /* yes => return */ > > - > > - movb 3(%rax), %cl /* get byte from string */ > > - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ > > - jne L(3) /* no => start loop again */ > > - > > - incq %rax /* adjust pointer */ > > -L(6): incq %rax > > -L(5): incq %rax > > - > > -L(4): addq $256, %rsp /* remove skipset */ > > - cfi_adjust_cfa_offset(-256) > > -#ifdef USE_AS_STRPBRK > > - xorl %edx,%edx > > - orb %cl, %cl /* was last character NUL? */ > > - cmovzq %rdx, %rax /* Yes: return NULL */ > > -#else > > - subq %rdx, %rax /* we have to return the number of valid > > - characters, so compute distance to first > > - non-valid character */ > > -#endif > > - ret > > -END (strcspn) > > -libc_hidden_builtin_def (strcspn) > > -- > > 2.25.1 > > > > LGTM. > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > > Thanks. > > -- > H.J. I would like to backport this patch to release branches. Any comments or objections? --Sunil
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c similarity index 85% rename from sysdeps/x86_64/multiarch/strcspn-sse2.S rename to sysdeps/x86_64/multiarch/strcspn-sse2.c index f97e856e1f..3a04bb39fc 100644 --- a/sysdeps/x86_64/multiarch/strcspn-sse2.S +++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c @@ -1,4 +1,4 @@ -/* strcspn optimized with SSE2. +/* strcspn. Copyright (C) 2017-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -19,10 +19,10 @@ #if IS_IN (libc) # include <sysdep.h> -# define strcspn __strcspn_sse2 +# define STRCSPN __strcspn_sse2 # undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(strcspn) +# define libc_hidden_builtin_def(STRCSPN) #endif -#include <sysdeps/x86_64/strcspn.S> +#include <string/strcspn.c> diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S deleted file mode 100644 index f3cd86c606..0000000000 --- a/sysdeps/x86_64/strcspn.S +++ /dev/null @@ -1,119 +0,0 @@ -/* strcspn (str, ss) -- Return the length of the initial segment of STR - which contains no characters from SS. - For AMD x86-64. - Copyright (C) 1994-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - - .text -ENTRY (strcspn) - - movq %rdi, %rdx /* Save SRC. */ - - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. */ - movq %rdi, %r8 /* Save value. */ - subq $256, %rsp /* Make space for 256 bytes. */ - cfi_adjust_cfa_offset(256) - movl $32, %ecx /* 32*8 bytes = 256 bytes. */ - movq %rsp, %rdi - xorl %eax, %eax /* We store 0s. */ - cld - rep - stosq - - movq %rsi, %rax /* Setup skipset. */ - -/* For understanding the following code remember that %rcx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 64-bit value in %rcx. */ - - .p2align 4 -L(2): movb (%rax), %cl /* get byte from skipset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ - - movb 1(%rax), %cl /* get byte from skipset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ - - movb 2(%rax), %cl /* get byte from skipset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ - - movb 3(%rax), %cl /* get byte from skipset */ - addq $4, %rax /* increment skipset pointer */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from skipset */ - -L(1): leaq -4(%rdx), %rax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the skipset was found - and - 2. the end of the string was found - But as a sign that the character is in the skipset we store its - value in the table. But the value of NUL is NUL so the loop - terminates for NUL in every case. */ - - .p2align 4 -L(3): addq $4, %rax /* adjust pointer for full loop round */ - - movb (%rax), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - je L(4) /* yes => return */ - - movb 1(%rax), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - je L(5) /* yes => return */ - - movb 2(%rax), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - jz L(6) /* yes => return */ - - movb 3(%rax), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - jne L(3) /* no => start loop again */ - - incq %rax /* adjust pointer */ -L(6): incq %rax -L(5): incq %rax - -L(4): addq $256, %rsp /* remove skipset */ - cfi_adjust_cfa_offset(-256) -#ifdef USE_AS_STRPBRK - xorl %edx,%edx - orb %cl, %cl /* was last character NUL? */ - cmovzq %rdx, %rax /* Yes: return NULL */ -#else - subq %rdx, %rax /* we have to return the number of valid - characters, so compute distance to first - non-valid character */ -#endif - ret -END (strcspn) -libc_hidden_builtin_def (strcspn)