Message ID | 20220509171747.4153703-6-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v5,1/6] elf: Refactor dl_new_hash so it can be tested / benchmarked | expand |
On 09/05/2022 14:17, Noah Goldstein via Libc-alpha wrote: > Unroll slightly and enforce good instruction scheduling. This improves > performance on out-of-order machines. Note the unrolling allows > for pipelined multiplies which helps a bit, but most of the gain > is from enforcing better instruction scheduling for more ILP. > Unrolling further started to induce slowdowns for sizes [0, 4] > but can help the loop so if larger sizes are the target further > unrolling can be beneficial. > > Results for _dl_new_hash > Benchmarked on Tigerlake: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz > > Time as Geometric Mean of N=25 runs > Geometric of all benchmark New / Old: 0.791 > type, length, New Time, Old Time, New Time / Old Time > fixed, 0, 0.641, 0.658, 0.974 > fixed, 1, 1.888, 1.883, 1.003 > fixed, 2, 2.712, 2.833, 0.957 > fixed, 3, 3.314, 3.739, 0.886 > fixed, 4, 4.316, 4.866, 0.887 > fixed, 5, 5.16, 5.966, 0.865 > fixed, 6, 5.986, 7.241, 0.827 > fixed, 7, 7.264, 8.435, 0.861 > fixed, 8, 8.052, 9.846, 0.818 > fixed, 9, 9.369, 11.316, 0.828 > fixed, 10, 10.256, 12.925, 0.794 > fixed, 11, 12.191, 14.546, 0.838 > fixed, 12, 12.667, 15.92, 0.796 > fixed, 13, 14.442, 17.465, 0.827 > fixed, 14, 14.808, 18.981, 0.78 > fixed, 15, 16.244, 20.565, 0.79 > fixed, 16, 17.166, 22.044, 0.779 > fixed, 32, 35.447, 50.558, 0.701 > fixed, 64, 86.479, 134.529, 0.643 > fixed, 128, 155.453, 287.527, 0.541 > fixed, 256, 302.57, 593.64, 0.51 > random, 2, 11.168, 10.61, 1.053 > random, 4, 13.308, 13.53, 0.984 > random, 8, 16.579, 19.437, 0.853 > random, 16, 21.292, 24.776, 0.859 > random, 32, 30.56, 35.906, 0.851 > random, 64, 49.249, 68.577, 0.718 > random, 128, 81.845, 140.664, 0.582 > random, 256, 152.517, 292.204, 0.522 > > Co-authored-by: Alexander Monakov <amonakov@ispras.ru> Buildbot failed to build it [1]: make[2]: Entering directory '/glibc/elf' gcc -m32 dl-lookup.c -c -std=gnu11 -fgnu89-inline -g -O2 -Wall -Wwrite-strings -Wundef -Werror -fmerge-all-constants -frounding-math -fno-stack-protector -fno-common -Wstrict-prototypes -Wold-style-definition -fmath-errno -fPIC -fno-stack-protector -DSTACK_PROTECTOR_LEVEL=0 -Wa,-mtune=i686 -mno-sse -mno-mmx -mfpmath=387 -fexceptions -fasynchronous-unwind-tables -ftls-model=initial-exec -I../include -I/build/elf -I/build -I../sysdeps/unix/sysv/linux/i386/i686 -I../sysdeps/i386/i686/nptl -I../sysdeps/unix/sysv/linux/i386 -I../sysdeps/unix/sysv/linux/x86/include -I../sysdeps/unix/sysv/linux/x86 -I../sysdeps/x86/nptl -I../sysdeps/i386/nptl -I../sysdeps/unix/sysv/linux/include -I../sysdeps/unix/sysv/linux -I../sysdeps/nptl -I../sysdeps/pthread -I../sysdeps/gnu -I../sysdeps/unix/inet -I../sysdeps/unix/sysv -I../sysdeps/unix/i386 -I../sysdeps/unix -I../sysdeps/posix -I../sysdeps/i386/i686/fpu/multiarch -I../sysdeps/i386/i686/fpu -I../sysdeps/i386/i686/multiarch -I../sysdeps/i386/i686 -I../sysdeps/i386/fpu -I../sysdeps/x86/fpu -I../sysdeps/i386 -I../sysdeps/x86/include -I../sysdeps/x86 -I../sysdeps/wordsize-32 -I../sysdeps/ieee754/float128 -I../sysdeps/ieee754/ldbl-96/include -I../sysdeps/ieee754/ldbl-96 -I../sysdeps/ieee754/dbl-64 -I../sysdeps/ieee754/flt-32 -I../sysdeps/ieee754 -I../sysdeps/generic -I.. -I../libio -I. -D_LIBC_REENTRANT -include /build/libc-modules.h -DMODULE_NAME=rtld -include ../include/libc-symbols.h -DPIC -DSHARED -DTOP_NAMESPACE=glibc -o /build/elf/dl-lookup.os -MD -MP -MF /build/elf/dl-lookup.os.dt -MT /build/elf/dl-lookup.os In file included from dl-lookup.c:27: ./dl-new-hash.h: In function '_dl_new_hash': ./dl-new-hash.h:30:28: error: pointer targets in initialization of 'const unsigned char *' from 'const char *' differ in signedness [-Werror=pointer-sign] 30 | const unsigned char *s = signed_s; | ^~~~~~~~ cc1: all warnings being treated as errors make[2]: *** [../o-iterator.mk:9: /build/elf/dl-lookup.os] Error 1 gcc -m32 dl-lookup.c -c -std=gnu11 -fgnu89-inline -g -O2 -Wall -Wwrite-strings -Wundef -Werror -fmerge-all-constants -frounding-math -fno-stack-protector -fno-common -Wstrict-prototypes -Wold-style-definition -fmath-errno -fpie -fno-stack-protector -DSTACK_PROTECTOR_LEVEL=0 -Wa,-mtune=i686 -fexceptions -fasynchronous-unwind-tables -ftls-model=initial-exec -I../include -I/build/elf -I/build -I../sysdeps/unix/sysv/linux/i386/i686 -I../sysdeps/i386/i686/nptl -I../sysdeps/unix/sysv/linux/i386 -I../sysdeps/unix/sysv/linux/x86/include -I../sysdeps/unix/sysv/linux/x86 -I../sysdeps/x86/nptl -I../sysdeps/i386/nptl -I../sysdeps/unix/sysv/linux/include -I../sysdeps/unix/sysv/linux -I../sysdeps/nptl -I../sysdeps/pthread -I../sysdeps/gnu -I../sysdeps/unix/inet -I../sysdeps/unix/sysv -I../sysdeps/unix/i386 -I../sysdeps/unix -I../sysdeps/posix -I../sysdeps/i386/i686/fpu/multiarch -I../sysdeps/i386/i686/fpu -I../sysdeps/i386/i686/multiarch -I../sysdeps/i386/i686 -I../sysdeps/i386/fpu -I../sysdeps/x86/fpu -I../sysdeps/i386 -I../sysdeps/x86/include -I../sysdeps/x86 -I../sysdeps/wordsize-32 -I../sysdeps/ieee754/float128 -I../sysdeps/ieee754/ldbl-96/include -I../sysdeps/ieee754/ldbl-96 -I../sysdeps/ieee754/dbl-64 -I../sysdeps/ieee754/flt-32 -I../sysdeps/ieee754 -I../sysdeps/generic -I.. -I../libio -I. -D_LIBC_REENTRANT -include /build/libc-modules.h -DMODULE_NAME=libc -include ../include/libc-symbols.h -DPIC -DTOP_NAMESPACE=glibc -o /build/elf/dl-lookup.o -MD -MP -MF /build/elf/dl-lookup.o.dt -MT /build/elf/dl-lookup.o In file included from dl-lookup.c:27: ./dl-new-hash.h: In function '_dl_new_hash': ./dl-new-hash.h:30:28: error: pointer targets in initialization of 'const unsigned char *' from 'const char *' differ in signedness [-Werror=pointer-sign] 30 | const unsigned char *s = signed_s; | ^~~~~~~~ [1] https://www.delorie.com/trybots/32bit/9123/make.tail.txt
diff --git a/elf/dl-new-hash.h b/elf/dl-new-hash.h index 40d88c81f9..70891d374c 100644 --- a/elf/dl-new-hash.h +++ b/elf/dl-new-hash.h @@ -20,15 +20,55 @@ #define _DL_NEW_HASH_H 1 #include <stdint.h> +/* For __glibc_unlikely. */ +#include <sys/cdefs.h> static inline uint32_t __attribute__ ((unused)) -_dl_new_hash (const char *s) +_dl_new_hash (const char *signed_s) { - uint32_t h = 5381; - for (unsigned char c = *s; c != '\0'; c = *++s) - h = h * 33 + c; - return h; + const unsigned char *s = signed_s; + unsigned int h = 5381; + unsigned int c0, c1; + for (;;) + { + c0 = (unsigned int) *s; + /* Unlikely length zero string so evens will be slightly less + common. */ + if (__glibc_unlikely (c0 == 0)) + { + return h; + } + + c1 = (unsigned int) *(s + 1); + if (c1 == 0) + { + c0 += h; + /* Ideal instruction scheduling is: + c0 += h; + h *= 32; + h += c0; + The asm statement ensures the compiler can't mess that up. */ + asm("" : "+r"(h) : "r"(c0)); + h = h * 32 + c0; + return h; + } + + /* Ideal instruction scheduling is: + c1 += c0; + h *= 33 * 33; + c0 *= 32; + c1 += c0; + h += c1; + The asm statements ensures the compiler can't mess that up. */ + c1 += c0; + asm("" : "+r"(c1), "+r"(c0)); + h *= 33 * 33; + c1 += c0 * 32; + asm("" : "+r"(c1)); + h += c1; + s += 2; + } }