Message ID | CAOVZoAOYJ7zNWRZkgEj2Pq=v=GHs2j4XkuFw8077M3vZnxBy0w@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote: > Ok, I'll get started then. > > Are there any general comments about the attached conversion for > memcpy? Just so I don't repeat the same wrong thing many times. You missed: /* Define multiple versions only for the definition in lib and for DSO. In static binaries we need memcpy before the initialization happened. */ #if defined SHARED && IS_IN (libc) +typedef void * (*memcpy_fn)(void *, const void *, size_t); + +extern void * __memcpy_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n); +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n); +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_avx_unaligned_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void *src, size_t n); Please use something similar to multiarch/strstr.c: /* Redefine strstr so that the compiler won't complain about the type mismatch with the IFUNC selector in strong_alias, below. */ #undef strstr #define strstr __redirect_strstr #include <string.h> #undef strstr ... extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; +/* Defined in cacheinfo.c */ +extern long int __x86_shared_cache_size attribute_hidden; +extern long int __x86_shared_cache_size_half attribute_hidden; +extern long int __x86_data_cache_size attribute_hidden; +extern long int __x86_data_cache_size_half attribute_hidden; +extern long int __x86_shared_non_temporal_threshold attribute_hidden; Remove them. static void * select_memcpy_impl(void) { + const struct cpu_features* cpu_features_struct_p = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) { + return __memcpy_erms; + } + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) { + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER)) + return __memcpy_avx512_unaligned_erms; + return __memcpy_avx512_unaligned; + } + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) { + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { + return __memcpy_avx_unaligned_erms; + + } + return __memcpy_avx_unaligned; + } + else { + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) { + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { + return __memcpy_sse2_unaligned_erms; + + } + return __memcpy_sse2_unaligned; + } + else { + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) { + return __memcpy_sse2_unaligned; + + } + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) { + return __memcpy_ssse3_back; + + } + return __memcpy_ssse3; + } + } +} Please 1. Fix formatting. 2. Remove unnecessary {}. 3. Don't use "else". +void *__new_memcpy(void *dest, const void *src, size_t n) + __attribute__ ((ifunc ("select_memcpy_impl"))); Use "typeof" here.
On 25/05/2017 18:38, H.J. Lu wrote: > On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote: >> Ok, I'll get started then. >> >> Are there any general comments about the attached conversion for >> memcpy? Just so I don't repeat the same wrong thing many times. > > You missed: > > /* Define multiple versions only for the definition in lib and for > DSO. In static binaries we need memcpy before the initialization > happened. */ > #if defined SHARED && IS_IN (libc) > > +typedef void * (*memcpy_fn)(void *, const void *, size_t); > + > +extern void * __memcpy_erms(void *dest, const void *src, size_t n); > +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n); > +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void > *src, size_t n); > +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n); > +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n); > +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n); > +extern void * __memcpy_avx_unaligned_erms(void *dest, const void > *src, size_t n); > +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n); > +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void > *src, size_t n); > > Please use something similar to multiarch/strstr.c: > > /* Redefine strstr so that the compiler won't complain about the type > mismatch with the IFUNC selector in strong_alias, below. */ > #undef strstr > #define strstr __redirect_strstr > #include <string.h> > #undef strstr > ... > extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; > > +/* Defined in cacheinfo.c */ > +extern long int __x86_shared_cache_size attribute_hidden; > +extern long int __x86_shared_cache_size_half attribute_hidden; > +extern long int __x86_data_cache_size attribute_hidden; > +extern long int __x86_data_cache_size_half attribute_hidden; > +extern long int __x86_shared_non_temporal_threshold attribute_hidden; It seems it will be used not only for memcpy, so I would suggest to add on a common header on multiarch. > > Remove them. > static void * select_memcpy_impl(void) { > + const struct cpu_features* cpu_features_struct_p = __get_cpu_features (); > + > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) { > + return __memcpy_erms; > + } > + > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) { > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER)) > + return __memcpy_avx512_unaligned_erms; > + return __memcpy_avx512_unaligned; > + } > + > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) { > + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { > + return __memcpy_avx_unaligned_erms; > + > + } > + return __memcpy_avx_unaligned; > + } > + else { > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) { > + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { > + return __memcpy_sse2_unaligned_erms; > + > + } > + return __memcpy_sse2_unaligned; > + } > + else { > + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) { > + return __memcpy_sse2_unaligned; > + > + } > + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) { > + return __memcpy_ssse3_back; > + > + } > + return __memcpy_ssse3; > + } > + } > +} > > Please > > 1. Fix formatting. > 2. Remove unnecessary {}. > 3. Don't use "else". > > +void *__new_memcpy(void *dest, const void *src, size_t n) > + __attribute__ ((ifunc ("select_memcpy_impl"))); > > Use "typeof" here. We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support from compiler. I think you can use: # include <string.h> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden; static void *memcpy_selector (void) { // fill me. } libc_ifunc_hidden (memcpy, memcpy, memcpy_selector); libc_hidden_def (memcpy)
From a2957f5a0b21f9588e8756228b11b86f886b0f4c Mon Sep 17 00:00:00 2001 From: Erich Elsen <eriche@google.com> Date: Tue, 23 May 2017 12:29:24 -0700 Subject: [PATCH] add memcpy.c --- sysdeps/x86_64/multiarch/memcpy.c | 70 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 sysdeps/x86_64/multiarch/memcpy.c diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c new file mode 100644 index 0000000000..b0ff8c71fd --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy.c @@ -0,0 +1,70 @@ +#include "cpu-features.h" +#include "init-arch.h" +#include "shlib-compat.h" +#include <stdlib.h> + +typedef void * (*memcpy_fn)(void *, const void *, size_t); + +extern void * __memcpy_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n); +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n); +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_avx_unaligned_erms(void *dest, const void *src, size_t n); +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n); +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void *src, size_t n); + +/* Defined in cacheinfo.c */ +extern long int __x86_shared_cache_size attribute_hidden; +extern long int __x86_shared_cache_size_half attribute_hidden; +extern long int __x86_data_cache_size attribute_hidden; +extern long int __x86_data_cache_size_half attribute_hidden; +extern long int __x86_shared_non_temporal_threshold attribute_hidden; + +static void * select_memcpy_impl(void) { + const struct cpu_features* cpu_features_struct_p = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) { + return __memcpy_erms; + } + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) { + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER)) + return __memcpy_avx512_unaligned_erms; + return __memcpy_avx512_unaligned; + } + + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) { + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { + return __memcpy_avx_unaligned_erms; + + } + return __memcpy_avx_unaligned; + } + else { + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) { + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { + return __memcpy_sse2_unaligned_erms; + + } + return __memcpy_sse2_unaligned; + } + else { + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) { + return __memcpy_sse2_unaligned; + + } + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) { + return __memcpy_ssse3_back; + + } + return __memcpy_ssse3; + } + } +} + +void *__new_memcpy(void *dest, const void *src, size_t n) + __attribute__ ((ifunc ("select_memcpy_impl"))); + +versioned_symbol(libc, __new_memcpy, memcpy, GLIBC_2_14); -- 2.13.0.219.gdb65acc882-goog