@@ -16,17 +16,6 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-
-#define VEC_SIZE 16
-#define VEC(i) xmm##i
-#define PREFETCHNT prefetchnta
-#define VMOVNT movntdq
-/* Use movups and movaps for smaller code sizes. */
-#define VMOVU movups
-#define VMOVA movaps
-#define MOV_SIZE 3
-#define SECTION(p) p
#ifdef USE_MULTIARCH
# if !IS_IN (libc)
@@ -42,12 +31,18 @@
#if !defined USE_MULTIARCH || !IS_IN (libc)
# define MEMPCPY_SYMBOL(p,s) __mempcpy
#endif
-#ifndef MEMMOVE_SYMBOL
-# define MEMMOVE_CHK_SYMBOL(p,s) p
-# define MEMMOVE_SYMBOL(p,s) memmove
-#endif
-#include "multiarch/memmove-vec-unaligned-erms.S"
+#define MEMMOVE_CHK_SYMBOL(p,s) p
+#define MEMMOVE_SYMBOL(p,s) memmove
+
+
+#define DEFAULT_IMPL_V1 "multiarch/memmove-sse2-unaligned-erms.S"
+#define DEFAULT_IMPL_V3 "multiarch/memmove-avx-unaligned-erms.S"
+#define DEFAULT_IMPL_V4 "multiarch/memmove-evex-unaligned-erms.S"
+
+#include "isa-default-impl.h"
+
+weak_alias (__mempcpy, mempcpy)
#ifndef USE_MULTIARCH
libc_hidden_builtin_def (memmove)
@@ -59,13 +54,10 @@ libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)
+
# if defined SHARED && IS_IN (libc)
# undef memcpy
# include <shlib-compat.h>
versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
-
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
-# endif
# endif
#endif
@@ -101,84 +101,96 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memmove_chk.c. */
IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX512F),
- __memmove_chk_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_chk_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_chk_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX),
- __memmove_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX),
- __memmove_chk_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memmove_chk_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memmove_chk_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_chk_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- CPU_FEATURE_USABLE (SSSE3),
- __memmove_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_erms))
+ __memmove_chk_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memmove_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_chk_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_chk_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memmove_chk_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memmove_chk_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memmove_chk_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk, 1,
+ __memmove_chk_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk, 1,
+ __memmove_chk_sse2_unaligned_erms))
#endif
/* Support sysdeps/x86_64/multiarch/memmove.c. */
IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX),
- __memmove_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX),
- __memmove_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memmove,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memmove_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, memmove,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memmove_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX512F),
- __memmove_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove,
- CPU_FEATURE_USABLE (AVX512VL),
- __memmove_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3),
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
IFUNC_IMPL_ADD (array, i, memmove, 1,
- __memmove_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove, 1,
- __memmove_sse2_unaligned_erms))
+ __memmove_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memmove_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memmove_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memmove_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memmove,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memmove_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memmove, 1,
+ __memmove_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memmove, 1,
+ __memmove_sse2_unaligned_erms))
/* Support sysdeps/x86_64/multiarch/memrchr.c. */
IFUNC_IMPL (i, name, memrchr,
@@ -832,165 +844,190 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memcpy_chk.c. */
IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX512F),
- __memcpy_chk_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_chk_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_chk_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX),
- __memcpy_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX),
- __memcpy_chk_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memcpy_chk_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memcpy_chk_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_chk_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- CPU_FEATURE_USABLE (SSSE3),
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_sse2_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_erms))
+ __memcpy_chk_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memcpy_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_chk_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_chk_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memcpy_chk_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memcpy_chk_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memcpy_chk_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk, 1,
+ __memcpy_chk_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk, 1,
+ __memcpy_chk_sse2_unaligned_erms))
#endif
/* Support sysdeps/x86_64/multiarch/memcpy.c. */
IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX),
- __memcpy_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX),
- __memcpy_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memcpy,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memcpy_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, memcpy,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __memcpy_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
- __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX512F),
- __memcpy_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __memcpy_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1,
- __memcpy_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms))
+ __memcpy_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memcpy_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memcpy_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memcpy_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memcpy_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy, 1,
+ __memcpy_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy, 1,
+ __memcpy_sse2_unaligned_erms))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/mempcpy_chk.c. */
IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX512F),
- __mempcpy_chk_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_chk_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_chk_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX),
- __mempcpy_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX),
- __mempcpy_chk_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __mempcpy_chk_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __mempcpy_chk_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_chk_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- CPU_FEATURE_USABLE (SSSE3),
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_erms))
+ __mempcpy_chk_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __mempcpy_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_chk_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_chk_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __mempcpy_chk_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __mempcpy_chk_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __mempcpy_chk_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_sse2_unaligned_erms))
#endif
/* Support sysdeps/x86_64/multiarch/mempcpy.c. */
IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX512F),
- __mempcpy_avx512_no_vzeroupper)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX),
- __mempcpy_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX),
- __mempcpy_avx_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __mempcpy_avx_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- (CPU_FEATURE_USABLE (AVX)
- && CPU_FEATURE_USABLE (RTM)),
- __mempcpy_avx_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, mempcpy,
- CPU_FEATURE_USABLE (AVX512VL),
- __mempcpy_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1,
- __mempcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, mempcpy, 1,
- __mempcpy_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms))
+ __mempcpy_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX512F),
+ __mempcpy_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_avx_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_avx_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __mempcpy_avx_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
+ (CPU_FEATURE_USABLE (AVX)
+ && CPU_FEATURE_USABLE (RTM)),
+ __mempcpy_avx_unaligned_erms_rtm)
+ /* By V3 we assume fast aligned copy. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy,
+ CPU_FEATURE_USABLE (SSSE3),
+ __mempcpy_ssse3)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2 (SSSE3 is too
+ optimized around aligned copy to be better as general
+ purpose memmove). */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy, 1,
+ __mempcpy_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy, 1,
+ __mempcpy_sse2_unaligned_erms))
/* Support sysdeps/x86_64/multiarch/strncmp.c. */
IFUNC_IMPL (i, name, strncmp,
@@ -20,11 +20,19 @@
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+ attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
attribute_hidden;
@@ -32,30 +40,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)
|| CPU_FEATURES_ARCH_P (cpu_features, Prefer_FSRM))
return OPTIMIZE (erms);
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx512_unaligned_erms);
@@ -66,9 +71,10 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx512_no_vzeroupper);
}
- if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (evex_unaligned_erms);
@@ -84,7 +90,8 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx_unaligned_rtm);
}
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx_unaligned_erms);
@@ -93,7 +100,11 @@ IFUNC_SELECTOR (void)
}
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
+ /* Leave this as runtime check. The SSSE3 is optimized almost
+ exclusively for avoiding unaligned memory access during the
+ copy and by and large is not better than the sse2
+ implementation as a general purpose memmove. */
&& !CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
{
return OPTIMIZE (ssse3);
@@ -1,12 +1,23 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
+
# define VEC_SIZE 32
# define VEC(i) ymm##i
# define VMOVNT vmovntdq
# define VMOVU vmovdqu
# define VMOVA vmovdqa
# define MOV_SIZE 4
+
# define SECTION(p) p##.avx
-# define MEMMOVE_SYMBOL(p,s) p##_avx_##s
+
+# ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_SYMBOL(p,s) p##_avx_##s
+# endif
# include "memmove-vec-unaligned-erms.S"
+
+# if MINIMUM_X86_ISA_LEVEL == 3
+# include "memmove-shlib-compat.h"
+# endif
#endif
@@ -17,8 +17,9 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <isa-level.h>
-#if IS_IN (libc)
+#if ISA_SHOULD_BUILD (4)
# include "asm-syntax.h"
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
# define VEC_SIZE 64
# define XMM0 xmm16
# define XMM1 xmm17
@@ -26,8 +29,12 @@
# define VMOVA vmovdqa64
# define VZEROUPPER
# define MOV_SIZE 6
+
# define SECTION(p) p##.evex512
-# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
+
+# ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
+# endif
# include "memmove-vec-unaligned-erms.S"
#endif
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
# define VEC_SIZE 32
# define XMM0 xmm16
# define XMM1 xmm17
@@ -26,8 +29,17 @@
# define VMOVA vmovdqa64
# define VZEROUPPER
# define MOV_SIZE 6
+
# define SECTION(p) p##.evex
-# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
+
+# ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
+# endif
# include "memmove-vec-unaligned-erms.S"
+
+
+# if MINIMUM_X86_ISA_LEVEL == 4
+# include "memmove-shlib-compat.h"
+# endif
#endif
new file mode 100644
@@ -0,0 +1,26 @@
+/* Copyright (C) 2016-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if defined SHARED && IS_IN(libc)
+# include <shlib-compat.h>
+# if SHLIB_COMPAT(libc, GLIBC_2_2_5, GLIBC_2_14)
+/* Use __memmove_{isa_level}_unaligned to support overlapping
+ addresses. */
+compat_symbol (libc, MEMMOVE_SYMBOL (__memmove, unaligned), memcpy,
+ GLIBC_2_2_5);
+# endif
+#endif
@@ -16,18 +16,32 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
-#else
-weak_alias (__mempcpy, mempcpy)
-#endif
+#include <isa-level.h>
+
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
+
+# include <sysdep.h>
+
+# define VEC_SIZE 16
+# define VEC(i) xmm##i
+# define PREFETCHNT prefetchnta
+# define VMOVNT movntdq
+/* Use movups and movaps for smaller code sizes. */
+# define VMOVU movups
+# define VMOVA movaps
+# define MOV_SIZE 3
+
+# define SECTION(p) p
+
+# ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
+# endif
-#include <sysdeps/x86_64/memmove.S>
+# include "multiarch/memmove-vec-unaligned-erms.S"
-#if defined SHARED && IS_IN (libc)
-# include <shlib-compat.h>
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-/* Use __memmove_sse2_unaligned to support overlapping addresses. */
-compat_symbol (libc, __memmove_sse2_unaligned, memcpy, GLIBC_2_2_5);
+# if MINIMUM_X86_ISA_LEVEL <= 2
+# include "memmove-shlib-compat.h"
# endif
#endif
@@ -18,7 +18,9 @@
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
# ifndef MEMMOVE
@@ -52,10 +54,10 @@ END(MEMMOVE_CHK)
# endif
ENTRY_P2ALIGN(MEMMOVE, 6)
-# ifdef __ILP32__
+# ifdef __ILP32__
/* Clear the upper 32 bits. */
movl %edx, %edx
-# endif
+# endif
movq %rdi, %rax
L(start):
cmpq $16, %rdx
@@ -263,10 +263,10 @@ END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
ENTRY_P2ALIGN (MEMMOVE_SYMBOL (__memmove, unaligned_erms), 6)
movq %rdi, %rax
L(start_erms):
-# ifdef __ILP32__
+#ifdef __ILP32__
/* Clear the upper 32 bits. */
movl %edx, %edx
-# endif
+#endif
cmp $VEC_SIZE, %RDX_LP
jb L(less_vec)
/* Load regardless. */
new file mode 100644
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../memmove.S"