@@ -25,6 +25,10 @@
# define __memmem memmem
#endif
+#ifndef MEMMEM
+# define MEMMEM __memmem
+#endif
+
#define RETURN_TYPE void *
#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
@@ -50,7 +54,7 @@
The limit also implies worst-case performance is linear.
Needles larger than 256 characters use the linear-time Two-Way algorithm. */
void *
-__memmem (const void *haystack, size_t hs_len,
+MEMMEM (const void *haystack, size_t hs_len,
const void *needle, size_t ne_len)
{
const unsigned char *hs = (const unsigned char *) haystack;
@@ -77,7 +81,7 @@ __memmem (const void *haystack, size_t hs_len,
/* Use Two-Way algorithm for very long needles. */
if (__builtin_expect (ne_len > 256, 0))
- return two_way_long_needle (hs, hs_len, ne, ne_len);
+ return TWO_WAY_LONG_NEEDLE_FUNC_NAME (hs, hs_len, ne, ne_len);
uint8_t shift[256];
size_t tmp, shift1;
@@ -91,6 +91,15 @@
# define RET0_IF_0(a) /* nothing */
#endif
+#ifndef TWO_WAY_LONG_NEEDLE_FUNC_NAME
+# define TWO_WAY_LONG_NEEDLE_FUNC_NAME two_way_long_needle
+#endif
+#ifndef TWO_WAY_LONG_NEEDLE_NON_STATIC
+# define TWO_WAY_LONG_NEEDLE_STATIC static
+#else
+# define TWO_WAY_LONG_NEEDLE_STATIC
+#endif
+
/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
Return the index of the first byte in the right half, and set
*PERIOD to the global period of the right half.
@@ -386,8 +395,8 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
Since this function is large and complex, block inlining to avoid
slowing down the common case of small needles. */
-__attribute__((noinline)) static RETURN_TYPE
-two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
+__attribute__((noinline)) TWO_WAY_LONG_NEEDLE_STATIC RETURN_TYPE
+TWO_WAY_LONG_NEEDLE_FUNC_NAME (const unsigned char *haystack, size_t haystack_len,
const unsigned char *needle, size_t needle_len)
{
size_t i; /* Index into current byte of NEEDLE. */
@@ -15,6 +15,10 @@ sysdep_routines += \
memcmpeq-avx2-rtm \
memcmpeq-evex \
memcmpeq-sse2 \
+ memmem-avx-base \
+ memmem-avx2 \
+ memmem-avx512 \
+ memmem-sse2 \
memmove-avx-unaligned-erms \
memmove-avx-unaligned-erms-rtm \
memmove-avx512-no-vzeroupper \
@@ -122,6 +126,10 @@ sysdep_routines += \
varshift \
# sysdep_routines
+CFLAGS-memmem-avx2.c += -mavx2 -mbmi -O3
+CFLAGS-memmem-avx512.c += -mavx512f -mavx512bw -mbmi -O3
+CFLAGS-memmem-sse2.c += -O3
+
CFLAGS-strcspn-sse4.c += -msse4
CFLAGS-strpbrk-sse4.c += -msse4
CFLAGS-strspn-sse4.c += -msse4
@@ -798,6 +798,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strstr_avx512)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
+
+ /* Support sysdeps/x86_64/multiarch/memmem.c. */
+ IFUNC_IMPL (i, name, memmem,
+ IFUNC_IMPL_ADD (array, i, memmem,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI1)),
+ __memmem_avx2)
+ IFUNC_IMPL_ADD (array, i, memmem,
+ (CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI1)),
+ __memmem_avx512)
+ IFUNC_IMPL_ADD (array, i, memmem, 1, __memmem_generic)
+ IFUNC_IMPL_ADD (array, i, memmem, 1, __memmem_sse2))
/* Support sysdeps/x86_64/multiarch/wcschr.c. */
IFUNC_IMPL (i, name, wcschr,
new file mode 100644
@@ -0,0 +1,37 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+const unsigned char ___rarebyte_table[256] attribute_hidden
+ = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4,
+ 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53,
+ 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219,
+ 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223,
+ 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216,
+ 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204,
+ 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246,
+ 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243,
+ 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190,
+ 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87,
+ 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115,
+ 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137,
+ 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73,
+ 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149,
+ 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123,
+ 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67,
+ 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34,
+ 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48,
+ 85, 49, 50, 51 };
new file mode 100644
@@ -0,0 +1,255 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <string.h>
+#include <libc-pointer-arith.h>
+#include "str-two-way.h"
+
+#ifndef FUNC_NAME
+# define FUNC_NAME __memmem_avx2
+#endif
+#ifndef VEC
+# define VEC __m256i
+#endif
+#ifndef MASK
+# define MASK uint32_t
+#endif
+#ifndef LOAD
+# define LOAD(x) _mm256_load_si256 (x)
+#endif
+#ifndef LOADU
+# define LOADU(x) _mm256_loadu_si256 (x)
+#endif
+#ifndef CMPEQ8_MASK
+# define CMPEQ8_MASK(x, y) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (x, y))
+#endif
+#ifndef SETONE8
+# define SETONE8(x) _mm256_set1_epi8 (x)
+#endif
+#ifndef TZCNT
+# define TZCNT(x) __builtin_ctz (x)
+#endif
+#ifndef BLSR
+# define BLSR(x) ((x) & ((x) -1))
+#endif
+#ifndef MEMMEM_GENERIC
+# define MEMMEM_GENERIC __memmem_generic
+#endif
+#ifndef TWO_WAY_LONG_NEEDLE_THRESHOLD
+# define TWO_WAY_LONG_NEEDLE_THRESHOLD VEC_SIZE
+#endif
+#ifndef VEC_SIZE
+# define VEC_SIZE 32
+#endif
+#define ONES ((MASK) -1)
+
+#ifndef MEMCMPEQ
+# define MEMCMPEQ __memcmpeq
+#endif
+#ifndef MEMCPY
+# define MEMCPY memcpy
+#endif
+#ifndef MEMCHR
+# define MEMCHR memchr
+#endif
+#ifndef PAGE_SIZE
+# define PAGE_SIZE 4096
+#endif
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#define NOT_CROSSING_PAGE(p, obj_size) \
+ (PTR_DIFF (PTR_ALIGN_UP (p, PAGE_SIZE), p) >= obj_size)
+#if TWO_WAY_LONG_NEEDLE_THRESHOLD > VEC_SIZE
+# define LONG_NEEDLE 1
+# define MIN_VEC(ne_len) MIN (ne_len, VEC_SIZE)
+#else
+# define LONG_NEEDLE 0
+# define MIN_VEC(ne_len) (ne_len)
+#endif
+
+_Static_assert (VEC_SIZE == sizeof (VEC), "VEC_SIZE != sizeof (VEC).");
+_Static_assert (
+ TWO_WAY_LONG_NEEDLE_THRESHOLD <= VEC_SIZE * 2,
+ "FIND_MATCH() assumes TWO_WAY_LONG_NEEDLE_THRESHOLD <= VEC_SIZE * 2.");
+
+#if LONG_NEEDLE
+# define FIND_MATCH() \
+ if (NOT_CROSSING_PAGE (hp, VEC_SIZE * 2)) \
+ { \
+ /* Do a vector compare if we are not crossing a page. */ \
+ hv = LOADU ((const VEC *) hp); \
+ cmpm = (MASK) CMPEQ8_MASK (hv, nv) << matchsh; \
+ /* Compare only the relevant bits of the needle vector. */ \
+ if (cmpm == matchm) \
+ { \
+ if (ne_len <= VEC_SIZE) \
+ return (void *) hp; \
+ /* Compare the rest of the needle. */ \
+ hv = LOADU ((const VEC *) hp + 1); \
+ cmpm = (MASK) CMPEQ8_MASK (hv, nv_e) << matchsh_e; \
+ if (cmpm == matchm_e) \
+ return (void *) hp; \
+ } \
+ } \
+ else \
+ { \
+ if (!MEMCMPEQ (hp, ne, ne_len)) \
+ return (void *) hp; \
+ }
+#else
+# define FIND_MATCH() \
+ if (NOT_CROSSING_PAGE (hp, VEC_SIZE)) \
+ { \
+ hv = LOADU ((const VEC *) hp); \
+ cmpm = (MASK) CMPEQ8_MASK (hv, nv) << matchsh; \
+ if (cmpm == matchm) \
+ return (void *) hp; \
+ } \
+ else \
+ { \
+ if (!MEMCMPEQ (hp, ne, ne_len)) \
+ return (void *) hp; \
+ }
+#endif
+
+extern void *MEMMEM_GENERIC (const void *, size_t, const void *,
+ size_t) attribute_hidden;
+
+/* Lower is rarer. The table is based on the *.c and *.h files in glibc. */
+extern const unsigned char ___rarebyte_table[256] attribute_hidden;
+
+static inline void *__attribute__ ((always_inline))
+find_rarest_byte (const unsigned char *rare, size_t n)
+{
+ const unsigned char *p = (const unsigned char *) rare;
+ int c_rare = ___rarebyte_table[*rare];
+ int c;
+ for (; n--; ++p)
+ {
+ c = ___rarebyte_table[*p];
+ if (c < c_rare)
+ {
+ rare = p;
+ c_rare = c;
+ }
+ }
+ return (void *) rare;
+}
+
+void *
+FUNC_NAME (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
+{
+ if (ne_len == 1)
+ return (void *) MEMCHR (hs, *(unsigned char *) ne, hs_len);
+ if (__glibc_unlikely (ne_len == 0))
+ return (void *) hs;
+ if (__glibc_unlikely (hs_len < ne_len))
+ return NULL;
+ /* Linear-time worst-case performance is guaranteed by the generic
+ * implementation using the Two-Way algorithm. */
+ if (__glibc_unlikely (ne_len > TWO_WAY_LONG_NEEDLE_THRESHOLD))
+ return MEMMEM_GENERIC (hs, hs_len, ne, ne_len);
+ VEC hv0, hv1, hv, nv;
+ MASK i, hm0, hm1, m, cmpm;
+ const unsigned int matchsh = ne_len < VEC_SIZE ? VEC_SIZE - ne_len : 0;
+ const MASK matchm = ONES << matchsh;
+#if LONG_NEEDLE
+ VEC nv_e;
+ const unsigned int matchsh_e
+ = ne_len < VEC_SIZE * 2 ? VEC_SIZE * 2 - ne_len : 0;
+ const MASK matchm_e = ONES << matchsh_e;
+#endif
+ const unsigned char *h = (const unsigned char *) hs;
+ const unsigned char *const end = h + hs_len - ne_len;
+ const unsigned char *hp;
+ size_t rare = PTR_DIFF (
+ find_rarest_byte ((const unsigned char *) ne, MIN_VEC (ne_len)), ne);
+ /* RARE will always be the first byte to find.
+ If RARE is at the end of the needle, use the byte before it. */
+ if (rare == MIN_VEC (ne_len) - 1)
+ --rare;
+ const VEC nv0 = SETONE8 (*((char *) ne + rare));
+ const VEC nv1 = SETONE8 (*((char *) ne + rare + 1));
+ unsigned int off_e = (PTR_DIFF (end, h) < VEC_SIZE)
+ ? VEC_SIZE - (unsigned int) (end - h) - 1
+ : 0;
+ /* Start from the position of RARE. */
+ h += rare;
+ /* Load the needle vector. */
+ if (NOT_CROSSING_PAGE (ne, VEC_SIZE)
+ || (LONG_NEEDLE ? ne_len >= VEC_SIZE : 0))
+ nv = LOADU ((const VEC *) ne);
+ else
+ MEMCPY (&nv, ne, MIN_VEC (ne_len));
+#if LONG_NEEDLE
+ if (ne_len >= VEC_SIZE)
+ {
+ if (NOT_CROSSING_PAGE (ne, VEC_SIZE * 2))
+ nv_e = LOADU ((const VEC *) ne + 1);
+ else
+ MEMCPY (&nv_e, (const unsigned char *) ne + VEC_SIZE,
+ MIN (VEC_SIZE, ne_len - VEC_SIZE));
+ }
+#endif
+ const unsigned int off_s = PTR_DIFF (h, PTR_ALIGN_DOWN (h, VEC_SIZE));
+ /* Align down to VEC_SIZE. */
+ h -= off_s;
+ hv0 = LOAD ((const VEC *) h);
+ hm0 = (MASK) CMPEQ8_MASK (hv0, nv0);
+ hm1 = (MASK) CMPEQ8_MASK (hv0, nv1) >> 1;
+ /* Clear the irrelevant bits from aligning down (OFF_S) and ones that are out
+ * of bounds (OFF_E). */
+ m = ((hm0 & hm1) >> off_s) & (ONES >> off_e);
+ while (m)
+ {
+ i = TZCNT (m);
+ m = BLSR (m);
+ hp = h + off_s + i - rare;
+ FIND_MATCH ();
+ }
+ h += VEC_SIZE - 1;
+ for (; h - rare + VEC_SIZE <= end; h += VEC_SIZE)
+ {
+ hv0 = LOADU ((const VEC *) h);
+ hv1 = LOAD ((const VEC *) (h + 1));
+ hm1 = (MASK) CMPEQ8_MASK (hv1, nv1);
+ hm0 = (MASK) CMPEQ8_MASK (hv0, nv0);
+ m = hm0 & hm1;
+ while (m)
+ {
+ match:
+ i = TZCNT (m);
+ m = BLSR (m);
+ hp = h + i - rare;
+ FIND_MATCH ();
+ }
+ }
+ if (h - rare <= end)
+ {
+ off_e = VEC_SIZE - (unsigned int) (end - (h - rare)) - 1;
+ hv0 = LOADU ((const VEC *) h);
+ hv1 = LOAD ((const VEC *) (h + 1));
+ hm1 = (MASK) CMPEQ8_MASK (hv1, nv1);
+ hm0 = (MASK) CMPEQ8_MASK (hv0, nv0);
+ /* Clear the irrelevant bits that are out of bounds. */
+ m = hm0 & hm1 & (ONES >> off_e);
+ if (m)
+ goto match;
+ }
+ return NULL;
+}
new file mode 100644
@@ -0,0 +1,6 @@
+#include "str-two-way.h"
+#define MEMMEM_GENERIC TWO_WAY_LONG_NEEDLE_FUNC_NAME
+#define TWO_WAY_LONG_NEEDLE_THRESHOLD ((VEC_SIZE) *2)
+#define VEC_SIZE 32
+#define FUNC_NAME __memmem_avx2
+#include "memmem-avx-base.h"
new file mode 100644
@@ -0,0 +1,13 @@
+#include "str-two-way.h"
+#define MEMMEM_GENERIC TWO_WAY_LONG_NEEDLE_FUNC_NAME
+#define TWO_WAY_LONG_NEEDLE_THRESHOLD ((VEC_SIZE) *2)
+#define VEC_SIZE 64
+#define VEC __m512i
+#define MASK uint64_t
+#define LOAD(x) _mm512_load_si512 (x)
+#define LOADU(x) _mm512_loadu_si512 (x)
+#define CMPEQ8_MASK(x, y) _mm512_cmpeq_epi8_mask (x, y)
+#define SETONE8(x) _mm512_set1_epi8 (x)
+#define TZCNT(x) _tzcnt_u64 (x)
+#define FUNC_NAME __memmem_avx512
+#include "memmem-avx-base.h"
new file mode 100644
@@ -0,0 +1,16 @@
+#include <x86intrin.h>
+
+#define VEC __m128i
+#define VEC_SIZE 16
+#define MASK uint16_t
+#define LOAD(x) _mm_load_si128 (x)
+#define LOADU(x) _mm_loadu_si128 (x)
+#define CMPEQ8_MASK(x, y) _mm_movemask_epi8 (_mm_cmpeq_epi8 (x, y))
+#define SETONE8(x) _mm_set1_epi8 (x)
+#define TZCNT(x) \
+ ((x) ? _bit_scan_forward (x) : (MASK) sizeof (MASK) * CHAR_BIT)
+
+#define FUNC_NAME __memmem_sse2
+#define TWO_WAY_LONG_NEEDLE_THRESHOLD VEC_SIZE
+
+#include "memmem-avx-base.h"
new file mode 100644
@@ -0,0 +1,73 @@
+/* Multiple versions of memmem.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Redefine memmem so that the compiler won't complain about the type
+ mismatch with the IFUNC selector in strong_alias, below. */
+#undef memmem
+#define memmem __redirect_memmem
+#include <string.h>
+#undef memmem
+
+#define MEMMEM __memmem_generic
+#ifdef SHARED
+# undef libc_hidden_weak
+# define libc_hidden_weak(name) \
+ __hidden_ver1 (__memmem_generic, __GI_memmem, __memmem_generic);
+#endif
+
+#include "str-two-way.h"
+#define TWO_WAY_LONG_NEEDLE_NON_STATIC
+#include "string/memmem.c"
+
+extern __typeof (__redirect_memmem) __memmem_avx2 attribute_hidden;
+extern __typeof (__redirect_memmem) __memmem_avx512 attribute_hidden;
+extern __typeof (__redirect_memmem) __memmem_generic attribute_hidden;
+extern __typeof (__redirect_memmem) __memmem_sse2 attribute_hidden;
+
+#define SYMBOL_NAME memmem
+
+#include "init-arch.h"
+
+/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
+ ifunc symbol properly. */
+extern __typeof (__redirect_memmem) __libc_memmem;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI1))
+ return __memmem_avx512;
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI1))
+ return __memmem_avx2;
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return __memmem_sse2;
+
+ return __memmem_generic;
+}
+
+libc_ifunc_redirected (__redirect_memmem, __libc_memmem, IFUNC_SELECTOR ());
+#undef memmem
+strong_alias (__libc_memmem, __memmem)