new file mode 100644
@@ -0,0 +1,35 @@
+#include <stdint.h>
+
+static const unsigned long int ones = (~0UL / 255); /* 0x0101...*/
+static const unsigned long int add = 127 * (~0UL / 255);
+static const unsigned long int high_bits = 128 * (~0UL / 255);
+
+/* Use vector arithmetic tricks. Idea is to take expression works on
+ unsigned byte and evaluates 0 for nozero byte and nonzero on zero byte.
+ Our expression is ((s + 127) & (~s)) & 128
+ Now we evaluate this expression on each byte in parallel and on first
+ nonzero byte our expression will have nonzero value. */
+
+static __always_inline
+unsigned long int
+contains_zero (unsigned long int s)
+{
+ return (((s & add) + add) ^ high_bits) & high_bits & ~s;
+}
+
+#define LSIZE sizeof (unsigned long int)
+#define CROSS_PAGE(x, n) (((uintptr_t)x) % 4096 > 4096 - n)
+
+static __always_inline
+size_t
+first_nonzero_byte (unsigned long int u)
+{
+#ifdef FAST_FFS
+ return ffsl (u) / 8 - 1;
+#else
+ u = u ^ (u - 1);
+ u = u & ones;
+ u = u * ones;
+ return (u >> (8 * LSIZE - 8)) - 1;
+#endif
+}
new file mode 100644
@@ -0,0 +1,145 @@
+/* Skeleton of generic string functions.
+ Copyright (C) 1991-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include <libc-internal.h>
+#include <stdint.h>
+
+#ifndef BOUND
+# define BOUND(x) 0
+#endif
+
+
+static __always_inline
+int
+found_in_long_bytes(char *s, unsigned long int cmask, char **result)
+{
+ const unsigned long int *lptr = (const unsigned long int *) s;
+ unsigned long int mask = EXPRESSION(*lptr, cmask);
+ if (mask)
+ {
+ *result = s + first_nonzero_byte (mask);
+ return 1;
+ }
+ else
+ return 0;
+}
+
+static __always_inline
+char *
+string_skeleton (const char *s_in, int c_in, char *end)
+{
+ unsigned long int mask;
+ const unsigned long int *lptr;
+ char *s = (char *) s_in;
+ unsigned char c = (unsigned char) c_in;
+ char *r;
+ unsigned long int cmask = c * ones;
+
+#if _STRING_ARCH_unaligned
+ /* We fetch 32 bytes while not crossing page boundary.
+ Most strings in practice are of that size and we avoid a loop.
+ This looks as best in practice, alternative below uses aligned load
+ but is slower when string starts just few
+ bytes before 32 byte boundary. A tradeoff is that we rarely could
+ fetch extra cache line without needing it but this optimization
+ does pay for that. */
+ if (!CROSS_PAGE(s, 32))
+ {
+ if (found_in_long_bytes (s + 0 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 1 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 2 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 3 * LSIZE, cmask, &r))
+ return r;
+ if (sizeof (unsigned long int) == 4)
+ {
+ if (found_in_long_bytes (s + 5 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 6 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 7 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s + 8 * LSIZE, cmask, &r))
+ return r;
+ }
+
+ if (BOUND (s + 32))
+ return NULL;
+ }
+ else
+ {
+#endif
+ /* We need use aligned loads. For first load we read some bytes before
+ start that we discard by shifting them down. */
+
+ char *s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
+ lptr = (const unsigned long int *) s_aligned;
+ mask = (EXPRESSION (*lptr, cmask)) >> (8 * (s - s_aligned));
+
+ if (mask)
+ return s + first_nonzero_byte (mask);
+
+ if (BOUND (s_aligned + 1 * LSIZE))
+ return NULL;
+ if (found_in_long_bytes (s_aligned + 1 * LSIZE, cmask, &r))
+ return r;
+ if (BOUND (s_aligned + 2 * LSIZE))
+ return NULL;
+ if (found_in_long_bytes (s_aligned + 2 * LSIZE, cmask, &r))
+ return r;
+ if (BOUND (s_aligned + 3 * LSIZE))
+ return NULL;
+ if (found_in_long_bytes (s_aligned + 3 * LSIZE, cmask, &r))
+ return r;
+ if (BOUND (s_aligned + 4 * LSIZE))
+ return NULL;
+#if _STRING_ARCH_unaligned
+ }
+#endif
+ /* Now we read enough bytes to start a loop. */
+
+ char *s_loop = PTR_ALIGN_DOWN (s, 4 * LSIZE);
+ while (!BOUND (s_loop + 4 * LSIZE))
+ {
+ s_loop += 4 * LSIZE;
+ lptr = (const unsigned long int *) (s_loop + 0 * LSIZE);
+ mask = EXPRESSION (*lptr, cmask);
+ lptr = (const unsigned long int *) (s_loop + 1 * LSIZE);
+ mask |= EXPRESSION (*lptr, cmask);
+ lptr = (const unsigned long int *) (s_loop + 2 * LSIZE);
+ mask |= EXPRESSION (*lptr, cmask);
+ lptr = (const unsigned long int *) (s_loop + 3 * LSIZE);
+ mask |= EXPRESSION (*lptr, cmask);
+
+ if (mask)
+ {
+ if (found_in_long_bytes (s_loop + 0 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s_loop + 1 * LSIZE, cmask, &r))
+ return r;
+ if (found_in_long_bytes (s_loop + 2 * LSIZE, cmask, &r))
+ return r;
+
+ return s_loop + 3 * LSIZE + first_nonzero_byte (mask);
+ }
+ }
+ return NULL;
+}