@@ -21,10 +21,6 @@
#include "bench-string.h"
-#define STRCASESTR simple_strcasestr
-#define NO_ALIAS
-#define __strncasecmp strncasecmp
-#include "../string/strcasestr.c"
static char *
@@ -53,7 +49,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)
@@ -35,6 +35,45 @@
#undef memmem
+#include <string_vector.h>
+
+struct cmask
+{
+ unsigned long int n0, n1;
+ size_t needle_size;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define LOOKAHEAD 1
+#define EXPRESSION_NOCARRY(x, cmask) \
+ ( bytes_equal_nocarry (BYTE (0), cmask.n0) \
+ & bytes_equal_nocarry (BYTE (1), cmask.n1))
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY(x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ /* First two characters were already checked by vector loop. */
+ size_t i = 2;
+
+ while (i < cmask.needle_size && s[i] == n[i])
+ i++;
+
+ if (i == cmask.needle_size)
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#define BOUND(p) ((uintptr_t) p >= (uintptr_t) end)
+#include <string_vector_search.h>
+
/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in
HAYSTACK. */
@@ -46,6 +85,7 @@ __memmem (const void *haystack_start, size_t haystack_len,
not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
const unsigned char *haystack = (const unsigned char *) haystack_start;
const unsigned char *needle = (const unsigned char *) needle_start;
+ char *ret;
if (needle_len == 0)
/* The first occurrence of the empty string is deemed to occur at
@@ -57,6 +97,31 @@ __memmem (const void *haystack_start, size_t haystack_len,
if (__glibc_unlikely (haystack_len < needle_len))
return NULL;
+ unsigned char *n = (unsigned char *) needle;
+ if (__BYTE_ORDER == __LITTLE_ENDIAN && needle_len >= 2)
+ {
+ struct cmask cmask;
+ cmask.n0 = n[0] * ones;
+ cmask.n1 = n[1] * ones;
+ cmask.needle_size = needle_len;
+ char *end_search = (char *) (((uintptr_t) haystack) + haystack_len
+ - needle_len + 1);
+ if ((uintptr_t) end_search < (uintptr_t) haystack)
+ end_search = (char *) UINTPTR_MAX;
+ if (vector_search ((char *) haystack, (char *) needle,
+ cmask, end_search, &ret))
+ return ((uintptr_t) ret < (uintptr_t) end_search) ? ret : NULL;
+ else
+ {
+ haystack_len -= (char *) ret - (char *) haystack;
+ haystack = (const unsigned char *) ret;
+
+ if (ret == NULL || haystack_len < needle_len)
+ return NULL;
+ }
+ }
+
+
/* Use optimizations in memchr when possible, to reduce the search
size of haystack using a linear algorithm with a smaller
coefficient. However, avoid memchr for long needles, since we
@@ -57,6 +57,61 @@
#define STRCASESTR __strcasestr
#endif
+/* A initial fast vectorized loop looking for first digraph. */
+
+#include <string_vector.h>
+
+struct cmask
+{
+ vector_int l0, u0, l1, u1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+ /* We use trick that for when character is in ascii and character
+ 0 <= c <= 127 could be caselessly equal only one of characters
+ tolower (c), toupper (c) or a character x in range 128 <= x <= 255
+ As these are exactly characters have highest bit set to 1 we adjust
+ a expression from strstr and filter lower bits by anding with high_bits.
+ */
+
+#define LOOKAHEAD 1
+#define BYTE_EXPRESSION(x, l, u) \
+ ( bytes_equal_nocarry (x, l) \
+ | bytes_equal_nocarry (x, u) \
+ | x)
+
+#define EXPRESSION_NOCARRY(x, cmask) (( \
+ ( BYTE_EXPRESSION (BYTE(0), cmask.l0, cmask.u0) \
+ & BYTE_EXPRESSION (BYTE(1), cmask.l1, cmask.u1) \
+ ) | contains_zero_nocarry (BYTE (1))) & high_bits)
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY (x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ /* We used heuristic. Need to recheck. */
+ size_t i = 0;
+ if (!s[1])
+ return 1;
+
+ while (n[i] && tolower (s[i]) == tolower (n[i]))
+ i++;
+
+ if (!n[i])
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#include <string_vector_search.h>
+
+#include "../locale/localeinfo.h"
/* Find the first occurrence of NEEDLE in HAYSTACK, using
case-insensitive comparison. This function gives unspecified
@@ -66,10 +121,33 @@ STRCASESTR (const char *haystack_start, const char *needle_start)
{
const char *haystack = haystack_start;
const char *needle = needle_start;
+ char *ret;
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
+ __locale_t loc = _NL_CURRENT_LOCALE;
+ struct __locale_data *ctype = loc->__locales[LC_CTYPE];
+ int nonascii = ctype->values[_NL_ITEM_INDEX (_NL_CTYPE_NONASCII_CASE)].word;
+
+ unsigned char *n = (unsigned char *) needle;
+ if (__BYTE_ORDER == __LITTLE_ENDIAN && !nonascii && haystack[0] != '\0' &&
+ n[0] != '\0' && n[0] < 128 && n[1] != '\0' && n[1] < 128)
+ {
+ struct cmask cmask;
+ cmask.l0 = tolower (n[0]) * ones;
+ cmask.u0 = toupper (n[0]) * ones;
+ cmask.l1 = tolower (n[1]) * ones;
+ cmask.u1 = toupper (n[1]) * ones;
+
+ if (vector_search ((char *) haystack, (char *) needle, cmask,\
+ NULL, &ret))
+ return (ret[1] != '\0') ? ret : NULL;
+ else
+ haystack = ret;
+ }
+
+
/* Determine length of NEEDLE, and in the process, make sure
HAYSTACK is at least as long (no point processing all of a long
NEEDLE if HAYSTACK is too short). */
@@ -45,6 +45,48 @@
#define STRSTR strstr
#endif
+#include <string_vector.h>
+
+struct cmask
+{
+ vector_int n0, n1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define LOOKAHEAD 1
+#define EXPRESSION_NOCARRY(x, cmask) (\
+ ( bytes_equal_nocarry (BYTE (0), cmask.n0) \
+ & bytes_equal_nocarry (BYTE (1), cmask.n1)) \
+ | contains_zero_nocarry (BYTE (1)))
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY(x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ /* First two characters were already checked by vector loop. */
+ size_t i = 2;
+ if (!s[1])
+ return 1;
+
+ while (n[i] && s[i] == n[i])
+ i++;
+
+ if (!n[i])
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#include <string_vector_search.h>
+
+
+
/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
HAYSTACK. */
@@ -53,10 +95,28 @@ STRSTR (const char *haystack_start, const char *needle_start)
{
const char *haystack = haystack_start;
const char *needle = needle_start;
+ char *ret;
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
+ unsigned char *n = (unsigned char *) needle;
+ if (__BYTE_ORDER == __LITTLE_ENDIAN
+ && haystack[0] != '\0' && n[0] != '\0' && n[1] != '\0')
+ {
+ struct cmask cmask;
+ cmask.n0 = n[0] * ones;
+ cmask.n1 = n[1] * ones;
+
+ if (vector_search ((char *) haystack, (char *) needle, cmask,\
+ NULL, &ret))
+ return ret[1] != '\0' ? ret : NULL;
+ else
+ haystack = ret;
+ }
+
+
+
/* Determine length of NEEDLE, and in the process, make sure
HAYSTACK is at least as long (no point processing all of a long
NEEDLE if HAYSTACK is too short). */
@@ -25,7 +25,6 @@
#define STRCASESTR simple_strcasestr
#define NO_ALIAS
#define __strncasecmp strncasecmp
-#include "strcasestr.c"
static char *
@@ -54,7 +53,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)
new file mode 100644
@@ -0,0 +1,85 @@
+/* Algorithm to select a faster string search implementation.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ For string search algorithms there are two ways how you could look
+ at performance.
+
+ From practical standpoint a naive algorithm is baseline. Most theoretical
+ algorithms (BW, KMP, ...) are slower because they need to do expensive random access. This makes prohibitive constant cost per character while naive
+ algorithm is linear on random inputs as most of time you don't match string.
+ Also potential gains are nonexistent, user supplies needle so you could be
+ happy when its at least 8 characters long and most haystacks are less than 64 bytes large.
+
+ On other hand we need to ensure a linear worst-case of our algorithm.
+
+ We do that with bit of accounting. We count number of comparisons and when
+ it exceeds a size of haystack times some constant we switch to two-way
+ algorithm with guaranteed linear time.
+
+ Main performance boost is gained by vectorizing naive algorithm checks.
+ We will check in parallel if first digraph matches. That should be quite rare, in english most frequent digraph is th with frequency around 1%.
+
+ Then after each digraph found we face decision if to keep looking for
+ digraphs or switch to two-way algorithm. These are covered as common
+ problem in online algorithm setting: buy-or-rent problem.
+ A precomputation in two-way algorithm with needle size n takes
+ around same time as 20 character comparisons so in worst case a two-way
+ algorithm would be twenty times slower than naive for haystacks of size n+1.
+ A solution would be pay higher rent rate until it accumulates to buy cost.
+ Then we would in worst case be twice slower than selecting optimal
+ implementation from start.
+
+ That would work except it needs strlen (needle) which is unnecessary
+ in practice. To show that haystack cannot match needle it suffices to know
+ that there is no leading triplet from needle in haystack. As unless there
+ was a planted needle in haystack and false positive is unlikely we likely
+ don't have to inspect more than three or four characters from needle. Also
+ correct accounting takes time so we approximate cost based on number of
+ comparisons and vector searchs.
+
+ */
+
+#include <string_vector_skeleton.h>
+
+static int
+vector_search (char *haystack, char *needle, CMASK_PARAM,
+ char *end, char **ret)
+{
+ char *s = haystack;
+ unsigned long rent = 0;
+ while (rent < 256 + (s - haystack))
+ {
+ s = string_skeleton (s, cmask, end);
+ if (s == NULL || BOUND (s))
+ {
+ *ret = s;
+ return 0;
+ }
+ if (check (s, needle, &rent, cmask))
+ {
+ *ret = s;
+ return 1;
+ }
+ else
+ s++;
+ }
+ /* Superlinear behaviour detected, switch to two-way algorithm. */
+ *ret = s;
+ return 0;
+}