@@ -20,11 +20,7 @@
#define TEST_NAME "strcasestr"
#include "bench-string.h"
-
-#define STRCASESTR simple_strcasestr
-#define NO_ALIAS
-#define __strncasecmp strncasecmp
-#include "../string/strcasestr.c"
+#include <ctype.h>
static char *
@@ -53,7 +49,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)
@@ -86,10 +81,9 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
static const char d[] = "1234567890abcxyz";
#define dl (sizeof (d) - 1)
char *ss2 = s2;
- for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
+ for (size_t l = 0; l < len2; l++)
{
- size_t t = l > dl ? dl : l;
- ss2 = mempcpy (ss2, d, t);
+ ss2[l] = ((unsigned)rand())%16 + 1;
}
s2[len2] = '\0';
@@ -528,7 +528,7 @@ _nl_C_LC_CTYPE_width attribute_hidden =
};
/* Number of fields with fixed meanings, starting at 0. */
-#define NR_FIXED 72
+#define NR_FIXED 73
/* Number of class fields, starting at CLASS_OFFSET. */
#define NR_CLASSES 12
/* Number of map fields, starting at MAP_OFFSET. */
@@ -669,6 +669,8 @@ const struct __locale_data _nl_C_LC_CTYPE attribute_hidden =
{ .word = 0 },
/* _NL_CTYPE_NONASCII_CASE */
{ .word = 0 },
+ /* _NL_CTYPE_NONBIJECTIVE_CASE */
+ { .word = 0 },
/* NR_CLASSES wctype_tables */
{ .string = (const char *) _nl_C_LC_CTYPE_class_upper.header },
{ .string = (const char *) _nl_C_LC_CTYPE_class_lower.header },
@@ -135,6 +135,7 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string)
DEFINE_ELEMENT (_NL_CTYPE_MAP_TO_NONASCII, "map-to-nonascii", std, word)
DEFINE_ELEMENT (_NL_CTYPE_NONASCII_CASE, "nonascii-case", std, word)
+ DEFINE_ELEMENT (_NL_CTYPE_NONBIJECTIVE_CASE, "nonbijective-case", std, word)
), _nl_postload_ctype)
@@ -335,6 +335,7 @@ enum
_NL_CTYPE_TRANSLIT_IGNORE,
_NL_CTYPE_MAP_TO_NONASCII,
_NL_CTYPE_NONASCII_CASE,
+ _NL_CTYPE_NONBIJECTIVE_CASE,
_NL_CTYPE_EXTRA_MAP_1,
_NL_CTYPE_EXTRA_MAP_2,
_NL_CTYPE_EXTRA_MAP_3,
@@ -227,6 +227,8 @@ struct locale_ctype_t
uint32_t to_nonascii;
uint32_t nonascii_case;
+ uint32_t nonbijective_case;
+
/* The arrays for the binary representation. */
char_class_t *ctype_b;
@@ -691,6 +693,13 @@ character <SP> not defined in character map")));
|| ctype->map256_collection[1][cnt] != cnt)
ctype->nonascii_case = 1;
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ if (ctype->map256_collection[0][ctype->map256_collection[1][cnt]] != cnt &&
+ ctype->map256_collection[1][ctype->map256_collection[0][cnt]] != cnt)
+ ctype->nonbijective_case = 1;
+
+
/* Now that the tests are done make sure the name array contains all
characters which are handled in the WIDTH section of the
character set definition file. */
@@ -1063,6 +1072,9 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
+ CTYPE_UINT32 (_NL_CTYPE_NONBIJECTIVE_CASE, ctype->nonbijective_case);
+
+
case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
add_locale_uint32 (&file, ctype->mbdigits_act / 10);
break;
@@ -35,6 +35,45 @@
#undef memmem
+#include <string_vector.h>
+
+struct cmask
+{
+ unsigned long int n0, n1;
+ size_t needle_size;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define LOOKAHEAD 1
+#define EXPRESSION_NOCARRY(x, cmask) \
+ ( bytes_equal_nocarry (BYTE (0), cmask.n0) \
+ & bytes_equal_nocarry (BYTE (1), cmask.n1))
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY(x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ /* First two characters were already checked by vector loop. */
+ size_t i = 2;
+
+ while (i < cmask.needle_size && s[i] == n[i])
+ i++;
+
+ if (i == cmask.needle_size)
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#define CHECK_N
+#include <string_vector_search.h>
+
/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in
HAYSTACK. */
@@ -46,17 +85,43 @@ __memmem (const void *haystack_start, size_t haystack_len,
not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
const unsigned char *haystack = (const unsigned char *) haystack_start;
const unsigned char *needle = (const unsigned char *) needle_start;
+ char *ret;
+ unsigned char *n = (unsigned char *) needle;
if (needle_len == 0)
/* The first occurrence of the empty string is deemed to occur at
the beginning of the string. */
return (void *) haystack;
+ if (needle_len == 1)
+ return memchr (haystack, n[0], haystack_len);
+
/* Sanity check, otherwise the loop might search through the whole
memory. */
if (__glibc_unlikely (haystack_len < needle_len))
return NULL;
+ if (__BYTE_ORDER == __LITTLE_ENDIAN)
+ {
+ struct cmask cmask;
+ cmask.n0 = broadcast (n[0]);
+ cmask.n1 = broadcast (n[1]);
+ cmask.needle_size = needle_len;
+ size_t search_size = haystack_len - needle_len + 1;
+ if (vector_search ((char *) haystack, (char *) needle,
+ cmask, search_size, &ret))
+ return (void *) ret;
+ else
+ {
+ haystack_len -= (char *) ret - (char *) haystack;
+ haystack = (const unsigned char *) ret;
+
+ if (ret == NULL || haystack_len < needle_len)
+ return NULL;
+ }
+ }
+
+
/* Use optimizations in memchr when possible, to reduce the search
size of haystack using a linear algorithm with a smaller
coefficient. However, avoid memchr for long needles, since we
@@ -57,6 +57,60 @@
#define STRCASESTR __strcasestr
#endif
+/* A initial fast vectorized loop looking for first digraph. */
+
+#include <string_vector.h>
+
+struct cmask
+{
+ vector_int l0, u0, l1, u1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+ /* We use trick that for when character is in ascii and character
+ 0 <= c <= 127 could be caselessly equal only one of characters
+ tolower (c), toupper (c) or a character x in range 128 <= x <= 255
+ As these are exactly characters have highest bit set to 1 we adjust
+ a expression from strstr and filter lower bits by anding with high_bits.
+ */
+
+#define LOOKAHEAD 1
+#define BYTE_EXPRESSION(x, l, u) \
+ ( bytes_equal_nocarry (x, l) \
+ | bytes_equal_nocarry (x, u))
+
+#define EXPRESSION_NOCARRY(x, cmask) (( \
+ ( BYTE_EXPRESSION (BYTE(0), cmask.l0, cmask.u0) \
+ & BYTE_EXPRESSION (BYTE(1), cmask.l1, cmask.u1) \
+ ) | contains_zero_nocarry (BYTE (1))))
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY (x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ size_t i = 2;
+
+ if (!s[1])
+ return 1;
+
+ while (n[i] && tolower (s[i]) == tolower (n[i]))
+ i++;
+
+ if (!n[i])
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#include <string_vector_search.h>
+
+#include "../locale/localeinfo.h"
/* Find the first occurrence of NEEDLE in HAYSTACK, using
case-insensitive comparison. This function gives unspecified
@@ -66,9 +120,46 @@ STRCASESTR (const char *haystack_start, const char *needle_start)
{
const char *haystack = haystack_start;
const char *needle = needle_start;
+ char *ret;
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
+ unsigned char *n = (unsigned char *) needle;
+
+ __locale_t loc = _NL_CURRENT_LOCALE;
+ struct __locale_data *ctype = loc->__locales[LC_CTYPE];
+ int bijective = !ctype->values[_NL_ITEM_INDEX (_NL_CTYPE_NONBIJECTIVE_CASE)].word;
+
+ if (!n[0])
+ return (char *) haystack;
+ if (!haystack[0])
+ return NULL;
+
+ /* TODO jump to special case of strpbrk with size 2. */
+ if (!n[1] && bijective)
+ {
+ char ac[3];
+ ac[0]=tolower(n[0]);
+ ac[1]=toupper(n[0]);
+ ac[2]=0;
+ return strpbrk (haystack, ac);
+ }
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN && bijective)
+ {
+ struct cmask cmask;
+ cmask.l0 = broadcast (tolower(n[0]));
+ cmask.u0 = broadcast (toupper(n[0]));
+ cmask.l1 = broadcast (tolower(n[1]));
+ cmask.u1 = broadcast (toupper(n[1]));
+
+ if (vector_search ((char *) haystack, (char *) needle, cmask,\
+ 0, &ret))
+ return (ret[1] != '\0') ? ret : NULL;
+ else
+ haystack = ret;
+ }
+
/* Determine length of NEEDLE, and in the process, make sure
HAYSTACK is at least as long (no point processing all of a long
@@ -45,6 +45,48 @@
#define STRSTR strstr
#endif
+#include <string_vector.h>
+
+struct cmask
+{
+ vector_int n0, n1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define LOOKAHEAD 1
+#define EXPRESSION_NOCARRY(x, cmask) (\
+ ( bytes_equal_nocarry (BYTE (0), cmask.n0) \
+ & bytes_equal_nocarry (BYTE (1), cmask.n1)) \
+ | contains_zero_nocarry (BYTE (1)))
+
+/* We calculate mask, not look for first byte and carry could corrupt
+ following bytes. Only possible optimization would be use
+ contains_zero (x) as it must end there. */
+#define EXPRESSION(x, cmask) EXPRESSION_NOCARRY(x, cmask)
+
+static int
+check (char *s, char *n, unsigned long *rent, struct cmask cmask)
+{
+ /* First two characters were already checked by vector loop. */
+ size_t i = 2;
+ if (!s[1])
+ return 1;
+
+ while (n[i] && s[i] == n[i])
+ i++;
+
+ if (!n[i])
+ return 1;
+
+ rent += i + 10;
+ return 0;
+}
+
+#include <string_vector_search.h>
+
+
+
/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
HAYSTACK. */
@@ -53,9 +95,34 @@ STRSTR (const char *haystack_start, const char *needle_start)
{
const char *haystack = haystack_start;
const char *needle = needle_start;
+ char *ret;
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
+ unsigned char *n = (unsigned char *) needle;
+
+ if (!n[0])
+ return (char *) haystack;
+ if (!n[1])
+ return strchr (haystack, n[0]);
+
+ if (!haystack[0])
+ return NULL;
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN)
+ {
+ struct cmask cmask;
+ cmask.n0 = broadcast (n[0]);
+ cmask.n1 = broadcast (n[1]);
+
+ if (vector_search ((char *) haystack, (char *) needle, cmask,\
+ 0, &ret))
+ return ret[1] != '\0' ? ret : NULL;
+ else
+ haystack = ret;
+ }
+
+
/* Determine length of NEEDLE, and in the process, make sure
HAYSTACK is at least as long (no point processing all of a long
@@ -21,12 +21,7 @@
#define TEST_NAME "strcasestr"
#include "test-string.h"
-
-#define STRCASESTR simple_strcasestr
-#define NO_ALIAS
-#define __strncasecmp strncasecmp
-#include "strcasestr.c"
-
+#include <ctype.h>
static char *
stupid_strcasestr (const char *s1, const char *s2)
@@ -54,7 +49,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)
new file mode 100644
@@ -0,0 +1,89 @@
+/* Algorithm to select a faster string search implementation.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ For string search algorithms there are two ways how you could look
+ at performance.
+
+ From practical standpoint a naive algorithm is baseline. Most theoretical
+ algorithms (BW, KMP, ...) are slower because they need to do expensive
+ random access. This makes prohibitive constant cost per character while naive
+ algorithm is linear on random inputs as most of time you don't match string.
+ Also potential gains are nonexistent, user supplies needle so you could be
+ happy when its at least 8 characters long and most haystacks are less than 64
+ bytes large.
+
+ On other hand we need to ensure a linear worst-case of our algorithm.
+
+ We do that with bit of accounting. We count number of comparisons and when
+ it exceeds a size of haystack times some constant we switch to two-way
+ algorithm with guaranteed linear time.
+
+ Main performance boost is gained by vectorizing naive algorithm checks.
+ We will check in parallel if first digraph matches. That should be quite rare,
+ in english most frequent digraph is th with frequency around 1%.
+
+ Then after each digraph found we face decision if to keep looking for
+ digraphs or switch to two-way algorithm. These are covered as common
+ problem in online algorithm setting: buy-or-rent problem.
+ A precomputation in two-way algorithm with needle size n takes
+ around same time as 20 character comparisons so in worst case a two-way
+ algorithm would be twenty times slower than naive for haystacks of size n+1.
+ A solution would be pay higher rent rate until it accumulates to buy cost.
+ Then we would in worst case be twice slower than selecting optimal
+ implementation from start.
+
+ That would work except it needs strlen (needle) which is unnecessary
+ in practice. To show that haystack cannot match needle it suffices to know
+ that there is no leading triplet from needle in haystack. As unless there
+ was a planted needle in haystack and false positive is unlikely we likely
+ don't have to inspect more than three or four characters from needle. Also
+ correct accounting takes time so we approximate cost based on number of
+ comparisons and vector searchs.
+
+ */
+
+#include <string_vector_skeleton.h>
+
+static int
+vector_search (char *haystack, char *needle, CMASK_PARAM,
+ size_t check_n, char **ret)
+{
+ char *s = haystack;
+ char *end = s + check_n;
+ unsigned long rent = 0;
+ while (rent < 256 + (s - haystack))
+ {
+ s = string_skeleton (s, cmask, end - s);
+ if (s == NULL)
+ {
+ *ret = s;
+ return 1;
+ }
+ if (check (s, needle, &rent, cmask))
+ {
+ *ret = s;
+ return 1;
+ }
+ else
+ s++;
+ }
+ /* Superlinear behaviour detected, switch to two-way algorithm. */
+ *ret = s;
+ return 0;
+}