@@ -57,23 +57,62 @@ STRSTR (const char *haystack_start, const char *needle_start)
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
- /* Determine length of NEEDLE, and in the process, make sure
- HAYSTACK is at least as long (no point processing all of a long
- NEEDLE if HAYSTACK is too short). */
- while (*haystack && *needle)
- ok &= *haystack++ == *needle++;
- if (*needle)
- return NULL;
- if (ok)
- return (char *) haystack_start;
-
- /* Reduce the size of haystack using strchr, since it has a smaller
- linear coefficient than the Two-Way algorithm. */
- needle_len = needle - needle_start;
- haystack = strchr (haystack_start + 1, *needle_start);
- if (!haystack || __builtin_expect (needle_len == 1, 0))
+ if (!needle[0])
return (char *) haystack;
- needle -= needle_len;
+
+ if (!needle[1])
+ return strchr (haystack, needle[0]);
+
+ /*
+ This optimizes strstr hot path.
+
+ Main performance problem is that haystacks and needle are short.
+ You spend 99% of time on sub64byte haystacks.
+ That makes preprocessing very expensive.
+ Without this around 90% of time would be spend at calculating critical
+ factorization that would never be used.
+
+ This loop should be fast due to optimized strchr implementation.
+ It would likely traverse haystack as character triplet occurences are
+ rare for practical strings.
+ */
+
+ while ((haystack = strchr (haystack, needle[0])))
+ {
+ if (haystack[1] == needle[1] && (haystack[2] == needle[2] || needle[2] == '\000'))
+ {
+
+ if (needle[2] == '\000')
+ return (char *) haystack;
+
+ /* Determine length of NEEDLE, and in the process, make sure
+ HAYSTACK is at least as long (no point processing all of a long
+ NEEDLE if HAYSTACK is too short). */
+
+ needle += 2;
+ haystack += 2;
+
+ while (*haystack && *needle)
+ ok &= *haystack++ == *needle++;
+ if (*needle)
+ return NULL;
+
+ needle_len = needle - needle_start;
+
+ if (ok)
+ return (char *) haystack - needle_len;
+
+ goto cold_path;
+ }
+ else
+ haystack++;
+ }
+
+ return NULL;
+
+ cold_path:;
+
+ needle = needle_start;
haystack_len = (haystack > haystack_start + needle_len ? 1
: needle_len + haystack_start - haystack);