@@ -47,12 +47,24 @@ contains_zero (unsigned long int s)
return ((s + add) ^ ~s) & high_bits;
}
+
+/* Here idea is still use the result of expression
+ contains_zero (*p) | contains_zero (*p ^ cmask)
+ but we can optimize it. By moving or to compute
+ ((s + add) | ((s ^ cmask) + add) a highest bit is set only for characters
+ 0, c, 128, c + 128
+ As we ensured that c has highest byte zero a ^ ~*p eliminates both 128 and
+ 128 + c instead doing xor twice. */
+
+
static always_inline
int
found_in_long_bytes(char *s, unsigned long int cmask, char **result)
{
const unsigned long int *lptr = (const unsigned long int *) s;
- unsigned long int mask = contains_zero (*lptr) | contains_zero (*lptr ^ cmask);
+ unsigned long int mask = (((*lptr + add) | ((*lptr ^ cmask) + add))
+ ^ ~*lptr) & high_bits;
+
if (mask)
{
*result = s + ffsl (mask) / 8 - 1;
@@ -76,9 +88,30 @@ STRCHRNUL (const char *s_in, int c_in)
const unsigned long int *lptr;
char *s = (char *) s_in;
unsigned char c = (unsigned char) c_in;
- char *r;
+ char *r, *s_aligned;
unsigned long int cmask = c * ones;
+ if (__libc_unlikely (c > 127))
+ {
+ s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
+ lptr = (const unsigned long int *) s_aligned;
+ mask = (contains_zero (*lptr)
+ | contains_zero (*lptr ^ cmask))
+ >> (8 * (s_aligned - s));
+
+ if (mask)
+ return s + ffsl (mask) / 8 - 1;
+ while (1)
+ {
+ s_aligned += LSIZE;
+ lptr = (const unsigned long int *) s_aligned;
+ mask = (contains_zero (*lptr)
+ | contains_zero (*lptr ^ cmask));
+ if (mask)
+ return s_aligned + ffsl (mask) / 8 - 1;
+ }
+ }
+
#if _STRING_ARCH_unaligned
/* We fetch 32 bytes while not crossing page boundary.
Most strings in practice are of that size and we avoid a loop.
@@ -115,7 +148,7 @@ STRCHRNUL (const char *s_in, int c_in)
/* We need use aligned loads. For first load we read some bytes before
start that we discard by shifting them down. */
- char *s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
+ s_aligned = PTR_ALIGN_DOWN (s, LSIZE);
lptr = (const unsigned long int *) s_aligned;
mask = (contains_zero (*lptr)
| contains_zero (*lptr ^ cmask))