diff mbox

[2/4] Improve generic strspn performance

Message ID 1459178389-14133-3-git-send-email-adhemerval.zanella@linaro.org
State New
Headers show

Commit Message

Adhemerval Zanella Netto March 28, 2016, 3:19 p.m. UTC
As for strcspn, this patch improves strspn performance using a much
faster algorithm.  It first constructs a 256-entry table based on
the accept string and then uses it as a lookup table for the
input string.  As for strcspn optimization, it is generally at least
10 times faster than the existing implementation on bench-strspn
on a few AArch64 implementations.

Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.

Tested on x86_64, i686, and aarch64.

	* string/strspn.c (strspn): Rewrite function.
	* string/bits/string2.h (strspn): Use __builtin_strcspn.
---
 ChangeLog             |  5 +++++
 string/bits/string2.h | 41 ++++++-------------------------------
 string/strspn.c       | 56 +++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 52 insertions(+), 50 deletions(-)

Comments

Tulio Magno Quites Machado Filho March 29, 2016, 8:32 p.m. UTC | #1
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> As for strcspn, this patch improves strspn performance using a much
> faster algorithm.  It first constructs a 256-entry table based on
> the accept string and then uses it as a lookup table for the
> input string.  As for strcspn optimization, it is generally at least
> 10 times faster than the existing implementation on bench-strspn
> on a few AArch64 implementations.
>
> Also the string/bits/string2.h inlines make no longer sense, as current
> implementation will already implement most of the optimizations.
>
> Tested on x86_64, i686, and aarch64.

And on powerpc64 and powerpc64le.

Git reported some trailing whitespaces in this patch.

> diff --git a/string/strspn.c b/string/strspn.c
> index f0635c1..0547f41 100644
> --- a/string/strspn.c
> +++ b/string/strspn.c
> @@ -25,23 +25,49 @@
>  /* Return the length of the maximum initial segment
>     of S which contains only characters in ACCEPT.  */
>  size_t
> -STRSPN (const char *s, const char *accept)
> +STRSPN (const char *str, const char *accept)
>  {
> -  const char *p;
> -  const char *a;
> -  size_t count = 0;
> -
> -  for (p = s; *p != '\0'; ++p)
> -    {
> -      for (a = accept; *a != '\0'; ++a)
> -	if (*p == *a)
> -	  break;
> -      if (*a == '\0')
> -	return count;
> -      else
> -	++count;
> +  if (accept[0] == '\0')
> +    return 0;
> +  if (accept[1] == '\0')
> +    { 

Here.

> +      const char *a = str;
> +      for (; *str == *accept; str++);
> +      return str - a;
>      }
>
> -  return count;
> +  /* Use multiple small memsets to enable inlining on most targets.  */
> +  unsigned char table[256];
> +  unsigned char *p = memset (table, 0, 64);
> +  memset (p + 64, 0, 64);
> +  memset (p + 128, 0, 64);
> +  memset (p + 192, 0, 64);
> +
> +  unsigned char *s = (unsigned char*) accept;
> +  /* Different from strcspn it does not add the NULL on the table
> +     so can avoid check if str[i] is NULL, since table['\0'] will
> +     be 0 and thus stopping the loop check.  */
> +  do
> +    p[*s++] = 1;
> +  while (*s);
> +
> +  s = (unsigned char*) str;
> +  if (!p[s[0]]) return 0;
> +  if (!p[s[1]]) return 1;
> +  if (!p[s[2]]) return 2;
> +  if (!p[s[3]]) return 3;
> +          

Here.

> +  s = (unsigned char *) ((size_t)(s) & ~3);
> +  unsigned int c0, c1, c2, c3; 

And here.
diff mbox

Patch

diff --git a/string/bits/string2.h b/string/bits/string2.h
index 1b87686..a1684eb 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -952,43 +952,14 @@  __strcspn_c3 (const char *__s, int __reject1, int __reject2,
 
 /* Return the length of the initial segment of S which
    consists entirely of characters in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strspn (s, accept)					      \
-	 : ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	    ? ((void) (s), (size_t) 0)					      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __strspn_c1 (s, __a0)					      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strspn_c2 (s, __a0, __a1)				      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strspn_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strspn (s, accept))))))		      \
-      : __builtin_strspn (s, accept)); })
-#  else
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (size_t) 0)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? __strspn_c1 (s, __a0)					      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strspn_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strspn_c3 (s, __a0, __a1, __a2)			      \
-		  : strspn (s, accept)))))				      \
-      : strspn (s, accept)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strspn
+# if __GNUC_PREREQ (3, 2)
+#  define strspn(s, accept) __builtin_strspn (s, accept)
 # endif
 
+/* The inline functions are not used from GLIBC 2.24 and forward, however
+   they are required to provide the symbols through string-inlines.c
+   (if inlining is not possible for compatibility reasons).  */
 __STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
 __STRING_INLINE size_t
 __strspn_c1 (const char *__s, int __accept)
diff --git a/string/strspn.c b/string/strspn.c
index f0635c1..0547f41 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -25,23 +25,49 @@ 
 /* Return the length of the maximum initial segment
    of S which contains only characters in ACCEPT.  */
 size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
-    {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
+  if (accept[0] == '\0')
+    return 0;
+  if (accept[1] == '\0')
+    { 
+      const char *a = str;
+      for (; *str == *accept; str++);
+      return str - a;
     }
 
-  return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  unsigned char *s = (unsigned char*) accept;
+  /* Different from strcspn it does not add the NULL on the table
+     so can avoid check if str[i] is NULL, since table['\0'] will
+     be 0 and thus stopping the loop check.  */
+  do
+    p[*s++] = 1;
+  while (*s);
+
+  s = (unsigned char*) str;
+  if (!p[s[0]]) return 0;
+  if (!p[s[1]]) return 1;
+  if (!p[s[2]]) return 2;
+  if (!p[s[3]]) return 3;
+          
+  s = (unsigned char *) ((size_t)(s) & ~3);
+  unsigned int c0, c1, c2, c3; 
+  do {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+  } while ((c0 && c1 && c2 && c3) == 1);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 && c1) == 0 ? count - !c0 + 1 : count - !c2 + 3;
 }
 libc_hidden_builtin_def (strspn)