diff mbox

[1/4] Improve generic strcspn performance

Message ID 1459432863-20749-2-git-send-email-adhemerval.zanella@linaro.org
State New
Headers show

Commit Message

Adhemerval Zanella Netto March 31, 2016, 2:01 p.m. UTC
From: Wilco Dijkstra <wdijkstr@arm.com>

Improve strcspn performance using a much faster algorithm.  It is kept simple
so it works well on most targets.  It is generally at least 10 times faster
than the existing implementation on bench-strcspn on a few AArch64
implementations, and for some tests 100 times as fast (repeatedly calling
strchr on a small string is extremely slow...).

In fact the string/bits/string2.h inlines make no longer sense, as GCC
already uses strlen if reject is an empty string, strchrnul is 5 times as
fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than
the strcspn main loop for large strings (though reject length 2-4 could be
special cased in the future to gain even more performance).

Tested on x86_64, i686, and aarch64.

	* string/Version (libc): Add GLIBC_2.24.
	* string/strcspn.c (strcspn): Rewrite function.
	* string/bits/string2.h (strcspn): Use __builtin_strcspn.
	(__strcspn_c1): Remove inline function.
	(__strcspn_c2): Likewise.
	(__strcspn_c3): Likewise.
	* string/string-inline.c
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add
	compatibility symbol.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2):
	Likewise.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3):
	Likewise.
---
 ChangeLog                     | 17 ++++++++++
 string/Versions               |  2 ++
 string/bits/string2.h         | 73 ++-----------------------------------------
 string/strcspn.c              | 44 +++++++++++++++++++++-----
 string/string-inlines.c       | 40 ++++++++++++++++++++++++
 sysdeps/i386/string-inlines.c | 19 +----------
 6 files changed, 99 insertions(+), 96 deletions(-)

Comments

Wilco Dijkstra March 31, 2016, 2:49 p.m. UTC | #1
Adhemerval Zanella wrote:

 size_t
-STRCSPN (const char *s, const char *reject)
+STRCSPN (const char *str, const char *reject)
 {
-  size_t count = 0;
+  if (reject[0] == '\0' || reject[1] == '\0')
+    return __strchrnul (str, reject [0]) - str;

I agree using __strchrnul is fine if reject[0] == '\0' as that should be very rare.
However it needs __glibc_unlikely again to avoid slowing down the common case.
It's odd that GCC doesn't predict that the first character(s) of a string are non-zero...

Other than that, it looks good.

Wilco
Adhemerval Zanella Netto March 31, 2016, 6:28 p.m. UTC | #2
On 31-03-2016 11:49, Wilco Dijkstra wrote:
> Adhemerval Zanella wrote:
> 
>  size_t
> -STRCSPN (const char *s, const char *reject)
> +STRCSPN (const char *str, const char *reject)
>  {
> -  size_t count = 0;
> +  if (reject[0] == '\0' || reject[1] == '\0')
> +    return __strchrnul (str, reject [0]) - str;
> 
> I agree using __strchrnul is fine if reject[0] == '\0' as that should be very rare.
> However it needs __glibc_unlikely again to avoid slowing down the common case.
> It's odd that GCC doesn't predict that the first character(s) of a string are non-zero...
> 
> Other than that, it looks good.
> 
> Wilco
> 

Right, I will change that.
diff mbox

Patch

diff --git a/string/Versions b/string/Versions
index 59bf35a..475c1fd 100644
--- a/string/Versions
+++ b/string/Versions
@@ -80,4 +80,6 @@  libc {
   GLIBC_2.6 {
     strerror_l;
   }
+  GLIBC_2.24 {
+  }
 }
diff --git a/string/bits/string2.h b/string/bits/string2.h
index 8200ef1..a8df0db 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -905,77 +905,10 @@  __stpcpy_small (char *__dest,
 
 /* Return the length of the initial segment of S which
    consists entirely of characters not in REJECT.  */
-#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strcspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strcspn(s, reject) \
-  __extension__								      \
-  ({ char __r0, __r1, __r2;						      \
-     (__builtin_constant_p (reject) && __string2_1bptr_p (reject)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strcspn (s, reject)				      \
-	 : ((__r0 = ((const char *) (reject))[0], __r0 == '\0')		      \
-	    ? strlen (s)						      \
-	    : ((__r1 = ((const char *) (reject))[1], __r1 == '\0')	      \
-	       ? __strcspn_c1 (s, __r0)					      \
-	       : ((__r2 = ((const char *) (reject))[2], __r2 == '\0')	      \
-		  ? __strcspn_c2 (s, __r0, __r1)			      \
-		  : (((const char *) (reject))[3] == '\0'		      \
-		     ? __strcspn_c3 (s, __r0, __r1, __r2)		      \
-		     : __builtin_strcspn (s, reject))))))		      \
-      : __builtin_strcspn (s, reject)); })
-#  else
-#   define strcspn(s, reject) \
-  __extension__								      \
-  ({ char __r0, __r1, __r2;						      \
-     (__builtin_constant_p (reject) && __string2_1bptr_p (reject)	      \
-      ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0')		      \
-	 ? strlen (s)							      \
-	 : ((__r1 = ((const char *) (reject))[1], __r1 == '\0')		      \
-	    ? __strcspn_c1 (s, __r0)					      \
-	    : ((__r2 = ((const char *) (reject))[2], __r2 == '\0')	      \
-	       ? __strcspn_c2 (s, __r0, __r1)				      \
-	       : (((const char *) (reject))[3] == '\0'			      \
-		  ? __strcspn_c3 (s, __r0, __r1, __r2)			      \
-		  : strcspn (s, reject)))))				      \
-      : strcspn (s, reject)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strcspn
+# if __GNUC_PREREQ (3, 2)
+#  define strcspn(s, reject) __builtin_strcspn (s, reject)
 # endif
-
-__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
-__STRING_INLINE size_t
-__strcspn_c1 (const char *__s, int __reject)
-{
-  size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
-				     int __reject2);
-__STRING_INLINE size_t
-__strcspn_c2 (const char *__s, int __reject1, int __reject2)
-{
-  size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject1
-	 && __s[__result] != __reject2)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
-				     int __reject2, int __reject3);
-__STRING_INLINE size_t
-__strcspn_c3 (const char *__s, int __reject1, int __reject2,
-	      int __reject3)
-{
-  size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject1
-	 && __s[__result] != __reject2 && __s[__result] != __reject3)
-    ++__result;
-  return __result;
-}
 #endif
 
 
diff --git a/string/strcspn.c b/string/strcspn.c
index 8888919..89ba4ca 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -26,16 +26,44 @@ 
 /* Return the length of the maximum initial segment of S
    which contains no characters from REJECT.  */
 size_t
-STRCSPN (const char *s, const char *reject)
+STRCSPN (const char *str, const char *reject)
 {
-  size_t count = 0;
+  if (reject[0] == '\0' || reject[1] == '\0')
+    return __strchrnul (str, reject [0]) - str;
 
-  while (*s != '\0')
-    if (strchr (reject, *s++) == NULL)
-      ++count;
-    else
-      return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
 
-  return count;
+  unsigned char *s = (unsigned char*) reject;
+  unsigned char tmp;
+  do
+    p[tmp = *s++] = 1;
+  while (tmp);
+
+  s = (unsigned char*) str;
+  if (p[s[0]]) return 0;
+  if (p[s[1]]) return 1;
+  if (p[s[2]]) return 2;
+  if (p[s[3]]) return 3;
+
+  s = (unsigned char *) ((size_t)s & ~3);
+
+  unsigned int c0, c1, c2, c3;
+  do
+    {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+    }
+  while ((c0 | c1 | c2 | c3) == 0);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
 }
 libc_hidden_builtin_def (strcspn)
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 16db3ea..83bdd6c 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -32,3 +32,43 @@ 
 #undef __NO_INLINE__
 #include <bits/string.h>
 #include <bits/string2.h>
+
+#include "shlib-compat.h"
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
+/* The inline functions are not used from GLIBC 2.24 and forward, however
+   they are required to provide the symbols through string-inlines.c
+   (if inlining is not possible for compatibility reasons).  */
+size_t
+__old_strcspn_c1 (const char *__s, int __reject)
+{
+  size_t __result = 0;
+  while (__s[__result] != '\0' && __s[__result] != __reject)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strcspn_c1, __strcspn_c1, GLIBC_2_1_1);
+
+size_t
+__old_strcspn_c2 (const char *__s, int __reject1, int __reject2)
+{
+  size_t __result = 0;
+  while (__s[__result] != '\0' && __s[__result] != __reject1
+	 && __s[__result] != __reject2)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strcspn_c2, __strcspn_c2, GLIBC_2_1_1);
+
+size_t
+__old_strcspn_c3 (const char *__s, int __reject1, int __reject2,
+	      int __reject3)
+{
+  size_t __result = 0;
+  while (__s[__result] != '\0' && __s[__result] != __reject1
+	 && __s[__result] != __reject2 && __s[__result] != __reject3)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1);
+#endif
diff --git a/sysdeps/i386/string-inlines.c b/sysdeps/i386/string-inlines.c
index c7de270..64d80e8 100644
--- a/sysdeps/i386/string-inlines.c
+++ b/sysdeps/i386/string-inlines.c
@@ -15,27 +15,10 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-/*  <bits/string.h> and <bits/string2.h> declare some extern inline
-    functions.  These functions are declared additionally here if
-    inlining is not possible.  */
-
-#undef __USE_STRING_INLINES
-#define __USE_STRING_INLINES
-#define _FORCE_INLINES
-#define __STRING_INLINE /* empty */
-#define __NO_INLINE__
-
 /* This is to avoid PLT entries for the x86 version.  */
 #define __memcpy_g __memcpy_g_internal
 #define __strchr_g __strchr_g_internal
-
-#include <string.h>
-#undef index
-#undef rindex
-
-#undef __NO_INLINE__
-#include <bits/string.h>
-#include <bits/string2.h>
+#include <string/string-inlines.c>
 
 void *
 (__memcpy_c) (void *d, const void *s, size_t n)