[v2,10/16] Improve generic strcmp

Message ID	20161221230605.28638-11-rth@twiddle.net
State	New
Headers	show Return-Path: <libc-alpha-return-76205-incoming=patchwork.ozlabs.org@sourceware.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=A6ILnIBC8sL7Pu1i2B+pbWy4oqWOHrH V8qHuO+JESttypvQNuzFG4/JXQqo5DBiBFvm+TvbFJrAib42EKXA6NclLe9pYmmX dP6SOxWfpfMgV/StsSF2Ivild/kI3lhplVzW4ovS31ivMH8vkcMx44u26UtLFL5x y/3CaHliSltw= Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org From: Richard Henderson <rth@twiddle.net> To: libc-alpha@sourceware.org Subject: [PATCH v2 10/16] Improve generic strcmp Date: Wed, 21 Dec 2016 15:05:59 -0800 Message-Id: <20161221230605.28638-11-rth@twiddle.net> In-Reply-To: <20161221230605.28638-1-rth@twiddle.net> References: <20161221230605.28638-1-rth@twiddle.net>

Message ID

20161221230605.28638-11-rth@twiddle.net

State

New

Headers

DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id
	:list-unsubscribe:list-subscribe:list-archive:list-post
	:list-help:sender:from:to:subject:date:message-id:in-reply-to
	:references; q=dns; s=default; b=A6ILnIBC8sL7Pu1i2B+pbWy4oqWOHrH
	V8qHuO+JESttypvQNuzFG4/JXQqo5DBiBFvm+TvbFJrAib42EKXA6NclLe9pYmmX
	dP6SOxWfpfMgV/StsSF2Ivild/kI3lhplVzW4ovS31ivMH8vkcMx44u26UtLFL5x
	y/3CaHliSltw=
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
Sender: libc-alpha-owner@sourceware.org
From: Richard Henderson <rth@twiddle.net>
To: libc-alpha@sourceware.org
Subject: [PATCH v2 10/16] Improve generic strcmp
Date: Wed, 21 Dec 2016 15:05:59 -0800
Message-Id: <20161221230605.28638-11-rth@twiddle.net>
In-Reply-To: <20161221230605.28638-1-rth@twiddle.net>
References: <20161221230605.28638-1-rth@twiddle.net>

Commit Message

Richard Henderson Dec. 21, 2016, 11:05 p.m. UTC

* string/strcmp.c: Rewrite using memcopy.h, string-fzb.h,
	string-fzi.h.
---
 string/strcmp.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 91 insertions(+), 11 deletions(-)

diff --git a/string/strcmp.c b/string/strcmp.c
index 4b16f99..cb522ff 100644
--- a/string/strcmp.c
+++ b/string/strcmp.c
@@ -16,32 +16,112 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <memcopy.h>
 
 #undef strcmp
 
-#ifndef STRCMP
-# define STRCMP strcmp
+#ifdef STRCMP
+# define strcmp STRCMP
 #endif
 
 /* Compare S1 and S2, returning less than, equal to or
    greater than zero if S1 is lexicographically less than,
    equal to or greater than S2.  */
 int
-STRCMP (const char *p1, const char *p2)
+strcmp (const char *p1, const char *p2)
 {
-  const unsigned char *s1 = (const unsigned char *) p1;
-  const unsigned char *s2 = (const unsigned char *) p2;
+  const op_t *x1, *x2;
+  op_t w1, w2;
   unsigned char c1, c2;
+  uintptr_t i, n, ofs;
+  int diff;
+
+  /* Handle the unaligned bytes of p1 first.  */
+  n = -(uintptr_t)p1 % sizeof(op_t);
+  for (i = 0; i < n; ++i)
+    {
+      c1 = *p1++;
+      c2 = *p2++;
+      diff = c1 - c2;
+      if (c1 == '\0' || diff)
+	return diff;
+    }
 
-  do
+  /* P1 is now aligned to unsigned long.  P2 may or may not be.  */
+  x1 = (const op_t *)p1;
+  w1 = *x1++;
+  ofs = (uintptr_t)p2 % sizeof(op_t);
+  if (ofs == 0)
     {
-      c1 = (unsigned char) *s1++;
-      c2 = (unsigned char) *s2++;
-      if (c1 == '\0')
-	return c1 - c2;
+      x2 = (const op_t *)p2;
+      w2 = *x2++;
+      /* Aligned loop.  If a difference is found, exit to compare the
+         bytes.  Else if a zero is found we have equal strings.  */
+      while (w1 == w2)
+	{
+	  if (has_zero (w1))
+	    return 0;
+          w1 = *x1++;
+          w2 = *x2++;
+	}
     }
-  while (c1 == c2);
+  else
+    {
+      op_t w2a, w2b;
+      uintptr_t sh_1, sh_2;
+
+      x2 = (const op_t *)(p2 - ofs);
+      w2a = *x2++;
+      sh_1 = ofs * CHAR_BIT;
+      sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
+
+      /* Align the first partial of P2, with 0xff for the rest of the
+         bytes so that we can also apply the has_zero test to see if we
+         have already reached EOS.  If we have, then we can simply fall
+         through to the final comparison.  */
+      w2 = MERGE (w2a, sh_1, (op_t)-1, sh_2);
+      if (!has_zero (w2))
+	{
+	  /* Unaligned loop.  The invariant is that W2B, which is "ahead"
+             of W1, does not contain end-of-string.  Therefore it is safe
+             (and necessary) to read another word from each while we do
+             not have a difference.  */
+	  while (1)
+	    {
+	      w2b = *x2++;
+	      w2 = MERGE (w2a, sh_1, w2b, sh_2);
+	      if (w1 != w2)
+		goto final_cmp;
+	      if (has_zero (w2b))
+		break;
+	      w1 = *x1++;
+	      w2a = w2b;
+	    }
 
+	  /* Zero found in the second partial of P2.  If we had EOS
+	     in the aligned word, we have equality.  */
+	  if (has_zero (w1))
+	    return 0;
+
+          /* Load the final word of P1 and align the final partial of P2.  */
+	  w1 = *x1++;
+          w2 = MERGE (w2b, sh_1, 0, sh_2);
+	}
+    }
+
+ final_cmp:
+  /* We have two aligned words of data.  */
+  i = index_first_zero_ne (w1, w2);
+  c1 = extractbyte (w1, i);
+  c2 = extractbyte (w2, i);
   return c1 - c2;
 }
+
+#ifndef STRCMP
 libc_hidden_builtin_def (strcmp)
+#endif

[v2,10/16] Improve generic strcmp

Commit Message

Patch