[01/11] Improve generic strlen

Message ID	20161217065729.28561-2-rth@twiddle.net
State	New
Headers	show Return-Path: <libc-alpha-return-76039-incoming=patchwork.ozlabs.org@sourceware.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=knh0upIFvz1jUOfGD++USTc++LKG48l 4BhAkgZGatz2xA0TBwPfF9dgasC2ctqwiHRtks3k1PU80jz12QBPvj8+SYkOgNuM QRls9Pt8d6C0uqC2s51ZqbvOrQyBKYS/sbf10ufn4yX+r08pqRKn/bVHAgdKs+2M rKb2cwkc88nQ= Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org From: Richard Henderson <rth@twiddle.net> To: libc-alpha@sourceware.org Subject: [PATCH 01/11] Improve generic strlen Date: Fri, 16 Dec 2016 22:57:19 -0800 Message-Id: <20161217065729.28561-2-rth@twiddle.net> In-Reply-To: <20161217065729.28561-1-rth@twiddle.net> References: <20161217065729.28561-1-rth@twiddle.net>

diff --git a/string/strlen.c b/string/strlen.c index 4943ce2..f2a53a6 100644 --- a/string/strlen.c +++ b/string/strlen.c @@ -20,6 +20,9 @@ #include <string.h> #include <stdlib.h> +#include <stdint.h> +#include <haszero.h> +#include <whichzero.h> #undef strlen @@ -32,78 +35,29 @@ size_t STRLEN (const char *str) { - const char *char_ptr; + const char *char_ptr = str; const unsigned long int *longword_ptr; - unsigned long int longword, himagic, lomagic; + unsigned long int longword; + uintptr_t i, align; /* Handle the first few characters by reading one character at a time. Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = str; ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - ++char_ptr) + align = -(uintptr_t)char_ptr % sizeof(longword); + for (i = 0; i < align; ++i, ++char_ptr) if (*char_ptr == '\0') - return char_ptr - str; + goto found; - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - himagic = 0x80808080L; - lomagic = 0x01010101L; - if (sizeof (longword) > 4) - { - /* 64-bit version of the magic. */ - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - himagic = ((himagic << 16) << 16) | himagic; - lomagic = ((lomagic << 16) << 16) | lomagic; - } - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) + longword_ptr = (const unsigned long int *) char_ptr; + do { longword = *longword_ptr++; + } + while (!haszero(longword)); - if (((longword - lomagic) & ~longword & himagic) != 0) - { - /* Which of the bytes was the zero? If none of them were, it was - a misfire; continue the search. */ - - const char *cp = (const char *) (longword_ptr - 1); + char_ptr = (const char *) (longword_ptr - 1); + char_ptr += whichzero(longword); - if (cp[0] == 0) - return cp - str; - if (cp[1] == 0) - return cp - str + 1; - if (cp[2] == 0) - return cp - str + 2; - if (cp[3] == 0) - return cp - str + 3; - if (sizeof (longword) > 4) - { - if (cp[4] == 0) - return cp - str + 4; - if (cp[5] == 0) - return cp - str + 5; - if (cp[6] == 0) - return cp - str + 6; - if (cp[7] == 0) - return cp - str + 7; - } - } - } + found: + return char_ptr - str; } libc_hidden_builtin_def (strlen) diff --git a/sysdeps/generic/extractbyte.h b/sysdeps/generic/extractbyte.h new file mode 100644 index 0000000..ace2cda --- /dev/null +++ b/sysdeps/generic/extractbyte.h @@ -0,0 +1,34 @@ +/* extractbyte.h -- function memory order byte extract. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef EXTRACTBYTE_H +#define EXTRACTBYTE_H 1 + +#include <limits.h> +#include <endian.h> + +static inline unsigned char +extractbyte(unsigned long int x, unsigned idx) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return x >> (idx * CHAR_BIT); + else + return x >> (sizeof(x) - 1 - idx) * CHAR_BIT; +} + +#endif /* EXTRACTBYTE_H */ diff --git a/sysdeps/generic/haszero.h b/sysdeps/generic/haszero.h new file mode 100644 index 0000000..084741c --- /dev/null +++ b/sysdeps/generic/haszero.h @@ -0,0 +1,42 @@ +/* haszero.h -- function for zero byte detection. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef HASZERO_H +#define HASZERO_H 1 + +/* See https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */ + +static inline unsigned long int +haszero(unsigned long int x) +{ + unsigned long int ones = -1UL / 0xff; + unsigned long int signs = ones * 0x80; + return (x - ones) & ~x & signs; +} + +/* Likewise, but for two words simultaneously. */ + +static inline unsigned long int +haszero2(unsigned long int x1, unsigned long int x2) +{ + unsigned long int ones = -1UL / 0xff; + unsigned long int signs = ones * 0x80; + return (((x1 - ones) & ~x1) | ((x2 - ones) & ~x2)) & signs; +} + +#endif /* haszero.h */ diff --git a/sysdeps/generic/whichzero.h b/sysdeps/generic/whichzero.h new file mode 100644 index 0000000..346b09a --- /dev/null +++ b/sysdeps/generic/whichzero.h @@ -0,0 +1,67 @@ +/* whichzero.h -- functions for zero byte searching. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef WHICHZERO_H +#define WHICHZERO_H 1 + +#include <endian.h> + +/* A subroutine for the whichzero and whichzero2 functions. Given a + test word C, return the index of the first byte in memory order + that contains 0x80 (all other bits will be zero). */ + +static inline unsigned int +whichzero_ffs(unsigned long int c) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return __builtin_ctzl (c) / 8u; + else + return __builtin_clzl (c) / 8u; +} + +/* Given a long that is known to contain a zero byte, return the + index of the first such within the long in host memory order. */ + +static inline unsigned int +whichzero(unsigned long int x) +{ + /* For each byte, find not-zero by + (0) And 0x7f so that we cannot carry between bytes, + (1) Add 0x7f so that we carry into 0x80, + (2) Or in the original byte which might have contained 0x80. + Then invert and mask such that 0x80 is set iff the byte was zero. */ + unsigned long int m = (-1UL / 0xff) * 0x7f; + unsigned long int c = ~(((x & m) + m) | x | m); + + return whichzero_ffs(c); +} + +/* Similarly, but perform the test for two longs simultaneously. */ + +static inline unsigned int +whichzero2(unsigned long int x1, unsigned long int x2) +{ + unsigned long int m = (-1UL / 0xff) * 0x7f; + unsigned long int c1 = ((x1 & m) + m) | x1; + unsigned long int c2 = ((x2 & m) + m) | x2; + unsigned long int c = ~((c1 & c2) | m); + + return whichzero_ffs(c); +} + +#endif /* whichzero.h */

[01/11] Improve generic strlen

Commit Message

Comments

Patch