From patchwork Mon Nov 24 16:56:32 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 414043 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 6D355140188 for ; Tue, 25 Nov 2014 03:56:53 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:message-id:date:from:mime-version:to:subject :content-type:content-transfer-encoding; q=dns; s=default; b=byN ZKEYS0U9higVe4r+kEW/PuE8LOpJCHOToc9tiSJxVh7LUUQLCOwxEPR+HOaiHDS4 tJYEfKK0m/s/Iwq53QvplBGEg1mfr4fgN46fKvXXWW+epwxWt+fkkh/0NznAymb3 yaSjgjOnFt74MrP507WOWGhIDfGCAXglMrDfIsvs= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:message-id:date:from:mime-version:to:subject :content-type:content-transfer-encoding; s=default; bh=CaovN/8HK cKG3akHj3/F1r9UxuE=; b=v2bNu4i9PkZpyqc5ZneSskMu9KR4KKLEWQ+J8qqib 1upLXGDlEF0s20WM0/g5tlCo+tEanMU6tqwozLDg+yLVRdFTU/vIH3J8deZw5+gs sbXRDbh45edRTR08ZkQpO+1U8gKo0v8H3vOmCRb9J+AlwSYF3Rs5SmsyLD4tzSxK Ww= Received: (qmail 22332 invoked by alias); 24 Nov 2014 16:56:46 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 22320 invoked by uid 89); 24 Nov 2014 16:56:45 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.7 required=5.0 tests=AWL, BAYES_50, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: e24smtp04.br.ibm.com Message-ID: <54736340.6030204@linux.vnet.ibm.com> Date: Mon, 24 Nov 2014 14:56:32 -0200 From: Adhemerval Zanella User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0 MIME-Version: 1.0 To: "GNU C. Library" Subject: [PATCH 1/3] powerpc: Add powerpc64 strspn optimization X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 14112416-0029-0000-0000-0000021DD2B7 This patch makes the POWER7 optimized strspn generic by using default doubleword stores to zero the hash, instead of VSX instructions. Performance on POWER7/POWER8 machines does not changed and they are faster on older machine (POWER6). Checked on powerpc64. --- * sysdeps/powerpc/powerpc64/multiarch/Makefile [sysdep_routines]: Remove strspn objetcs. * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Remove strspn implementation. * sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S: Remove file. * sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/power7/strspn.S: Remove file. * sysdeps/powerpc/powerpc64/strspn.S: New file. --- diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index abc9d2e..d6de5a5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -15,7 +15,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \ strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \ strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \ - strspn-power7 strspn-ppc64 strcspn-power7 strcspn-ppc64 \ + strcspn-power7 strcspn-ppc64 \ strpbrk-power7 strpbrk-ppc64 strncpy-power7 strncpy-ppc64 \ stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \ strcat-power7 strcat-ppc64 memmove-power7 memmove-ppc64 \ diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 06d5be9..4a9e523 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -272,14 +272,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ - IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, - hwcap & PPC_FEATURE_HAS_VSX, - __strspn_power7) - IFUNC_IMPL_ADD (array, i, strspn, 1, - __strspn_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ IFUNC_IMPL (i, name, strcspn, IFUNC_IMPL_ADD (array, i, strcspn, diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S deleted file mode 100644 index 889dfee..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S +++ /dev/null @@ -1,40 +0,0 @@ -/* Optimized strspn implementation for POWER7. - Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -#undef EALIGN -#define EALIGN(name, alignt, words) \ - .section ".text"; \ - ENTRY_2(__strspn_power7) \ - .align ALIGNARG(alignt); \ - EALIGN_W_##words; \ - BODY_LABEL(__strspn_power7): \ - cfi_startproc; \ - LOCALENTRY(__strspn_power7) - -#undef END -#define END(name) \ - cfi_endproc; \ - TRACEBACK(__strspn_power7) \ - END_2(__strspn_power7) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c deleted file mode 100644 index b35af3e..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -#define STRSPN __strspn_ppc -#undef weak_alias -#define weak_alias(name, aliasname) \ - extern __typeof (__strspn_ppc) aliasname \ - __attribute__ ((weak, alias ("__strspn_ppc"))); -#if IS_IN (libc) && defined(SHARED) -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1(__strspn_ppc, __GI_strspn, __strspn_ppc); -#endif - -extern __typeof (strspn) __strspn_ppc attribute_hidden; - -#include diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/sysdeps/powerpc/powerpc64/multiarch/strspn.c deleted file mode 100644 index dd9ec18..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strspn.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Multiple versions of strspn. PowerPC64 version. - Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) -# include -# include -# include "init-arch.h" - -extern __typeof (strspn) __strspn_ppc attribute_hidden; -extern __typeof (strspn) __strspn_power7 attribute_hidden; - -libc_ifunc (strspn, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __strspn_power7 - : __strspn_ppc); -#endif diff --git a/sysdeps/powerpc/powerpc64/power7/strspn.S b/sysdeps/powerpc/powerpc64/power7/strspn.S deleted file mode 100644 index d587a67..0000000 --- a/sysdeps/powerpc/powerpc64/power7/strspn.S +++ /dev/null @@ -1,165 +0,0 @@ -/* Optimized strspn implementation for PowerPC64/POWER7. - - Copyright (C) 2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* size_t [r3] strspn (const char *string [r3], - const char *needleAccept [r4] */ - -/* Performance gains are grabbed through following techniques: - - > hashing of needle. - > hashing avoids scanning of duplicate entries in needle - across the string. - > initializing the hash table with Vector instructions - by quadword access. - > unrolling when scanning for character in string - across hash table. */ - -/* Algorithm is as below: - 1. A empty hash table/dictionary is created comprising of - 256 ascii character set - 2. When hash entry is found in needle , the hash index - is initialized to 1 - 3. The string is scanned until end and for every character, - its corresponding hash index is compared. - 4. initial length of string (count) until first hit of - accept needle to be found is set to 0 - 4. If hash index is set to 1 for the index of string, - count is returned. - 5. Otherwise count is incremented and scanning continues - until end of string. */ - -#include - -#undef strspn - - .machine power7 -EALIGN(strspn, 4, 0) - CALL_MCOUNT 2 - - lbz r10, 0(r4) /* load r10 with needle (r4) */ - addi r9, r1, -256 /* r9 is a hash of 256 bytes */ - - li r5, 16 /* set r5 = 16 as offset */ - li r6, 32 /* set r6 = 32 as offset */ - li r8, 48 /* set r8 = 48 as offset */ - -/*Iniatliaze hash table with Zeroes in double indexed quadword accesses */ - xxlxor v0, v0, v0 /* prepare for initializing hash */ - - stxvd2x v0, r0, r9 /* initialize 1st quadword */ - stxvd2x v0, r9, r5 - stxvd2x v0, r9, r6 - stxvd2x v0, r9, r8 /* initialize 4th quadword */ - - addi r11, r9, 64 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 5th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 8th quadword */ - - addi r11, r9, 128 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 9th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 12th quadword */ - - addi r11, r9, 192 /* r11 is index to hash */ - - stxvd2x v0, r0, r11 /* initialize 13th quadword */ - stxvd2x v0, r11, r5 - stxvd2x v0, r11, r6 - stxvd2x v0, r11, r8 /* initialize 16th quadword */ - - li r8, 1 /* r8=1, marker into hash if found in - needle */ - - cmpdi cr7, r10, 0 /* accept needle is NULL */ - beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ - - .p2align 4 /* align section to 16 byte boundary */ -L(hashing): - stbx r8, r9, r10 /* update hash with marker for the pivot of - the needle */ - lbzu r10, 1(r4) /* load needle into r10 and update to next */ - cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */ - bne cr7, L(hashing) /* loop to hash the needle */ - -L(skipHashing): - li r10, 0 /* load counter = 0 */ - b L(beginScan) - - .p2align 4 /* align section to 16 byte boundary */ -L(scanUnroll): - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found - accept needle */ - beq cr7, L(ret1stIndex) /* we have hit accept needle, return the - count */ - - lbz r8, 1(r3) /* load string[1] into r8 */ - addi r10, r10, 4 /* increment counter */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found - accept needle */ - beq cr7, L(ret2ndIndex) /* we have hit accept needle, return the - count */ - - lbz r8, 2(r3) /* load string[2] into r8 */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found - accept needle */ - beq cr7, L(ret3rdIndex) /* we have hit accept needle, return the - count */ - - lbz r8, 3(r3) /* load string[3] into r8 */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - addi r3, r3, 4 /* unroll factor , increment string by 4 */ - cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found - accept needle */ - beq cr7,L(ret4thIndex) /* we have hit accept needle, return the - count */ - -L(beginScan): - lbz r8, 0(r3) /* load string[0] into r8 */ - addi r6, r10, 1 /* place holder for counter + 1 */ - addi r5, r10, 2 /* place holder for counter + 2 */ - addi r4, r10, 3 /* place holder for counter + 3 */ - cmpdi cr7, r8, 0 /* if we hit marker in hash, we have found - accept needle */ - bne cr7, L(scanUnroll) /* continue scanning */ - -L(ret1stIndex): - mr r3, r10 /* update r3 for return */ - blr /* return */ - -L(ret2ndIndex): - mr r3, r6 /* update r3 for return */ - blr /* return */ - -L(ret3rdIndex): - mr r3, r5 /* update r3 for return */ - blr /* return */ - -L(ret4thIndex): - mr r3, r4 /* update r3 for return */ - blr /* done */ -END(strspn) -libc_hidden_builtin_def (strspn) diff --git a/sysdeps/powerpc/powerpc64/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S new file mode 100644 index 0000000..7a038e6 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strspn.S @@ -0,0 +1,147 @@ +/* Optimized strspn implementation for PowerPC64/POWER7. + + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4] */ + +/* Performance gains are grabbed through following techniques: + + > hashing of needle. + > hashing avoids scanning of duplicate entries in needle + across the string. + > initializing the hash table with Vector instructions + by quadword access. + > unrolling when scanning for character in string + across hash table. */ + +/* Algorithm is as below: + 1. A empty hash table/dictionary is created comprising of + 256 ascii character set + 2. When hash entry is found in needle , the hash index + is initialized to 1 + 3. The string is scanned until end and for every character, + its corresponding hash index is compared. + 4. initial length of string (count) until first hit of + accept needle to be found is set to 0 + 4. If hash index is set to 1 for the index of string, + count is returned. + 5. Otherwise count is incremented and scanning continues + until end of string. */ + +#include + + .machine power7 +EALIGN(strspn, 4, 0) + CALL_MCOUNT 3 + + /* PPC64 ELF ABI stack is aligned to 16 bytes. */ + addi r9,r1,-256 + /* Clear the table with 0 values */ + li r6, 0 + li r8, 4 + mtctr r8 + mr r10, r9 + .align 4 +L(zerohash): + std r6, 0(r10) + std r6, 8(r10) + std r6, 16(r10) + std r6, 24(r10) + std r6, 32(r10) + std r6, 40(r10) + std r6, 48(r10) + std r6, 56(r10) + addi r10, r10, 64 + bdnz L(zerohash) + + lbz r10,0(r4) + li r8, 1 /* r8=1, marker into hash if found in + needle */ + cmpdi cr7, r10, 0 /* accept needle is NULL */ + beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ + + .align 4 /* align section to 16 byte boundary */ +L(hashing): + stbx r8, r9, r10 /* update hash with marker for the pivot of + the needle */ + lbzu r10, 1(r4) /* load needle into r10 and update to next */ + cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */ + bne cr7, L(hashing) /* loop to hash the needle */ + +L(skipHashing): + li r10, 0 /* load counter = 0 */ + b L(beginScan) + + .align 4 /* align section to 16 byte boundary */ +L(scanUnroll): + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret1stIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 1(r3) /* load string[1] into r8 */ + addi r10, r10, 4 /* increment counter */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret2ndIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 2(r3) /* load string[2] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret3rdIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 3(r3) /* load string[3] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + addi r3, r3, 4 /* unroll factor , increment string by 4 */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7,L(ret4thIndex) /* we have hit accept needle, return the + count */ + +L(beginScan): + lbz r8, 0(r3) /* load string[0] into r8 */ + addi r6, r10, 1 /* place holder for counter + 1 */ + addi r5, r10, 2 /* place holder for counter + 2 */ + addi r4, r10, 3 /* place holder for counter + 3 */ + cmpdi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + bne cr7, L(scanUnroll) /* continue scanning */ + +L(ret1stIndex): + mr r3, r10 /* update r3 for return */ + blr /* return */ + +L(ret2ndIndex): + mr r3, r6 /* update r3 for return */ + blr /* return */ + +L(ret3rdIndex): + mr r3, r5 /* update r3 for return */ + blr /* return */ + +L(ret4thIndex): + mr r3, r4 /* update r3 for return */ + blr /* done */ +END(strspn) +libc_hidden_builtin_def (strspn)