Message ID | 20161217065729.28561-11-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On 17/12/2016 04:57, Richard Henderson wrote: > +static inline unsigned long int > +haszero(unsigned long int x) > +{ > +#if __GNUC_PREREQ(4, 5) > + /* It's more useful to expose a control transfer to the compiler > + than to expose a proper boolean result. */ > + if (sizeof(x) == 8) > + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); > + else > + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); > + return 1; > + nbz: > + return 0; > +#else Since current GLIBC requires GCC 4.7 as minimum compiler I think we can get rid of snippets for old compilers. Same for the other override functios. > + unsigned long int ret; > + if (sizeof(x) == 8) > + asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0" > + : "=r"(ret) : "r"(x), "0"(1)); > + else > + asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0" > + : "=r"(ret) : "r"(x), "0"(1)); > + return ret; > +#endif > +} > + > +/* Likewise, but for two words simultaneously. */ > + > +static inline unsigned long int > +haszero2(unsigned long int x1, unsigned long int x2) > +{ > +#if __GNUC_PREREQ(4, 5) > + /* It's more useful to expose a control transfer to the compiler > + than to expose a proper boolean result. */ > + if (sizeof(x1) == 8) > + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t" > + "uxor,*nbz %%r0,%1,%%r0\n\t" > + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz); > + else > + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t" > + "uxor,nbz %%r0,%1,%%r0\n\t" > + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz); > + return 0; > + sbz: > + return 1; > +#else > + unsigned long int ret; > + if (sizeof(x1) == 8) > + asm ("uxor,*sbz %%r0,%1,%%r0\n\t" > + "uxor,*nbz %%r0,%2,%%r0\n\t" > + "ldi 1,%0" > + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0)); > + else > + asm ("uxor,sbz %%r0,%1,%%r0\n\t" > + "uxor,nbz %%r0,%2,%%r0\n\t" > + "ldi 1,%0" > + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0)); > + return ret; > +#endif > +} > + > +#endif /* haszero.h */ > diff --git a/sysdeps/hppa/whichzero.h b/sysdeps/hppa/whichzero.h > new file mode 100644 > index 0000000..ef18cc7 > --- /dev/null > +++ b/sysdeps/hppa/whichzero.h > @@ -0,0 +1,70 @@ > +/* whichzero.h -- functions for zero byte searching. HPPA version. > + Copyright (C) 2016 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef HPPA_WHICHZERO_H > +#define HPPA_WHICHZERO_H 1 > + > +/* Given a long that is known to contain a zero byte, return the > + index of the first such within the long in host memory order. */ > + > +static inline unsigned int > +whichzero(unsigned long int x) > +{ > + unsigned int ret; > + > + _Static_assert (sizeof(x) == 4, "64-bit not supported"); > + > + /* Since we have no clz insn, direct tests of the bytes is faster > + than loading up the constants to do the masking. */ > + asm ("extrw,u,<> %1,23,8,%%r0\n\t" > + "ldi 2,%0\n\t" > + "extrw,u,<> %1,15,8,%%r0\n\t" > + "ldi 1,%0\n\t" > + "extrw,u,<> %1,7,8,%%r0\n\t" > + "ldi 0,%0" > + : "=r"(ret) : "r"(x), "0"(3)); > + > + return ret; > +} > + > +/* Similarly, but perform the test for two longs simultaneously. */ > + > +static inline unsigned int > +whichzero2(unsigned long int x1, unsigned long int x2) > +{ > + unsigned int ret; > + > + _Static_assert (sizeof(x1) == 4, "64-bit not supported"); > + > + /* Since we have no clz insn, direct tests of the bytes is faster > + than loading up the constants to do the masking. */ > + asm ("extrw,u,= %1,23,8,%%r0\n\t" > + "extrw,u,<> %2,23,8,%%r0\n\t" > + "ldi 2,%0\n\t" > + "extrw,u,= %1,15,8,%%r0\n\t" > + "extrw,u,<> %2,15,8,%%r0\n\t" > + "ldi 1,%0\n\t" > + "extrw,u,= %1,7,8,%%r0\n\t" > + "extrw,u,<> %2,7,8,%%r0\n\t" > + "ldi 0,%0" > + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3)); > + > + return ret; > +} > + > +#endif /* whichzero.h */ I am far from a hppa expert, but can't we code the same snippet in C? How bad would it be compared to this optimized asm?
On 12/19/2016 06:54 AM, Adhemerval Zanella wrote: >> +static inline unsigned long int >> +haszero(unsigned long int x) >> +{ >> +#if __GNUC_PREREQ(4, 5) >> + /* It's more useful to expose a control transfer to the compiler >> + than to expose a proper boolean result. */ >> + if (sizeof(x) == 8) >> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); >> + else >> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); >> + return 1; >> + nbz: >> + return 0; >> +#else > > Since current GLIBC requires GCC 4.7 as minimum compiler I think we > can get rid of snippets for old compilers. Same for the other > override functios. Ah good. I'd meant to go back and look for the minimum required gcc. >> + /* Since we have no clz insn, direct tests of the bytes is faster >> + than loading up the constants to do the masking. */ >> + asm ("extrw,u,= %1,23,8,%%r0\n\t" >> + "extrw,u,<> %2,23,8,%%r0\n\t" >> + "ldi 2,%0\n\t" >> + "extrw,u,= %1,15,8,%%r0\n\t" >> + "extrw,u,<> %2,15,8,%%r0\n\t" >> + "ldi 1,%0\n\t" >> + "extrw,u,= %1,7,8,%%r0\n\t" >> + "extrw,u,<> %2,7,8,%%r0\n\t" >> + "ldi 0,%0" >> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3)); >> + >> + return ret; >> +} >> + >> +#endif /* whichzero.h */ > > I am far from a hppa expert, but can't we code the same snippet in C? How > bad would it be compared to this optimized asm? The compiler is not great at this. It only attempts nullification on comparisons (not directly as a result of an operation like extract), and it never attempts double nullification as above. So for whichzero gcc will use 10 insns instead of my 7; for whichzero2 gcc will use 19 insns instead of my 10. r~
diff --git a/sysdeps/hppa/haszero.h b/sysdeps/hppa/haszero.h new file mode 100644 index 0000000..99cc6fc --- /dev/null +++ b/sysdeps/hppa/haszero.h @@ -0,0 +1,82 @@ +/* haszero.h -- function for zero byte detection. HPPA version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef HASZERO_H +#define HASZERO_H 1 + +static inline unsigned long int +haszero(unsigned long int x) +{ +#if __GNUC_PREREQ(4, 5) + /* It's more useful to expose a control transfer to the compiler + than to expose a proper boolean result. */ + if (sizeof(x) == 8) + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); + else + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz); + return 1; + nbz: + return 0; +#else + unsigned long int ret; + if (sizeof(x) == 8) + asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0" + : "=r"(ret) : "r"(x), "0"(1)); + else + asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0" + : "=r"(ret) : "r"(x), "0"(1)); + return ret; +#endif +} + +/* Likewise, but for two words simultaneously. */ + +static inline unsigned long int +haszero2(unsigned long int x1, unsigned long int x2) +{ +#if __GNUC_PREREQ(4, 5) + /* It's more useful to expose a control transfer to the compiler + than to expose a proper boolean result. */ + if (sizeof(x1) == 8) + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t" + "uxor,*nbz %%r0,%1,%%r0\n\t" + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz); + else + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t" + "uxor,nbz %%r0,%1,%%r0\n\t" + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz); + return 0; + sbz: + return 1; +#else + unsigned long int ret; + if (sizeof(x1) == 8) + asm ("uxor,*sbz %%r0,%1,%%r0\n\t" + "uxor,*nbz %%r0,%2,%%r0\n\t" + "ldi 1,%0" + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0)); + else + asm ("uxor,sbz %%r0,%1,%%r0\n\t" + "uxor,nbz %%r0,%2,%%r0\n\t" + "ldi 1,%0" + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0)); + return ret; +#endif +} + +#endif /* haszero.h */ diff --git a/sysdeps/hppa/whichzero.h b/sysdeps/hppa/whichzero.h new file mode 100644 index 0000000..ef18cc7 --- /dev/null +++ b/sysdeps/hppa/whichzero.h @@ -0,0 +1,70 @@ +/* whichzero.h -- functions for zero byte searching. HPPA version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef HPPA_WHICHZERO_H +#define HPPA_WHICHZERO_H 1 + +/* Given a long that is known to contain a zero byte, return the + index of the first such within the long in host memory order. */ + +static inline unsigned int +whichzero(unsigned long int x) +{ + unsigned int ret; + + _Static_assert (sizeof(x) == 4, "64-bit not supported"); + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,<> %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,<> %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,<> %1,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(x), "0"(3)); + + return ret; +} + +/* Similarly, but perform the test for two longs simultaneously. */ + +static inline unsigned int +whichzero2(unsigned long int x1, unsigned long int x2) +{ + unsigned int ret; + + _Static_assert (sizeof(x1) == 4, "64-bit not supported"); + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,= %1,23,8,%%r0\n\t" + "extrw,u,<> %2,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,= %1,15,8,%%r0\n\t" + "extrw,u,<> %2,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,= %1,7,8,%%r0\n\t" + "extrw,u,<> %2,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3)); + + return ret; +} + +#endif /* whichzero.h */