Message ID | 000101d07776$0c9c4ac0$25d4e040$@com |
---|---|
State | New |
Headers | show |
On 15/04/15 13:16, Wilco Dijkstra wrote: > Inline __ieee754_sqrt and __ieee754_sqrtf. Also add external definitions. > > OK for commit? > ping (looks good to me and same should be done for fabs) > 2015-04-15 Wilco Dijkstra <wdijkstr@arm.com> > > * sysdeps/aarch64/fpu/math_private.h (__ieee754_sqrt): > New function. (__ieee754_sqrtf): New function. > * sysdeps/aarch64/fpu/e_sqrt.c (__ieee754_sqrt): > New function. > * sysdeps/aarch64/fpu/e_sqrtf.c (__ieee754_sqrtf): > New function. > > --- > sysdeps/aarch64/fpu/e_sqrt.c | 28 ++++++++++++++++++++++++++++ > sysdeps/aarch64/fpu/e_sqrtf.c | 28 ++++++++++++++++++++++++++++ > sysdeps/aarch64/fpu/math_private.h | 16 ++++++++++++++++ > 3 files changed, 72 insertions(+) > create mode 100644 sysdeps/aarch64/fpu/e_sqrt.c > create mode 100644 sysdeps/aarch64/fpu/e_sqrtf.c > > diff --git a/sysdeps/aarch64/fpu/e_sqrt.c b/sysdeps/aarch64/fpu/e_sqrt.c > new file mode 100644 > index 0000000..4f11ca2 > --- /dev/null > +++ b/sysdeps/aarch64/fpu/e_sqrt.c > @@ -0,0 +1,28 @@ > +/* Square root of floating point number. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <math_private.h> > + > +double > +__ieee754_sqrt (double d) > +{ > + double res; > + asm ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); > + return res; > +} > +strong_alias (__ieee754_sqrt, __sqrt_finite) > diff --git a/sysdeps/aarch64/fpu/e_sqrtf.c b/sysdeps/aarch64/fpu/e_sqrtf.c > new file mode 100644 > index 0000000..a2e99e1 > --- /dev/null > +++ b/sysdeps/aarch64/fpu/e_sqrtf.c > @@ -0,0 +1,28 @@ > +/* Single-precision floating point square root. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <math_private.h> > + > +float > +__ieee754_sqrtf (float s) > +{ > + float res; > + asm ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); > + return res; > +} > +strong_alias (__ieee754_sqrtf, __sqrtf_finite) > diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h > index 52a6ad9..b3c2509 100644 > --- a/sysdeps/aarch64/fpu/math_private.h > +++ b/sysdeps/aarch64/fpu/math_private.h > @@ -22,6 +22,22 @@ > #include <fenv.h> > #include <fpu_control.h> > > +extern __always_inline double > +__ieee754_sqrt (double d) > +{ > + double res; > + asm __volatile__ ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); > + return res; > +} > + > +extern __always_inline float > +__ieee754_sqrtf (float s) > +{ > + float res; > + asm __volatile__ ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); > + return res; > +} > + > static __always_inline void > libc_feholdexcept_aarch64 (fenv_t *envp) > { >
> Szabolcs Nagy wrote: > > (looks good to me and same should be done for fabs) Yes - and that is yet another example of unnecessary target code... Given GLIBC can now rely on a modern GCC for its build, can we avoid this and actually use the builtins rather than having each target override the generic implementation with an inline assembler version? double __fabs (double x) { - u_int32_t high; - GET_HIGH_WORD (high, x); - SET_HIGH_WORD (x, high & 0x7fffffff); - return x; + return __builtin_fabs (x); } GCC will always inline __builtin_fabs, with -O0, -Os and -fno-builtin. Wilco
On Mon, 18 May 2015, Wilco Dijkstra wrote: > Given GLIBC can now rely on a modern GCC for its build, can we avoid > this and actually use the builtins rather than having each target > override the generic implementation with an inline assembler version? > > double > __fabs (double x) > { > - u_int32_t high; > - GET_HIGH_WORD (high, x); > - SET_HIGH_WORD (x, high & 0x7fffffff); > - return x; > + return __builtin_fabs (x); > } > > GCC will always inline __builtin_fabs, with -O0, -Os and -fno-builtin. I believe doing this is safe (in that a proper inline expansion will be generated, by bit-fiddling if needed) *except for the ldbl-128ibm version of fabsl* (where fabs isn't simply a matter of clearing one bit, and GCC doesn't know how to do it correctly inline for soft-float / e500 - see GCC bug 29253 and the corresponding -fno-builtin-fabsl in sysdeps/powerpc/nofpu/Makefile). However, there might be cases where GCC generates calls to an absolute-value instruction that fails to clear the sign bit of a NaN, or predates IEEE 754-2008 and raises "invalid" for a signaling NaN. At least the former would be a clear GCC bug (on MIPS, for example, GCC makes sure to avoid abs instructions unless -mabs=2008 or -ffinite-math-only, for that reason), but it's something to watch out for.
> Joseph Myers wrote: > On Mon, 18 May 2015, Wilco Dijkstra wrote: > > > Given GLIBC can now rely on a modern GCC for its build, can we avoid > > this and actually use the builtins rather than having each target > > override the generic implementation with an inline assembler version? > > > > double > > __fabs (double x) > > { > > - u_int32_t high; > > - GET_HIGH_WORD (high, x); > > - SET_HIGH_WORD (x, high & 0x7fffffff); > > - return x; > > + return __builtin_fabs (x); > > } > > > > GCC will always inline __builtin_fabs, with -O0, -Os and -fno-builtin. > > I believe doing this is safe (in that a proper inline expansion will be > generated, by bit-fiddling if needed) *except for the ldbl-128ibm version > of fabsl* (where fabs isn't simply a matter of clearing one bit, and GCC > doesn't know how to do it correctly inline for soft-float / e500 - see GCC > bug 29253 and the corresponding -fno-builtin-fabsl in > sysdeps/powerpc/nofpu/Makefile). OK, so I'll only do float and double. It seems 128-bit is usually emulated so the current implementation is already optimal. > However, there might be cases where GCC generates calls to an > absolute-value instruction that fails to clear the sign bit of a NaN, or > predates IEEE 754-2008 and raises "invalid" for a signaling NaN. At least > the former would be a clear GCC bug (on MIPS, for example, GCC makes sure > to avoid abs instructions unless -mabs=2008 or -ffinite-math-only, for > that reason), but it's something to watch out for. If GCC implements fabs incorrectly then any code that uses fabs (including libm which calls fabs in many places) would fail today. So the above patch cannot introduce any new failures. I'll send out my patch at later date. Wilco
On Tue, 19 May 2015, Wilco Dijkstra wrote: > > I believe doing this is safe (in that a proper inline expansion will be > > generated, by bit-fiddling if needed) *except for the ldbl-128ibm version > > of fabsl* (where fabs isn't simply a matter of clearing one bit, and GCC > > doesn't know how to do it correctly inline for soft-float / e500 - see GCC > > bug 29253 and the corresponding -fno-builtin-fabsl in > > sysdeps/powerpc/nofpu/Makefile). > > OK, so I'll only do float and double. It seems 128-bit is usually emulated > so the current implementation is already optimal. I think S/390 uses hardware 128-bit. > > However, there might be cases where GCC generates calls to an > > absolute-value instruction that fails to clear the sign bit of a NaN, or > > predates IEEE 754-2008 and raises "invalid" for a signaling NaN. At least > > the former would be a clear GCC bug (on MIPS, for example, GCC makes sure > > to avoid abs instructions unless -mabs=2008 or -ffinite-math-only, for > > that reason), but it's something to watch out for. > > If GCC implements fabs incorrectly then any code that uses fabs (including > libm which calls fabs in many places) would fail today. So the above patch > cannot introduce any new failures. I'll send out my patch at later date. I don't think any libm code depends on the handling of signs of NaNs from GCC __builtin_fabs, or on what GCC __builtin_fabs does with sNaNs (it does depend on handling of signed zeroes, hence the -fno-builtin-fabsl for powerpc-nofpu). The libm testsuite does verify the signs of NaNs from libm fabs (so if GCC gets it wrong then that would be visible as libm test failures for fabs), but not sNaN handling (Thomas proposed sNaN tests in <https://sourceware.org/ml/libc-ports/2013-04/msg00008.html>, but those would need a major rework to go in now).
On 15 April 2015 at 13:16, Wilco Dijkstra <wdijkstr@arm.com> wrote: > Inline __ieee754_sqrt and __ieee754_sqrtf. Also add external definitions. > > OK for commit? > > 2015-04-15 Wilco Dijkstra <wdijkstr@arm.com> > > * sysdeps/aarch64/fpu/math_private.h (__ieee754_sqrt): > New function. (__ieee754_sqrtf): New function. > * sysdeps/aarch64/fpu/e_sqrt.c (__ieee754_sqrt): > New function. > * sysdeps/aarch64/fpu/e_sqrtf.c (__ieee754_sqrtf): > New function. OK /Marcus
diff --git a/sysdeps/aarch64/fpu/e_sqrt.c b/sysdeps/aarch64/fpu/e_sqrt.c new file mode 100644 index 0000000..4f11ca2 --- /dev/null +++ b/sysdeps/aarch64/fpu/e_sqrt.c @@ -0,0 +1,28 @@ +/* Square root of floating point number. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_private.h> + +double +__ieee754_sqrt (double d) +{ + double res; + asm ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); + return res; +} +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/aarch64/fpu/e_sqrtf.c b/sysdeps/aarch64/fpu/e_sqrtf.c new file mode 100644 index 0000000..a2e99e1 --- /dev/null +++ b/sysdeps/aarch64/fpu/e_sqrtf.c @@ -0,0 +1,28 @@ +/* Single-precision floating point square root. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_private.h> + +float +__ieee754_sqrtf (float s) +{ + float res; + asm ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); + return res; +} +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h index 52a6ad9..b3c2509 100644 --- a/sysdeps/aarch64/fpu/math_private.h +++ b/sysdeps/aarch64/fpu/math_private.h @@ -22,6 +22,22 @@ #include <fenv.h> #include <fpu_control.h> +extern __always_inline double +__ieee754_sqrt (double d) +{ + double res; + asm __volatile__ ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); + return res; +} + +extern __always_inline float +__ieee754_sqrtf (float s) +{ + float res; + asm __volatile__ ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); + return res; +} + static __always_inline void libc_feholdexcept_aarch64 (fenv_t *envp) {