Message ID | 20250314002648.1013721-1-skpgkp2@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86_64: Add asinh with FMA | expand |
On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote: > > FMA and non-FMA performance are similar on SPR. > > Before After Improvement > reciprocal-throughput 12.226 12.4064 -1% > latency 49.4701 48.4424 2% Since there is no real advantage for FMA, it isn't necessary. > --- > benchtests/asinh-inputs | 1 + > sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++ > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++ > sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++ > sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++ > 5 files changed, 46 insertions(+) > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c > > diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs > index 8dfa37c49f..946e7b4d22 100644 > --- a/benchtests/asinh-inputs > +++ b/benchtests/asinh-inputs > @@ -1,6 +1,7 @@ > ## args: double > ## ret: double > ## includes: math.h > +## name: workload-random > 0x1.408fb643484cep-11 > 0x1.f0763423f1d52p-23 > -0x1.681573418e494p4 > diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c > index 7fd281d791..04b686a28e 100644 > --- a/sysdeps/ieee754/dbl-64/s_asinh.c > +++ b/sysdeps/ieee754/dbl-64/s_asinh.c > @@ -32,6 +32,11 @@ static const double > ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ > huge = 1.00000000000000000000e+300; > > +#ifndef SECTION > +# define SECTION > +#endif > + > +SECTION > double > __asinh (double x) > { > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile > index 3403422443..1ab46bb446 100644 > --- a/sysdeps/x86_64/fpu/multiarch/Makefile > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile > @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2 > CFLAGS-e_log2-fma.c = -mfma -mavx2 > CFLAGS-e_pow-fma.c = -mfma -mavx2 > CFLAGS-e_sinh-fma.c = -mfma -mavx2 > +CFLAGS-s_asinh-fma.c = -mfma -mavx2 > CFLAGS-s_atan-fma.c = -mfma -mavx2 > CFLAGS-s_expm1-fma.c = -mfma -mavx2 > CFLAGS-s_log1p-fma.c = -mfma -mavx2 > @@ -73,6 +74,7 @@ libm-sysdep_routines += \ > e_pow-fma \ > e_powf-fma \ > e_sinh-fma \ > + s_asinh-fma \ > s_atan-avx \ > s_atan-fma \ > s_ceil-sse4_1 \ > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > new file mode 100644 > index 0000000000..293a255005 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > @@ -0,0 +1,7 @@ > +#define __asinh __asinh_fma > +#define __ieee754_log __ieee754_log_fma > +#define __log1p __log1p_fma > + > +#define SECTION __attribute__ ((section (".text.fma"))) > + > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c b/sysdeps/x86_64/fpu/multiarch/s_asinh.c > new file mode 100644 > index 0000000000..509d74c96b > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c > @@ -0,0 +1,31 @@ > +/* Multiple versions of asinh. > + Copyright (C) 2025 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdeps/x86/isa-level.h> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL > + > +extern double __redirect_asinh (double); > + > +# define SYMBOL_NAME asinh > +# include "ifunc-fma.h" > + > +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ()); > + > +# define __asinh __asinh_sse2 > +#endif > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> > -- > 2.48.1 >
On Thu, Mar 13, 2025 at 6:04 PM H.J. Lu <hjl.tools@gmail.com> wrote: > On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote: > > > > FMA and non-FMA performance are similar on SPR. > > > > Before After Improvement > > reciprocal-throughput 12.226 12.4064 -1% > > latency 49.4701 48.4424 2% > > Since there is no real advantage for FMA, it isn't necessary. > > This request comes from https://issues.redhat.com/browse/RHEL-1063 We don't see any real advantage because in upstream glibc underlying function call __ieee754_log __log1p are already FMA enabled and invoked via ifunc. FMA version could be beneficial if the underlying function is replaced by direct call. > > --- > > benchtests/asinh-inputs | 1 + > > sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++ > > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++ > > sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++ > > sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++ > > 5 files changed, 46 insertions(+) > > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c > > > > diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs > > index 8dfa37c49f..946e7b4d22 100644 > > --- a/benchtests/asinh-inputs > > +++ b/benchtests/asinh-inputs > > @@ -1,6 +1,7 @@ > > ## args: double > > ## ret: double > > ## includes: math.h > > +## name: workload-random > > 0x1.408fb643484cep-11 > > 0x1.f0763423f1d52p-23 > > -0x1.681573418e494p4 > > diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c > b/sysdeps/ieee754/dbl-64/s_asinh.c > > index 7fd281d791..04b686a28e 100644 > > --- a/sysdeps/ieee754/dbl-64/s_asinh.c > > +++ b/sysdeps/ieee754/dbl-64/s_asinh.c > > @@ -32,6 +32,11 @@ static const double > > ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ > > huge = 1.00000000000000000000e+300; > > > > +#ifndef SECTION > > +# define SECTION > > +#endif > > + > > +SECTION > > double > > __asinh (double x) > > { > > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile > b/sysdeps/x86_64/fpu/multiarch/Makefile > > index 3403422443..1ab46bb446 100644 > > --- a/sysdeps/x86_64/fpu/multiarch/Makefile > > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile > > @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2 > > CFLAGS-e_log2-fma.c = -mfma -mavx2 > > CFLAGS-e_pow-fma.c = -mfma -mavx2 > > CFLAGS-e_sinh-fma.c = -mfma -mavx2 > > +CFLAGS-s_asinh-fma.c = -mfma -mavx2 > > CFLAGS-s_atan-fma.c = -mfma -mavx2 > > CFLAGS-s_expm1-fma.c = -mfma -mavx2 > > CFLAGS-s_log1p-fma.c = -mfma -mavx2 > > @@ -73,6 +74,7 @@ libm-sysdep_routines += \ > > e_pow-fma \ > > e_powf-fma \ > > e_sinh-fma \ > > + s_asinh-fma \ > > s_atan-avx \ > > s_atan-fma \ > > s_ceil-sse4_1 \ > > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > > new file mode 100644 > > index 0000000000..293a255005 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c > > @@ -0,0 +1,7 @@ > > +#define __asinh __asinh_fma > > +#define __ieee754_log __ieee754_log_fma > > +#define __log1p __log1p_fma > > + > > +#define SECTION __attribute__ ((section (".text.fma"))) > > + > > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> > > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c > b/sysdeps/x86_64/fpu/multiarch/s_asinh.c > > new file mode 100644 > > index 0000000000..509d74c96b > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c > > @@ -0,0 +1,31 @@ > > +/* Multiple versions of asinh. > > + Copyright (C) 2025 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdeps/x86/isa-level.h> > > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL > > + > > +extern double __redirect_asinh (double); > > + > > +# define SYMBOL_NAME asinh > > +# include "ifunc-fma.h" > > + > > +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ()); > > + > > +# define __asinh __asinh_sse2 > > +#endif > > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> > > -- > > 2.48.1 > > > > > -- > H.J. >
On Thu, Mar 13, 2025 at 8:50 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > > > On Thu, Mar 13, 2025 at 6:04 PM H.J. Lu <hjl.tools@gmail.com> wrote: >> >> On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote: >> > >> > FMA and non-FMA performance are similar on SPR. >> > >> > Before After Improvement >> > reciprocal-throughput 12.226 12.4064 -1% >> > latency 49.4701 48.4424 2% >> >> Since there is no real advantage for FMA, it isn't necessary. >> > > This request comes from https://issues.redhat.com/browse/RHEL-1063 > We don't see any real advantage because in upstream glibc underlying > function call > > __ieee754_log > __log1p > > are already FMA enabled and invoked via ifunc. > > FMA version could be beneficial if the underlying function is replaced > by direct call. Please do that to see if it improves performance. Thanks. > > >> >> > --- >> > benchtests/asinh-inputs | 1 + >> > sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++ >> > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++ >> > sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++ >> > sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++ >> > 5 files changed, 46 insertions(+) >> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c >> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c >> > >> > diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs >> > index 8dfa37c49f..946e7b4d22 100644 >> > --- a/benchtests/asinh-inputs >> > +++ b/benchtests/asinh-inputs >> > @@ -1,6 +1,7 @@ >> > ## args: double >> > ## ret: double >> > ## includes: math.h >> > +## name: workload-random >> > 0x1.408fb643484cep-11 >> > 0x1.f0763423f1d52p-23 >> > -0x1.681573418e494p4 >> > diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c >> > index 7fd281d791..04b686a28e 100644 >> > --- a/sysdeps/ieee754/dbl-64/s_asinh.c >> > +++ b/sysdeps/ieee754/dbl-64/s_asinh.c >> > @@ -32,6 +32,11 @@ static const double >> > ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ >> > huge = 1.00000000000000000000e+300; >> > >> > +#ifndef SECTION >> > +# define SECTION >> > +#endif >> > + >> > +SECTION >> > double >> > __asinh (double x) >> > { >> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile >> > index 3403422443..1ab46bb446 100644 >> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile >> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile >> > @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2 >> > CFLAGS-e_log2-fma.c = -mfma -mavx2 >> > CFLAGS-e_pow-fma.c = -mfma -mavx2 >> > CFLAGS-e_sinh-fma.c = -mfma -mavx2 >> > +CFLAGS-s_asinh-fma.c = -mfma -mavx2 >> > CFLAGS-s_atan-fma.c = -mfma -mavx2 >> > CFLAGS-s_expm1-fma.c = -mfma -mavx2 >> > CFLAGS-s_log1p-fma.c = -mfma -mavx2 >> > @@ -73,6 +74,7 @@ libm-sysdep_routines += \ >> > e_pow-fma \ >> > e_powf-fma \ >> > e_sinh-fma \ >> > + s_asinh-fma \ >> > s_atan-avx \ >> > s_atan-fma \ >> > s_ceil-sse4_1 \ >> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c >> > new file mode 100644 >> > index 0000000000..293a255005 >> > --- /dev/null >> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c >> > @@ -0,0 +1,7 @@ >> > +#define __asinh __asinh_fma >> > +#define __ieee754_log __ieee754_log_fma >> > +#define __log1p __log1p_fma >> > + >> > +#define SECTION __attribute__ ((section (".text.fma"))) >> > + >> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> >> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c b/sysdeps/x86_64/fpu/multiarch/s_asinh.c >> > new file mode 100644 >> > index 0000000000..509d74c96b >> > --- /dev/null >> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c >> > @@ -0,0 +1,31 @@ >> > +/* Multiple versions of asinh. >> > + Copyright (C) 2025 Free Software Foundation, Inc. >> > + This file is part of the GNU C Library. >> > + >> > + The GNU C Library is free software; you can redistribute it and/or >> > + modify it under the terms of the GNU Lesser General Public >> > + License as published by the Free Software Foundation; either >> > + version 2.1 of the License, or (at your option) any later version. >> > + >> > + The GNU C Library is distributed in the hope that it will be useful, >> > + but WITHOUT ANY WARRANTY; without even the implied warranty of >> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> > + Lesser General Public License for more details. >> > + >> > + You should have received a copy of the GNU Lesser General Public >> > + License along with the GNU C Library; if not, see >> > + <https://www.gnu.org/licenses/>. */ >> > + >> > +#include <sysdeps/x86/isa-level.h> >> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL >> > + >> > +extern double __redirect_asinh (double); >> > + >> > +# define SYMBOL_NAME asinh >> > +# include "ifunc-fma.h" >> > + >> > +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ()); >> > + >> > +# define __asinh __asinh_sse2 >> > +#endif >> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c> >> > -- >> > 2.48.1 >> > >> >> >> -- >> H.J. > > >
diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs index 8dfa37c49f..946e7b4d22 100644 --- a/benchtests/asinh-inputs +++ b/benchtests/asinh-inputs @@ -1,6 +1,7 @@ ## args: double ## ret: double ## includes: math.h +## name: workload-random 0x1.408fb643484cep-11 0x1.f0763423f1d52p-23 -0x1.681573418e494p4 diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c index 7fd281d791..04b686a28e 100644 --- a/sysdeps/ieee754/dbl-64/s_asinh.c +++ b/sysdeps/ieee754/dbl-64/s_asinh.c @@ -32,6 +32,11 @@ static const double ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ huge = 1.00000000000000000000e+300; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __asinh (double x) { diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 3403422443..1ab46bb446 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_log2-fma.c = -mfma -mavx2 CFLAGS-e_pow-fma.c = -mfma -mavx2 CFLAGS-e_sinh-fma.c = -mfma -mavx2 +CFLAGS-s_asinh-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-s_expm1-fma.c = -mfma -mavx2 CFLAGS-s_log1p-fma.c = -mfma -mavx2 @@ -73,6 +74,7 @@ libm-sysdep_routines += \ e_pow-fma \ e_powf-fma \ e_sinh-fma \ + s_asinh-fma \ s_atan-avx \ s_atan-fma \ s_ceil-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c new file mode 100644 index 0000000000..293a255005 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c @@ -0,0 +1,7 @@ +#define __asinh __asinh_fma +#define __ieee754_log __ieee754_log_fma +#define __log1p __log1p_fma + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_asinh.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c b/sysdeps/x86_64/fpu/multiarch/s_asinh.c new file mode 100644 index 0000000000..509d74c96b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c @@ -0,0 +1,31 @@ +/* Multiple versions of asinh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL + +extern double __redirect_asinh (double); + +# define SYMBOL_NAME asinh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ()); + +# define __asinh __asinh_sse2 +#endif +#include <sysdeps/ieee754/dbl-64/s_asinh.c>