diff mbox series

[17/17] x86_64: Add exp2m1f with FMA

Message ID 20241025182614.2022697-18-adhemerval.zanella@linaro.org
State New
Headers show
Series Add more CORE-MATH on libm | expand

Commit Message

Adhemerval Zanella Oct. 25, 2024, 6:21 p.m. UTC
The CORE-MATH exp10m1f implementation showed slight worse latency
when using x86_64 baseline ABI.  This patch adds a ifunc variant
with similar performance for x86_64-v3.
---
 sysdeps/ieee754/flt-32/s_exp2m1f.c           |  2 ++
 sysdeps/x86_64/fpu/multiarch/Makefile        |  2 ++
 sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c |  4 +++
 sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c     | 33 ++++++++++++++++++++
 4 files changed, 41 insertions(+)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c

Comments

Joseph Myers Oct. 25, 2024, 7:10 p.m. UTC | #1
On Fri, 25 Oct 2024, Adhemerval Zanella wrote:

> The CORE-MATH exp10m1f implementation showed slight worse latency
> when using x86_64 baseline ABI.  This patch adds a ifunc variant
> with similar performance for x86_64-v3.

This commit message should refer to exp2m1f, not exp10m1f.
Noah Goldstein Oct. 26, 2024, 6:34 p.m. UTC | #2
On Fri, Oct 25, 2024 at 1:31 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
> The CORE-MATH exp10m1f implementation showed slight worse latency
> when using x86_64 baseline ABI.  This patch adds a ifunc variant
> with similar performance for x86_64-v3.
> ---
>  sysdeps/ieee754/flt-32/s_exp2m1f.c           |  2 ++
>  sysdeps/x86_64/fpu/multiarch/Makefile        |  2 ++
>  sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c |  4 +++
>  sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c     | 33 ++++++++++++++++++++
>  4 files changed, 41 insertions(+)
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
>  create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
>
> diff --git a/sysdeps/ieee754/flt-32/s_exp2m1f.c b/sysdeps/ieee754/flt-32/s_exp2m1f.c
> index f899152d2a..3e79030888 100644
> --- a/sysdeps/ieee754/flt-32/s_exp2m1f.c
> +++ b/sysdeps/ieee754/flt-32/s_exp2m1f.c
> @@ -189,4 +189,6 @@ __exp2m1f (float x)
>        return (s - 1.0) + w * c0;
>      }
>  }
> +#ifndef __exp2m1f
>  libm_alias_float (__exp2m1, exp2m1)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index dcff4df2f1..e823d2fcc6 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -12,6 +12,7 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
>  CFLAGS-s_tan-fma.c = -mfma -mavx2
>  CFLAGS-s_sincos-fma.c = -mfma -mavx2
>  CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
> +CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
>
>  CFLAGS-e_exp2f-fma.c = -mfma -mavx2
>  CFLAGS-e_expf-fma.c = -mfma -mavx2
> @@ -74,6 +75,7 @@ libm-sysdep_routines += \
>    s_cosf-fma \
>    s_cosf-sse2 \
>    s_exp10m1f-fma \
> +  s_exp2m1f-fma \
>    s_expm1-fma \
>    s_floor-sse4_1 \
>    s_floorf-sse4_1 \
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
> new file mode 100644
> index 0000000000..bfa00eae4e
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
> @@ -0,0 +1,4 @@
> +#define __exp2m1f __exp2m1f_fma
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
> new file mode 100644
> index 0000000000..8e2d7ec384
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
> @@ -0,0 +1,33 @@
> +/* Multiple versions of exp2m1.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +# include <libm-alias-float.h>
> +
> +extern float __redirect_exp2m1f (float);
> +
> +# define SYMBOL_NAME exp2m1f
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_exp2m1f, __exp2m1f, IFUNC_SELECTOR ());
> +libm_alias_float (__exp2m1, exp2m1)
> +
> +# define __exp2m1f __exp2m1f_sse2
> +#endif
> +#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>
> --
> 2.43.0
>

Code LGTM but please update commit message to reference correct function.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff mbox series

Patch

diff --git a/sysdeps/ieee754/flt-32/s_exp2m1f.c b/sysdeps/ieee754/flt-32/s_exp2m1f.c
index f899152d2a..3e79030888 100644
--- a/sysdeps/ieee754/flt-32/s_exp2m1f.c
+++ b/sysdeps/ieee754/flt-32/s_exp2m1f.c
@@ -189,4 +189,6 @@  __exp2m1f (float x)
       return (s - 1.0) + w * c0;
     }
 }
+#ifndef __exp2m1f
 libm_alias_float (__exp2m1, exp2m1)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index dcff4df2f1..e823d2fcc6 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -12,6 +12,7 @@  CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 CFLAGS-s_sincos-fma.c = -mfma -mavx2
 CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
+CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
 
 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
 CFLAGS-e_expf-fma.c = -mfma -mavx2
@@ -74,6 +75,7 @@  libm-sysdep_routines += \
   s_cosf-fma \
   s_cosf-sse2 \
   s_exp10m1f-fma \
+  s_exp2m1f-fma \
   s_expm1-fma \
   s_floor-sse4_1 \
   s_floorf-sse4_1 \
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
new file mode 100644
index 0000000000..bfa00eae4e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f-fma.c
@@ -0,0 +1,4 @@ 
+#define __exp2m1f __exp2m1f_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
new file mode 100644
index 0000000000..8e2d7ec384
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_exp2m1f.c
@@ -0,0 +1,33 @@ 
+/* Multiple versions of exp2m1.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+
+extern float __redirect_exp2m1f (float);
+
+# define SYMBOL_NAME exp2m1f
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp2m1f, __exp2m1f, IFUNC_SELECTOR ());
+libm_alias_float (__exp2m1, exp2m1)
+
+# define __exp2m1f __exp2m1f_sse2
+#endif
+#include <sysdeps/ieee754/flt-32/s_exp2m1f.c>