@@ -241,4 +241,15 @@
#define __DECL_SIMD_exp2f32x
#define __DECL_SIMD_exp2f64x
#define __DECL_SIMD_exp2f128x
+
+#define __DECL_SIMD_expm1
+#define __DECL_SIMD_expm1f
+#define __DECL_SIMD_expm1l
+#define __DECL_SIMD_expm1f16
+#define __DECL_SIMD_expm1f32
+#define __DECL_SIMD_expm1f64
+#define __DECL_SIMD_expm1f128
+#define __DECL_SIMD_expm1f32x
+#define __DECL_SIMD_expm1f64x
+#define __DECL_SIMD_expm1f128x
#endif
@@ -116,7 +116,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x));
#if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99
/* Return exp(X) - 1. */
-__MATHCALL (expm1,, (_Mdouble_ __x));
+__MATHCALL_VEC (expm1,, (_Mdouble_ __x));
/* Return log(1 + X). */
__MATHCALL (log1p,, (_Mdouble_ __x));
@@ -58,6 +58,7 @@ GLIBC_2.35 _ZGVbN2v_erf F
GLIBC_2.35 _ZGVbN2v_erfc F
GLIBC_2.35 _ZGVbN2v_exp10 F
GLIBC_2.35 _ZGVbN2v_exp2 F
+GLIBC_2.35 _ZGVbN2v_expm1 F
GLIBC_2.35 _ZGVbN2vv_atan2 F
GLIBC_2.35 _ZGVbN4v_acosf F
GLIBC_2.35 _ZGVbN4v_acoshf F
@@ -71,6 +72,7 @@ GLIBC_2.35 _ZGVbN4v_erfcf F
GLIBC_2.35 _ZGVbN4v_erff F
GLIBC_2.35 _ZGVbN4v_exp10f F
GLIBC_2.35 _ZGVbN4v_exp2f F
+GLIBC_2.35 _ZGVbN4v_expm1f F
GLIBC_2.35 _ZGVbN4vv_atan2f F
GLIBC_2.35 _ZGVcN4v_acos F
GLIBC_2.35 _ZGVcN4v_acosh F
@@ -84,6 +86,7 @@ GLIBC_2.35 _ZGVcN4v_erf F
GLIBC_2.35 _ZGVcN4v_erfc F
GLIBC_2.35 _ZGVcN4v_exp10 F
GLIBC_2.35 _ZGVcN4v_exp2 F
+GLIBC_2.35 _ZGVcN4v_expm1 F
GLIBC_2.35 _ZGVcN4vv_atan2 F
GLIBC_2.35 _ZGVcN8v_acosf F
GLIBC_2.35 _ZGVcN8v_acoshf F
@@ -97,6 +100,7 @@ GLIBC_2.35 _ZGVcN8v_erfcf F
GLIBC_2.35 _ZGVcN8v_erff F
GLIBC_2.35 _ZGVcN8v_exp10f F
GLIBC_2.35 _ZGVcN8v_exp2f F
+GLIBC_2.35 _ZGVcN8v_expm1f F
GLIBC_2.35 _ZGVcN8vv_atan2f F
GLIBC_2.35 _ZGVdN4v_acos F
GLIBC_2.35 _ZGVdN4v_acosh F
@@ -110,6 +114,7 @@ GLIBC_2.35 _ZGVdN4v_erf F
GLIBC_2.35 _ZGVdN4v_erfc F
GLIBC_2.35 _ZGVdN4v_exp10 F
GLIBC_2.35 _ZGVdN4v_exp2 F
+GLIBC_2.35 _ZGVdN4v_expm1 F
GLIBC_2.35 _ZGVdN4vv_atan2 F
GLIBC_2.35 _ZGVdN8v_acosf F
GLIBC_2.35 _ZGVdN8v_acoshf F
@@ -123,6 +128,7 @@ GLIBC_2.35 _ZGVdN8v_erfcf F
GLIBC_2.35 _ZGVdN8v_erff F
GLIBC_2.35 _ZGVdN8v_exp10f F
GLIBC_2.35 _ZGVdN8v_exp2f F
+GLIBC_2.35 _ZGVdN8v_expm1f F
GLIBC_2.35 _ZGVdN8vv_atan2f F
GLIBC_2.35 _ZGVeN16v_acosf F
GLIBC_2.35 _ZGVeN16v_acoshf F
@@ -136,6 +142,7 @@ GLIBC_2.35 _ZGVeN16v_erfcf F
GLIBC_2.35 _ZGVeN16v_erff F
GLIBC_2.35 _ZGVeN16v_exp10f F
GLIBC_2.35 _ZGVeN16v_exp2f F
+GLIBC_2.35 _ZGVeN16v_expm1f F
GLIBC_2.35 _ZGVeN16vv_atan2f F
GLIBC_2.35 _ZGVeN8v_acos F
GLIBC_2.35 _ZGVeN8v_acosh F
@@ -149,4 +156,5 @@ GLIBC_2.35 _ZGVeN8v_erf F
GLIBC_2.35 _ZGVeN8v_erfc F
GLIBC_2.35 _ZGVeN8v_exp10 F
GLIBC_2.35 _ZGVeN8v_exp2 F
+GLIBC_2.35 _ZGVeN8v_expm1 F
GLIBC_2.35 _ZGVeN8vv_atan2 F
@@ -110,6 +110,10 @@
# define __DECL_SIMD_exp2 __DECL_SIMD_x86_64
# undef __DECL_SIMD_exp2f
# define __DECL_SIMD_exp2f __DECL_SIMD_x86_64
+# undef __DECL_SIMD_expm1
+# define __DECL_SIMD_expm1 __DECL_SIMD_x86_64
+# undef __DECL_SIMD_expm1f
+# define __DECL_SIMD_expm1f __DECL_SIMD_x86_64
# endif
#endif
@@ -37,6 +37,7 @@ libmvec-funcs = \
exp \
exp10 \
exp2 \
+ expm1 \
log \
pow \
sin \
@@ -26,6 +26,7 @@ libmvec {
_ZGVbN2v_erfc; _ZGVcN4v_erfc; _ZGVdN4v_erfc; _ZGVeN8v_erfc;
_ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10;
_ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2;
+ _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1;
_ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
_ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
_ZGVbN4v_acoshf; _ZGVcN8v_acoshf; _ZGVdN8v_acoshf; _ZGVeN16v_acoshf;
@@ -39,6 +40,7 @@ libmvec {
_ZGVbN4v_erff; _ZGVcN8v_erff; _ZGVdN8v_erff; _ZGVeN16v_erff;
_ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f;
_ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f;
+ _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f;
_ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
}
}
@@ -1532,6 +1532,26 @@ float: 1
float128: 3
ldouble: 4
+Function: "expm1_vlen16":
+float: 1
+
+Function: "expm1_vlen2":
+double: 1
+
+Function: "expm1_vlen4":
+double: 1
+float: 1
+
+Function: "expm1_vlen4_avx2":
+double: 1
+
+Function: "expm1_vlen8":
+double: 1
+float: 1
+
+Function: "expm1_vlen8_avx2":
+float: 1
+
Function: "gamma":
double: 4
float: 7
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized expm1, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_expm1 _ZGVbN2v_expm1_sse2
+#include "../svml_d_expm12_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized expm1, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_expm1
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_expm1, __GI__ZGVbN2v_expm1, __redirect__ZGVbN2v_expm1)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1990 @@
+/* Function expm vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * N = (int)(x*2^k/log(2.0)), R = x - N*log(2)/2^k
+ * exp(x) = 2^(N/2^k) * poly(R) is computed in high-low parts
+ * expm1(x) = exp(x)-1 is then obtained via multi-precision computation
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVbN2v_expm1_sse4)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $320, %rsp
+ movaps %xmm0, %xmm5
+ movups 2112+__svml_dexpm1_data_internal(%rip), %xmm0
+ movaps %xmm5, %xmm4
+ mulpd %xmm5, %xmm0
+ movups .FLT_14(%rip), %xmm6
+ lea __svml_dexpm1_data_internal(%rip), %r8
+ addpd %xmm6, %xmm0
+ subpd %xmm6, %xmm0
+
+/* argument reduction */
+ movups 2176+__svml_dexpm1_data_internal(%rip), %xmm7
+ mulpd %xmm0, %xmm7
+ movups 2240+__svml_dexpm1_data_internal(%rip), %xmm3
+ mulpd %xmm0, %xmm3
+ subpd %xmm7, %xmm4
+ movups 2304+__svml_dexpm1_data_internal(%rip), %xmm2
+ addpd %xmm0, %xmm2
+ subpd %xmm3, %xmm4
+
+/* table lookup */
+ movdqu 2368+__svml_dexpm1_data_internal(%rip), %xmm0
+ movups 2624+__svml_dexpm1_data_internal(%rip), %xmm3
+ pand %xmm2, %xmm0
+ movaps %xmm3, %xmm1
+ andnps %xmm5, %xmm3
+ movd %xmm0, %eax
+ andps %xmm5, %xmm1
+ pshufd $2, %xmm0, %xmm6
+ movups %xmm15, 144(%rsp)
+ cmpnlepd 2688+__svml_dexpm1_data_internal(%rip), %xmm1
+ movd %xmm6, %ecx
+ .cfi_escape 0x10, 0x20, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+
+/* polynomial */
+ movups 2048+__svml_dexpm1_data_internal(%rip), %xmm15
+ movslq %eax, %rax
+ movslq %ecx, %rcx
+ mulpd %xmm4, %xmm15
+ movmskpd %xmm1, %edx
+ movups (%r8,%rax), %xmm1
+ movups (%r8,%rcx), %xmm7
+ movaps %xmm1, %xmm0
+ unpcklpd %xmm7, %xmm0
+ unpckhpd %xmm7, %xmm1
+ movaps %xmm4, %xmm7
+ mulpd %xmm4, %xmm7
+ addpd 2064+__svml_dexpm1_data_internal(%rip), %xmm15
+ movups 2080+__svml_dexpm1_data_internal(%rip), %xmm6
+ mulpd %xmm4, %xmm6
+ mulpd %xmm7, %xmm15
+ addpd 2096+__svml_dexpm1_data_internal(%rip), %xmm6
+ pand 2432+__svml_dexpm1_data_internal(%rip), %xmm2
+ psllq $41, %xmm2
+
+/* Th1 = (Th-1) + Tl */
+ mulpd %xmm2, %xmm1
+ addpd %xmm15, %xmm6
+
+/* T-1 */
+ movups 2560+__svml_dexpm1_data_internal(%rip), %xmm15
+ orps %xmm2, %xmm0
+ addpd %xmm15, %xmm0
+ mulpd %xmm6, %xmm7
+ addpd %xmm1, %xmm0
+ addpd %xmm7, %xmm4
+
+/* T = Th+Tl */
+ movaps %xmm0, %xmm2
+ subpd %xmm15, %xmm2
+ mulpd %xmm2, %xmm4
+ addpd %xmm4, %xmm0
+ orps %xmm3, %xmm0
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ movups 144(%rsp), %xmm15
+ cfi_restore(32)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ .cfi_escape 0x10, 0x20, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_3:
+ movups %xmm5, 192(%rsp)
+ movups %xmm0, 256(%rsp)
+ xorl %eax, %eax
+ movups %xmm8, 96(%rsp)
+ movups %xmm9, 80(%rsp)
+ movups %xmm10, 64(%rsp)
+ movups %xmm11, 48(%rsp)
+ movups %xmm12, 32(%rsp)
+ movups %xmm13, 16(%rsp)
+ movups %xmm14, (%rsp)
+ movq %rsi, 120(%rsp)
+ movq %rdi, 112(%rsp)
+ movq %r12, 136(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x38, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x30, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x19, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1a, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1b, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 128(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $2, %r12d
+ jl .LBL_1_7
+ movups 96(%rsp), %xmm8
+ cfi_restore(25)
+ movups 80(%rsp), %xmm9
+ cfi_restore(26)
+ movups 64(%rsp), %xmm10
+ cfi_restore(27)
+ movups 48(%rsp), %xmm11
+ cfi_restore(28)
+ movups 32(%rsp), %xmm12
+ cfi_restore(29)
+ movups 16(%rsp), %xmm13
+ cfi_restore(30)
+ movups (%rsp), %xmm14
+ cfi_restore(31)
+ movq 120(%rsp), %rsi
+ cfi_restore(4)
+ movq 112(%rsp), %rdi
+ cfi_restore(5)
+ movq 136(%rsp), %r12
+ cfi_restore(12)
+ movq 128(%rsp), %r13
+ cfi_restore(13)
+ movups 256(%rsp), %xmm0
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x38, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x30, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x19, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1a, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1b, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 192(%rsp,%r12,8), %rdi
+ lea 256(%rsp,%r12,8), %rsi
+ call __svml_dexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVbN2v_expm1_sse4)
+
+ .align 16,0x90
+
+__svml_dexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movsd (%rdi), %xmm6
+ pxor %xmm0, %xmm0
+ movzwl 6(%rdi), %edx
+ comisd %xmm6, %xmm0
+ ja .LBL_2_18
+ andl $32752, %edx
+ shrl $4, %edx
+ movsd %xmm6, -8(%rsp)
+ cmpl $2047, %edx
+ je .LBL_2_19
+ cmpl $970, %edx
+ jle .LBL_2_16
+ movsd 1080+_imldExpHATab(%rip), %xmm0
+ comisd %xmm6, %xmm0
+ jb .LBL_2_15
+ comisd 1096+_imldExpHATab(%rip), %xmm6
+ jb .LBL_2_14
+ movsd 1024+_imldExpHATab(%rip), %xmm0
+ movaps %xmm6, %xmm5
+ mulsd %xmm6, %xmm0
+ lea _imldExpHATab(%rip), %r10
+ movsd %xmm0, -24(%rsp)
+ movsd -24(%rsp), %xmm1
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, -8(%rsp)
+ addsd 1032+_imldExpHATab(%rip), %xmm1
+ movsd %xmm1, -16(%rsp)
+ movsd -16(%rsp), %xmm2
+ movl -16(%rsp), %r8d
+ movl %r8d, %ecx
+ andl $63, %r8d
+ subsd 1032+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -24(%rsp)
+ lea 1(%r8,%r8), %r9d
+ movsd -24(%rsp), %xmm3
+ lea (%r8,%r8), %edi
+ mulsd 1104+_imldExpHATab(%rip), %xmm3
+ movsd -24(%rsp), %xmm4
+ subsd %xmm3, %xmm5
+ mulsd 1112+_imldExpHATab(%rip), %xmm4
+ movsd 1072+_imldExpHATab(%rip), %xmm2
+ subsd %xmm4, %xmm5
+ mulsd %xmm5, %xmm2
+ shrl $6, %ecx
+ addsd 1064+_imldExpHATab(%rip), %xmm2
+ comisd 1088+_imldExpHATab(%rip), %xmm6
+ mulsd %xmm5, %xmm2
+ movsd (%r10,%rdi,8), %xmm0
+ lea 1023(%rcx), %edx
+ addsd 1056+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1048+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1040+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ mulsd %xmm5, %xmm2
+ addsd %xmm5, %xmm2
+ addsd (%r10,%r9,8), %xmm2
+ mulsd %xmm0, %xmm2
+ jb .LBL_2_10
+ andl $2047, %edx
+ addsd %xmm0, %xmm2
+ cmpl $2046, %edx
+ ja .LBL_2_9
+ movq 1136+_imldExpHATab(%rip), %rcx
+ shrq $48, %rcx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_9:
+ decl %edx
+ andl $2047, %edx
+ movzwl -2(%rsp), %ecx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ mulsd 1152+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_10:
+ addl $1083, %ecx
+ andl $2047, %ecx
+ movl %ecx, %eax
+ movzwl -2(%rsp), %edx
+ shll $4, %eax
+ andl $-32753, %edx
+ orl %eax, %edx
+ movw %dx, -2(%rsp)
+ movsd -8(%rsp), %xmm1
+ mulsd %xmm1, %xmm2
+ mulsd %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ addsd %xmm2, %xmm0
+ cmpl $50, %ecx
+ ja .LBL_2_12
+ mulsd 1160+_imldExpHATab(%rip), %xmm0
+ movsd %xmm0, (%rsi)
+ jmp .LBL_2_13
+
+.LBL_2_12:
+ movsd %xmm0, -72(%rsp)
+ movsd -72(%rsp), %xmm0
+ subsd %xmm0, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -64(%rsp), %xmm1
+ addsd %xmm2, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -72(%rsp), %xmm2
+ mulsd 1168+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -56(%rsp)
+ movsd -72(%rsp), %xmm4
+ movsd -56(%rsp), %xmm3
+ addsd %xmm3, %xmm4
+ movsd %xmm4, -48(%rsp)
+ movsd -48(%rsp), %xmm6
+ movsd -56(%rsp), %xmm5
+ subsd %xmm5, %xmm6
+ movsd %xmm6, -40(%rsp)
+ movsd -72(%rsp), %xmm8
+ movsd -40(%rsp), %xmm7
+ subsd %xmm7, %xmm8
+ movsd %xmm8, -32(%rsp)
+ movsd -64(%rsp), %xmm10
+ movsd -32(%rsp), %xmm9
+ addsd %xmm9, %xmm10
+ movsd %xmm10, -32(%rsp)
+ movsd -40(%rsp), %xmm11
+ mulsd 1160+_imldExpHATab(%rip), %xmm11
+ movsd %xmm11, -40(%rsp)
+ movsd -32(%rsp), %xmm12
+ mulsd 1160+_imldExpHATab(%rip), %xmm12
+ movsd %xmm12, -32(%rsp)
+ movsd -40(%rsp), %xmm14
+ movsd -32(%rsp), %xmm13
+ addsd %xmm13, %xmm14
+ movsd %xmm14, (%rsi)
+
+.LBL_2_13:
+ movl $4, %eax
+ ret
+
+.LBL_2_14:
+ movsd 1120+_imldExpHATab(%rip), %xmm0
+ movl $4, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_15:
+ movsd 1128+_imldExpHATab(%rip), %xmm0
+ movl $3, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_16:
+ movsd 1144+_imldExpHATab(%rip), %xmm0
+ addsd %xmm6, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_18:
+ movq $0xbff0000000000000, %rax
+ movq %rax, (%rsi)
+ xorl %eax, %eax
+ ret
+
+.LBL_2_19:
+ movb -1(%rsp), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_21
+
+.LBL_2_20:
+ mulsd %xmm6, %xmm6
+ movsd %xmm6, (%rsi)
+ ret
+
+.LBL_2_21:
+ testl $1048575, -4(%rsp)
+ jne .LBL_2_20
+ cmpl $0, -8(%rsp)
+ jne .LBL_2_20
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_dexpm1_cout_rare_internal,@function
+ .size __svml_dexpm1_cout_rare_internal,.-__svml_dexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_dexpm1_data_internal:
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 2818572288
+ .long 5693
+ .long 1457015399
+ .long 1044362035
+ .long 1073741824
+ .long 11418
+ .long 4044949557
+ .long 3191375865
+ .long 3892314112
+ .long 17173
+ .long 794616807
+ .long 1041997793
+ .long 3489660928
+ .long 22960
+ .long 2715237930
+ .long 1044950059
+ .long 671088640
+ .long 28779
+ .long 3698181124
+ .long 1044242285
+ .long 402653184
+ .long 34629
+ .long 36755401
+ .long 1042114290
+ .long 3355443200
+ .long 40510
+ .long 438898435
+ .long 1044789148
+ .long 1879048192
+ .long 46424
+ .long 2230008375
+ .long 3192402871
+ .long 671088640
+ .long 52370
+ .long 3120814979
+ .long 1045140031
+ .long 805306368
+ .long 58348
+ .long 269498903
+ .long 1044815501
+ .long 2952790016
+ .long 64358
+ .long 3705630242
+ .long 3182612048
+ .long 3489660928
+ .long 70401
+ .long 2766913307
+ .long 1039293264
+ .long 3221225472
+ .long 76477
+ .long 4276399797
+ .long 1041960050
+ .long 2952790016
+ .long 82586
+ .long 80474087
+ .long 3191172386
+ .long 3355443200
+ .long 88728
+ .long 613423790
+ .long 1042592202
+ .long 1073741824
+ .long 94904
+ .long 689505308
+ .long 3192657268
+ .long 939524096
+ .long 101113
+ .long 930606615
+ .long 1042387389
+ .long 3892314112
+ .long 107355
+ .long 2850403528
+ .long 1045134939
+ .long 2281701376
+ .long 113632
+ .long 1663725767
+ .long 3192904985
+ .long 805306368
+ .long 119943
+ .long 2810207104
+ .long 1043762074
+ .long 536870912
+ .long 126288
+ .long 3854830848
+ .long 1044899528
+ .long 2281701376
+ .long 132667
+ .long 2397289153
+ .long 1041802037
+ .long 2415919104
+ .long 139081
+ .long 1649749971
+ .long 1043848649
+ .long 1879048192
+ .long 145530
+ .long 2039734354
+ .long 3191384540
+ .long 1342177280
+ .long 152014
+ .long 849302817
+ .long 3188938352
+ .long 1744830464
+ .long 158533
+ .long 383003846
+ .long 3191925785
+ .long 3758096384
+ .long 165087
+ .long 158134621
+ .long 1044338232
+ .long 4160749568
+ .long 171677
+ .long 4137603445
+ .long 3192324360
+ .long 3489660928
+ .long 178303
+ .long 4290499725
+ .long 1043028785
+ .long 2818572288
+ .long 184965
+ .long 1275031083
+ .long 3190931407
+ .long 2818572288
+ .long 191663
+ .long 1629266164
+ .long 1043587829
+ .long 134217728
+ .long 198398
+ .long 2842642093
+ .long 1044483512
+ .long 0
+ .long 205169
+ .long 1985360263
+ .long 3192756542
+ .long 2952790016
+ .long 211976
+ .long 4200916017
+ .long 1044586679
+ .long 1610612736
+ .long 218821
+ .long 3450763054
+ .long 3189463043
+ .long 939524096
+ .long 225703
+ .long 2870834528
+ .long 3190336198
+ .long 1879048192
+ .long 232622
+ .long 3553800616
+ .long 3192377660
+ .long 939524096
+ .long 239579
+ .long 1219436983
+ .long 3192443648
+ .long 3221225472
+ .long 246573
+ .long 606077177
+ .long 1044946247
+ .long 1342177280
+ .long 253606
+ .long 3998375791
+ .long 3192876638
+ .long 134217728
+ .long 260677
+ .long 586810495
+ .long 3192560639
+ .long 536870912
+ .long 267786
+ .long 2676240988
+ .long 1044345570
+ .long 3623878656
+ .long 274933
+ .long 1841759300
+ .long 1043663497
+ .long 1610612736
+ .long 282120
+ .long 1086643152
+ .long 1041785419
+ .long 4026531840
+ .long 289345
+ .long 1148024454
+ .long 3192330237
+ .long 3087007744
+ .long 296610
+ .long 2137125602
+ .long 3191993881
+ .long 4026531840
+ .long 303914
+ .long 3437605242
+ .long 1043004027
+ .long 3623878656
+ .long 311258
+ .long 3340100419
+ .long 3192278702
+ .long 2550136832
+ .long 318642
+ .long 3594204911
+ .long 1044372944
+ .long 2013265920
+ .long 326066
+ .long 2502738549
+ .long 3191221557
+ .long 2684354560
+ .long 333530
+ .long 235444137
+ .long 1044806450
+ .long 1476395008
+ .long 341035
+ .long 3792656324
+ .long 3191220999
+ .long 3355443200
+ .long 348580
+ .long 1982428721
+ .long 1044573328
+ .long 939524096
+ .long 356167
+ .long 1502688512
+ .long 3191123330
+ .long 3623878656
+ .long 363794
+ .long 383164906
+ .long 3192603072
+ .long 3758096384
+ .long 371463
+ .long 3040458367
+ .long 3192241502
+ .long 2281701376
+ .long 379174
+ .long 3087934862
+ .long 1044564533
+ .long 402653184
+ .long 386927
+ .long 3163234522
+ .long 3192035061
+ .long 3087007744
+ .long 394721
+ .long 2332520281
+ .long 1043819968
+ .long 2952790016
+ .long 402558
+ .long 1492679939
+ .long 1041050306
+ .long 939524096
+ .long 410438
+ .long 29656007
+ .long 3192494567
+ .long 2147483648
+ .long 418360
+ .long 612974287
+ .long 1044556049
+ .long 3623878656
+ .long 426325
+ .long 1740578119
+ .long 3192756916
+ .long 1744830464
+ .long 434334
+ .long 922176773
+ .long 3191344195
+ .long 2013265920
+ .long 442386
+ .long 143936179
+ .long 3192365354
+ .long 1073741824
+ .long 450482
+ .long 2288974058
+ .long 3192706862
+ .long 4160749568
+ .long 458621
+ .long 1022918171
+ .long 1043667272
+ .long 3892314112
+ .long 466805
+ .long 2074373662
+ .long 1043172334
+ .long 1207959552
+ .long 475034
+ .long 2007733066
+ .long 1042591790
+ .long 1476395008
+ .long 483307
+ .long 1946752598
+ .long 3191593347
+ .long 1342177280
+ .long 491625
+ .long 1328713708
+ .long 3187724640
+ .long 1879048192
+ .long 499988
+ .long 918464641
+ .long 1045387276
+ .long 0
+ .long 508397
+ .long 667194164
+ .long 1043532819
+ .long 939524096
+ .long 516851
+ .long 3740938196
+ .long 3191016217
+ .long 1476395008
+ .long 525351
+ .long 1917817036
+ .long 3192786735
+ .long 2550136832
+ .long 533897
+ .long 682424459
+ .long 1043647713
+ .long 1207959552
+ .long 542490
+ .long 857395348
+ .long 3191718789
+ .long 2550136832
+ .long 551129
+ .long 1678188781
+ .long 1045046423
+ .long 3623878656
+ .long 559815
+ .long 2523214013
+ .long 1043900009
+ .long 1073741824
+ .long 568549
+ .long 3671932459
+ .long 1044468998
+ .long 402653184
+ .long 577330
+ .long 1091392995
+ .long 3191122871
+ .long 2550136832
+ .long 586158
+ .long 1656324724
+ .long 1043421043
+ .long 134217728
+ .long 595035
+ .long 742731994
+ .long 1045204990
+ .long 2952790016
+ .long 603959
+ .long 2659845000
+ .long 1042921660
+ .long 3355443200
+ .long 612932
+ .long 2001576987
+ .long 1045316240
+ .long 2684354560
+ .long 621954
+ .long 976271096
+ .long 3187726552
+ .long 1879048192
+ .long 631025
+ .long 927342903
+ .long 1042890999
+ .long 2147483648
+ .long 640145
+ .long 2162418230
+ .long 1044717444
+ .long 402653184
+ .long 649315
+ .long 830622888
+ .long 1044263474
+ .long 2013265920
+ .long 658534
+ .long 630511316
+ .long 1045098283
+ .long 4026531840
+ .long 667803
+ .long 1698296944
+ .long 3192762006
+ .long 2952790016
+ .long 677123
+ .long 3831108133
+ .long 1044508970
+ .long 268435456
+ .long 686494
+ .long 3279515609
+ .long 1045005722
+ .long 1476395008
+ .long 695915
+ .long 98608862
+ .long 3192139794
+ .long 3221225472
+ .long 705387
+ .long 529675467
+ .long 3188065859
+ .long 2550136832
+ .long 714911
+ .long 3588780877
+ .long 1043705146
+ .long 671088640
+ .long 724487
+ .long 1493713581
+ .long 1043913574
+ .long 3087007744
+ .long 734114
+ .long 3182425146
+ .long 1041483134
+ .long 2415919104
+ .long 743794
+ .long 864959479
+ .long 3191919926
+ .long 4026531840
+ .long 753526
+ .long 928333188
+ .long 1044896498
+ .long 805306368
+ .long 763312
+ .long 813799033
+ .long 1042555081
+ .long 2415919104
+ .long 773150
+ .long 2300504125
+ .long 1041428596
+ .long 1476395008
+ .long 783042
+ .long 1142965944
+ .long 1045346544
+ .long 3758096384
+ .long 792987
+ .long 518977959
+ .long 3192116587
+ .long 1610612736
+ .long 802987
+ .long 1972387576
+ .long 3179791049
+ .long 805306368
+ .long 813041
+ .long 1264446592
+ .long 3191505643
+ .long 2550136832
+ .long 823149
+ .long 1467128350
+ .long 3192899778
+ .long 3758096384
+ .long 833312
+ .long 3075989921
+ .long 3192423292
+ .long 1476395008
+ .long 843531
+ .long 836600757
+ .long 3192197600
+ .long 1207959552
+ .long 853805
+ .long 3697834264
+ .long 1044397131
+ .long 134217728
+ .long 864135
+ .long 364651635
+ .long 1038816227
+ .long 3758096384
+ .long 874520
+ .long 3335598035
+ .long 3192398555
+ .long 402653184
+ .long 884963
+ .long 2219290723
+ .long 3191039942
+ .long 0
+ .long 895462
+ .long 730095629
+ .long 1045354900
+ .long 4026531840
+ .long 906017
+ .long 39537391
+ .long 1044909475
+ .long 805306368
+ .long 916631
+ .long 4123739734
+ .long 1045159130
+ .long 402653184
+ .long 927302
+ .long 3136734448
+ .long 3192410870
+ .long 3892314112
+ .long 938030
+ .long 1982905152
+ .long 3189583874
+ .long 4160749568
+ .long 948817
+ .long 442147929
+ .long 1045314148
+ .long 2684354560
+ .long 959663
+ .long 3425467293
+ .long 1044718726
+ .long 805306368
+ .long 970568
+ .long 2073198199
+ .long 3192097984
+ .long 4026531840
+ .long 981531
+ .long 2291008222
+ .long 3191466589
+ .long 939524096
+ .long 992555
+ .long 372190496
+ .long 3189934253
+ .long 1476395008
+ .long 1003638
+ .long 54164518
+ .long 1045131818
+ .long 2952790016
+ .long 1014781
+ .long 1672962650
+ .long 3192068623
+ .long 2147483648
+ .long 1025985
+ .long 2196310654
+ .long 1043982605
+ .long 671088640
+ .long 1037250
+ .long 2286661074
+ .long 1045199759
+ .long 1753710392
+ .long 1065423121
+ .long 1753710392
+ .long 1065423121
+ .long 3265904883
+ .long 1067799893
+ .long 3265904883
+ .long 1067799893
+ .long 1431655453
+ .long 1069897045
+ .long 1431655453
+ .long 1069897045
+ .long 4294966876
+ .long 1071644671
+ .long 4294966876
+ .long 1071644671
+ .long 1697350398
+ .long 1080497479
+ .long 1697350398
+ .long 1080497479
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4277665792
+ .long 1064709698
+ .long 4277665792
+ .long 1064709698
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2882134964
+ .long 1027723129
+ .long 2882134964
+ .long 1027723129
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2095104
+ .long 1123549184
+ .long 2095104
+ .long 1123549184
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2032
+ .long 0
+ .long 2032
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4192256
+ .long 0
+ .long 4192256
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4160749568
+ .long 4294967295
+ .long 4160749568
+ .long 4294967295
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1287323203
+ .long 1082531232
+ .long 1287323203
+ .long 1082531232
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4277811695
+ .long 1064709698
+ .long 4277811695
+ .long 1064709698
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1047552
+ .long 1124597760
+ .long 1047552
+ .long 1124597760
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1016
+ .long 0
+ .long 1016
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2096128
+ .long 0
+ .long 2096128
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .type __svml_dexpm1_data_internal,@object
+ .size __svml_dexpm1_data_internal,3008
+ .align 32
+
+_imldExpHATab:
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 0
+ .long 1048019041
+ .long 1072704666
+ .long 2631457885
+ .long 3161546771
+ .long 3541402996
+ .long 1072716208
+ .long 896005651
+ .long 1015861842
+ .long 410360776
+ .long 1072727877
+ .long 1642514529
+ .long 1012987726
+ .long 1828292879
+ .long 1072739672
+ .long 1568897901
+ .long 1016568486
+ .long 852742562
+ .long 1072751596
+ .long 1882168529
+ .long 1010744893
+ .long 3490863953
+ .long 1072763649
+ .long 707771662
+ .long 3163903570
+ .long 2930322912
+ .long 1072775834
+ .long 3117806614
+ .long 3163670819
+ .long 1014845819
+ .long 1072788152
+ .long 3936719688
+ .long 3162512149
+ .long 3949972341
+ .long 1072800603
+ .long 1058231231
+ .long 1015777676
+ .long 828946858
+ .long 1072813191
+ .long 1044000608
+ .long 1016786167
+ .long 2288159958
+ .long 1072825915
+ .long 1151779725
+ .long 1015705409
+ .long 1853186616
+ .long 1072838778
+ .long 3819481236
+ .long 1016499965
+ .long 1709341917
+ .long 1072851781
+ .long 2552227826
+ .long 1015039787
+ .long 4112506593
+ .long 1072864925
+ .long 1829350193
+ .long 1015216097
+ .long 2799960843
+ .long 1072878213
+ .long 1913391796
+ .long 1015756674
+ .long 171030293
+ .long 1072891646
+ .long 1303423926
+ .long 1015238005
+ .long 2992903935
+ .long 1072905224
+ .long 1574172746
+ .long 1016061241
+ .long 926591435
+ .long 1072918951
+ .long 3427487848
+ .long 3163704045
+ .long 887463927
+ .long 1072932827
+ .long 1049900754
+ .long 3161575912
+ .long 1276261410
+ .long 1072946854
+ .long 2804567149
+ .long 1015390024
+ .long 569847338
+ .long 1072961034
+ .long 1209502043
+ .long 3159926671
+ .long 1617004845
+ .long 1072975368
+ .long 1623370769
+ .long 1011049453
+ .long 3049340112
+ .long 1072989858
+ .long 3667985273
+ .long 1013894369
+ .long 3577096743
+ .long 1073004506
+ .long 3145379760
+ .long 1014403278
+ .long 1990012071
+ .long 1073019314
+ .long 7447438
+ .long 3163526196
+ .long 1453150082
+ .long 1073034283
+ .long 3171891295
+ .long 3162037958
+ .long 917841882
+ .long 1073049415
+ .long 419288974
+ .long 1016280325
+ .long 3712504873
+ .long 1073064711
+ .long 3793507337
+ .long 1016095713
+ .long 363667784
+ .long 1073080175
+ .long 728023093
+ .long 1016345318
+ .long 2956612997
+ .long 1073095806
+ .long 1005538728
+ .long 3163304901
+ .long 2186617381
+ .long 1073111608
+ .long 2018924632
+ .long 3163803357
+ .long 1719614413
+ .long 1073127582
+ .long 3210617384
+ .long 3163796463
+ .long 1013258799
+ .long 1073143730
+ .long 3094194670
+ .long 3160631279
+ .long 3907805044
+ .long 1073160053
+ .long 2119843535
+ .long 3161988964
+ .long 1447192521
+ .long 1073176555
+ .long 508946058
+ .long 3162904882
+ .long 1944781191
+ .long 1073193236
+ .long 3108873501
+ .long 3162190556
+ .long 919555682
+ .long 1073210099
+ .long 2882956373
+ .long 1013312481
+ .long 2571947539
+ .long 1073227145
+ .long 4047189812
+ .long 3163777462
+ .long 2604962541
+ .long 1073244377
+ .long 3631372142
+ .long 3163870288
+ .long 1110089947
+ .long 1073261797
+ .long 3253791412
+ .long 1015920431
+ .long 2568320822
+ .long 1073279406
+ .long 1509121860
+ .long 1014756995
+ .long 2966275557
+ .long 1073297207
+ .long 2339118633
+ .long 3160254904
+ .long 2682146384
+ .long 1073315202
+ .long 586480042
+ .long 3163702083
+ .long 2191782032
+ .long 1073333393
+ .long 730975783
+ .long 1014083580
+ .long 2069751141
+ .long 1073351782
+ .long 576856675
+ .long 3163014404
+ .long 2990417245
+ .long 1073370371
+ .long 3552361237
+ .long 3163667409
+ .long 1434058175
+ .long 1073389163
+ .long 1853053619
+ .long 1015310724
+ .long 2572866477
+ .long 1073408159
+ .long 2462790535
+ .long 1015814775
+ .long 3092190715
+ .long 1073427362
+ .long 1457303226
+ .long 3159737305
+ .long 4076559943
+ .long 1073446774
+ .long 950899508
+ .long 3160987380
+ .long 2420883922
+ .long 1073466398
+ .long 174054861
+ .long 1014300631
+ .long 3716502172
+ .long 1073486235
+ .long 816778419
+ .long 1014197934
+ .long 777507147
+ .long 1073506289
+ .long 3507050924
+ .long 1015341199
+ .long 3706687593
+ .long 1073526560
+ .long 1821514088
+ .long 1013410604
+ .long 1242007932
+ .long 1073547053
+ .long 1073740399
+ .long 3163532637
+ .long 3707479175
+ .long 1073567768
+ .long 2789017511
+ .long 1014276997
+ .long 64696965
+ .long 1073588710
+ .long 3586233004
+ .long 1015962192
+ .long 863738719
+ .long 1073609879
+ .long 129252895
+ .long 3162690849
+ .long 3884662774
+ .long 1073631278
+ .long 1614448851
+ .long 1014281732
+ .long 2728693978
+ .long 1073652911
+ .long 2413007344
+ .long 3163551506
+ .long 3999357479
+ .long 1073674779
+ .long 1101668360
+ .long 1015989180
+ .long 1533953344
+ .long 1073696886
+ .long 835814894
+ .long 1015702697
+ .long 2174652632
+ .long 1073719233
+ .long 1301400989
+ .long 1014466875
+ .long 1697350398
+ .long 1079448903
+ .long 0
+ .long 1127743488
+ .long 0
+ .long 1071644672
+ .long 1431652600
+ .long 1069897045
+ .long 1431670732
+ .long 1067799893
+ .long 984555731
+ .long 1065423122
+ .long 472530941
+ .long 1062650218
+ .long 4277811695
+ .long 1082535490
+ .long 3715808466
+ .long 3230016299
+ .long 3576508497
+ .long 3230091536
+ .long 4277796864
+ .long 1065758274
+ .long 3164486458
+ .long 1025308570
+ .long 1
+ .long 1048576
+ .long 4294967295
+ .long 2146435071
+ .long 0
+ .long 0
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1073741824
+ .long 0
+ .long 1009778688
+ .long 0
+ .long 1106771968
+ .type _imldExpHATab,@object
+ .size _imldExpHATab,1176
+ .space 8, 0x00
+ .align 16
+
+.FLT_14:
+ .long 0x00000000,0x43380000,0x00000000,0x43380000
+ .type .FLT_14,@object
+ .size .FLT_14,16
+ .align 8
+
+.FLT_73:
+ .long 0x00000000,0xbff00000
+ .type .FLT_73,@object
+ .size .FLT_73,8
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE version of vectorized expm1, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_expm1 _ZGVdN4v_expm1_sse_wrapper
+#include "../svml_d_expm14_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized expm1, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_expm1
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_expm1, __GI__ZGVdN4v_expm1, __redirect__ZGVdN4v_expm1)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1824 @@
+/* Function expm vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * N = (int)(x*2^k/log(2.0)), R = x - N*log(2)/2^k
+ * exp(x) = 2^(N/2^k) * poly(R) is computed in high-low parts
+ * expm1(x) = exp(x)-1 is then obtained via multi-precision computation
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVdN4v_expm1_avx2)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $384, %rsp
+ lea __svml_dexpm1_data_internal(%rip), %rax
+ vmovapd %ymm0, %ymm3
+ vmulpd 2176+__svml_dexpm1_data_internal(%rip), %ymm3, %ymm4
+
+/* argument reduction */
+ vmovupd 2240+__svml_dexpm1_data_internal(%rip), %ymm2
+ vmovupd 2688+__svml_dexpm1_data_internal(%rip), %ymm5
+ vmovups %ymm14, 288(%rsp)
+ vmovups %ymm9, 192(%rsp)
+ vmovups %ymm8, 224(%rsp)
+ vmovups %ymm12, 160(%rsp)
+ vmovups %ymm11, 96(%rsp)
+ vmovups %ymm15, 320(%rsp)
+ vmovups %ymm10, 32(%rsp)
+ vmovups %ymm13, 256(%rsp)
+ vroundpd $0, %ymm4, %ymm0
+ vfnmadd213pd %ymm3, %ymm0, %ymm2
+ vfnmadd231pd 2304+__svml_dexpm1_data_internal(%rip), %ymm0, %ymm2
+ vaddpd 2368+__svml_dexpm1_data_internal(%rip), %ymm0, %ymm0
+
+/* table lookup */
+ vandps 2432+__svml_dexpm1_data_internal(%rip), %ymm0, %ymm4
+ vandpd %ymm5, %ymm3, %ymm6
+ vandnpd %ymm3, %ymm5, %ymm1
+ vcmpnle_uqpd 2752+__svml_dexpm1_data_internal(%rip), %ymm6, %ymm7
+ .cfi_escape 0x10, 0xdb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xde, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdf, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe0, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe1, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe2, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+ vandps 2496+__svml_dexpm1_data_internal(%rip), %ymm0, %ymm14
+ vmovupd 2112+__svml_dexpm1_data_internal(%rip), %ymm0
+ vmovmskpd %ymm7, %edx
+ vfmadd213pd 2144+__svml_dexpm1_data_internal(%rip), %ymm2, %ymm0
+ vextractf128 $1, %ymm4, %xmm5
+ vmovd %xmm4, %ecx
+ vmovd %xmm5, %r9d
+ vpextrd $2, %xmm4, %r8d
+ vpextrd $2, %xmm5, %r10d
+ movslq %ecx, %rcx
+ movslq %r8d, %r8
+ movslq %r9d, %r9
+ movslq %r10d, %r10
+
+/* polynomial */
+ vmovupd 2048+__svml_dexpm1_data_internal(%rip), %ymm5
+ vmovupd (%rax,%rcx), %xmm8
+ vpsllq $41, %ymm14, %ymm4
+ vmovupd (%rax,%r8), %xmm9
+ vmovupd (%rax,%r9), %xmm11
+ vmovupd (%rax,%r10), %xmm12
+ vunpcklpd %xmm9, %xmm8, %xmm6
+ vunpcklpd %xmm12, %xmm11, %xmm7
+ vfmadd213pd 2080+__svml_dexpm1_data_internal(%rip), %ymm2, %ymm5
+ vunpckhpd %xmm9, %xmm8, %xmm10
+ vunpckhpd %xmm12, %xmm11, %xmm13
+ vinsertf128 $1, %xmm7, %ymm6, %ymm15
+ vorpd %ymm4, %ymm15, %ymm14
+ vmulpd %ymm2, %ymm2, %ymm15
+ vfmadd213pd %ymm0, %ymm15, %ymm5
+
+/* T-1 */
+ vmovupd 2624+__svml_dexpm1_data_internal(%rip), %ymm0
+ vfmadd213pd %ymm2, %ymm15, %ymm5
+ vaddpd %ymm0, %ymm14, %ymm2
+ vinsertf128 $1, %xmm13, %ymm10, %ymm6
+
+/* Th1 = (Th-1) + Tl */
+ vfmadd213pd %ymm2, %ymm4, %ymm6
+
+/* T = Th+Tl */
+ vsubpd %ymm0, %ymm6, %ymm4
+ vfmadd213pd %ymm6, %ymm5, %ymm4
+ vorpd %ymm1, %ymm4, %ymm0
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ vmovups 224(%rsp), %ymm8
+ cfi_restore(91)
+ vmovups 192(%rsp), %ymm9
+ cfi_restore(92)
+ vmovups 32(%rsp), %ymm10
+ cfi_restore(93)
+ vmovups 96(%rsp), %ymm11
+ cfi_restore(94)
+ vmovups 160(%rsp), %ymm12
+ cfi_restore(95)
+ vmovups 256(%rsp), %ymm13
+ cfi_restore(96)
+ vmovups 288(%rsp), %ymm14
+ cfi_restore(97)
+ vmovups 320(%rsp), %ymm15
+ cfi_restore(98)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ .cfi_escape 0x10, 0xdb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xde, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdf, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe0, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe1, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe2, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_3:
+ vmovupd %ymm3, 64(%rsp)
+ vmovupd %ymm0, 128(%rsp)
+ je .LBL_1_2
+ xorl %eax, %eax
+ vzeroupper
+ movq %rsi, 8(%rsp)
+ movq %rdi, (%rsp)
+ movq %r12, 24(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x98, 0xfe, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 16(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xfe, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $4, %r12d
+ jl .LBL_1_7
+ movq 8(%rsp), %rsi
+ cfi_restore(4)
+ movq (%rsp), %rdi
+ cfi_restore(5)
+ movq 24(%rsp), %r12
+ cfi_restore(12)
+ movq 16(%rsp), %r13
+ cfi_restore(13)
+ vmovupd 128(%rsp), %ymm0
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x98, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xfe, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 64(%rsp,%r12,8), %rdi
+ lea 128(%rsp,%r12,8), %rsi
+ call __svml_dexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVdN4v_expm1_avx2)
+
+ .align 16,0x90
+
+__svml_dexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movsd (%rdi), %xmm6
+ pxor %xmm0, %xmm0
+ movzwl 6(%rdi), %edx
+ comisd %xmm6, %xmm0
+ ja .LBL_2_18
+ andl $32752, %edx
+ shrl $4, %edx
+ movsd %xmm6, -8(%rsp)
+ cmpl $2047, %edx
+ je .LBL_2_19
+ cmpl $970, %edx
+ jle .LBL_2_16
+ movsd 1080+_imldExpHATab(%rip), %xmm0
+ comisd %xmm6, %xmm0
+ jb .LBL_2_15
+ comisd 1096+_imldExpHATab(%rip), %xmm6
+ jb .LBL_2_14
+ movsd 1024+_imldExpHATab(%rip), %xmm0
+ movaps %xmm6, %xmm5
+ mulsd %xmm6, %xmm0
+ lea _imldExpHATab(%rip), %r10
+ movsd %xmm0, -24(%rsp)
+ movsd -24(%rsp), %xmm1
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, -8(%rsp)
+ addsd 1032+_imldExpHATab(%rip), %xmm1
+ movsd %xmm1, -16(%rsp)
+ movsd -16(%rsp), %xmm2
+ movl -16(%rsp), %r8d
+ movl %r8d, %ecx
+ andl $63, %r8d
+ subsd 1032+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -24(%rsp)
+ lea 1(%r8,%r8), %r9d
+ movsd -24(%rsp), %xmm3
+ lea (%r8,%r8), %edi
+ mulsd 1104+_imldExpHATab(%rip), %xmm3
+ movsd -24(%rsp), %xmm4
+ subsd %xmm3, %xmm5
+ mulsd 1112+_imldExpHATab(%rip), %xmm4
+ movsd 1072+_imldExpHATab(%rip), %xmm2
+ subsd %xmm4, %xmm5
+ mulsd %xmm5, %xmm2
+ shrl $6, %ecx
+ addsd 1064+_imldExpHATab(%rip), %xmm2
+ comisd 1088+_imldExpHATab(%rip), %xmm6
+ mulsd %xmm5, %xmm2
+ movsd (%r10,%rdi,8), %xmm0
+ lea 1023(%rcx), %edx
+ addsd 1056+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1048+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1040+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ mulsd %xmm5, %xmm2
+ addsd %xmm5, %xmm2
+ addsd (%r10,%r9,8), %xmm2
+ mulsd %xmm0, %xmm2
+ jb .LBL_2_10
+ andl $2047, %edx
+ addsd %xmm0, %xmm2
+ cmpl $2046, %edx
+ ja .LBL_2_9
+ movq 1136+_imldExpHATab(%rip), %rcx
+ shrq $48, %rcx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_9:
+ decl %edx
+ andl $2047, %edx
+ movzwl -2(%rsp), %ecx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ mulsd 1152+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_10:
+ addl $1083, %ecx
+ andl $2047, %ecx
+ movl %ecx, %eax
+ movzwl -2(%rsp), %edx
+ shll $4, %eax
+ andl $-32753, %edx
+ orl %eax, %edx
+ movw %dx, -2(%rsp)
+ movsd -8(%rsp), %xmm1
+ mulsd %xmm1, %xmm2
+ mulsd %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ addsd %xmm2, %xmm0
+ cmpl $50, %ecx
+ ja .LBL_2_12
+ mulsd 1160+_imldExpHATab(%rip), %xmm0
+ movsd %xmm0, (%rsi)
+ jmp .LBL_2_13
+
+.LBL_2_12:
+ movsd %xmm0, -72(%rsp)
+ movsd -72(%rsp), %xmm0
+ subsd %xmm0, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -64(%rsp), %xmm1
+ addsd %xmm2, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -72(%rsp), %xmm2
+ mulsd 1168+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -56(%rsp)
+ movsd -72(%rsp), %xmm4
+ movsd -56(%rsp), %xmm3
+ addsd %xmm3, %xmm4
+ movsd %xmm4, -48(%rsp)
+ movsd -48(%rsp), %xmm6
+ movsd -56(%rsp), %xmm5
+ subsd %xmm5, %xmm6
+ movsd %xmm6, -40(%rsp)
+ movsd -72(%rsp), %xmm8
+ movsd -40(%rsp), %xmm7
+ subsd %xmm7, %xmm8
+ movsd %xmm8, -32(%rsp)
+ movsd -64(%rsp), %xmm10
+ movsd -32(%rsp), %xmm9
+ addsd %xmm9, %xmm10
+ movsd %xmm10, -32(%rsp)
+ movsd -40(%rsp), %xmm11
+ mulsd 1160+_imldExpHATab(%rip), %xmm11
+ movsd %xmm11, -40(%rsp)
+ movsd -32(%rsp), %xmm12
+ mulsd 1160+_imldExpHATab(%rip), %xmm12
+ movsd %xmm12, -32(%rsp)
+ movsd -40(%rsp), %xmm14
+ movsd -32(%rsp), %xmm13
+ addsd %xmm13, %xmm14
+ movsd %xmm14, (%rsi)
+
+.LBL_2_13:
+ movl $4, %eax
+ ret
+
+.LBL_2_14:
+ movsd 1120+_imldExpHATab(%rip), %xmm0
+ movl $4, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_15:
+ movsd 1128+_imldExpHATab(%rip), %xmm0
+ movl $3, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_16:
+ movsd 1144+_imldExpHATab(%rip), %xmm0
+ addsd %xmm6, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_18:
+ movq $0xbff0000000000000, %rax
+ movq %rax, (%rsi)
+ xorl %eax, %eax
+ ret
+
+.LBL_2_19:
+ movb -1(%rsp), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_21
+
+.LBL_2_20:
+ mulsd %xmm6, %xmm6
+ movsd %xmm6, (%rsi)
+ ret
+
+.LBL_2_21:
+ testl $1048575, -4(%rsp)
+ jne .LBL_2_20
+ cmpl $0, -8(%rsp)
+ jne .LBL_2_20
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_dexpm1_cout_rare_internal,@function
+ .size __svml_dexpm1_cout_rare_internal,.-__svml_dexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_dexpm1_data_internal:
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 2818572288
+ .long 5693
+ .long 1457015399
+ .long 1044362035
+ .long 1073741824
+ .long 11418
+ .long 4044949557
+ .long 3191375865
+ .long 3892314112
+ .long 17173
+ .long 794616807
+ .long 1041997793
+ .long 3489660928
+ .long 22960
+ .long 2715237930
+ .long 1044950059
+ .long 671088640
+ .long 28779
+ .long 3698181124
+ .long 1044242285
+ .long 402653184
+ .long 34629
+ .long 36755401
+ .long 1042114290
+ .long 3355443200
+ .long 40510
+ .long 438898435
+ .long 1044789148
+ .long 1879048192
+ .long 46424
+ .long 2230008375
+ .long 3192402871
+ .long 671088640
+ .long 52370
+ .long 3120814979
+ .long 1045140031
+ .long 805306368
+ .long 58348
+ .long 269498903
+ .long 1044815501
+ .long 2952790016
+ .long 64358
+ .long 3705630242
+ .long 3182612048
+ .long 3489660928
+ .long 70401
+ .long 2766913307
+ .long 1039293264
+ .long 3221225472
+ .long 76477
+ .long 4276399797
+ .long 1041960050
+ .long 2952790016
+ .long 82586
+ .long 80474087
+ .long 3191172386
+ .long 3355443200
+ .long 88728
+ .long 613423790
+ .long 1042592202
+ .long 1073741824
+ .long 94904
+ .long 689505308
+ .long 3192657268
+ .long 939524096
+ .long 101113
+ .long 930606615
+ .long 1042387389
+ .long 3892314112
+ .long 107355
+ .long 2850403528
+ .long 1045134939
+ .long 2281701376
+ .long 113632
+ .long 1663725767
+ .long 3192904985
+ .long 805306368
+ .long 119943
+ .long 2810207104
+ .long 1043762074
+ .long 536870912
+ .long 126288
+ .long 3854830848
+ .long 1044899528
+ .long 2281701376
+ .long 132667
+ .long 2397289153
+ .long 1041802037
+ .long 2415919104
+ .long 139081
+ .long 1649749971
+ .long 1043848649
+ .long 1879048192
+ .long 145530
+ .long 2039734354
+ .long 3191384540
+ .long 1342177280
+ .long 152014
+ .long 849302817
+ .long 3188938352
+ .long 1744830464
+ .long 158533
+ .long 383003846
+ .long 3191925785
+ .long 3758096384
+ .long 165087
+ .long 158134621
+ .long 1044338232
+ .long 4160749568
+ .long 171677
+ .long 4137603445
+ .long 3192324360
+ .long 3489660928
+ .long 178303
+ .long 4290499725
+ .long 1043028785
+ .long 2818572288
+ .long 184965
+ .long 1275031083
+ .long 3190931407
+ .long 2818572288
+ .long 191663
+ .long 1629266164
+ .long 1043587829
+ .long 134217728
+ .long 198398
+ .long 2842642093
+ .long 1044483512
+ .long 0
+ .long 205169
+ .long 1985360263
+ .long 3192756542
+ .long 2952790016
+ .long 211976
+ .long 4200916017
+ .long 1044586679
+ .long 1610612736
+ .long 218821
+ .long 3450763054
+ .long 3189463043
+ .long 939524096
+ .long 225703
+ .long 2870834528
+ .long 3190336198
+ .long 1879048192
+ .long 232622
+ .long 3553800616
+ .long 3192377660
+ .long 939524096
+ .long 239579
+ .long 1219436983
+ .long 3192443648
+ .long 3221225472
+ .long 246573
+ .long 606077177
+ .long 1044946247
+ .long 1342177280
+ .long 253606
+ .long 3998375791
+ .long 3192876638
+ .long 134217728
+ .long 260677
+ .long 586810495
+ .long 3192560639
+ .long 536870912
+ .long 267786
+ .long 2676240988
+ .long 1044345570
+ .long 3623878656
+ .long 274933
+ .long 1841759300
+ .long 1043663497
+ .long 1610612736
+ .long 282120
+ .long 1086643152
+ .long 1041785419
+ .long 4026531840
+ .long 289345
+ .long 1148024454
+ .long 3192330237
+ .long 3087007744
+ .long 296610
+ .long 2137125602
+ .long 3191993881
+ .long 4026531840
+ .long 303914
+ .long 3437605242
+ .long 1043004027
+ .long 3623878656
+ .long 311258
+ .long 3340100419
+ .long 3192278702
+ .long 2550136832
+ .long 318642
+ .long 3594204911
+ .long 1044372944
+ .long 2013265920
+ .long 326066
+ .long 2502738549
+ .long 3191221557
+ .long 2684354560
+ .long 333530
+ .long 235444137
+ .long 1044806450
+ .long 1476395008
+ .long 341035
+ .long 3792656324
+ .long 3191220999
+ .long 3355443200
+ .long 348580
+ .long 1982428721
+ .long 1044573328
+ .long 939524096
+ .long 356167
+ .long 1502688512
+ .long 3191123330
+ .long 3623878656
+ .long 363794
+ .long 383164906
+ .long 3192603072
+ .long 3758096384
+ .long 371463
+ .long 3040458367
+ .long 3192241502
+ .long 2281701376
+ .long 379174
+ .long 3087934862
+ .long 1044564533
+ .long 402653184
+ .long 386927
+ .long 3163234522
+ .long 3192035061
+ .long 3087007744
+ .long 394721
+ .long 2332520281
+ .long 1043819968
+ .long 2952790016
+ .long 402558
+ .long 1492679939
+ .long 1041050306
+ .long 939524096
+ .long 410438
+ .long 29656007
+ .long 3192494567
+ .long 2147483648
+ .long 418360
+ .long 612974287
+ .long 1044556049
+ .long 3623878656
+ .long 426325
+ .long 1740578119
+ .long 3192756916
+ .long 1744830464
+ .long 434334
+ .long 922176773
+ .long 3191344195
+ .long 2013265920
+ .long 442386
+ .long 143936179
+ .long 3192365354
+ .long 1073741824
+ .long 450482
+ .long 2288974058
+ .long 3192706862
+ .long 4160749568
+ .long 458621
+ .long 1022918171
+ .long 1043667272
+ .long 3892314112
+ .long 466805
+ .long 2074373662
+ .long 1043172334
+ .long 1207959552
+ .long 475034
+ .long 2007733066
+ .long 1042591790
+ .long 1476395008
+ .long 483307
+ .long 1946752598
+ .long 3191593347
+ .long 1342177280
+ .long 491625
+ .long 1328713708
+ .long 3187724640
+ .long 1879048192
+ .long 499988
+ .long 918464641
+ .long 1045387276
+ .long 0
+ .long 508397
+ .long 667194164
+ .long 1043532819
+ .long 939524096
+ .long 516851
+ .long 3740938196
+ .long 3191016217
+ .long 1476395008
+ .long 525351
+ .long 1917817036
+ .long 3192786735
+ .long 2550136832
+ .long 533897
+ .long 682424459
+ .long 1043647713
+ .long 1207959552
+ .long 542490
+ .long 857395348
+ .long 3191718789
+ .long 2550136832
+ .long 551129
+ .long 1678188781
+ .long 1045046423
+ .long 3623878656
+ .long 559815
+ .long 2523214013
+ .long 1043900009
+ .long 1073741824
+ .long 568549
+ .long 3671932459
+ .long 1044468998
+ .long 402653184
+ .long 577330
+ .long 1091392995
+ .long 3191122871
+ .long 2550136832
+ .long 586158
+ .long 1656324724
+ .long 1043421043
+ .long 134217728
+ .long 595035
+ .long 742731994
+ .long 1045204990
+ .long 2952790016
+ .long 603959
+ .long 2659845000
+ .long 1042921660
+ .long 3355443200
+ .long 612932
+ .long 2001576987
+ .long 1045316240
+ .long 2684354560
+ .long 621954
+ .long 976271096
+ .long 3187726552
+ .long 1879048192
+ .long 631025
+ .long 927342903
+ .long 1042890999
+ .long 2147483648
+ .long 640145
+ .long 2162418230
+ .long 1044717444
+ .long 402653184
+ .long 649315
+ .long 830622888
+ .long 1044263474
+ .long 2013265920
+ .long 658534
+ .long 630511316
+ .long 1045098283
+ .long 4026531840
+ .long 667803
+ .long 1698296944
+ .long 3192762006
+ .long 2952790016
+ .long 677123
+ .long 3831108133
+ .long 1044508970
+ .long 268435456
+ .long 686494
+ .long 3279515609
+ .long 1045005722
+ .long 1476395008
+ .long 695915
+ .long 98608862
+ .long 3192139794
+ .long 3221225472
+ .long 705387
+ .long 529675467
+ .long 3188065859
+ .long 2550136832
+ .long 714911
+ .long 3588780877
+ .long 1043705146
+ .long 671088640
+ .long 724487
+ .long 1493713581
+ .long 1043913574
+ .long 3087007744
+ .long 734114
+ .long 3182425146
+ .long 1041483134
+ .long 2415919104
+ .long 743794
+ .long 864959479
+ .long 3191919926
+ .long 4026531840
+ .long 753526
+ .long 928333188
+ .long 1044896498
+ .long 805306368
+ .long 763312
+ .long 813799033
+ .long 1042555081
+ .long 2415919104
+ .long 773150
+ .long 2300504125
+ .long 1041428596
+ .long 1476395008
+ .long 783042
+ .long 1142965944
+ .long 1045346544
+ .long 3758096384
+ .long 792987
+ .long 518977959
+ .long 3192116587
+ .long 1610612736
+ .long 802987
+ .long 1972387576
+ .long 3179791049
+ .long 805306368
+ .long 813041
+ .long 1264446592
+ .long 3191505643
+ .long 2550136832
+ .long 823149
+ .long 1467128350
+ .long 3192899778
+ .long 3758096384
+ .long 833312
+ .long 3075989921
+ .long 3192423292
+ .long 1476395008
+ .long 843531
+ .long 836600757
+ .long 3192197600
+ .long 1207959552
+ .long 853805
+ .long 3697834264
+ .long 1044397131
+ .long 134217728
+ .long 864135
+ .long 364651635
+ .long 1038816227
+ .long 3758096384
+ .long 874520
+ .long 3335598035
+ .long 3192398555
+ .long 402653184
+ .long 884963
+ .long 2219290723
+ .long 3191039942
+ .long 0
+ .long 895462
+ .long 730095629
+ .long 1045354900
+ .long 4026531840
+ .long 906017
+ .long 39537391
+ .long 1044909475
+ .long 805306368
+ .long 916631
+ .long 4123739734
+ .long 1045159130
+ .long 402653184
+ .long 927302
+ .long 3136734448
+ .long 3192410870
+ .long 3892314112
+ .long 938030
+ .long 1982905152
+ .long 3189583874
+ .long 4160749568
+ .long 948817
+ .long 442147929
+ .long 1045314148
+ .long 2684354560
+ .long 959663
+ .long 3425467293
+ .long 1044718726
+ .long 805306368
+ .long 970568
+ .long 2073198199
+ .long 3192097984
+ .long 4026531840
+ .long 981531
+ .long 2291008222
+ .long 3191466589
+ .long 939524096
+ .long 992555
+ .long 372190496
+ .long 3189934253
+ .long 1476395008
+ .long 1003638
+ .long 54164518
+ .long 1045131818
+ .long 2952790016
+ .long 1014781
+ .long 1672962650
+ .long 3192068623
+ .long 2147483648
+ .long 1025985
+ .long 2196310654
+ .long 1043982605
+ .long 671088640
+ .long 1037250
+ .long 2286661074
+ .long 1045199759
+ .long 1753710392
+ .long 1065423121
+ .long 1753710392
+ .long 1065423121
+ .long 1753710392
+ .long 1065423121
+ .long 1753710392
+ .long 1065423121
+ .long 3265904883
+ .long 1067799893
+ .long 3265904883
+ .long 1067799893
+ .long 3265904883
+ .long 1067799893
+ .long 3265904883
+ .long 1067799893
+ .long 1431655453
+ .long 1069897045
+ .long 1431655453
+ .long 1069897045
+ .long 1431655453
+ .long 1069897045
+ .long 1431655453
+ .long 1069897045
+ .long 4294966876
+ .long 1071644671
+ .long 4294966876
+ .long 1071644671
+ .long 4294966876
+ .long 1071644671
+ .long 4294966876
+ .long 1071644671
+ .long 1697350398
+ .long 1080497479
+ .long 1697350398
+ .long 1080497479
+ .long 1697350398
+ .long 1080497479
+ .long 1697350398
+ .long 1080497479
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4277665792
+ .long 1064709698
+ .long 4277665792
+ .long 1064709698
+ .long 4277665792
+ .long 1064709698
+ .long 4277665792
+ .long 1064709698
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2882134964
+ .long 1027723129
+ .long 2882134964
+ .long 1027723129
+ .long 2882134964
+ .long 1027723129
+ .long 2882134964
+ .long 1027723129
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2095104
+ .long 1123549184
+ .long 2095104
+ .long 1123549184
+ .long 2095104
+ .long 1123549184
+ .long 2095104
+ .long 1123549184
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2032
+ .long 0
+ .long 2032
+ .long 0
+ .long 2032
+ .long 0
+ .long 2032
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4192256
+ .long 0
+ .long 4192256
+ .long 0
+ .long 4192256
+ .long 0
+ .long 4192256
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4160749568
+ .long 4294967295
+ .long 4160749568
+ .long 4294967295
+ .long 4160749568
+ .long 4294967295
+ .long 4160749568
+ .long 4294967295
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1287323203
+ .long 1082531232
+ .long 1287323203
+ .long 1082531232
+ .long 1287323203
+ .long 1082531232
+ .long 1287323203
+ .long 1082531232
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4277811695
+ .long 1064709698
+ .long 4277811695
+ .long 1064709698
+ .long 4277811695
+ .long 1064709698
+ .long 4277811695
+ .long 1064709698
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1047552
+ .long 1124597760
+ .long 1047552
+ .long 1124597760
+ .long 1047552
+ .long 1124597760
+ .long 1047552
+ .long 1124597760
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1016
+ .long 0
+ .long 1016
+ .long 0
+ .long 1016
+ .long 0
+ .long 1016
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2096128
+ .long 0
+ .long 2096128
+ .long 0
+ .long 2096128
+ .long 0
+ .long 2096128
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .type __svml_dexpm1_data_internal,@object
+ .size __svml_dexpm1_data_internal,3072
+ .align 32
+
+_imldExpHATab:
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 0
+ .long 1048019041
+ .long 1072704666
+ .long 2631457885
+ .long 3161546771
+ .long 3541402996
+ .long 1072716208
+ .long 896005651
+ .long 1015861842
+ .long 410360776
+ .long 1072727877
+ .long 1642514529
+ .long 1012987726
+ .long 1828292879
+ .long 1072739672
+ .long 1568897901
+ .long 1016568486
+ .long 852742562
+ .long 1072751596
+ .long 1882168529
+ .long 1010744893
+ .long 3490863953
+ .long 1072763649
+ .long 707771662
+ .long 3163903570
+ .long 2930322912
+ .long 1072775834
+ .long 3117806614
+ .long 3163670819
+ .long 1014845819
+ .long 1072788152
+ .long 3936719688
+ .long 3162512149
+ .long 3949972341
+ .long 1072800603
+ .long 1058231231
+ .long 1015777676
+ .long 828946858
+ .long 1072813191
+ .long 1044000608
+ .long 1016786167
+ .long 2288159958
+ .long 1072825915
+ .long 1151779725
+ .long 1015705409
+ .long 1853186616
+ .long 1072838778
+ .long 3819481236
+ .long 1016499965
+ .long 1709341917
+ .long 1072851781
+ .long 2552227826
+ .long 1015039787
+ .long 4112506593
+ .long 1072864925
+ .long 1829350193
+ .long 1015216097
+ .long 2799960843
+ .long 1072878213
+ .long 1913391796
+ .long 1015756674
+ .long 171030293
+ .long 1072891646
+ .long 1303423926
+ .long 1015238005
+ .long 2992903935
+ .long 1072905224
+ .long 1574172746
+ .long 1016061241
+ .long 926591435
+ .long 1072918951
+ .long 3427487848
+ .long 3163704045
+ .long 887463927
+ .long 1072932827
+ .long 1049900754
+ .long 3161575912
+ .long 1276261410
+ .long 1072946854
+ .long 2804567149
+ .long 1015390024
+ .long 569847338
+ .long 1072961034
+ .long 1209502043
+ .long 3159926671
+ .long 1617004845
+ .long 1072975368
+ .long 1623370769
+ .long 1011049453
+ .long 3049340112
+ .long 1072989858
+ .long 3667985273
+ .long 1013894369
+ .long 3577096743
+ .long 1073004506
+ .long 3145379760
+ .long 1014403278
+ .long 1990012071
+ .long 1073019314
+ .long 7447438
+ .long 3163526196
+ .long 1453150082
+ .long 1073034283
+ .long 3171891295
+ .long 3162037958
+ .long 917841882
+ .long 1073049415
+ .long 419288974
+ .long 1016280325
+ .long 3712504873
+ .long 1073064711
+ .long 3793507337
+ .long 1016095713
+ .long 363667784
+ .long 1073080175
+ .long 728023093
+ .long 1016345318
+ .long 2956612997
+ .long 1073095806
+ .long 1005538728
+ .long 3163304901
+ .long 2186617381
+ .long 1073111608
+ .long 2018924632
+ .long 3163803357
+ .long 1719614413
+ .long 1073127582
+ .long 3210617384
+ .long 3163796463
+ .long 1013258799
+ .long 1073143730
+ .long 3094194670
+ .long 3160631279
+ .long 3907805044
+ .long 1073160053
+ .long 2119843535
+ .long 3161988964
+ .long 1447192521
+ .long 1073176555
+ .long 508946058
+ .long 3162904882
+ .long 1944781191
+ .long 1073193236
+ .long 3108873501
+ .long 3162190556
+ .long 919555682
+ .long 1073210099
+ .long 2882956373
+ .long 1013312481
+ .long 2571947539
+ .long 1073227145
+ .long 4047189812
+ .long 3163777462
+ .long 2604962541
+ .long 1073244377
+ .long 3631372142
+ .long 3163870288
+ .long 1110089947
+ .long 1073261797
+ .long 3253791412
+ .long 1015920431
+ .long 2568320822
+ .long 1073279406
+ .long 1509121860
+ .long 1014756995
+ .long 2966275557
+ .long 1073297207
+ .long 2339118633
+ .long 3160254904
+ .long 2682146384
+ .long 1073315202
+ .long 586480042
+ .long 3163702083
+ .long 2191782032
+ .long 1073333393
+ .long 730975783
+ .long 1014083580
+ .long 2069751141
+ .long 1073351782
+ .long 576856675
+ .long 3163014404
+ .long 2990417245
+ .long 1073370371
+ .long 3552361237
+ .long 3163667409
+ .long 1434058175
+ .long 1073389163
+ .long 1853053619
+ .long 1015310724
+ .long 2572866477
+ .long 1073408159
+ .long 2462790535
+ .long 1015814775
+ .long 3092190715
+ .long 1073427362
+ .long 1457303226
+ .long 3159737305
+ .long 4076559943
+ .long 1073446774
+ .long 950899508
+ .long 3160987380
+ .long 2420883922
+ .long 1073466398
+ .long 174054861
+ .long 1014300631
+ .long 3716502172
+ .long 1073486235
+ .long 816778419
+ .long 1014197934
+ .long 777507147
+ .long 1073506289
+ .long 3507050924
+ .long 1015341199
+ .long 3706687593
+ .long 1073526560
+ .long 1821514088
+ .long 1013410604
+ .long 1242007932
+ .long 1073547053
+ .long 1073740399
+ .long 3163532637
+ .long 3707479175
+ .long 1073567768
+ .long 2789017511
+ .long 1014276997
+ .long 64696965
+ .long 1073588710
+ .long 3586233004
+ .long 1015962192
+ .long 863738719
+ .long 1073609879
+ .long 129252895
+ .long 3162690849
+ .long 3884662774
+ .long 1073631278
+ .long 1614448851
+ .long 1014281732
+ .long 2728693978
+ .long 1073652911
+ .long 2413007344
+ .long 3163551506
+ .long 3999357479
+ .long 1073674779
+ .long 1101668360
+ .long 1015989180
+ .long 1533953344
+ .long 1073696886
+ .long 835814894
+ .long 1015702697
+ .long 2174652632
+ .long 1073719233
+ .long 1301400989
+ .long 1014466875
+ .long 1697350398
+ .long 1079448903
+ .long 0
+ .long 1127743488
+ .long 0
+ .long 1071644672
+ .long 1431652600
+ .long 1069897045
+ .long 1431670732
+ .long 1067799893
+ .long 984555731
+ .long 1065423122
+ .long 472530941
+ .long 1062650218
+ .long 4277811695
+ .long 1082535490
+ .long 3715808466
+ .long 3230016299
+ .long 3576508497
+ .long 3230091536
+ .long 4277796864
+ .long 1065758274
+ .long 3164486458
+ .long 1025308570
+ .long 1
+ .long 1048576
+ .long 4294967295
+ .long 2146435071
+ .long 0
+ .long 0
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1073741824
+ .long 0
+ .long 1009778688
+ .long 0
+ .long 1106771968
+ .type _imldExpHATab,@object
+ .size _imldExpHATab,1176
+ .align 8
+
+.FLT_72:
+ .long 0x00000000,0xbff00000
+ .type .FLT_72,@object
+ .size .FLT_72,8
new file mode 100644
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized expm1, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8v_expm1 _ZGVeN8v_expm1_avx2_wrapper
+#include "../svml_d_expm18_core.S"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized expm1, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_expm1
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_expm1, __GI__ZGVeN8v_expm1, __redirect__ZGVeN8v_expm1)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1037 @@
+/* Function expm vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ * After computing exp(x) in high-low parts, an accurate computation is performed to obtain exp(x)-1
+ * Typical exp() implementation, except that:
+ * - tables are small (16 elements), allowing for fast gathers
+ * - all arguments processed in the main path
+ * - final VSCALEF assists branch-free design (correct overflow/underflow and special case responses)
+ * - a VAND is used to ensure the reduced argument |R|<2, even for large inputs
+ * - RZ mode used to avoid oveflow to +/-Inf for x*log2(e); helps with special case handling
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVeN8v_expm1_skx)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $192, %rsp
+ vmovups 256+__svml_dexpm1_data_internal_avx512(%rip), %zmm6
+ vmovups 320+__svml_dexpm1_data_internal_avx512(%rip), %zmm4
+ vmovups 512+__svml_dexpm1_data_internal_avx512(%rip), %zmm11
+ vmovups 576+__svml_dexpm1_data_internal_avx512(%rip), %zmm5
+ vmovups 384+__svml_dexpm1_data_internal_avx512(%rip), %zmm3
+ vmovups 960+__svml_dexpm1_data_internal_avx512(%rip), %zmm13
+ vmovups 1024+__svml_dexpm1_data_internal_avx512(%rip), %zmm15
+
+/* polynomial */
+ vmovups 832+__svml_dexpm1_data_internal_avx512(%rip), %zmm12
+
+/* set Z0=max(Z0, -128.0) */
+ vmovups 640+__svml_dexpm1_data_internal_avx512(%rip), %zmm8
+ vmovups 1088+__svml_dexpm1_data_internal_avx512(%rip), %zmm14
+ vmovups __svml_dexpm1_data_internal_avx512(%rip), %zmm9
+ vmovaps %zmm0, %zmm2
+
+/* 2^(52-4)*1.5 + x * log2(e) */
+ vfmadd213pd {rn-sae}, %zmm4, %zmm2, %zmm6
+ vmovups 128+__svml_dexpm1_data_internal_avx512(%rip), %zmm0
+ vcmppd $21, {sae}, %zmm3, %zmm2, %k0
+
+/* Z0 ~ x*log2(e), rounded to 4 fractional bits */
+ vsubpd {rn-sae}, %zmm4, %zmm6, %zmm7
+ vpermt2pd 64+__svml_dexpm1_data_internal_avx512(%rip), %zmm6, %zmm9
+ vpermt2pd 192+__svml_dexpm1_data_internal_avx512(%rip), %zmm6, %zmm0
+ vandpd 448+__svml_dexpm1_data_internal_avx512(%rip), %zmm2, %zmm1
+
+/* R = x - Z0*log(2) */
+ vfnmadd213pd {rn-sae}, %zmm2, %zmm7, %zmm11
+ vmaxpd {sae}, %zmm8, %zmm7, %zmm10
+ vfnmadd231pd {rn-sae}, %zmm7, %zmm5, %zmm11
+ kmovw %k0, %edx
+
+/* ensure |R|<2 even for special cases */
+ vandpd 704+__svml_dexpm1_data_internal_avx512(%rip), %zmm11, %zmm3
+ vmovups 896+__svml_dexpm1_data_internal_avx512(%rip), %zmm11
+
+/* scale Th */
+ vscalefpd {rn-sae}, %zmm10, %zmm9, %zmm4
+ vfmadd231pd {rn-sae}, %zmm3, %zmm13, %zmm15
+ vfmadd231pd {rn-sae}, %zmm3, %zmm12, %zmm11
+ vmovups 1152+__svml_dexpm1_data_internal_avx512(%rip), %zmm12
+ vmulpd {rn-sae}, %zmm3, %zmm3, %zmm13
+ vfmadd231pd {rn-sae}, %zmm3, %zmm14, %zmm12
+ vfmadd213pd {rn-sae}, %zmm15, %zmm13, %zmm11
+ vfmadd213pd {rn-sae}, %zmm12, %zmm13, %zmm11
+
+/* Tlr + R+ R*Poly */
+ vfmadd213pd {rn-sae}, %zmm0, %zmm13, %zmm11
+
+/* Th - 1 */
+ vmovups 1216+__svml_dexpm1_data_internal_avx512(%rip), %zmm0
+ vaddpd {rn-sae}, %zmm3, %zmm11, %zmm14
+ vsubpd {rn-sae}, %zmm0, %zmm4, %zmm15
+
+/* (Th-1)+Th*(Tlr + R+ R*Poly) */
+ vfmadd213pd {rn-sae}, %zmm15, %zmm14, %zmm4
+ vorpd %zmm1, %zmm4, %zmm0
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+.LBL_1_3:
+ vmovups %zmm2, 64(%rsp)
+ vmovups %zmm0, 128(%rsp)
+ je .LBL_1_2
+ xorl %eax, %eax
+ vzeroupper
+ kmovw %k4, 24(%rsp)
+ kmovw %k5, 16(%rsp)
+ kmovw %k6, 8(%rsp)
+ kmovw %k7, (%rsp)
+ movq %rsi, 40(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r12, 56(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x68, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x78, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 48(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x70, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ .cfi_escape 0x10, 0xfa, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $8, %r12d
+ jl .LBL_1_7
+ kmovw 24(%rsp), %k4
+ cfi_restore(122)
+ kmovw 16(%rsp), %k5
+ cfi_restore(123)
+ kmovw 8(%rsp), %k6
+ cfi_restore(124)
+ kmovw (%rsp), %k7
+ cfi_restore(125)
+ vmovups 128(%rsp), %zmm0
+ movq 40(%rsp), %rsi
+ cfi_restore(4)
+ movq 32(%rsp), %rdi
+ cfi_restore(5)
+ movq 56(%rsp), %r12
+ cfi_restore(12)
+ movq 48(%rsp), %r13
+ cfi_restore(13)
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x68, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x78, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x70, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfa, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 64(%rsp,%r12,8), %rdi
+ lea 128(%rsp,%r12,8), %rsi
+ call __svml_dexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVeN8v_expm1_skx)
+
+ .align 16,0x90
+
+__svml_dexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movsd (%rdi), %xmm6
+ pxor %xmm0, %xmm0
+ movzwl 6(%rdi), %edx
+ comisd %xmm6, %xmm0
+ ja .LBL_2_18
+ andl $32752, %edx
+ shrl $4, %edx
+ movsd %xmm6, -8(%rsp)
+ cmpl $2047, %edx
+ je .LBL_2_19
+ cmpl $970, %edx
+ jle .LBL_2_16
+ movsd 1080+_imldExpHATab(%rip), %xmm0
+ comisd %xmm6, %xmm0
+ jb .LBL_2_15
+ comisd 1096+_imldExpHATab(%rip), %xmm6
+ jb .LBL_2_14
+ movsd 1024+_imldExpHATab(%rip), %xmm0
+ movaps %xmm6, %xmm5
+ mulsd %xmm6, %xmm0
+ lea _imldExpHATab(%rip), %r10
+ movsd %xmm0, -24(%rsp)
+ movsd -24(%rsp), %xmm1
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, -8(%rsp)
+ addsd 1032+_imldExpHATab(%rip), %xmm1
+ movsd %xmm1, -16(%rsp)
+ movsd -16(%rsp), %xmm2
+ movl -16(%rsp), %r8d
+ movl %r8d, %ecx
+ andl $63, %r8d
+ subsd 1032+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -24(%rsp)
+ lea 1(%r8,%r8), %r9d
+ movsd -24(%rsp), %xmm3
+ lea (%r8,%r8), %edi
+ mulsd 1104+_imldExpHATab(%rip), %xmm3
+ movsd -24(%rsp), %xmm4
+ subsd %xmm3, %xmm5
+ mulsd 1112+_imldExpHATab(%rip), %xmm4
+ movsd 1072+_imldExpHATab(%rip), %xmm2
+ subsd %xmm4, %xmm5
+ mulsd %xmm5, %xmm2
+ shrl $6, %ecx
+ addsd 1064+_imldExpHATab(%rip), %xmm2
+ comisd 1088+_imldExpHATab(%rip), %xmm6
+ mulsd %xmm5, %xmm2
+ movsd (%r10,%rdi,8), %xmm0
+ lea 1023(%rcx), %edx
+ addsd 1056+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1048+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ addsd 1040+_imldExpHATab(%rip), %xmm2
+ mulsd %xmm5, %xmm2
+ mulsd %xmm5, %xmm2
+ addsd %xmm5, %xmm2
+ addsd (%r10,%r9,8), %xmm2
+ mulsd %xmm0, %xmm2
+ jb .LBL_2_10
+ andl $2047, %edx
+ addsd %xmm0, %xmm2
+ cmpl $2046, %edx
+ ja .LBL_2_9
+ movq 1136+_imldExpHATab(%rip), %rcx
+ shrq $48, %rcx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_9:
+ decl %edx
+ andl $2047, %edx
+ movzwl -2(%rsp), %ecx
+ shll $4, %edx
+ andl $-32753, %ecx
+ orl %edx, %ecx
+ movw %cx, -2(%rsp)
+ movsd -8(%rsp), %xmm0
+ mulsd %xmm0, %xmm2
+ mulsd 1152+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, (%rsi)
+ ret
+
+.LBL_2_10:
+ addl $1083, %ecx
+ andl $2047, %ecx
+ movl %ecx, %eax
+ movzwl -2(%rsp), %edx
+ shll $4, %eax
+ andl $-32753, %edx
+ orl %eax, %edx
+ movw %dx, -2(%rsp)
+ movsd -8(%rsp), %xmm1
+ mulsd %xmm1, %xmm2
+ mulsd %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ addsd %xmm2, %xmm0
+ cmpl $50, %ecx
+ ja .LBL_2_12
+ mulsd 1160+_imldExpHATab(%rip), %xmm0
+ movsd %xmm0, (%rsi)
+ jmp .LBL_2_13
+
+.LBL_2_12:
+ movsd %xmm0, -72(%rsp)
+ movsd -72(%rsp), %xmm0
+ subsd %xmm0, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -64(%rsp), %xmm1
+ addsd %xmm2, %xmm1
+ movsd %xmm1, -64(%rsp)
+ movsd -72(%rsp), %xmm2
+ mulsd 1168+_imldExpHATab(%rip), %xmm2
+ movsd %xmm2, -56(%rsp)
+ movsd -72(%rsp), %xmm4
+ movsd -56(%rsp), %xmm3
+ addsd %xmm3, %xmm4
+ movsd %xmm4, -48(%rsp)
+ movsd -48(%rsp), %xmm6
+ movsd -56(%rsp), %xmm5
+ subsd %xmm5, %xmm6
+ movsd %xmm6, -40(%rsp)
+ movsd -72(%rsp), %xmm8
+ movsd -40(%rsp), %xmm7
+ subsd %xmm7, %xmm8
+ movsd %xmm8, -32(%rsp)
+ movsd -64(%rsp), %xmm10
+ movsd -32(%rsp), %xmm9
+ addsd %xmm9, %xmm10
+ movsd %xmm10, -32(%rsp)
+ movsd -40(%rsp), %xmm11
+ mulsd 1160+_imldExpHATab(%rip), %xmm11
+ movsd %xmm11, -40(%rsp)
+ movsd -32(%rsp), %xmm12
+ mulsd 1160+_imldExpHATab(%rip), %xmm12
+ movsd %xmm12, -32(%rsp)
+ movsd -40(%rsp), %xmm14
+ movsd -32(%rsp), %xmm13
+ addsd %xmm13, %xmm14
+ movsd %xmm14, (%rsi)
+
+.LBL_2_13:
+ movl $4, %eax
+ ret
+
+.LBL_2_14:
+ movsd 1120+_imldExpHATab(%rip), %xmm0
+ movl $4, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_15:
+ movsd 1128+_imldExpHATab(%rip), %xmm0
+ movl $3, %eax
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_16:
+ movsd 1144+_imldExpHATab(%rip), %xmm0
+ addsd %xmm6, %xmm0
+ movsd %xmm0, (%rsi)
+ ret
+
+.LBL_2_18:
+ movq $0xbff0000000000000, %rax
+ movq %rax, (%rsi)
+ xorl %eax, %eax
+ ret
+
+.LBL_2_19:
+ movb -1(%rsp), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_21
+
+.LBL_2_20:
+ mulsd %xmm6, %xmm6
+ movsd %xmm6, (%rsi)
+ ret
+
+.LBL_2_21:
+ testl $1048575, -4(%rsp)
+ jne .LBL_2_20
+ cmpl $0, -8(%rsp)
+ jne .LBL_2_20
+ movq 1136+_imldExpHATab(%rip), %rdx
+ movq %rdx, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_dexpm1_cout_rare_internal,@function
+ .size __svml_dexpm1_cout_rare_internal,.-__svml_dexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_dexpm1_data_internal_avx512:
+ .long 0
+ .long 1072693248
+ .long 1828292879
+ .long 1072739672
+ .long 1014845819
+ .long 1072788152
+ .long 1853186616
+ .long 1072838778
+ .long 171030293
+ .long 1072891646
+ .long 1276261410
+ .long 1072946854
+ .long 3577096743
+ .long 1073004506
+ .long 3712504873
+ .long 1073064711
+ .long 1719614413
+ .long 1073127582
+ .long 1944781191
+ .long 1073193236
+ .long 1110089947
+ .long 1073261797
+ .long 2191782032
+ .long 1073333393
+ .long 2572866477
+ .long 1073408159
+ .long 3716502172
+ .long 1073486235
+ .long 3707479175
+ .long 1073567768
+ .long 2728693978
+ .long 1073652911
+ .long 0
+ .long 0
+ .long 1568897901
+ .long 1016568486
+ .long 3936719688
+ .long 3162512149
+ .long 3819481236
+ .long 1016499965
+ .long 1303423926
+ .long 1015238005
+ .long 2804567149
+ .long 1015390024
+ .long 3145379760
+ .long 1014403278
+ .long 3793507337
+ .long 1016095713
+ .long 3210617384
+ .long 3163796463
+ .long 3108873501
+ .long 3162190556
+ .long 3253791412
+ .long 1015920431
+ .long 730975783
+ .long 1014083580
+ .long 2462790535
+ .long 1015814775
+ .long 816778419
+ .long 1014197934
+ .long 2789017511
+ .long 1014276997
+ .long 2413007344
+ .long 3163551506
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 1697350398
+ .long 1073157447
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 16368
+ .long 1123549184
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 1287323204
+ .long 1082531232
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 0
+ .long 2147483648
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 4277811695
+ .long 1072049730
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 993624127
+ .long 1014676638
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 0
+ .long 3227516928
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4294967295
+ .long 3221225471
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 4106095538
+ .long 1056571896
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 271596938
+ .long 1059717636
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 2383825455
+ .long 1062650307
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 277238292
+ .long 1065423121
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1420639494
+ .long 1067799893
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 1431656022
+ .long 1069897045
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 162
+ .long 1071644672
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .long 0
+ .long 3220176896
+ .type __svml_dexpm1_data_internal_avx512,@object
+ .size __svml_dexpm1_data_internal_avx512,1344
+ .align 32
+
+_imldExpHATab:
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 0
+ .long 1048019041
+ .long 1072704666
+ .long 2631457885
+ .long 3161546771
+ .long 3541402996
+ .long 1072716208
+ .long 896005651
+ .long 1015861842
+ .long 410360776
+ .long 1072727877
+ .long 1642514529
+ .long 1012987726
+ .long 1828292879
+ .long 1072739672
+ .long 1568897901
+ .long 1016568486
+ .long 852742562
+ .long 1072751596
+ .long 1882168529
+ .long 1010744893
+ .long 3490863953
+ .long 1072763649
+ .long 707771662
+ .long 3163903570
+ .long 2930322912
+ .long 1072775834
+ .long 3117806614
+ .long 3163670819
+ .long 1014845819
+ .long 1072788152
+ .long 3936719688
+ .long 3162512149
+ .long 3949972341
+ .long 1072800603
+ .long 1058231231
+ .long 1015777676
+ .long 828946858
+ .long 1072813191
+ .long 1044000608
+ .long 1016786167
+ .long 2288159958
+ .long 1072825915
+ .long 1151779725
+ .long 1015705409
+ .long 1853186616
+ .long 1072838778
+ .long 3819481236
+ .long 1016499965
+ .long 1709341917
+ .long 1072851781
+ .long 2552227826
+ .long 1015039787
+ .long 4112506593
+ .long 1072864925
+ .long 1829350193
+ .long 1015216097
+ .long 2799960843
+ .long 1072878213
+ .long 1913391796
+ .long 1015756674
+ .long 171030293
+ .long 1072891646
+ .long 1303423926
+ .long 1015238005
+ .long 2992903935
+ .long 1072905224
+ .long 1574172746
+ .long 1016061241
+ .long 926591435
+ .long 1072918951
+ .long 3427487848
+ .long 3163704045
+ .long 887463927
+ .long 1072932827
+ .long 1049900754
+ .long 3161575912
+ .long 1276261410
+ .long 1072946854
+ .long 2804567149
+ .long 1015390024
+ .long 569847338
+ .long 1072961034
+ .long 1209502043
+ .long 3159926671
+ .long 1617004845
+ .long 1072975368
+ .long 1623370769
+ .long 1011049453
+ .long 3049340112
+ .long 1072989858
+ .long 3667985273
+ .long 1013894369
+ .long 3577096743
+ .long 1073004506
+ .long 3145379760
+ .long 1014403278
+ .long 1990012071
+ .long 1073019314
+ .long 7447438
+ .long 3163526196
+ .long 1453150082
+ .long 1073034283
+ .long 3171891295
+ .long 3162037958
+ .long 917841882
+ .long 1073049415
+ .long 419288974
+ .long 1016280325
+ .long 3712504873
+ .long 1073064711
+ .long 3793507337
+ .long 1016095713
+ .long 363667784
+ .long 1073080175
+ .long 728023093
+ .long 1016345318
+ .long 2956612997
+ .long 1073095806
+ .long 1005538728
+ .long 3163304901
+ .long 2186617381
+ .long 1073111608
+ .long 2018924632
+ .long 3163803357
+ .long 1719614413
+ .long 1073127582
+ .long 3210617384
+ .long 3163796463
+ .long 1013258799
+ .long 1073143730
+ .long 3094194670
+ .long 3160631279
+ .long 3907805044
+ .long 1073160053
+ .long 2119843535
+ .long 3161988964
+ .long 1447192521
+ .long 1073176555
+ .long 508946058
+ .long 3162904882
+ .long 1944781191
+ .long 1073193236
+ .long 3108873501
+ .long 3162190556
+ .long 919555682
+ .long 1073210099
+ .long 2882956373
+ .long 1013312481
+ .long 2571947539
+ .long 1073227145
+ .long 4047189812
+ .long 3163777462
+ .long 2604962541
+ .long 1073244377
+ .long 3631372142
+ .long 3163870288
+ .long 1110089947
+ .long 1073261797
+ .long 3253791412
+ .long 1015920431
+ .long 2568320822
+ .long 1073279406
+ .long 1509121860
+ .long 1014756995
+ .long 2966275557
+ .long 1073297207
+ .long 2339118633
+ .long 3160254904
+ .long 2682146384
+ .long 1073315202
+ .long 586480042
+ .long 3163702083
+ .long 2191782032
+ .long 1073333393
+ .long 730975783
+ .long 1014083580
+ .long 2069751141
+ .long 1073351782
+ .long 576856675
+ .long 3163014404
+ .long 2990417245
+ .long 1073370371
+ .long 3552361237
+ .long 3163667409
+ .long 1434058175
+ .long 1073389163
+ .long 1853053619
+ .long 1015310724
+ .long 2572866477
+ .long 1073408159
+ .long 2462790535
+ .long 1015814775
+ .long 3092190715
+ .long 1073427362
+ .long 1457303226
+ .long 3159737305
+ .long 4076559943
+ .long 1073446774
+ .long 950899508
+ .long 3160987380
+ .long 2420883922
+ .long 1073466398
+ .long 174054861
+ .long 1014300631
+ .long 3716502172
+ .long 1073486235
+ .long 816778419
+ .long 1014197934
+ .long 777507147
+ .long 1073506289
+ .long 3507050924
+ .long 1015341199
+ .long 3706687593
+ .long 1073526560
+ .long 1821514088
+ .long 1013410604
+ .long 1242007932
+ .long 1073547053
+ .long 1073740399
+ .long 3163532637
+ .long 3707479175
+ .long 1073567768
+ .long 2789017511
+ .long 1014276997
+ .long 64696965
+ .long 1073588710
+ .long 3586233004
+ .long 1015962192
+ .long 863738719
+ .long 1073609879
+ .long 129252895
+ .long 3162690849
+ .long 3884662774
+ .long 1073631278
+ .long 1614448851
+ .long 1014281732
+ .long 2728693978
+ .long 1073652911
+ .long 2413007344
+ .long 3163551506
+ .long 3999357479
+ .long 1073674779
+ .long 1101668360
+ .long 1015989180
+ .long 1533953344
+ .long 1073696886
+ .long 835814894
+ .long 1015702697
+ .long 2174652632
+ .long 1073719233
+ .long 1301400989
+ .long 1014466875
+ .long 1697350398
+ .long 1079448903
+ .long 0
+ .long 1127743488
+ .long 0
+ .long 1071644672
+ .long 1431652600
+ .long 1069897045
+ .long 1431670732
+ .long 1067799893
+ .long 984555731
+ .long 1065423122
+ .long 472530941
+ .long 1062650218
+ .long 4277811695
+ .long 1082535490
+ .long 3715808466
+ .long 3230016299
+ .long 3576508497
+ .long 3230091536
+ .long 4277796864
+ .long 1065758274
+ .long 3164486458
+ .long 1025308570
+ .long 1
+ .long 1048576
+ .long 4294967295
+ .long 2146435071
+ .long 0
+ .long 0
+ .long 0
+ .long 1072693248
+ .long 0
+ .long 1073741824
+ .long 0
+ .long 1009778688
+ .long 0
+ .long 1106771968
+ .type _imldExpHATab,@object
+ .size _imldExpHATab,1176
+ .align 8
+
+.FLT_77:
+ .long 0x00000000,0xbff00000
+ .type .FLT_77,@object
+ .size .FLT_77,8
new file mode 100644
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized expm1f.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16v_expm1f _ZGVeN16v_expm1f_avx2_wrapper
+#include "../svml_s_expm1f16_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expm1f, vector length is 16.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_expm1f
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_expm1f, __GI__ZGVeN16v_expm1f,
+ __redirect__ZGVeN16v_expm1f)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,569 @@
+/* Function expmf16 vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ * After computing exp(x) in high-low parts, an accurate computation is performed to obtain exp(x)-1
+ * Typical exp() implementation, except that:
+ * - tables are small (32 elements), allowing for fast gathers
+ * - all arguments processed in the main path
+ * - final VSCALEF assists branch-free design (correct overflow/underflow and special case responses)
+ * - a VAND is used to ensure the reduced argument |R|<2, even for large inputs
+ * - RZ mode used to avoid oveflow to +/-Inf for x*log2(e); helps with special case handling
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVeN16v_expm1f_skx)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $192, %rsp
+ vmovups 256+__svml_sexpm1_data_internal_avx512(%rip), %zmm5
+ vmovups 320+__svml_sexpm1_data_internal_avx512(%rip), %zmm3
+ vmovups 512+__svml_sexpm1_data_internal_avx512(%rip), %zmm8
+ vmovups 576+__svml_sexpm1_data_internal_avx512(%rip), %zmm4
+ vmovups __svml_sexpm1_data_internal_avx512(%rip), %zmm6
+
+/* polynomial */
+ vmovups 704+__svml_sexpm1_data_internal_avx512(%rip), %zmm9
+ vmovups 768+__svml_sexpm1_data_internal_avx512(%rip), %zmm12
+ vmovups 128+__svml_sexpm1_data_internal_avx512(%rip), %zmm11
+ vmovups 384+__svml_sexpm1_data_internal_avx512(%rip), %zmm2
+
+/* Th - 1 */
+ vmovups 832+__svml_sexpm1_data_internal_avx512(%rip), %zmm14
+ vmovaps %zmm0, %zmm1
+
+/* 2^(52-5)*1.5 + x * log2(e) */
+ vfmadd213ps {rn-sae}, %zmm3, %zmm1, %zmm5
+ vcmpps $29, {sae}, %zmm2, %zmm1, %k0
+
+/* Z0 ~ x*log2(e), rounded to 5 fractional bits */
+ vsubps {rn-sae}, %zmm3, %zmm5, %zmm7
+ vpermt2ps 64+__svml_sexpm1_data_internal_avx512(%rip), %zmm5, %zmm6
+ vpermt2ps 192+__svml_sexpm1_data_internal_avx512(%rip), %zmm5, %zmm11
+ vandps 448+__svml_sexpm1_data_internal_avx512(%rip), %zmm1, %zmm0
+
+/* R = x - Z0*log(2) */
+ vfnmadd213ps {rn-sae}, %zmm1, %zmm7, %zmm8
+
+/* scale Th */
+ vscalefps {rn-sae}, %zmm7, %zmm6, %zmm2
+ vfnmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
+ kmovw %k0, %edx
+
+/* ensure |R|<2 even for special cases */
+ vandps 640+__svml_sexpm1_data_internal_avx512(%rip), %zmm8, %zmm13
+ vsubps {rn-sae}, %zmm14, %zmm2, %zmm8
+ vmulps {rn-sae}, %zmm13, %zmm13, %zmm10
+ vfmadd231ps {rn-sae}, %zmm13, %zmm9, %zmm12
+
+/* Tlr + R+ R2*Poly */
+ vfmadd213ps {rn-sae}, %zmm11, %zmm10, %zmm12
+ vaddps {rn-sae}, %zmm13, %zmm12, %zmm15
+
+/* (Th-1)+Th*(Tlr + R+ R*Poly) */
+ vfmadd213ps {rn-sae}, %zmm8, %zmm15, %zmm2
+ vorps %zmm0, %zmm2, %zmm0
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+.LBL_1_3:
+ vmovups %zmm1, 64(%rsp)
+ vmovups %zmm0, 128(%rsp)
+ je .LBL_1_2
+ xorl %eax, %eax
+ vzeroupper
+ kmovw %k4, 24(%rsp)
+ kmovw %k5, 16(%rsp)
+ kmovw %k6, 8(%rsp)
+ kmovw %k7, (%rsp)
+ movq %rsi, 40(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r12, 56(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x68, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x78, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 48(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x70, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ .cfi_escape 0x10, 0xfa, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $16, %r12d
+ jl .LBL_1_7
+ kmovw 24(%rsp), %k4
+ cfi_restore(122)
+ kmovw 16(%rsp), %k5
+ cfi_restore(123)
+ kmovw 8(%rsp), %k6
+ cfi_restore(124)
+ kmovw (%rsp), %k7
+ cfi_restore(125)
+ vmovups 128(%rsp), %zmm0
+ movq 40(%rsp), %rsi
+ cfi_restore(4)
+ movq 32(%rsp), %rdi
+ cfi_restore(5)
+ movq 56(%rsp), %r12
+ cfi_restore(12)
+ movq 48(%rsp), %r13
+ cfi_restore(13)
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x68, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x78, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x70, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfa, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xfd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 64(%rsp,%r12,4), %rdi
+ lea 128(%rsp,%r12,4), %rsi
+ call __svml_sexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVeN16v_expm1f_skx)
+
+ .align 16,0x90
+
+__svml_sexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movss (%rdi), %xmm8
+ pxor %xmm0, %xmm0
+ comiss %xmm8, %xmm0
+ ja .LBL_2_8
+ movzwl 2(%rdi), %edx
+ andl $32640, %edx
+ cmpl $32640, %edx
+ je .LBL_2_9
+ movss .FLT_45(%rip), %xmm0
+ comiss %xmm8, %xmm0
+ jb .LBL_2_6
+ comiss .FLT_46(%rip), %xmm8
+ jb .LBL_2_7
+ movss .FLT_48(%rip), %xmm0
+ mulss %xmm8, %xmm0
+ movss %xmm0, -24(%rsp)
+ movss -24(%rsp), %xmm1
+ movss .FLT_49(%rip), %xmm2
+ movss .FLT_56(%rip), %xmm6
+ addss %xmm2, %xmm1
+ movss %xmm1, -20(%rsp)
+ movss -20(%rsp), %xmm3
+ movss .FLT_52(%rip), %xmm7
+ subss %xmm2, %xmm3
+ movss %xmm3, -24(%rsp)
+ movss -24(%rsp), %xmm4
+ mulss .FLT_50(%rip), %xmm4
+ movss -24(%rsp), %xmm5
+ subss %xmm4, %xmm8
+ mulss .FLT_51(%rip), %xmm5
+ movl -20(%rsp), %edx
+ subss %xmm5, %xmm8
+ mulss %xmm8, %xmm6
+ shll $23, %edx
+ addss .FLT_55(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_54(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_53(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm7, %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm6, %xmm7
+ movss %xmm7, -16(%rsp)
+ addl -16(%rsp), %edx
+ movl %edx, (%rsi)
+ ret
+
+.LBL_2_6:
+ movss .FLT_47(%rip), %xmm0
+ mulss %xmm0, %xmm0
+ movss %xmm0, -16(%rsp)
+ movl -16(%rsp), %eax
+ movl %eax, (%rsi)
+ movl $3, %eax
+
+.LBL_2_7:
+ ret
+
+.LBL_2_8:
+ movl $-1082130432, (%rsi)
+ ret
+
+.LBL_2_9:
+ movb 3(%rdi), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_11
+
+.LBL_2_10:
+ mulss %xmm8, %xmm8
+ movss %xmm8, (%rsi)
+ ret
+
+.LBL_2_11:
+ testl $8388607, (%rdi)
+ jne .LBL_2_10
+ movss %xmm0, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_sexpm1_cout_rare_internal,@function
+ .size __svml_sexpm1_cout_rare_internal,.-__svml_sexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_sexpm1_data_internal_avx512:
+ .long 1065353216
+ .long 1065536903
+ .long 1065724611
+ .long 1065916431
+ .long 1066112450
+ .long 1066312762
+ .long 1066517459
+ .long 1066726640
+ .long 1066940400
+ .long 1067158842
+ .long 1067382066
+ .long 1067610179
+ .long 1067843287
+ .long 1068081499
+ .long 1068324927
+ .long 1068573686
+ .long 1068827891
+ .long 1069087663
+ .long 1069353124
+ .long 1069624397
+ .long 1069901610
+ .long 1070184894
+ .long 1070474380
+ .long 1070770206
+ .long 1071072509
+ .long 1071381432
+ .long 1071697119
+ .long 1072019719
+ .long 1072349383
+ .long 1072686266
+ .long 1073030525
+ .long 1073382323
+ .long 0
+ .long 3007986186
+ .long 860277610
+ .long 3010384254
+ .long 2991457809
+ .long 3008462297
+ .long 860562562
+ .long 3004532446
+ .long 856238081
+ .long 3001480295
+ .long 857441778
+ .long 815380209
+ .long 3003456168
+ .long 3001196762
+ .long 2986372182
+ .long 3006683458
+ .long 848495278
+ .long 851809756
+ .long 3003311522
+ .long 2995654817
+ .long 833868005
+ .long 3004843819
+ .long 835836658
+ .long 3003498340
+ .long 2994528642
+ .long 3002229827
+ .long 2981408986
+ .long 2983889551
+ .long 2983366846
+ .long 3000350873
+ .long 833659207
+ .long 2987748092
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1069066811
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1220542464
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 2147483648
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 1060205080
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 2969756424
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 3221225471
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1042983923
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1056964854
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .long 1065353216
+ .type __svml_sexpm1_data_internal_avx512,@object
+ .size __svml_sexpm1_data_internal_avx512,896
+ .align 4
+
+.FLT_44:
+ .long 0xbf800000
+ .type .FLT_44,@object
+ .size .FLT_44,4
+ .align 4
+
+.FLT_45:
+ .long 0x42b17217
+ .type .FLT_45,@object
+ .size .FLT_45,4
+ .align 4
+
+.FLT_46:
+ .long 0xc2cff1b4
+ .type .FLT_46,@object
+ .size .FLT_46,4
+ .align 4
+
+.FLT_47:
+ .long 0x7f7fffff
+ .type .FLT_47,@object
+ .size .FLT_47,4
+ .align 4
+
+.FLT_48:
+ .long 0x3fb8aa3b
+ .type .FLT_48,@object
+ .size .FLT_48,4
+ .align 4
+
+.FLT_49:
+ .long 0x4b400000
+ .type .FLT_49,@object
+ .size .FLT_49,4
+ .align 4
+
+.FLT_50:
+ .long 0x3f317200
+ .type .FLT_50,@object
+ .size .FLT_50,4
+ .align 4
+
+.FLT_51:
+ .long 0x35bfbe8e
+ .type .FLT_51,@object
+ .size .FLT_51,4
+ .align 4
+
+.FLT_52:
+ .long 0x3f800001
+ .type .FLT_52,@object
+ .size .FLT_52,4
+ .align 4
+
+.FLT_53:
+ .long 0x3efffe85
+ .type .FLT_53,@object
+ .size .FLT_53,4
+ .align 4
+
+.FLT_54:
+ .long 0x3e2aa9c6
+ .type .FLT_54,@object
+ .size .FLT_54,4
+ .align 4
+
+.FLT_55:
+ .long 0x3d2bb1b6
+ .type .FLT_55,@object
+ .size .FLT_55,4
+ .align 4
+
+.FLT_56:
+ .long 0x3c0950ef
+ .type .FLT_56,@object
+ .size .FLT_56,4
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized expm1f, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_expm1f _ZGVbN4v_expm1f_sse2
+#include "../svml_s_expm1f4_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expm1f, vector length is 4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_expm1f
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_expm1f, __GI__ZGVbN4v_expm1f,
+ __redirect__ZGVbN4v_expm1f)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1315 @@
+/* Function expmf4 vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * N = (int)(x*2^k/log(2.0)), R = x - N*log(2)/2^k
+ * exp(x) = 2^(N/2^k) * poly(R) is computed in high-low parts
+ * expm1(x) = exp(x)-1 is then obtained via multi-precision computation
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVbN4v_expm1f_sse4)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $320, %rsp
+ lea __svml_sexpm1_data_internal(%rip), %rax
+ movups 576+__svml_sexpm1_data_internal(%rip), %xmm6
+ mulps %xmm0, %xmm6
+ movups .FLT_14(%rip), %xmm2
+
+/* argument reduction */
+ movups 640+__svml_sexpm1_data_internal(%rip), %xmm3
+ addps %xmm2, %xmm6
+ movups 704+__svml_sexpm1_data_internal(%rip), %xmm1
+ subps %xmm2, %xmm6
+ mulps %xmm6, %xmm3
+ movaps %xmm0, %xmm2
+ mulps %xmm6, %xmm1
+ subps %xmm3, %xmm2
+
+/* table lookup */
+ movdqu 832+__svml_sexpm1_data_internal(%rip), %xmm7
+ subps %xmm1, %xmm2
+ movups 768+__svml_sexpm1_data_internal(%rip), %xmm1
+ movups 1088+__svml_sexpm1_data_internal(%rip), %xmm3
+ addps %xmm6, %xmm1
+ pand %xmm1, %xmm7
+ movaps %xmm3, %xmm4
+ pshufd $1, %xmm7, %xmm5
+ andps %xmm0, %xmm4
+ movd %xmm7, %ecx
+ andnps %xmm0, %xmm3
+ movd %xmm5, %r8d
+ pshufd $2, %xmm7, %xmm5
+ pshufd $3, %xmm7, %xmm7
+ cmpnleps 1152+__svml_sexpm1_data_internal(%rip), %xmm4
+ movd %xmm5, %r9d
+ movd %xmm7, %r10d
+ movmskps %xmm4, %edx
+ movslq %ecx, %rcx
+ movslq %r8d, %r8
+ movslq %r9d, %r9
+ movslq %r10d, %r10
+ movq (%rax,%rcx), %xmm4
+ movq (%rax,%r8), %xmm6
+ movq (%rax,%r9), %xmm5
+ movq (%rax,%r10), %xmm7
+ unpcklps %xmm6, %xmm4
+ unpcklps %xmm7, %xmm5
+ movaps %xmm4, %xmm6
+ movlhps %xmm5, %xmm6
+ movaps %xmm2, %xmm7
+ shufps $238, %xmm5, %xmm4
+ movdqu 896+__svml_sexpm1_data_internal(%rip), %xmm5
+ pand %xmm5, %xmm1
+
+/* polynomial */
+ movups 512+__svml_sexpm1_data_internal(%rip), %xmm5
+ pslld $14, %xmm1
+ mulps %xmm2, %xmm5
+ orps %xmm1, %xmm6
+ mulps %xmm2, %xmm7
+ mulps %xmm1, %xmm4
+ addps 528+__svml_sexpm1_data_internal(%rip), %xmm5
+ mulps %xmm7, %xmm5
+ addps %xmm5, %xmm2
+
+/* T-1 */
+ movups 1024+__svml_sexpm1_data_internal(%rip), %xmm5
+ addps %xmm5, %xmm6
+ addps %xmm4, %xmm6
+ movaps %xmm6, %xmm1
+ subps %xmm5, %xmm1
+ mulps %xmm1, %xmm2
+ addps %xmm2, %xmm6
+ orps %xmm3, %xmm6
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ movaps %xmm6, %xmm0
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+
+.LBL_1_3:
+ movups %xmm0, 192(%rsp)
+ movups %xmm6, 256(%rsp)
+ xorl %eax, %eax
+ movups %xmm8, 112(%rsp)
+ movups %xmm9, 96(%rsp)
+ movups %xmm10, 80(%rsp)
+ movups %xmm11, 64(%rsp)
+ movups %xmm12, 48(%rsp)
+ movups %xmm13, 32(%rsp)
+ movups %xmm14, 16(%rsp)
+ movups %xmm15, (%rsp)
+ movq %rsi, 136(%rsp)
+ movq %rdi, 128(%rsp)
+ movq %r12, 152(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x19, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x30, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1a, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1b, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x20, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 144(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $4, %r12d
+ jl .LBL_1_7
+ movups 112(%rsp), %xmm8
+ cfi_restore(25)
+ movups 96(%rsp), %xmm9
+ cfi_restore(26)
+ movups 80(%rsp), %xmm10
+ cfi_restore(27)
+ movups 64(%rsp), %xmm11
+ cfi_restore(28)
+ movups 48(%rsp), %xmm12
+ cfi_restore(29)
+ movups 32(%rsp), %xmm13
+ cfi_restore(30)
+ movups 16(%rsp), %xmm14
+ cfi_restore(31)
+ movups (%rsp), %xmm15
+ cfi_restore(32)
+ movq 136(%rsp), %rsi
+ cfi_restore(4)
+ movq 128(%rsp), %rdi
+ cfi_restore(5)
+ movq 152(%rsp), %r12
+ cfi_restore(12)
+ movq 144(%rsp), %r13
+ cfi_restore(13)
+ movups 256(%rsp), %xmm6
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x58, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x19, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x30, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1a, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1b, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x1f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x20, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 192(%rsp,%r12,4), %rdi
+ lea 256(%rsp,%r12,4), %rsi
+ call __svml_sexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVbN4v_expm1f_sse4)
+
+ .align 16,0x90
+
+__svml_sexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movss (%rdi), %xmm8
+ pxor %xmm0, %xmm0
+ comiss %xmm8, %xmm0
+ ja .LBL_2_8
+ movzwl 2(%rdi), %edx
+ andl $32640, %edx
+ cmpl $32640, %edx
+ je .LBL_2_9
+ movss .FLT_46(%rip), %xmm0
+ comiss %xmm8, %xmm0
+ jb .LBL_2_6
+ comiss .FLT_47(%rip), %xmm8
+ jb .LBL_2_7
+ movss .FLT_49(%rip), %xmm0
+ mulss %xmm8, %xmm0
+ movss %xmm0, -24(%rsp)
+ movss -24(%rsp), %xmm1
+ movss .FLT_50(%rip), %xmm2
+ movss .FLT_57(%rip), %xmm6
+ addss %xmm2, %xmm1
+ movss %xmm1, -20(%rsp)
+ movss -20(%rsp), %xmm3
+ movss .FLT_53(%rip), %xmm7
+ subss %xmm2, %xmm3
+ movss %xmm3, -24(%rsp)
+ movss -24(%rsp), %xmm4
+ mulss .FLT_51(%rip), %xmm4
+ movss -24(%rsp), %xmm5
+ subss %xmm4, %xmm8
+ mulss .FLT_52(%rip), %xmm5
+ movl -20(%rsp), %edx
+ subss %xmm5, %xmm8
+ mulss %xmm8, %xmm6
+ shll $23, %edx
+ addss .FLT_56(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_55(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_54(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm7, %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm6, %xmm7
+ movss %xmm7, -16(%rsp)
+ addl -16(%rsp), %edx
+ movl %edx, (%rsi)
+ ret
+
+.LBL_2_6:
+ movss .FLT_48(%rip), %xmm0
+ mulss %xmm0, %xmm0
+ movss %xmm0, -16(%rsp)
+ movl -16(%rsp), %eax
+ movl %eax, (%rsi)
+ movl $3, %eax
+
+.LBL_2_7:
+ ret
+
+.LBL_2_8:
+ movl $-1082130432, (%rsi)
+ ret
+
+.LBL_2_9:
+ movb 3(%rdi), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_11
+
+.LBL_2_10:
+ mulss %xmm8, %xmm8
+ movss %xmm8, (%rsi)
+ ret
+
+.LBL_2_11:
+ testl $8388607, (%rdi)
+ jne .LBL_2_10
+ movss %xmm0, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_sexpm1_cout_rare_internal,@function
+ .size __svml_sexpm1_cout_rare_internal,.-__svml_sexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_sexpm1_data_internal:
+ .long 0
+ .long 0
+ .long 90112
+ .long 958021240
+ .long 184320
+ .long 3097385429
+ .long 278528
+ .long 3107645322
+ .long 372736
+ .long 3106378502
+ .long 466944
+ .long 3080612193
+ .long 565248
+ .long 3112054766
+ .long 659456
+ .long 958049961
+ .long 757760
+ .long 959986813
+ .long 860160
+ .long 3106149520
+ .long 958464
+ .long 956772713
+ .long 1060864
+ .long 946741642
+ .long 1163264
+ .long 955571435
+ .long 1269760
+ .long 3107625502
+ .long 1372160
+ .long 958264608
+ .long 1478656
+ .long 956671716
+ .long 1585152
+ .long 964561458
+ .long 1695744
+ .long 923477569
+ .long 1806336
+ .long 3098644875
+ .long 1916928
+ .long 3088264298
+ .long 2027520
+ .long 958811154
+ .long 2142208
+ .long 924983155
+ .long 2256896
+ .long 923141657
+ .long 2371584
+ .long 958576065
+ .long 2490368
+ .long 3088362279
+ .long 2609152
+ .long 3097170630
+ .long 2727936
+ .long 942496373
+ .long 2850816
+ .long 3107506507
+ .long 2973696
+ .long 3111658168
+ .long 3096576
+ .long 3104893523
+ .long 3219456
+ .long 956129397
+ .long 3346432
+ .long 945949013
+ .long 3473408
+ .long 958293631
+ .long 3604480
+ .long 3097200438
+ .long 3735552
+ .long 3104446228
+ .long 3866624
+ .long 3081063917
+ .long 4001792
+ .long 3110833173
+ .long 4132864
+ .long 963851983
+ .long 4272128
+ .long 3102526822
+ .long 4407296
+ .long 962173765
+ .long 4546560
+ .long 962937387
+ .long 4689920
+ .long 3097677269
+ .long 4833280
+ .long 3108523826
+ .long 4976640
+ .long 3103440963
+ .long 5120000
+ .long 957448868
+ .long 5267456
+ .long 952956604
+ .long 5419008
+ .long 3111931330
+ .long 5566464
+ .long 953592564
+ .long 5718016
+ .long 958372187
+ .long 5873664
+ .long 3099234146
+ .long 6029312
+ .long 3104378117
+ .long 6184960
+ .long 938771038
+ .long 6344704
+ .long 3100132597
+ .long 6504448
+ .long 3077380739
+ .long 6668288
+ .long 3110019856
+ .long 6832128
+ .long 3109205496
+ .long 6995968
+ .long 935782365
+ .long 7163904
+ .long 3083854118
+ .long 7331840
+ .long 957821820
+ .long 7503872
+ .long 943431213
+ .long 7675904
+ .long 959423141
+ .long 7852032
+ .long 937390859
+ .long 8028160
+ .long 955037405
+ .long 8208384
+ .long 3095001478
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .type __svml_sexpm1_data_internal,@object
+ .size __svml_sexpm1_data_internal,1536
+ .align 16
+
+.FLT_14:
+ .long 0x4b400000,0x4b400000,0x4b400000,0x4b400000
+ .type .FLT_14,@object
+ .size .FLT_14,16
+ .align 4
+
+.FLT_45:
+ .long 0xbf800000
+ .type .FLT_45,@object
+ .size .FLT_45,4
+ .align 4
+
+.FLT_46:
+ .long 0x42b17217
+ .type .FLT_46,@object
+ .size .FLT_46,4
+ .align 4
+
+.FLT_47:
+ .long 0xc2cff1b4
+ .type .FLT_47,@object
+ .size .FLT_47,4
+ .align 4
+
+.FLT_48:
+ .long 0x7f7fffff
+ .type .FLT_48,@object
+ .size .FLT_48,4
+ .align 4
+
+.FLT_49:
+ .long 0x3fb8aa3b
+ .type .FLT_49,@object
+ .size .FLT_49,4
+ .align 4
+
+.FLT_50:
+ .long 0x4b400000
+ .type .FLT_50,@object
+ .size .FLT_50,4
+ .align 4
+
+.FLT_51:
+ .long 0x3f317200
+ .type .FLT_51,@object
+ .size .FLT_51,4
+ .align 4
+
+.FLT_52:
+ .long 0x35bfbe8e
+ .type .FLT_52,@object
+ .size .FLT_52,4
+ .align 4
+
+.FLT_53:
+ .long 0x3f800001
+ .type .FLT_53,@object
+ .size .FLT_53,4
+ .align 4
+
+.FLT_54:
+ .long 0x3efffe85
+ .type .FLT_54,@object
+ .size .FLT_54,4
+ .align 4
+
+.FLT_55:
+ .long 0x3e2aa9c6
+ .type .FLT_55,@object
+ .size .FLT_55,4
+ .align 4
+
+.FLT_56:
+ .long 0x3d2bb1b6
+ .type .FLT_56,@object
+ .size .FLT_56,4
+ .align 4
+
+.FLT_57:
+ .long 0x3c0950ef
+ .type .FLT_57,@object
+ .size .FLT_57,4
new file mode 100644
@@ -0,0 +1,20 @@
+/* SSE version of vectorized expm1f, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_expm1f _ZGVdN8v_expm1f_sse_wrapper
+#include "../svml_s_expm1f8_core.S"
new file mode 100644
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expm1f, vector length is 8.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_expm1f
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_expm1f, __GI__ZGVdN8v_expm1f,
+ __redirect__ZGVdN8v_expm1f)
+ __attribute__ ((visibility ("hidden")));
+#endif
new file mode 100644
@@ -0,0 +1,1143 @@
+/* Function expmf8 vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * N = (int)(x*2^k/log(2.0)), R = x - N*log(2)/2^k
+ * exp(x) = 2^(N/2^k) * poly(R) is computed in high-low parts
+ * expm1(x) = exp(x)-1 is then obtained via multi-precision computation
+ *
+ *
+ */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(_ZGVdN8v_expm1f_avx2)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $384, %rsp
+ lea __svml_sexpm1_data_internal(%rip), %rax
+ vmovaps %ymm0, %ymm3
+ vmulps 640+__svml_sexpm1_data_internal(%rip), %ymm3, %ymm4
+
+/* argument reduction */
+ vmovups 704+__svml_sexpm1_data_internal(%rip), %ymm2
+ vmovups 1152+__svml_sexpm1_data_internal(%rip), %ymm5
+ vmovups %ymm13, 256(%rsp)
+ vmovups %ymm12, 224(%rsp)
+ vmovups %ymm8, 32(%rsp)
+ vmovups %ymm9, 96(%rsp)
+ vmovups %ymm11, 192(%rsp)
+ vmovups %ymm10, 160(%rsp)
+ vmovups %ymm15, 320(%rsp)
+ vmovups %ymm14, 288(%rsp)
+ vroundps $0, %ymm4, %ymm0
+ vfnmadd213ps %ymm3, %ymm0, %ymm2
+ vfnmadd231ps 768+__svml_sexpm1_data_internal(%rip), %ymm0, %ymm2
+ vaddps 832+__svml_sexpm1_data_internal(%rip), %ymm0, %ymm0
+
+/* table lookup */
+ vandps 896+__svml_sexpm1_data_internal(%rip), %ymm0, %ymm4
+ vandps %ymm5, %ymm3, %ymm6
+ vandnps %ymm3, %ymm5, %ymm1
+ vcmpnle_uqps 1216+__svml_sexpm1_data_internal(%rip), %ymm6, %ymm7
+ vmovmskps %ymm7, %edx
+ vandps 960+__svml_sexpm1_data_internal(%rip), %ymm0, %ymm0
+ vmovd %xmm4, %ecx
+ vextractf128 $1, %ymm4, %xmm5
+ vpextrd $2, %xmm4, %r9d
+ movslq %ecx, %rcx
+ movslq %r9d, %r9
+ vpextrd $1, %xmm4, %r8d
+ movslq %r8d, %r8
+ vmovq (%rax,%rcx), %xmm6
+ vmovq (%rax,%r9), %xmm7
+ vpextrd $1, %xmm5, %ecx
+ vpextrd $3, %xmm5, %r9d
+ movslq %ecx, %rcx
+ movslq %r9d, %r9
+ vpextrd $3, %xmm4, %r10d
+ movslq %r10d, %r10
+ .cfi_escape 0x10, 0xdb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xde, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdf, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe0, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe1, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe2, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+ vmovq (%rax,%rcx), %xmm12
+ vmovq (%rax,%r9), %xmm13
+ vmovq (%rax,%r8), %xmm8
+ vmovd %xmm5, %r11d
+ vmovq (%rax,%r10), %xmm9
+ vunpcklps %xmm9, %xmm8, %xmm4
+ vpextrd $2, %xmm5, %r8d
+ vunpcklps %xmm13, %xmm12, %xmm5
+
+/* polynomial */
+ vmovups 512+__svml_sexpm1_data_internal(%rip), %ymm12
+ vmulps %ymm2, %ymm2, %ymm9
+ vunpcklps %xmm7, %xmm6, %xmm14
+ vfmadd213ps 544+__svml_sexpm1_data_internal(%rip), %ymm2, %ymm12
+ movslq %r11d, %r11
+ movslq %r8d, %r8
+ vfmadd213ps %ymm2, %ymm9, %ymm12
+
+/* T-1 */
+ vmovups 1088+__svml_sexpm1_data_internal(%rip), %ymm9
+ vmovq (%rax,%r11), %xmm10
+ vmovq (%rax,%r8), %xmm11
+ vunpcklps %xmm11, %xmm10, %xmm15
+ vpslld $14, %ymm0, %ymm11
+ vinsertf128 $1, %xmm15, %ymm14, %ymm6
+ vinsertf128 $1, %xmm5, %ymm4, %ymm7
+ vunpcklps %ymm7, %ymm6, %ymm8
+ vorps %ymm11, %ymm8, %ymm10
+ vaddps %ymm9, %ymm10, %ymm2
+ vunpckhps %ymm7, %ymm6, %ymm13
+ vfmadd213ps %ymm2, %ymm11, %ymm13
+ vsubps %ymm9, %ymm13, %ymm0
+ vfmadd213ps %ymm13, %ymm12, %ymm0
+ vorps %ymm1, %ymm0, %ymm0
+ testl %edx, %edx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ vmovups 32(%rsp), %ymm8
+ cfi_restore(91)
+ vmovups 96(%rsp), %ymm9
+ cfi_restore(92)
+ vmovups 160(%rsp), %ymm10
+ cfi_restore(93)
+ vmovups 192(%rsp), %ymm11
+ cfi_restore(94)
+ vmovups 224(%rsp), %ymm12
+ cfi_restore(95)
+ vmovups 256(%rsp), %ymm13
+ cfi_restore(96)
+ vmovups 288(%rsp), %ymm14
+ cfi_restore(97)
+ vmovups 320(%rsp), %ymm15
+ cfi_restore(98)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ .cfi_escape 0x10, 0xdb, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdc, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdd, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x20, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xde, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xdf, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x60, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe0, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe1, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0xe2, 0x00, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22
+
+.LBL_1_3:
+ vmovups %ymm3, 64(%rsp)
+ vmovups %ymm0, 128(%rsp)
+ je .LBL_1_2
+ xorl %eax, %eax
+ vzeroupper
+ movq %rsi, 8(%rsp)
+ movq %rdi, (%rsp)
+ movq %r12, 24(%rsp)
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x98, 0xfe, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 16(%rsp)
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xfe, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+
+.LBL_1_7:
+ btl %r12d, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incl %r12d
+ cmpl $8, %r12d
+ jl .LBL_1_7
+ movq 8(%rsp), %rsi
+ cfi_restore(4)
+ movq (%rsp), %rdi
+ cfi_restore(5)
+ movq 24(%rsp), %r12
+ cfi_restore(12)
+ movq 16(%rsp), %r13
+ cfi_restore(13)
+ vmovups 128(%rsp), %ymm0
+ jmp .LBL_1_2
+ .cfi_escape 0x10, 0x04, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x05, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x98, 0xfe, 0xff, 0xff, 0x22
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xfe, 0xff, 0xff, 0x22
+
+.LBL_1_10:
+ lea 64(%rsp,%r12,4), %rdi
+ lea 128(%rsp,%r12,4), %rsi
+ call __svml_sexpm1_cout_rare_internal
+ jmp .LBL_1_8
+
+END(_ZGVdN8v_expm1f_avx2)
+
+ .align 16,0x90
+
+__svml_sexpm1_cout_rare_internal:
+
+ cfi_startproc
+
+ xorl %eax, %eax
+ movss (%rdi), %xmm8
+ pxor %xmm0, %xmm0
+ comiss %xmm8, %xmm0
+ ja .LBL_2_8
+ movzwl 2(%rdi), %edx
+ andl $32640, %edx
+ cmpl $32640, %edx
+ je .LBL_2_9
+ movss .FLT_45(%rip), %xmm0
+ comiss %xmm8, %xmm0
+ jb .LBL_2_6
+ comiss .FLT_46(%rip), %xmm8
+ jb .LBL_2_7
+ movss .FLT_48(%rip), %xmm0
+ mulss %xmm8, %xmm0
+ movss %xmm0, -24(%rsp)
+ movss -24(%rsp), %xmm1
+ movss .FLT_49(%rip), %xmm2
+ movss .FLT_56(%rip), %xmm6
+ addss %xmm2, %xmm1
+ movss %xmm1, -20(%rsp)
+ movss -20(%rsp), %xmm3
+ movss .FLT_52(%rip), %xmm7
+ subss %xmm2, %xmm3
+ movss %xmm3, -24(%rsp)
+ movss -24(%rsp), %xmm4
+ mulss .FLT_50(%rip), %xmm4
+ movss -24(%rsp), %xmm5
+ subss %xmm4, %xmm8
+ mulss .FLT_51(%rip), %xmm5
+ movl -20(%rsp), %edx
+ subss %xmm5, %xmm8
+ mulss %xmm8, %xmm6
+ shll $23, %edx
+ addss .FLT_55(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_54(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss .FLT_53(%rip), %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm7, %xmm6
+ mulss %xmm8, %xmm6
+ addss %xmm6, %xmm7
+ movss %xmm7, -16(%rsp)
+ addl -16(%rsp), %edx
+ movl %edx, (%rsi)
+ ret
+
+.LBL_2_6:
+ movss .FLT_47(%rip), %xmm0
+ mulss %xmm0, %xmm0
+ movss %xmm0, -16(%rsp)
+ movl -16(%rsp), %eax
+ movl %eax, (%rsi)
+ movl $3, %eax
+
+.LBL_2_7:
+ ret
+
+.LBL_2_8:
+ movl $-1082130432, (%rsi)
+ ret
+
+.LBL_2_9:
+ movb 3(%rdi), %dl
+ andb $-128, %dl
+ cmpb $-128, %dl
+ je .LBL_2_11
+
+.LBL_2_10:
+ mulss %xmm8, %xmm8
+ movss %xmm8, (%rsi)
+ ret
+
+.LBL_2_11:
+ testl $8388607, (%rdi)
+ jne .LBL_2_10
+ movss %xmm0, (%rsi)
+ ret
+
+ cfi_endproc
+
+ .type __svml_sexpm1_cout_rare_internal,@function
+ .size __svml_sexpm1_cout_rare_internal,.-__svml_sexpm1_cout_rare_internal
+
+ .section .rodata, "a"
+ .align 64
+
+__svml_sexpm1_data_internal:
+ .long 0
+ .long 0
+ .long 90112
+ .long 958021240
+ .long 184320
+ .long 3097385429
+ .long 278528
+ .long 3107645322
+ .long 372736
+ .long 3106378502
+ .long 466944
+ .long 3080612193
+ .long 565248
+ .long 3112054766
+ .long 659456
+ .long 958049961
+ .long 757760
+ .long 959986813
+ .long 860160
+ .long 3106149520
+ .long 958464
+ .long 956772713
+ .long 1060864
+ .long 946741642
+ .long 1163264
+ .long 955571435
+ .long 1269760
+ .long 3107625502
+ .long 1372160
+ .long 958264608
+ .long 1478656
+ .long 956671716
+ .long 1585152
+ .long 964561458
+ .long 1695744
+ .long 923477569
+ .long 1806336
+ .long 3098644875
+ .long 1916928
+ .long 3088264298
+ .long 2027520
+ .long 958811154
+ .long 2142208
+ .long 924983155
+ .long 2256896
+ .long 923141657
+ .long 2371584
+ .long 958576065
+ .long 2490368
+ .long 3088362279
+ .long 2609152
+ .long 3097170630
+ .long 2727936
+ .long 942496373
+ .long 2850816
+ .long 3107506507
+ .long 2973696
+ .long 3111658168
+ .long 3096576
+ .long 3104893523
+ .long 3219456
+ .long 956129397
+ .long 3346432
+ .long 945949013
+ .long 3473408
+ .long 958293631
+ .long 3604480
+ .long 3097200438
+ .long 3735552
+ .long 3104446228
+ .long 3866624
+ .long 3081063917
+ .long 4001792
+ .long 3110833173
+ .long 4132864
+ .long 963851983
+ .long 4272128
+ .long 3102526822
+ .long 4407296
+ .long 962173765
+ .long 4546560
+ .long 962937387
+ .long 4689920
+ .long 3097677269
+ .long 4833280
+ .long 3108523826
+ .long 4976640
+ .long 3103440963
+ .long 5120000
+ .long 957448868
+ .long 5267456
+ .long 952956604
+ .long 5419008
+ .long 3111931330
+ .long 5566464
+ .long 953592564
+ .long 5718016
+ .long 958372187
+ .long 5873664
+ .long 3099234146
+ .long 6029312
+ .long 3104378117
+ .long 6184960
+ .long 938771038
+ .long 6344704
+ .long 3100132597
+ .long 6504448
+ .long 3077380739
+ .long 6668288
+ .long 3110019856
+ .long 6832128
+ .long 3109205496
+ .long 6995968
+ .long 935782365
+ .long 7163904
+ .long 3083854118
+ .long 7331840
+ .long 957821820
+ .long 7503872
+ .long 943431213
+ .long 7675904
+ .long 959423141
+ .long 7852032
+ .long 937390859
+ .long 8028160
+ .long 955037405
+ .long 8208384
+ .long 3095001478
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1042983615
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 1056964623
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 0
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .long 1119398459
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .long 1009876992
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .long 3059646595
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .long 1240530432
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .long 504
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .long 130560
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .long 4294963200
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .long 3212836864
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .long 2147483647
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .long 1118652779
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .long 1111009851
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .long 1018262040
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .long 1245724544
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .long 124
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .long 32640
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .type __svml_sexpm1_data_internal,@object
+ .size __svml_sexpm1_data_internal,1600
+ .align 4
+
+.FLT_44:
+ .long 0xbf800000
+ .type .FLT_44,@object
+ .size .FLT_44,4
+ .align 4
+
+.FLT_45:
+ .long 0x42b17217
+ .type .FLT_45,@object
+ .size .FLT_45,4
+ .align 4
+
+.FLT_46:
+ .long 0xc2cff1b4
+ .type .FLT_46,@object
+ .size .FLT_46,4
+ .align 4
+
+.FLT_47:
+ .long 0x7f7fffff
+ .type .FLT_47,@object
+ .size .FLT_47,4
+ .align 4
+
+.FLT_48:
+ .long 0x3fb8aa3b
+ .type .FLT_48,@object
+ .size .FLT_48,4
+ .align 4
+
+.FLT_49:
+ .long 0x4b400000
+ .type .FLT_49,@object
+ .size .FLT_49,4
+ .align 4
+
+.FLT_50:
+ .long 0x3f317200
+ .type .FLT_50,@object
+ .size .FLT_50,4
+ .align 4
+
+.FLT_51:
+ .long 0x35bfbe8e
+ .type .FLT_51,@object
+ .size .FLT_51,4
+ .align 4
+
+.FLT_52:
+ .long 0x3f800001
+ .type .FLT_52,@object
+ .size .FLT_52,4
+ .align 4
+
+.FLT_53:
+ .long 0x3efffe85
+ .type .FLT_53,@object
+ .size .FLT_53,4
+ .align 4
+
+.FLT_54:
+ .long 0x3e2aa9c6
+ .type .FLT_54,@object
+ .size .FLT_54,4
+ .align 4
+
+.FLT_55:
+ .long 0x3d2bb1b6
+ .type .FLT_55,@object
+ .size .FLT_55,4
+ .align 4
+
+.FLT_56:
+ .long 0x3c0950ef
+ .type .FLT_56,@object
+ .size .FLT_56,4
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function expm1 vectorized with SSE2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN2v_expm1)
+WRAPPER_IMPL_SSE2 expm1
+END (_ZGVbN2v_expm1)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN2v_expm1)
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function expm1 vectorized with AVX2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVdN4v_expm1)
+WRAPPER_IMPL_AVX _ZGVbN2v_expm1
+END (_ZGVdN4v_expm1)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN4v_expm1)
+#endif
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function expm1 vectorized in AVX ISA as wrapper to SSE4 ISA version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVcN4v_expm1)
+WRAPPER_IMPL_AVX _ZGVbN2v_expm1
+END (_ZGVcN4v_expm1)
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function expm1 vectorized with AVX-512, wrapper to AVX2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN8v_expm1)
+WRAPPER_IMPL_AVX512 _ZGVdN4v_expm1
+END (_ZGVeN8v_expm1)
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function expm1f vectorized with AVX-512. Wrapper to AVX2 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN16v_expm1f)
+WRAPPER_IMPL_AVX512 _ZGVdN8v_expm1f
+END (_ZGVeN16v_expm1f)
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function expm1f vectorized with SSE2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN4v_expm1f)
+WRAPPER_IMPL_SSE2 expm1f
+END (_ZGVbN4v_expm1f)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4v_expm1f)
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* Function expm1f vectorized with AVX2, wrapper version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVdN8v_expm1f)
+WRAPPER_IMPL_AVX _ZGVbN4v_expm1f
+END (_ZGVdN8v_expm1f)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN8v_expm1f)
+#endif
new file mode 100644
@@ -0,0 +1,25 @@
+/* Function expm1f vectorized in AVX ISA as wrapper to SSE4 ISA version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVcN8v_expm1f)
+WRAPPER_IMPL_AVX _ZGVbN4v_expm1f
+END (_ZGVcN8v_expm1f)
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-expm1.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-expm1.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-double-libmvec-expm1.c"
new file mode 100644
@@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE double
+#define LIBMVEC_FUNC expm1
+#include "test-vector-abi-arg1.h"
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVbN2v_erf)
VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVbN2v_erfc)
VECTOR_WRAPPER (WRAPPER_NAME (exp10), _ZGVbN2v_exp10)
VECTOR_WRAPPER (WRAPPER_NAME (exp2), _ZGVbN2v_exp2)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVbN2v_expm1)
#define VEC_INT_TYPE __m128i
@@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVdN4v_erf)
VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVdN4v_erfc)
VECTOR_WRAPPER (WRAPPER_NAME (exp10), _ZGVdN4v_exp10)
VECTOR_WRAPPER (WRAPPER_NAME (exp2), _ZGVdN4v_exp2)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVdN4v_expm1)
#ifndef __ILP32__
# define VEC_INT_TYPE __m256i
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVcN4v_erf)
VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVcN4v_erfc)
VECTOR_WRAPPER (WRAPPER_NAME (exp10), _ZGVcN4v_exp10)
VECTOR_WRAPPER (WRAPPER_NAME (exp2), _ZGVcN4v_exp2)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVcN4v_expm1)
#define VEC_INT_TYPE __m128i
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVeN8v_erf)
VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVeN8v_erfc)
VECTOR_WRAPPER (WRAPPER_NAME (exp10), _ZGVeN8v_exp10)
VECTOR_WRAPPER (WRAPPER_NAME (exp2), _ZGVeN8v_exp2)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVeN8v_expm1)
#ifndef __ILP32__
# define VEC_INT_TYPE __m512i
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-expm1f.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-expm1f.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "test-float-libmvec-expm1f.c"
new file mode 100644
@@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE float
+#define LIBMVEC_FUNC expm1f
+#include "test-vector-abi-arg1.h"
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVeN16v_erff)
VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVeN16v_erfcf)
VECTOR_WRAPPER (WRAPPER_NAME (exp10f), _ZGVeN16v_exp10f)
VECTOR_WRAPPER (WRAPPER_NAME (exp2f), _ZGVeN16v_exp2f)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVeN16v_expm1f)
#define VEC_INT_TYPE __m512i
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVbN4v_erff)
VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVbN4v_erfcf)
VECTOR_WRAPPER (WRAPPER_NAME (exp10f), _ZGVbN4v_exp10f)
VECTOR_WRAPPER (WRAPPER_NAME (exp2f), _ZGVbN4v_exp2f)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVbN4v_expm1f)
#define VEC_INT_TYPE __m128i
@@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVdN8v_erff)
VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVdN8v_erfcf)
VECTOR_WRAPPER (WRAPPER_NAME (exp10f), _ZGVdN8v_exp10f)
VECTOR_WRAPPER (WRAPPER_NAME (exp2f), _ZGVdN8v_exp2f)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVdN8v_expm1f)
/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
#undef VECTOR_WRAPPER_fFF
@@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVcN8v_erff)
VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVcN8v_erfcf)
VECTOR_WRAPPER (WRAPPER_NAME (exp10f), _ZGVcN8v_exp10f)
VECTOR_WRAPPER (WRAPPER_NAME (exp2f), _ZGVcN8v_exp2f)
+VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVcN8v_expm1f)
#define VEC_INT_TYPE __m128i