Message ID | 20211229064000.1465621-14-skpgkp2@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86-64: Add vector math functions to libmvec | expand |
On Tue, Dec 28, 2021 at 10:39:55PM -0800, Sunil K Pandey wrote: > Implement vectorized log1p/log1pf containing SSE, AVX, AVX2 and > AVX512 versions for libmvec as per vector ABI. It also contains > accuracy and ABI tests for vector log1p/log1pf with regenerated ulps. > --- > bits/libm-simd-decl-stubs.h | 11 + > math/bits/mathcalls.h | 2 +- > .../unix/sysv/linux/x86_64/libmvec.abilist | 8 + > sysdeps/x86/fpu/bits/math-vector.h | 4 + > .../x86/fpu/finclude/math-vector-fortran.h | 4 + > sysdeps/x86_64/fpu/Makeconfig | 1 + > sysdeps/x86_64/fpu/Versions | 2 + > sysdeps/x86_64/fpu/libm-test-ulps | 20 + > .../fpu/multiarch/svml_d_log1p2_core-sse2.S | 20 + > .../x86_64/fpu/multiarch/svml_d_log1p2_core.c | 27 + > .../fpu/multiarch/svml_d_log1p2_core_sse4.S | 1398 +++++++++++++++++ > .../fpu/multiarch/svml_d_log1p4_core-sse.S | 20 + > .../x86_64/fpu/multiarch/svml_d_log1p4_core.c | 27 + > .../fpu/multiarch/svml_d_log1p4_core_avx2.S | 1383 ++++++++++++++++ > .../fpu/multiarch/svml_d_log1p8_core-avx2.S | 20 + > .../x86_64/fpu/multiarch/svml_d_log1p8_core.c | 27 + > .../fpu/multiarch/svml_d_log1p8_core_avx512.S | 317 ++++ > .../fpu/multiarch/svml_s_log1pf16_core-avx2.S | 20 + > .../fpu/multiarch/svml_s_log1pf16_core.c | 28 + > .../multiarch/svml_s_log1pf16_core_avx512.S | 271 ++++ > .../fpu/multiarch/svml_s_log1pf4_core-sse2.S | 20 + > .../fpu/multiarch/svml_s_log1pf4_core.c | 28 + > .../fpu/multiarch/svml_s_log1pf4_core_sse4.S | 252 +++ > .../fpu/multiarch/svml_s_log1pf8_core-sse.S | 20 + > .../fpu/multiarch/svml_s_log1pf8_core.c | 28 + > .../fpu/multiarch/svml_s_log1pf8_core_avx2.S | 254 +++ > sysdeps/x86_64/fpu/svml_d_log1p2_core.S | 29 + > sysdeps/x86_64/fpu/svml_d_log1p4_core.S | 29 + > sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S | 25 + > sysdeps/x86_64/fpu/svml_d_log1p8_core.S | 25 + > sysdeps/x86_64/fpu/svml_s_log1pf16_core.S | 25 + > sysdeps/x86_64/fpu/svml_s_log1pf4_core.S | 29 + > sysdeps/x86_64/fpu/svml_s_log1pf8_core.S | 29 + > sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S | 25 + > .../fpu/test-double-libmvec-log1p-avx.c | 1 + > .../fpu/test-double-libmvec-log1p-avx2.c | 1 + > .../fpu/test-double-libmvec-log1p-avx512f.c | 1 + > .../x86_64/fpu/test-double-libmvec-log1p.c | 3 + > .../x86_64/fpu/test-double-vlen2-wrappers.c | 1 + > .../fpu/test-double-vlen4-avx2-wrappers.c | 1 + > .../x86_64/fpu/test-double-vlen4-wrappers.c | 1 + > .../x86_64/fpu/test-double-vlen8-wrappers.c | 1 + > .../fpu/test-float-libmvec-log1pf-avx.c | 1 + > .../fpu/test-float-libmvec-log1pf-avx2.c | 1 + > .../fpu/test-float-libmvec-log1pf-avx512f.c | 1 + > .../x86_64/fpu/test-float-libmvec-log1pf.c | 3 + > .../x86_64/fpu/test-float-vlen16-wrappers.c | 1 + > .../x86_64/fpu/test-float-vlen4-wrappers.c | 1 + > .../fpu/test-float-vlen8-avx2-wrappers.c | 1 + > .../x86_64/fpu/test-float-vlen8-wrappers.c | 1 + > 50 files changed, 4447 insertions(+), 1 deletion(-) > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p2_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p8_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h > index 73252615ca..845246fab9 100644 > --- a/bits/libm-simd-decl-stubs.h > +++ b/bits/libm-simd-decl-stubs.h > @@ -241,4 +241,15 @@ > #define __DECL_SIMD_log2f32x > #define __DECL_SIMD_log2f64x > #define __DECL_SIMD_log2f128x > + > +#define __DECL_SIMD_log1p > +#define __DECL_SIMD_log1pf > +#define __DECL_SIMD_log1pl > +#define __DECL_SIMD_log1pf16 > +#define __DECL_SIMD_log1pf32 > +#define __DECL_SIMD_log1pf64 > +#define __DECL_SIMD_log1pf128 > +#define __DECL_SIMD_log1pf32x > +#define __DECL_SIMD_log1pf64x > +#define __DECL_SIMD_log1pf128x > #endif > diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h > index bfe52a4666..aa4bc61aa4 100644 > --- a/math/bits/mathcalls.h > +++ b/math/bits/mathcalls.h > @@ -119,7 +119,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x)); > __MATHCALL_VEC (expm1,, (_Mdouble_ __x)); > > /* Return log(1 + X). */ > -__MATHCALL (log1p,, (_Mdouble_ __x)); > +__MATHCALL_VEC (log1p,, (_Mdouble_ __x)); > > /* Return the base 2 signed integral exponent of X. */ > __MATHCALL (logb,, (_Mdouble_ __x)); > diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > index fa8b016c5d..68b940606a 100644 > --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > @@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_exp10 F > GLIBC_2.35 _ZGVbN2v_exp2 F > GLIBC_2.35 _ZGVbN2v_expm1 F > GLIBC_2.35 _ZGVbN2v_log10 F > +GLIBC_2.35 _ZGVbN2v_log1p F > GLIBC_2.35 _ZGVbN2v_log2 F > GLIBC_2.35 _ZGVbN2v_sinh F > GLIBC_2.35 _ZGVbN2vv_atan2 F > @@ -68,6 +69,7 @@ GLIBC_2.35 _ZGVbN4v_exp10f F > GLIBC_2.35 _ZGVbN4v_exp2f F > GLIBC_2.35 _ZGVbN4v_expm1f F > GLIBC_2.35 _ZGVbN4v_log10f F > +GLIBC_2.35 _ZGVbN4v_log1pf F > GLIBC_2.35 _ZGVbN4v_log2f F > GLIBC_2.35 _ZGVbN4v_sinhf F > GLIBC_2.35 _ZGVbN4vv_atan2f F > @@ -81,6 +83,7 @@ GLIBC_2.35 _ZGVcN4v_exp10 F > GLIBC_2.35 _ZGVcN4v_exp2 F > GLIBC_2.35 _ZGVcN4v_expm1 F > GLIBC_2.35 _ZGVcN4v_log10 F > +GLIBC_2.35 _ZGVcN4v_log1p F > GLIBC_2.35 _ZGVcN4v_log2 F > GLIBC_2.35 _ZGVcN4v_sinh F > GLIBC_2.35 _ZGVcN4vv_atan2 F > @@ -94,6 +97,7 @@ GLIBC_2.35 _ZGVcN8v_exp10f F > GLIBC_2.35 _ZGVcN8v_exp2f F > GLIBC_2.35 _ZGVcN8v_expm1f F > GLIBC_2.35 _ZGVcN8v_log10f F > +GLIBC_2.35 _ZGVcN8v_log1pf F > GLIBC_2.35 _ZGVcN8v_log2f F > GLIBC_2.35 _ZGVcN8v_sinhf F > GLIBC_2.35 _ZGVcN8vv_atan2f F > @@ -107,6 +111,7 @@ GLIBC_2.35 _ZGVdN4v_exp10 F > GLIBC_2.35 _ZGVdN4v_exp2 F > GLIBC_2.35 _ZGVdN4v_expm1 F > GLIBC_2.35 _ZGVdN4v_log10 F > +GLIBC_2.35 _ZGVdN4v_log1p F > GLIBC_2.35 _ZGVdN4v_log2 F > GLIBC_2.35 _ZGVdN4v_sinh F > GLIBC_2.35 _ZGVdN4vv_atan2 F > @@ -120,6 +125,7 @@ GLIBC_2.35 _ZGVdN8v_exp10f F > GLIBC_2.35 _ZGVdN8v_exp2f F > GLIBC_2.35 _ZGVdN8v_expm1f F > GLIBC_2.35 _ZGVdN8v_log10f F > +GLIBC_2.35 _ZGVdN8v_log1pf F > GLIBC_2.35 _ZGVdN8v_log2f F > GLIBC_2.35 _ZGVdN8v_sinhf F > GLIBC_2.35 _ZGVdN8vv_atan2f F > @@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVeN16v_exp10f F > GLIBC_2.35 _ZGVeN16v_exp2f F > GLIBC_2.35 _ZGVeN16v_expm1f F > GLIBC_2.35 _ZGVeN16v_log10f F > +GLIBC_2.35 _ZGVeN16v_log1pf F > GLIBC_2.35 _ZGVeN16v_log2f F > GLIBC_2.35 _ZGVeN16v_sinhf F > GLIBC_2.35 _ZGVeN16vv_atan2f F > @@ -146,6 +153,7 @@ GLIBC_2.35 _ZGVeN8v_exp10 F > GLIBC_2.35 _ZGVeN8v_exp2 F > GLIBC_2.35 _ZGVeN8v_expm1 F > GLIBC_2.35 _ZGVeN8v_log10 F > +GLIBC_2.35 _ZGVeN8v_log1p F > GLIBC_2.35 _ZGVeN8v_log2 F > GLIBC_2.35 _ZGVeN8v_sinh F > GLIBC_2.35 _ZGVeN8vv_atan2 F > diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h > index 59d284a10a..14c9db3bb3 100644 > --- a/sysdeps/x86/fpu/bits/math-vector.h > +++ b/sysdeps/x86/fpu/bits/math-vector.h > @@ -110,6 +110,10 @@ > # define __DECL_SIMD_log2 __DECL_SIMD_x86_64 > # undef __DECL_SIMD_log2f > # define __DECL_SIMD_log2f __DECL_SIMD_x86_64 > +# undef __DECL_SIMD_log1p > +# define __DECL_SIMD_log1p __DECL_SIMD_x86_64 > +# undef __DECL_SIMD_log1pf > +# define __DECL_SIMD_log1pf __DECL_SIMD_x86_64 > > # endif > #endif > diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > index a2ca9a203f..3dca196432 100644 > --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h > +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > @@ -54,6 +54,8 @@ > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64') > !GCC$ builtin (log2) attributes simd (notinbranch) if('x86_64') > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64') > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64') > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64') > > !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') > !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') > @@ -93,3 +95,5 @@ > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x32') > !GCC$ builtin (log2) attributes simd (notinbranch) if('x32') > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x32') > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x32') > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32') > diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig > index 8d6d0915af..378cb06d37 100644 > --- a/sysdeps/x86_64/fpu/Makeconfig > +++ b/sysdeps/x86_64/fpu/Makeconfig > @@ -36,6 +36,7 @@ libmvec-funcs = \ > hypot \ > log \ > log10 \ > + log1p \ > log2 \ > pow \ > sin \ > diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions > index 1b48c2d642..155fb115f3 100644 > --- a/sysdeps/x86_64/fpu/Versions > +++ b/sysdeps/x86_64/fpu/Versions > @@ -23,6 +23,7 @@ libmvec { > _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; > _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; > _ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10; > + _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p; > _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2; > _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh; > _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2; > @@ -36,6 +37,7 @@ libmvec { > _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; > _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f; > _ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f; > + _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf; > _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f; > _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf; > _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f; > diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps > index 3b7f3cee6f..a2b15a795b 100644 > --- a/sysdeps/x86_64/fpu/libm-test-ulps > +++ b/sysdeps/x86_64/fpu/libm-test-ulps > @@ -1685,6 +1685,26 @@ float: 2 > float128: 2 > ldouble: 3 > > +Function: "log1p_vlen16": > +float: 2 > + > +Function: "log1p_vlen2": > +double: 1 > + > +Function: "log1p_vlen4": > +double: 1 > +float: 2 > + > +Function: "log1p_vlen4_avx2": > +double: 1 > + > +Function: "log1p_vlen8": > +double: 1 > +float: 2 > + > +Function: "log1p_vlen8_avx2": > +float: 2 > + > Function: "log2": > double: 2 > float: 1 > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > new file mode 100644 > index 0000000000..8004088346 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > @@ -0,0 +1,20 @@ > +/* SSE2 version of vectorized log1p, vector length is 2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVbN2v_log1p _ZGVbN2v_log1p_sse2 > +#include "../svml_d_log1p2_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > new file mode 100644 > index 0000000000..35ca620aba > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > @@ -0,0 +1,27 @@ > +/* Multiple versions of vectorized log1p, vector length is 2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVbN2v_log1p > +#include "ifunc-mathvec-sse4_1.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVbN2v_log1p, __GI__ZGVbN2v_log1p, __redirect__ZGVbN2v_log1p) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > new file mode 100644 > index 0000000000..9d3f0647b4 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > @@ -0,0 +1,1398 @@ > +/* Function log1p vectorized with SSE4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_dlog1p_data_internal > + */ > +#define Log_HA_table 0 > +#define Log_LA_table 8208 > +#define poly_coeff 12320 > +#define ExpMask 12384 > +#define Two10 12400 > +#define MinLog1p 12416 > +#define MaxLog1p 12432 > +#define One 12448 > +#define SgnMask 12464 > +#define XThreshold 12480 > +#define XhMask 12496 > +#define Threshold 12512 > +#define Bias 12528 > +#define Bias1 12544 > +#define ExpMask0 12560 > +#define ExpMask2 12576 > +#define L2 12592 > + > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > +#define Table_Lookup_Bias -0x405ff0 > + > +#include <sysdep.h> > + > + .text > + .section .text.sse4,"ax",@progbits > +ENTRY(_ZGVbN2v_log1p_sse4) > + pushq %rbp > + cfi_def_cfa_offset(16) > + movq %rsp, %rbp > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + andq $-32, %rsp > + subq $64, %rsp > + movaps %xmm0, %xmm7 > + > +/* SgnMask used by all accuracies */ > + movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi > + movaps %xmm6, %xmm8 > + movaps %xmm7, %xmm15 > + movups One+__svml_dlog1p_data_internal(%rip), %xmm0 > + andps %xmm7, %xmm8 > + cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 > + cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 > + movaps %xmm0, %xmm4 > + > +/* compute 1+x as high, low parts */ > + movaps %xmm0, %xmm9 > + addpd %xmm7, %xmm4 > + maxpd %xmm7, %xmm9 > + orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 > + movaps %xmm0, %xmm5 > + > +/* preserve mantissa, set input exponent to 2^(-10) */ > + movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 > + andps %xmm8, %xmm4 > + andps %xmm4, %xmm3 > + > +/* check range */ > + movaps %xmm7, %xmm8 > + orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 > + > +/* Compute SignMask for all accuracies, including EP */ > + andnps %xmm7, %xmm6 > + > +/* reciprocal approximation good to at least 11 bits */ > + cvtpd2ps %xmm3, %xmm10 > + minpd %xmm7, %xmm5 > + subpd %xmm4, %xmm9 > + cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 > + addpd %xmm9, %xmm5 > + movlhps %xmm10, %xmm10 > + orps %xmm15, %xmm8 > + rcpps %xmm10, %xmm11 > + > +/* combine and get argument value range mask */ > + movmskpd %xmm8, %edx > + > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > + movups .FLT_16(%rip), %xmm13 > + > +/* exponent of X needed to scale Xl */ > + movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 > + cvtps2pd %xmm11, %xmm1 > + addpd %xmm13, %xmm1 > + subpd %xmm13, %xmm1 > + > +/* 2^ (-10-exp(X) ) */ > + movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 > + pand %xmm4, %xmm12 > + psubq %xmm12, %xmm2 > + mulpd %xmm1, %xmm3 > + > +/* scale DblRcp */ > + mulpd %xmm1, %xmm2 > + subpd %xmm0, %xmm3 > + > +/* > + * argument reduction > + * VQFMS( D, R, X, DblRcp1, One ); > + */ > + mulpd %xmm2, %xmm5 > + addpd %xmm5, %xmm3 > + > +/* exponent*log(2.0) */ > + movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 > + > +/* exponent bits */ > + psrlq $20, %xmm4 > + pshufd $221, %xmm4, %xmm14 > + > +/* > + * prepare table index > + * table lookup > + */ > + movaps %xmm1, %xmm4 > + cmpltpd %xmm1, %xmm10 > + > +/* biased exponent in DP format */ > + cvtdq2pd %xmm14, %xmm0 > + > +/* polynomial */ > + movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 > + movaps %xmm3, %xmm5 > + mulpd %xmm3, %xmm1 > + mulpd %xmm3, %xmm5 > + addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 > + movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 > + psrlq $40, %xmm4 > + mulpd %xmm3, %xmm2 > + mulpd %xmm5, %xmm1 > + addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 > + movd %xmm4, %eax > + andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 > + addpd %xmm1, %xmm2 > + > +/* reconstruction */ > + mulpd %xmm2, %xmm5 > + orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 > + pshufd $2, %xmm4, %xmm9 > + subpd %xmm10, %xmm0 > + addpd %xmm5, %xmm3 > + movd %xmm9, %ecx > + mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 > + movslq %eax, %rax > + movslq %ecx, %rcx > + movsd (%rsi,%rax), %xmm11 > + movhpd (%rsi,%rcx), %xmm11 > + addpd %xmm3, %xmm11 > + addpd %xmm11, %xmm0 > + > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > + orps %xmm6, %xmm0 > + testl %edx, %edx > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movq %rbp, %rsp > + popq %rbp > + cfi_def_cfa(7, 8) > + cfi_restore(6) > + ret > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + movups %xmm7, 32(%rsp) > + movups %xmm0, 48(%rsp) > + # LOE rbx r12 r13 r14 r15 edx > + > + xorl %eax, %eax > + movq %r12, 16(%rsp) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > + movl %eax, %r12d > + movq %r13, 8(%rsp) > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > + movl %edx, %r13d > + movq %r14, (%rsp) > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $2, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + movups 48(%rsp), %xmm0 > + > +/* Go to exit */ > + jmp L(EXIT) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r12 r13 r14 r15 xmm0 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movsd 32(%rsp,%r14,8), %xmm0 > + call log1p@PLT > + # LOE rbx r14 r15 r12d r13d xmm0 > + > + movsd %xmm0, 48(%rsp,%r14,8) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx r15 r12d r13d > +END(_ZGVbN2v_log1p_sse4) > + > + .section .rodata, "a" > + .align 16 > + > +#ifdef __svml_dlog1p_data_internal_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; > + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; > + __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; > + __declspec(align(16)) VUINT32 ExpMask[2][2]; > + __declspec(align(16)) VUINT32 Two10[2][2]; > + __declspec(align(16)) VUINT32 MinLog1p[2][2]; > + __declspec(align(16)) VUINT32 MaxLog1p[2][2]; > + __declspec(align(16)) VUINT32 One[2][2]; > + __declspec(align(16)) VUINT32 SgnMask[2][2]; > + __declspec(align(16)) VUINT32 XThreshold[2][2]; > + __declspec(align(16)) VUINT32 XhMask[2][2]; > + __declspec(align(16)) VUINT32 Threshold[2][2]; > + __declspec(align(16)) VUINT32 Bias[2][2]; > + __declspec(align(16)) VUINT32 Bias1[2][2]; > + __declspec(align(16)) VUINT32 ExpMask0[2][2]; > + __declspec(align(16)) VUINT32 ExpMask2[2][2]; > + __declspec(align(16)) VUINT32 L2[2][2]; > +} __svml_dlog1p_data_internal; > +#endif > +__svml_dlog1p_data_internal: > + /* Log_HA_table */ > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > + /*== Log_LA_table ==*/ > + .align 16 > + .quad 0x8000000000000000 > + .quad 0xbf5ff802a9ab10e6 > + .quad 0xbf6ff00aa2b10bc0 > + .quad 0xbf77ee11ebd82e94 > + .quad 0xbf7fe02a6b106789 > + .quad 0xbf83e7295d25a7d9 > + .quad 0xbf87dc475f810a77 > + .quad 0xbf8bcf712c74384c > + .quad 0xbf8fc0a8b0fc03e4 > + .quad 0xbf91d7f7eb9eebe7 > + .quad 0xbf93cea44346a575 > + .quad 0xbf95c45a51b8d389 > + .quad 0xbf97b91b07d5b11b > + .quad 0xbf99ace7551cc514 > + .quad 0xbf9b9fc027af9198 > + .quad 0xbf9d91a66c543cc4 > + .quad 0xbf9f829b0e783300 > + .quad 0xbfa0b94f7c196176 > + .quad 0xbfa1b0d98923d980 > + .quad 0xbfa2a7ec2214e873 > + .quad 0xbfa39e87b9febd60 > + .quad 0xbfa494acc34d911c > + .quad 0xbfa58a5bafc8e4d5 > + .quad 0xbfa67f94f094bd98 > + .quad 0xbfa77458f632dcfc > + .quad 0xbfa868a83083f6cf > + .quad 0xbfa95c830ec8e3eb > + .quad 0xbfaa4fe9ffa3d235 > + .quad 0xbfab42dd711971bf > + .quad 0xbfac355dd0921f2d > + .quad 0xbfad276b8adb0b52 > + .quad 0xbfae19070c276016 > + .quad 0xbfaf0a30c01162a6 > + .quad 0xbfaffae9119b9303 > + .quad 0xbfb075983598e471 > + .quad 0xbfb0ed839b5526fe > + .quad 0xbfb16536eea37ae1 > + .quad 0xbfb1dcb263db1944 > + .quad 0xbfb253f62f0a1417 > + .quad 0xbfb2cb0283f5de1f > + .quad 0xbfb341d7961bd1d1 > + .quad 0xbfb3b87598b1b6ee > + .quad 0xbfb42edcbea646f0 > + .quad 0xbfb4a50d3aa1b040 > + .quad 0xbfb51b073f06183f > + .quad 0xbfb590cafdf01c28 > + .quad 0xbfb60658a93750c4 > + .quad 0xbfb67bb0726ec0fc > + .quad 0xbfb6f0d28ae56b4c > + .quad 0xbfb765bf23a6be13 > + .quad 0xbfb7da766d7b12cd > + .quad 0xbfb84ef898e8282a > + .quad 0xbfb8c345d6319b21 > + .quad 0xbfb9375e55595ede > + .quad 0xbfb9ab42462033ad > + .quad 0xbfba1ef1d8061cd4 > + .quad 0xbfba926d3a4ad563 > + .quad 0xbfbb05b49bee43fe > + .quad 0xbfbb78c82bb0eda1 > + .quad 0xbfbbeba818146765 > + .quad 0xbfbc5e548f5bc743 > + .quad 0xbfbcd0cdbf8c13e1 > + .quad 0xbfbd4313d66cb35d > + .quad 0xbfbdb5270187d927 > + .quad 0xbfbe27076e2af2e6 > + .quad 0xbfbe98b549671467 > + .quad 0xbfbf0a30c01162a6 > + .quad 0xbfbf7b79fec37ddf > + .quad 0xbfbfec9131dbeabb > + .quad 0xbfc02ebb42bf3d4b > + .quad 0xbfc0671512ca596e > + .quad 0xbfc09f561ee719c3 > + .quad 0xbfc0d77e7cd08e59 > + .quad 0xbfc10f8e422539b1 > + .quad 0xbfc14785846742ac > + .quad 0xbfc17f6458fca611 > + .quad 0xbfc1b72ad52f67a0 > + .quad 0xbfc1eed90e2dc2c3 > + .quad 0xbfc2266f190a5acb > + .quad 0xbfc25ded0abc6ad2 > + .quad 0xbfc29552f81ff523 > + .quad 0xbfc2cca0f5f5f251 > + .quad 0xbfc303d718e47fd3 > + .quad 0xbfc33af575770e4f > + .quad 0xbfc371fc201e8f74 > + .quad 0xbfc3a8eb2d31a376 > + .quad 0xbfc3dfc2b0ecc62a > + .quad 0xbfc41682bf727bc0 > + .quad 0xbfc44d2b6ccb7d1e > + .quad 0xbfc483bccce6e3dd > + .quad 0xbfc4ba36f39a55e5 > + .quad 0xbfc4f099f4a230b2 > + .quad 0xbfc526e5e3a1b438 > + .quad 0xbfc55d1ad4232d6f > + .quad 0xbfc59338d9982086 > + .quad 0xbfc5c940075972b9 > + .quad 0xbfc5ff3070a793d4 > + .quad 0xbfc6350a28aaa758 > + .quad 0xbfc66acd4272ad51 > + .quad 0xbfc6a079d0f7aad2 > + .quad 0xbfc6d60fe719d21d > + .quad 0xbfc70b8f97a1aa75 > + .quad 0xbfc740f8f54037a5 > + .quad 0xbfc7764c128f2127 > + .quad 0xbfc7ab890210d909 > + .quad 0xbfc7e0afd630c274 > + .quad 0xbfc815c0a14357eb > + .quad 0xbfc84abb75865139 > + .quad 0xbfc87fa06520c911 > + .quad 0xbfc8b46f8223625b > + .quad 0xbfc8e928de886d41 > + .quad 0xbfc91dcc8c340bde > + .quad 0xbfc9525a9cf456b4 > + .quad 0xbfc986d3228180ca > + .quad 0xbfc9bb362e7dfb83 > + .quad 0xbfc9ef83d2769a34 > + .quad 0xbfca23bc1fe2b563 > + .quad 0xbfca57df28244dcd > + .quad 0xbfca8becfc882f19 > + .quad 0xbfcabfe5ae46124c > + .quad 0xbfcaf3c94e80bff3 > + .quad 0xbfcb2797ee46320c > + .quad 0xbfcb5b519e8fb5a4 > + .quad 0xbfcb8ef670420c3b > + .quad 0xbfcbc286742d8cd6 > + .quad 0xbfcbf601bb0e44e2 > + .quad 0xbfcc2968558c18c1 > + .quad 0xbfcc5cba543ae425 > + .quad 0xbfcc8ff7c79a9a22 > + .quad 0xbfccc320c0176502 > + .quad 0xbfccf6354e09c5dc > + .quad 0xbfcd293581b6b3e7 > + .quad 0xbfcd5c216b4fbb91 > + .quad 0xbfcd8ef91af31d5e > + .quad 0xbfcdc1bca0abec7d > + .quad 0xbfcdf46c0c722d2f > + .quad 0xbfce27076e2af2e6 > + .quad 0xbfce598ed5a87e2f > + .quad 0xbfce8c0252aa5a60 > + .quad 0xbfcebe61f4dd7b0b > + .quad 0xbfcef0adcbdc5936 > + .quad 0xbfcf22e5e72f105d > + .quad 0xbfcf550a564b7b37 > + .quad 0xbfcf871b28955045 > + .quad 0xbfcfb9186d5e3e2b > + .quad 0xbfcfeb0233e607cc > + .quad 0xbfd00e6c45ad501d > + .quad 0xbfd0274dc16c232f > + .quad 0xbfd0402594b4d041 > + .quad 0xbfd058f3c703ebc6 > + .quad 0xbfd071b85fcd590d > + .quad 0xbfd08a73667c57af > + .quad 0xbfd0a324e27390e3 > + .quad 0xbfd0bbccdb0d24bd > + .quad 0xbfd0d46b579ab74b > + .quad 0xbfd0ed005f657da4 > + .quad 0xbfd1058bf9ae4ad5 > + .quad 0xbfd11e0e2dad9cb7 > + .quad 0xbfd136870293a8b0 > + .quad 0xbfd14ef67f88685a > + .quad 0xbfd1675cababa60e > + .quad 0xbfd17fb98e15095d > + .quad 0xbfd1980d2dd4236f > + .quad 0xbfd1b05791f07b49 > + .quad 0xbfd1c898c16999fb > + .quad 0xbfd1e0d0c33716be > + .quad 0xbfd1f8ff9e48a2f3 > + .quad 0xbfd211255986160c > + .quad 0xbfd22941fbcf7966 > + .quad 0xbfd241558bfd1404 > + .quad 0xbfd2596010df763a > + .quad 0xbfd27161913f853d > + .quad 0xbfd2895a13de86a3 > + .quad 0xbfd2a1499f762bc9 > + .quad 0xbfd2b9303ab89d25 > + .quad 0xbfd2d10dec508583 > + .quad 0xbfd2e8e2bae11d31 > + .quad 0xbfd300aead06350c > + .quad 0xbfd31871c9544185 > + .quad 0xbfd3302c16586588 > + .quad 0xbfd347dd9a987d55 > + .quad 0xbfd35f865c93293e > + .quad 0xbfd3772662bfd85b > + .quad 0xbfd38ebdb38ed321 > + .quad 0xbfd3a64c556945ea > + .quad 0xbfd3bdd24eb14b6a > + .quad 0xbfd3d54fa5c1f710 > + .quad 0xbfd3ecc460ef5f50 > + .quad 0xbfd404308686a7e4 > + .quad 0xbfd41b941cce0bee > + .quad 0xbfd432ef2a04e814 > + .quad 0xbfd44a41b463c47c > + .quad 0xbfd4618bc21c5ec2 > + .quad 0xbfd478cd5959b3d9 > + .quad 0xbfd49006804009d1 > + .quad 0xbfd4a7373cecf997 > + .quad 0xbfd4be5f957778a1 > + .quad 0xbfd4d57f8fefe27f > + .quad 0xbfd4ec973260026a > + .quad 0xbfd503a682cb1cb3 > + .quad 0xbfd51aad872df82d > + .quad 0xbfd531ac457ee77e > + .quad 0xbfd548a2c3add263 > + .quad 0xbfd55f9107a43ee2 > + .quad 0xbfd5767717455a6c > + .quad 0xbfd58d54f86e02f2 > + .quad 0xbfd5a42ab0f4cfe2 > + .quad 0xbfd5baf846aa1b19 > + .quad 0xbfd5d1bdbf5809ca > + .quad 0xbfd5e87b20c2954a > + .quad 0xbfd5ff3070a793d4 > + .quad 0xbfd615ddb4bec13c > + .quad 0xbfd62c82f2b9c795 > + .quad 0x3fd61965cdb02c1f > + .quad 0x3fd602d08af091ec > + .quad 0x3fd5ec433d5c35ae > + .quad 0x3fd5d5bddf595f30 > + .quad 0x3fd5bf406b543db2 > + .quad 0x3fd5a8cadbbedfa1 > + .quad 0x3fd5925d2b112a59 > + .quad 0x3fd57bf753c8d1fb > + .quad 0x3fd565995069514c > + .quad 0x3fd54f431b7be1a9 > + .quad 0x3fd538f4af8f72fe > + .quad 0x3fd522ae0738a3d8 > + .quad 0x3fd50c6f1d11b97c > + .quad 0x3fd4f637ebba9810 > + .quad 0x3fd4e0086dd8baca > + .quad 0x3fd4c9e09e172c3c > + .quad 0x3fd4b3c077267e9a > + .quad 0x3fd49da7f3bcc41f > + .quad 0x3fd487970e958770 > + .quad 0x3fd4718dc271c41b > + .quad 0x3fd45b8c0a17df13 > + .quad 0x3fd44591e0539f49 > + .quad 0x3fd42f9f3ff62642 > + .quad 0x3fd419b423d5e8c7 > + .quad 0x3fd403d086cea79c > + .quad 0x3fd3edf463c1683e > + .quad 0x3fd3d81fb5946dba > + .quad 0x3fd3c25277333184 > + .quad 0x3fd3ac8ca38e5c5f > + .quad 0x3fd396ce359bbf54 > + .quad 0x3fd3811728564cb2 > + .quad 0x3fd36b6776be1117 > + .quad 0x3fd355bf1bd82c8b > + .quad 0x3fd3401e12aecba1 > + .quad 0x3fd32a84565120a8 > + .quad 0x3fd314f1e1d35ce4 > + .quad 0x3fd2ff66b04ea9d4 > + .quad 0x3fd2e9e2bce12286 > + .quad 0x3fd2d46602adccee > + .quad 0x3fd2bef07cdc9354 > + .quad 0x3fd2a982269a3dbf > + .quad 0x3fd2941afb186b7c > + .quad 0x3fd27ebaf58d8c9d > + .quad 0x3fd269621134db92 > + .quad 0x3fd25410494e56c7 > + .quad 0x3fd23ec5991eba49 > + .quad 0x3fd22981fbef797b > + .quad 0x3fd214456d0eb8d4 > + .quad 0x3fd1ff0fe7cf47a7 > + .quad 0x3fd1e9e1678899f4 > + .quad 0x3fd1d4b9e796c245 > + .quad 0x3fd1bf99635a6b95 > + .quad 0x3fd1aa7fd638d33f > + .quad 0x3fd1956d3b9bc2fa > + .quad 0x3fd180618ef18adf > + .quad 0x3fd16b5ccbacfb73 > + .quad 0x3fd1565eed455fc3 > + .quad 0x3fd14167ef367783 > + .quad 0x3fd12c77cd00713b > + .quad 0x3fd1178e8227e47c > + .quad 0x3fd102ac0a35cc1c > + .quad 0x3fd0edd060b78081 > + .quad 0x3fd0d8fb813eb1ef > + .quad 0x3fd0c42d676162e3 > + .quad 0x3fd0af660eb9e279 > + .quad 0x3fd09aa572e6c6d4 > + .quad 0x3fd085eb8f8ae797 > + .quad 0x3fd07138604d5862 > + .quad 0x3fd05c8be0d9635a > + .quad 0x3fd047e60cde83b8 > + .quad 0x3fd03346e0106062 > + .quad 0x3fd01eae5626c691 > + .quad 0x3fd00a1c6adda473 > + .quad 0x3fcfeb2233ea07cd > + .quad 0x3fcfc218be620a5e > + .quad 0x3fcf991c6cb3b379 > + .quad 0x3fcf702d36777df0 > + .quad 0x3fcf474b134df229 > + .quad 0x3fcf1e75fadf9bde > + .quad 0x3fcef5ade4dcffe6 > + .quad 0x3fceccf2c8fe920a > + .quad 0x3fcea4449f04aaf5 > + .quad 0x3fce7ba35eb77e2a > + .quad 0x3fce530effe71012 > + .quad 0x3fce2a877a6b2c12 > + .quad 0x3fce020cc6235ab5 > + .quad 0x3fcdd99edaf6d7e9 > + .quad 0x3fcdb13db0d48940 > + .quad 0x3fcd88e93fb2f450 > + .quad 0x3fcd60a17f903515 > + .quad 0x3fcd38666871f465 > + .quad 0x3fcd1037f2655e7b > + .quad 0x3fcce816157f1988 > + .quad 0x3fccc000c9db3c52 > + .quad 0x3fcc97f8079d44ec > + .quad 0x3fcc6ffbc6f00f71 > + .quad 0x3fcc480c0005ccd1 > + .quad 0x3fcc2028ab17f9b4 > + .quad 0x3fcbf851c067555f > + .quad 0x3fcbd087383bd8ad > + .quad 0x3fcba8c90ae4ad19 > + .quad 0x3fcb811730b823d2 > + .quad 0x3fcb5971a213acdb > + .quad 0x3fcb31d8575bce3d > + .quad 0x3fcb0a4b48fc1b46 > + .quad 0x3fcae2ca6f672bd4 > + .quad 0x3fcabb55c31693ad > + .quad 0x3fca93ed3c8ad9e3 > + .quad 0x3fca6c90d44b704e > + .quad 0x3fca454082e6ab05 > + .quad 0x3fca1dfc40f1b7f1 > + .quad 0x3fc9f6c407089664 > + .quad 0x3fc9cf97cdce0ec3 > + .quad 0x3fc9a8778debaa38 > + .quad 0x3fc981634011aa75 > + .quad 0x3fc95a5adcf7017f > + .quad 0x3fc9335e5d594989 > + .quad 0x3fc90c6db9fcbcd9 > + .quad 0x3fc8e588ebac2dbf > + .quad 0x3fc8beafeb38fe8c > + .quad 0x3fc897e2b17b19a5 > + .quad 0x3fc871213750e994 > + .quad 0x3fc84a6b759f512f > + .quad 0x3fc823c16551a3c2 > + .quad 0x3fc7fd22ff599d4f > + .quad 0x3fc7d6903caf5ad0 > + .quad 0x3fc7b0091651528c > + .quad 0x3fc7898d85444c73 > + .quad 0x3fc7631d82935a86 > + .quad 0x3fc73cb9074fd14d > + .quad 0x3fc716600c914054 > + .quad 0x3fc6f0128b756abc > + .quad 0x3fc6c9d07d203fc7 > + .quad 0x3fc6a399dabbd383 > + .quad 0x3fc67d6e9d785771 > + .quad 0x3fc6574ebe8c133a > + .quad 0x3fc6313a37335d76 > + .quad 0x3fc60b3100b09476 > + .quad 0x3fc5e533144c1719 > + .quad 0x3fc5bf406b543db2 > + .quad 0x3fc59958ff1d52f1 > + .quad 0x3fc5737cc9018cdd > + .quad 0x3fc54dabc26105d2 > + .quad 0x3fc527e5e4a1b58d > + .quad 0x3fc5022b292f6a45 > + .quad 0x3fc4dc7b897bc1c8 > + .quad 0x3fc4b6d6fefe22a4 > + .quad 0x3fc4913d8333b561 > + .quad 0x3fc46baf0f9f5db7 > + .quad 0x3fc4462b9dc9b3dc > + .quad 0x3fc420b32740fdd4 > + .quad 0x3fc3fb45a59928cc > + .quad 0x3fc3d5e3126bc27f > + .quad 0x3fc3b08b6757f2a9 > + .quad 0x3fc38b3e9e027479 > + .quad 0x3fc365fcb0159016 > + .quad 0x3fc340c59741142e > + .quad 0x3fc31b994d3a4f85 > + .quad 0x3fc2f677cbbc0a96 > + .quad 0x3fc2d1610c86813a > + .quad 0x3fc2ac55095f5c59 > + .quad 0x3fc28753bc11aba5 > + .quad 0x3fc2625d1e6ddf57 > + .quad 0x3fc23d712a49c202 > + .quad 0x3fc2188fd9807263 > + .quad 0x3fc1f3b925f25d41 > + .quad 0x3fc1ceed09853752 > + .quad 0x3fc1aa2b7e23f72a > + .quad 0x3fc185747dbecf34 > + .quad 0x3fc160c8024b27b1 > + .quad 0x3fc13c2605c398c3 > + .quad 0x3fc1178e8227e47c > + .quad 0x3fc0f301717cf0fb > + .quad 0x3fc0ce7ecdccc28d > + .quad 0x3fc0aa06912675d5 > + .quad 0x3fc08598b59e3a07 > + .quad 0x3fc06135354d4b18 > + .quad 0x3fc03cdc0a51ec0d > + .quad 0x3fc0188d2ecf6140 > + .quad 0x3fbfe89139dbd566 > + .quad 0x3fbfa01c9db57ce2 > + .quad 0x3fbf57bc7d9005db > + .quad 0x3fbf0f70cdd992e3 > + .quad 0x3fbec739830a1120 > + .quad 0x3fbe7f1691a32d3e > + .quad 0x3fbe3707ee30487b > + .quad 0x3fbdef0d8d466db9 > + .quad 0x3fbda727638446a2 > + .quad 0x3fbd5f55659210e2 > + .quad 0x3fbd179788219364 > + .quad 0x3fbccfedbfee13a8 > + .quad 0x3fbc885801bc4b23 > + .quad 0x3fbc40d6425a5cb1 > + .quad 0x3fbbf968769fca11 > + .quad 0x3fbbb20e936d6974 > + .quad 0x3fbb6ac88dad5b1c > + .quad 0x3fbb23965a52ff00 > + .quad 0x3fbadc77ee5aea8c > + .quad 0x3fba956d3ecade63 > + .quad 0x3fba4e7640b1bc38 > + .quad 0x3fba0792e9277cac > + .quad 0x3fb9c0c32d4d2548 > + .quad 0x3fb97a07024cbe74 > + .quad 0x3fb9335e5d594989 > + .quad 0x3fb8ecc933aeb6e8 > + .quad 0x3fb8a6477a91dc29 > + .quad 0x3fb85fd927506a48 > + .quad 0x3fb8197e2f40e3f0 > + .quad 0x3fb7d33687c293c9 > + .quad 0x3fb78d02263d82d3 > + .quad 0x3fb746e100226ed9 > + .quad 0x3fb700d30aeac0e1 > + .quad 0x3fb6bad83c1883b6 > + .quad 0x3fb674f089365a7a > + .quad 0x3fb62f1be7d77743 > + .quad 0x3fb5e95a4d9791cb > + .quad 0x3fb5a3abb01ade25 > + .quad 0x3fb55e10050e0384 > + .quad 0x3fb518874226130a > + .quad 0x3fb4d3115d207eac > + .quad 0x3fb48dae4bc31018 > + .quad 0x3fb4485e03dbdfad > + .quad 0x3fb403207b414b7f > + .quad 0x3fb3bdf5a7d1ee64 > + .quad 0x3fb378dd7f749714 > + .quad 0x3fb333d7f8183f4b > + .quad 0x3fb2eee507b40301 > + .quad 0x3fb2aa04a44717a5 > + .quad 0x3fb26536c3d8c369 > + .quad 0x3fb2207b5c78549e > + .quad 0x3fb1dbd2643d190b > + .quad 0x3fb1973bd1465567 > + .quad 0x3fb152b799bb3cc9 > + .quad 0x3fb10e45b3cae831 > + .quad 0x3fb0c9e615ac4e17 > + .quad 0x3fb08598b59e3a07 > + .quad 0x3fb0415d89e74444 > + .quad 0x3faffa6911ab9301 > + .quad 0x3faf723b517fc523 > + .quad 0x3faeea31c006b87c > + .quad 0x3fae624c4a0b5e1b > + .quad 0x3fadda8adc67ee4e > + .quad 0x3fad52ed6405d86f > + .quad 0x3faccb73cdddb2cc > + .quad 0x3fac441e06f72a9e > + .quad 0x3fabbcebfc68f420 > + .quad 0x3fab35dd9b58baad > + .quad 0x3faaaef2d0fb10fc > + .quad 0x3faa282b8a936171 > + .quad 0x3fa9a187b573de7c > + .quad 0x3fa91b073efd7314 > + .quad 0x3fa894aa149fb343 > + .quad 0x3fa80e7023d8ccc4 > + .quad 0x3fa788595a3577ba > + .quad 0x3fa70265a550e777 > + .quad 0x3fa67c94f2d4bb58 > + .quad 0x3fa5f6e73078efb8 > + .quad 0x3fa5715c4c03ceef > + .quad 0x3fa4ebf43349e26f > + .quad 0x3fa466aed42de3ea > + .quad 0x3fa3e18c1ca0ae92 > + .quad 0x3fa35c8bfaa1306b > + .quad 0x3fa2d7ae5c3c5bae > + .quad 0x3fa252f32f8d183f > + .quad 0x3fa1ce5a62bc353a > + .quad 0x3fa149e3e4005a8d > + .quad 0x3fa0c58fa19dfaaa > + .quad 0x3fa0415d89e74444 > + .quad 0x3f9f7a9b16782856 > + .quad 0x3f9e72bf2813ce51 > + .quad 0x3f9d6b2725979802 > + .quad 0x3f9c63d2ec14aaf2 > + .quad 0x3f9b5cc258b718e6 > + .quad 0x3f9a55f548c5c43f > + .quad 0x3f994f6b99a24475 > + .quad 0x3f98492528c8cabf > + .quad 0x3f974321d3d006d3 > + .quad 0x3f963d6178690bd6 > + .quad 0x3f9537e3f45f3565 > + .quad 0x3f9432a925980cc1 > + .quad 0x3f932db0ea132e22 > + .quad 0x3f9228fb1fea2e28 > + .quad 0x3f912487a5507f70 > + .quad 0x3f90205658935847 > + .quad 0x3f8e38ce3033310c > + .quad 0x3f8c317384c75f06 > + .quad 0x3f8a2a9c6c170462 > + .quad 0x3f882448a388a2aa > + .quad 0x3f861e77e8b53fc6 > + .quad 0x3f841929f96832f0 > + .quad 0x3f82145e939ef1e9 > + .quad 0x3f8010157588de71 > + .quad 0x3f7c189cbb0e27fb > + .quad 0x3f78121214586b54 > + .quad 0x3f740c8a747878e2 > + .quad 0x3f70080559588b35 > + .quad 0x3f680904828985c0 > + .quad 0x3f60040155d5889e > + .quad 0x3f50020055655889 > + .quad 0x0000000000000000 > + /*== poly_coeff[4] ==*/ > + .align 16 > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > + /*== ExpMask ==*/ > + .align 16 > + .quad 0x000fffffffffffff, 0x000fffffffffffff > + /*== Two10 ==*/ > + .align 16 > + .quad 0x3f50000000000000, 0x3f50000000000000 > + /*== MinLog1p = -1+2^(-53) ==*/ > + .align 16 > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff > + /*== MaxLog1p ==*/ > + .align 16 > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000 > + /*== One ==*/ > + .align 16 > + .quad 0x3ff0000000000000, 0x3ff0000000000000 > + /*== SgnMask ==*/ > + .align 16 > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff > + /*== XThreshold ==*/ > + .align 16 > + .quad 0x3e00000000000000, 0x3e00000000000000 > + /*== XhMask ==*/ > + .align 16 > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00 > + /*== Threshold ==*/ > + .align 16 > + .quad 0x4086a00000000000, 0x4086a00000000000 > + /*== Bias ==*/ > + .align 16 > + .quad 0x408ff80000000000, 0x408ff80000000000 > + /*== Bias1 ==*/ > + .align 16 > + .quad 0x408ff00000000000, 0x408ff00000000000 > + /*== ExpMask ==*/ > + .align 16 > + .quad 0x7ff0000000000000, 0x7ff0000000000000 > + /*== ExpMask2 ==*/ > + .align 16 > + .quad 0x7f40000000000000, 0x7f40000000000000 > + /*== L2L ==*/ > + .align 16 > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > + .align 16 > + .type __svml_dlog1p_data_internal,@object > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > + .space 96, 0x00 > + .align 16 > + > +.FLT_16: > + .long 0x00000000,0x43380000,0x00000000,0x43380000 > + .type .FLT_16,@object > + .size .FLT_16,16 > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > new file mode 100644 > index 0000000000..ec01af680c > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > @@ -0,0 +1,20 @@ > +/* SSE version of vectorized log1p, vector length is 4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVdN4v_log1p _ZGVdN4v_log1p_sse_wrapper > +#include "../svml_d_log1p4_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > new file mode 100644 > index 0000000000..808f3224ef > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > @@ -0,0 +1,27 @@ > +/* Multiple versions of vectorized log1p, vector length is 4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVdN4v_log1p > +#include "ifunc-mathvec-avx2.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVdN4v_log1p, __GI__ZGVdN4v_log1p, __redirect__ZGVdN4v_log1p) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > new file mode 100644 > index 0000000000..548538b0ec > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > @@ -0,0 +1,1383 @@ > +/* Function log1p vectorized with AVX2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_dlog1p_data_internal > + */ > +#define Log_HA_table 0 > +#define Log_LA_table 8224 > +#define poly_coeff 12352 > +#define ExpMask 12480 > +#define Two10 12512 > +#define MinLog1p 12544 > +#define MaxLog1p 12576 > +#define One 12608 > +#define SgnMask 12640 > +#define XThreshold 12672 > +#define XhMask 12704 > +#define Threshold 12736 > +#define Bias 12768 > +#define Bias1 12800 > +#define ExpMask0 12832 > +#define ExpMask2 12864 > +#define L2 12896 > + > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > +#define Table_Lookup_Bias -0x405fe0 > + > +#include <sysdep.h> > + > + .text > + .section .text.avx2,"ax",@progbits > +ENTRY(_ZGVdN4v_log1p_avx2) > + pushq %rbp > + cfi_def_cfa_offset(16) > + movq %rsp, %rbp > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + andq $-32, %rsp > + subq $96, %rsp > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 > + > +/* SgnMask used by all accuracies */ > + vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 > + vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 > + > +/* 2^ (-10-exp(X) ) */ > + vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 > + vmovapd %ymm0, %ymm9 > + vandpd %ymm12, %ymm9, %ymm10 > + vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 > + vaddpd %ymm7, %ymm9, %ymm13 > + > +/* compute 1+x as high, low parts */ > + vmaxpd %ymm9, %ymm7, %ymm15 > + vminpd %ymm9, %ymm7, %ymm6 > + vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 > + vandpd %ymm14, %ymm13, %ymm4 > + > +/* preserve mantissa, set input exponent to 2^(-10) */ > + vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 > + vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 > + > +/* reciprocal approximation good to at least 11 bits */ > + vcvtpd2ps %ymm5, %xmm2 > + vsubpd %ymm4, %ymm15, %ymm0 > + > +/* check range */ > + vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 > + vrcpps %xmm2, %xmm1 > + vaddpd %ymm0, %ymm6, %ymm6 > + vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 > + vcvtps2pd %xmm1, %ymm11 > + > +/* exponent of X needed to scale Xl */ > + vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 > + vpsubq %ymm10, %ymm3, %ymm13 > + > +/* exponent bits */ > + vpsrlq $20, %ymm4, %ymm4 > + > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > + vroundpd $0, %ymm11, %ymm3 > + > +/* scale DblRcp */ > + vmulpd %ymm13, %ymm3, %ymm2 > + > +/* exponent*log(2.0) */ > + vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 > + vfmsub213pd %ymm7, %ymm3, %ymm5 > + > +/* Compute SignMask for all accuracies, including EP */ > + vandnpd %ymm9, %ymm12, %ymm8 > + vorpd %ymm0, %ymm15, %ymm7 > + > +/* > + * prepare table index > + * table lookup > + */ > + vpsrlq $40, %ymm3, %ymm0 > + > +/* > + * argument reduction > + * VQFMS( D, R, X, DblRcp1, One ); > + */ > + vfmadd213pd %ymm5, %ymm2, %ymm6 > + vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 > + vcmplt_oqpd %ymm3, %ymm13, %ymm3 > + vmulpd %ymm6, %ymm6, %ymm5 > + vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 > + > +/* combine and get argument value range mask */ > + vmovmskpd %ymm7, %eax > + vextractf128 $1, %ymm4, %xmm12 > + vshufps $221, %xmm12, %xmm4, %xmm14 > + > +/* biased exponent in DP format */ > + vcvtdq2pd %xmm14, %ymm1 > + vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 > + vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 > + vsubpd %ymm15, %ymm1, %ymm1 > + vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 > + > +/* polynomial */ > + vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 > + vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 > + vfmadd213pd %ymm2, %ymm5, %ymm1 > + > +/* reconstruction */ > + vfmadd213pd %ymm6, %ymm5, %ymm1 > + vextractf128 $1, %ymm0, %xmm10 > + vmovd %xmm0, %edx > + vmovd %xmm10, %esi > + movslq %edx, %rdx > + vpextrd $2, %xmm0, %ecx > + movslq %esi, %rsi > + vpextrd $2, %xmm10, %edi > + movslq %ecx, %rcx > + movslq %edi, %rdi > + vmovsd (%r8,%rdx), %xmm4 > + vmovsd (%r8,%rsi), %xmm11 > + vmovhpd (%r8,%rcx), %xmm4, %xmm7 > + vmovhpd (%r8,%rdi), %xmm11, %xmm12 > + vinsertf128 $1, %xmm12, %ymm7, %ymm0 > + vaddpd %ymm1, %ymm0, %ymm6 > + vaddpd %ymm6, %ymm3, %ymm0 > + > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > + vorpd %ymm8, %ymm0, %ymm0 > + testl %eax, %eax > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movq %rbp, %rsp > + popq %rbp > + cfi_def_cfa(7, 8) > + cfi_restore(6) > + ret > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + vmovupd %ymm9, 32(%rsp) > + vmovupd %ymm0, 64(%rsp) > + # LOE rbx r12 r13 r14 r15 eax ymm0 > + > + xorl %edx, %edx > + # LOE rbx r12 r13 r14 r15 eax edx > + > + vzeroupper > + movq %r12, 16(%rsp) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > + movl %edx, %r12d > + movq %r13, 8(%rsp) > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > + movl %eax, %r13d > + movq %r14, (%rsp) > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $4, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + vmovupd 64(%rsp), %ymm0 > + > +/* Go to exit */ > + jmp L(EXIT) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r12 r13 r14 r15 ymm0 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movsd 32(%rsp,%r14,8), %xmm0 > + call log1p@PLT > + # LOE rbx r14 r15 r12d r13d xmm0 > + > + movsd %xmm0, 64(%rsp,%r14,8) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx r15 r12d r13d > +END(_ZGVdN4v_log1p_avx2) > + > + .section .rodata, "a" > + .align 32 > + > +#ifdef __svml_dlog1p_data_internal_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; > + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; > + __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; > + __declspec(align(32)) VUINT32 ExpMask[4][2]; > + __declspec(align(32)) VUINT32 Two10[4][2]; > + __declspec(align(32)) VUINT32 MinLog1p[4][2]; > + __declspec(align(32)) VUINT32 MaxLog1p[4][2]; > + __declspec(align(32)) VUINT32 One[4][2]; > + __declspec(align(32)) VUINT32 SgnMask[4][2]; > + __declspec(align(32)) VUINT32 XThreshold[4][2]; > + __declspec(align(32)) VUINT32 XhMask[4][2]; > + __declspec(align(32)) VUINT32 Threshold[4][2]; > + __declspec(align(32)) VUINT32 Bias[4][2]; > + __declspec(align(32)) VUINT32 Bias1[4][2]; > + __declspec(align(32)) VUINT32 ExpMask0[4][2]; > + __declspec(align(32)) VUINT32 ExpMask2[4][2]; > + __declspec(align(32)) VUINT32 L2[4][2]; > +} __svml_dlog1p_data_internal; > +#endif > +__svml_dlog1p_data_internal: > + /* Log_HA_table */ > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > + /*== Log_LA_table ==*/ > + .align 32 > + .quad 0x8000000000000000 > + .quad 0xbf5ff802a9ab10e6 > + .quad 0xbf6ff00aa2b10bc0 > + .quad 0xbf77ee11ebd82e94 > + .quad 0xbf7fe02a6b106789 > + .quad 0xbf83e7295d25a7d9 > + .quad 0xbf87dc475f810a77 > + .quad 0xbf8bcf712c74384c > + .quad 0xbf8fc0a8b0fc03e4 > + .quad 0xbf91d7f7eb9eebe7 > + .quad 0xbf93cea44346a575 > + .quad 0xbf95c45a51b8d389 > + .quad 0xbf97b91b07d5b11b > + .quad 0xbf99ace7551cc514 > + .quad 0xbf9b9fc027af9198 > + .quad 0xbf9d91a66c543cc4 > + .quad 0xbf9f829b0e783300 > + .quad 0xbfa0b94f7c196176 > + .quad 0xbfa1b0d98923d980 > + .quad 0xbfa2a7ec2214e873 > + .quad 0xbfa39e87b9febd60 > + .quad 0xbfa494acc34d911c > + .quad 0xbfa58a5bafc8e4d5 > + .quad 0xbfa67f94f094bd98 > + .quad 0xbfa77458f632dcfc > + .quad 0xbfa868a83083f6cf > + .quad 0xbfa95c830ec8e3eb > + .quad 0xbfaa4fe9ffa3d235 > + .quad 0xbfab42dd711971bf > + .quad 0xbfac355dd0921f2d > + .quad 0xbfad276b8adb0b52 > + .quad 0xbfae19070c276016 > + .quad 0xbfaf0a30c01162a6 > + .quad 0xbfaffae9119b9303 > + .quad 0xbfb075983598e471 > + .quad 0xbfb0ed839b5526fe > + .quad 0xbfb16536eea37ae1 > + .quad 0xbfb1dcb263db1944 > + .quad 0xbfb253f62f0a1417 > + .quad 0xbfb2cb0283f5de1f > + .quad 0xbfb341d7961bd1d1 > + .quad 0xbfb3b87598b1b6ee > + .quad 0xbfb42edcbea646f0 > + .quad 0xbfb4a50d3aa1b040 > + .quad 0xbfb51b073f06183f > + .quad 0xbfb590cafdf01c28 > + .quad 0xbfb60658a93750c4 > + .quad 0xbfb67bb0726ec0fc > + .quad 0xbfb6f0d28ae56b4c > + .quad 0xbfb765bf23a6be13 > + .quad 0xbfb7da766d7b12cd > + .quad 0xbfb84ef898e8282a > + .quad 0xbfb8c345d6319b21 > + .quad 0xbfb9375e55595ede > + .quad 0xbfb9ab42462033ad > + .quad 0xbfba1ef1d8061cd4 > + .quad 0xbfba926d3a4ad563 > + .quad 0xbfbb05b49bee43fe > + .quad 0xbfbb78c82bb0eda1 > + .quad 0xbfbbeba818146765 > + .quad 0xbfbc5e548f5bc743 > + .quad 0xbfbcd0cdbf8c13e1 > + .quad 0xbfbd4313d66cb35d > + .quad 0xbfbdb5270187d927 > + .quad 0xbfbe27076e2af2e6 > + .quad 0xbfbe98b549671467 > + .quad 0xbfbf0a30c01162a6 > + .quad 0xbfbf7b79fec37ddf > + .quad 0xbfbfec9131dbeabb > + .quad 0xbfc02ebb42bf3d4b > + .quad 0xbfc0671512ca596e > + .quad 0xbfc09f561ee719c3 > + .quad 0xbfc0d77e7cd08e59 > + .quad 0xbfc10f8e422539b1 > + .quad 0xbfc14785846742ac > + .quad 0xbfc17f6458fca611 > + .quad 0xbfc1b72ad52f67a0 > + .quad 0xbfc1eed90e2dc2c3 > + .quad 0xbfc2266f190a5acb > + .quad 0xbfc25ded0abc6ad2 > + .quad 0xbfc29552f81ff523 > + .quad 0xbfc2cca0f5f5f251 > + .quad 0xbfc303d718e47fd3 > + .quad 0xbfc33af575770e4f > + .quad 0xbfc371fc201e8f74 > + .quad 0xbfc3a8eb2d31a376 > + .quad 0xbfc3dfc2b0ecc62a > + .quad 0xbfc41682bf727bc0 > + .quad 0xbfc44d2b6ccb7d1e > + .quad 0xbfc483bccce6e3dd > + .quad 0xbfc4ba36f39a55e5 > + .quad 0xbfc4f099f4a230b2 > + .quad 0xbfc526e5e3a1b438 > + .quad 0xbfc55d1ad4232d6f > + .quad 0xbfc59338d9982086 > + .quad 0xbfc5c940075972b9 > + .quad 0xbfc5ff3070a793d4 > + .quad 0xbfc6350a28aaa758 > + .quad 0xbfc66acd4272ad51 > + .quad 0xbfc6a079d0f7aad2 > + .quad 0xbfc6d60fe719d21d > + .quad 0xbfc70b8f97a1aa75 > + .quad 0xbfc740f8f54037a5 > + .quad 0xbfc7764c128f2127 > + .quad 0xbfc7ab890210d909 > + .quad 0xbfc7e0afd630c274 > + .quad 0xbfc815c0a14357eb > + .quad 0xbfc84abb75865139 > + .quad 0xbfc87fa06520c911 > + .quad 0xbfc8b46f8223625b > + .quad 0xbfc8e928de886d41 > + .quad 0xbfc91dcc8c340bde > + .quad 0xbfc9525a9cf456b4 > + .quad 0xbfc986d3228180ca > + .quad 0xbfc9bb362e7dfb83 > + .quad 0xbfc9ef83d2769a34 > + .quad 0xbfca23bc1fe2b563 > + .quad 0xbfca57df28244dcd > + .quad 0xbfca8becfc882f19 > + .quad 0xbfcabfe5ae46124c > + .quad 0xbfcaf3c94e80bff3 > + .quad 0xbfcb2797ee46320c > + .quad 0xbfcb5b519e8fb5a4 > + .quad 0xbfcb8ef670420c3b > + .quad 0xbfcbc286742d8cd6 > + .quad 0xbfcbf601bb0e44e2 > + .quad 0xbfcc2968558c18c1 > + .quad 0xbfcc5cba543ae425 > + .quad 0xbfcc8ff7c79a9a22 > + .quad 0xbfccc320c0176502 > + .quad 0xbfccf6354e09c5dc > + .quad 0xbfcd293581b6b3e7 > + .quad 0xbfcd5c216b4fbb91 > + .quad 0xbfcd8ef91af31d5e > + .quad 0xbfcdc1bca0abec7d > + .quad 0xbfcdf46c0c722d2f > + .quad 0xbfce27076e2af2e6 > + .quad 0xbfce598ed5a87e2f > + .quad 0xbfce8c0252aa5a60 > + .quad 0xbfcebe61f4dd7b0b > + .quad 0xbfcef0adcbdc5936 > + .quad 0xbfcf22e5e72f105d > + .quad 0xbfcf550a564b7b37 > + .quad 0xbfcf871b28955045 > + .quad 0xbfcfb9186d5e3e2b > + .quad 0xbfcfeb0233e607cc > + .quad 0xbfd00e6c45ad501d > + .quad 0xbfd0274dc16c232f > + .quad 0xbfd0402594b4d041 > + .quad 0xbfd058f3c703ebc6 > + .quad 0xbfd071b85fcd590d > + .quad 0xbfd08a73667c57af > + .quad 0xbfd0a324e27390e3 > + .quad 0xbfd0bbccdb0d24bd > + .quad 0xbfd0d46b579ab74b > + .quad 0xbfd0ed005f657da4 > + .quad 0xbfd1058bf9ae4ad5 > + .quad 0xbfd11e0e2dad9cb7 > + .quad 0xbfd136870293a8b0 > + .quad 0xbfd14ef67f88685a > + .quad 0xbfd1675cababa60e > + .quad 0xbfd17fb98e15095d > + .quad 0xbfd1980d2dd4236f > + .quad 0xbfd1b05791f07b49 > + .quad 0xbfd1c898c16999fb > + .quad 0xbfd1e0d0c33716be > + .quad 0xbfd1f8ff9e48a2f3 > + .quad 0xbfd211255986160c > + .quad 0xbfd22941fbcf7966 > + .quad 0xbfd241558bfd1404 > + .quad 0xbfd2596010df763a > + .quad 0xbfd27161913f853d > + .quad 0xbfd2895a13de86a3 > + .quad 0xbfd2a1499f762bc9 > + .quad 0xbfd2b9303ab89d25 > + .quad 0xbfd2d10dec508583 > + .quad 0xbfd2e8e2bae11d31 > + .quad 0xbfd300aead06350c > + .quad 0xbfd31871c9544185 > + .quad 0xbfd3302c16586588 > + .quad 0xbfd347dd9a987d55 > + .quad 0xbfd35f865c93293e > + .quad 0xbfd3772662bfd85b > + .quad 0xbfd38ebdb38ed321 > + .quad 0xbfd3a64c556945ea > + .quad 0xbfd3bdd24eb14b6a > + .quad 0xbfd3d54fa5c1f710 > + .quad 0xbfd3ecc460ef5f50 > + .quad 0xbfd404308686a7e4 > + .quad 0xbfd41b941cce0bee > + .quad 0xbfd432ef2a04e814 > + .quad 0xbfd44a41b463c47c > + .quad 0xbfd4618bc21c5ec2 > + .quad 0xbfd478cd5959b3d9 > + .quad 0xbfd49006804009d1 > + .quad 0xbfd4a7373cecf997 > + .quad 0xbfd4be5f957778a1 > + .quad 0xbfd4d57f8fefe27f > + .quad 0xbfd4ec973260026a > + .quad 0xbfd503a682cb1cb3 > + .quad 0xbfd51aad872df82d > + .quad 0xbfd531ac457ee77e > + .quad 0xbfd548a2c3add263 > + .quad 0xbfd55f9107a43ee2 > + .quad 0xbfd5767717455a6c > + .quad 0xbfd58d54f86e02f2 > + .quad 0xbfd5a42ab0f4cfe2 > + .quad 0xbfd5baf846aa1b19 > + .quad 0xbfd5d1bdbf5809ca > + .quad 0xbfd5e87b20c2954a > + .quad 0xbfd5ff3070a793d4 > + .quad 0xbfd615ddb4bec13c > + .quad 0xbfd62c82f2b9c795 > + .quad 0x3fd61965cdb02c1f > + .quad 0x3fd602d08af091ec > + .quad 0x3fd5ec433d5c35ae > + .quad 0x3fd5d5bddf595f30 > + .quad 0x3fd5bf406b543db2 > + .quad 0x3fd5a8cadbbedfa1 > + .quad 0x3fd5925d2b112a59 > + .quad 0x3fd57bf753c8d1fb > + .quad 0x3fd565995069514c > + .quad 0x3fd54f431b7be1a9 > + .quad 0x3fd538f4af8f72fe > + .quad 0x3fd522ae0738a3d8 > + .quad 0x3fd50c6f1d11b97c > + .quad 0x3fd4f637ebba9810 > + .quad 0x3fd4e0086dd8baca > + .quad 0x3fd4c9e09e172c3c > + .quad 0x3fd4b3c077267e9a > + .quad 0x3fd49da7f3bcc41f > + .quad 0x3fd487970e958770 > + .quad 0x3fd4718dc271c41b > + .quad 0x3fd45b8c0a17df13 > + .quad 0x3fd44591e0539f49 > + .quad 0x3fd42f9f3ff62642 > + .quad 0x3fd419b423d5e8c7 > + .quad 0x3fd403d086cea79c > + .quad 0x3fd3edf463c1683e > + .quad 0x3fd3d81fb5946dba > + .quad 0x3fd3c25277333184 > + .quad 0x3fd3ac8ca38e5c5f > + .quad 0x3fd396ce359bbf54 > + .quad 0x3fd3811728564cb2 > + .quad 0x3fd36b6776be1117 > + .quad 0x3fd355bf1bd82c8b > + .quad 0x3fd3401e12aecba1 > + .quad 0x3fd32a84565120a8 > + .quad 0x3fd314f1e1d35ce4 > + .quad 0x3fd2ff66b04ea9d4 > + .quad 0x3fd2e9e2bce12286 > + .quad 0x3fd2d46602adccee > + .quad 0x3fd2bef07cdc9354 > + .quad 0x3fd2a982269a3dbf > + .quad 0x3fd2941afb186b7c > + .quad 0x3fd27ebaf58d8c9d > + .quad 0x3fd269621134db92 > + .quad 0x3fd25410494e56c7 > + .quad 0x3fd23ec5991eba49 > + .quad 0x3fd22981fbef797b > + .quad 0x3fd214456d0eb8d4 > + .quad 0x3fd1ff0fe7cf47a7 > + .quad 0x3fd1e9e1678899f4 > + .quad 0x3fd1d4b9e796c245 > + .quad 0x3fd1bf99635a6b95 > + .quad 0x3fd1aa7fd638d33f > + .quad 0x3fd1956d3b9bc2fa > + .quad 0x3fd180618ef18adf > + .quad 0x3fd16b5ccbacfb73 > + .quad 0x3fd1565eed455fc3 > + .quad 0x3fd14167ef367783 > + .quad 0x3fd12c77cd00713b > + .quad 0x3fd1178e8227e47c > + .quad 0x3fd102ac0a35cc1c > + .quad 0x3fd0edd060b78081 > + .quad 0x3fd0d8fb813eb1ef > + .quad 0x3fd0c42d676162e3 > + .quad 0x3fd0af660eb9e279 > + .quad 0x3fd09aa572e6c6d4 > + .quad 0x3fd085eb8f8ae797 > + .quad 0x3fd07138604d5862 > + .quad 0x3fd05c8be0d9635a > + .quad 0x3fd047e60cde83b8 > + .quad 0x3fd03346e0106062 > + .quad 0x3fd01eae5626c691 > + .quad 0x3fd00a1c6adda473 > + .quad 0x3fcfeb2233ea07cd > + .quad 0x3fcfc218be620a5e > + .quad 0x3fcf991c6cb3b379 > + .quad 0x3fcf702d36777df0 > + .quad 0x3fcf474b134df229 > + .quad 0x3fcf1e75fadf9bde > + .quad 0x3fcef5ade4dcffe6 > + .quad 0x3fceccf2c8fe920a > + .quad 0x3fcea4449f04aaf5 > + .quad 0x3fce7ba35eb77e2a > + .quad 0x3fce530effe71012 > + .quad 0x3fce2a877a6b2c12 > + .quad 0x3fce020cc6235ab5 > + .quad 0x3fcdd99edaf6d7e9 > + .quad 0x3fcdb13db0d48940 > + .quad 0x3fcd88e93fb2f450 > + .quad 0x3fcd60a17f903515 > + .quad 0x3fcd38666871f465 > + .quad 0x3fcd1037f2655e7b > + .quad 0x3fcce816157f1988 > + .quad 0x3fccc000c9db3c52 > + .quad 0x3fcc97f8079d44ec > + .quad 0x3fcc6ffbc6f00f71 > + .quad 0x3fcc480c0005ccd1 > + .quad 0x3fcc2028ab17f9b4 > + .quad 0x3fcbf851c067555f > + .quad 0x3fcbd087383bd8ad > + .quad 0x3fcba8c90ae4ad19 > + .quad 0x3fcb811730b823d2 > + .quad 0x3fcb5971a213acdb > + .quad 0x3fcb31d8575bce3d > + .quad 0x3fcb0a4b48fc1b46 > + .quad 0x3fcae2ca6f672bd4 > + .quad 0x3fcabb55c31693ad > + .quad 0x3fca93ed3c8ad9e3 > + .quad 0x3fca6c90d44b704e > + .quad 0x3fca454082e6ab05 > + .quad 0x3fca1dfc40f1b7f1 > + .quad 0x3fc9f6c407089664 > + .quad 0x3fc9cf97cdce0ec3 > + .quad 0x3fc9a8778debaa38 > + .quad 0x3fc981634011aa75 > + .quad 0x3fc95a5adcf7017f > + .quad 0x3fc9335e5d594989 > + .quad 0x3fc90c6db9fcbcd9 > + .quad 0x3fc8e588ebac2dbf > + .quad 0x3fc8beafeb38fe8c > + .quad 0x3fc897e2b17b19a5 > + .quad 0x3fc871213750e994 > + .quad 0x3fc84a6b759f512f > + .quad 0x3fc823c16551a3c2 > + .quad 0x3fc7fd22ff599d4f > + .quad 0x3fc7d6903caf5ad0 > + .quad 0x3fc7b0091651528c > + .quad 0x3fc7898d85444c73 > + .quad 0x3fc7631d82935a86 > + .quad 0x3fc73cb9074fd14d > + .quad 0x3fc716600c914054 > + .quad 0x3fc6f0128b756abc > + .quad 0x3fc6c9d07d203fc7 > + .quad 0x3fc6a399dabbd383 > + .quad 0x3fc67d6e9d785771 > + .quad 0x3fc6574ebe8c133a > + .quad 0x3fc6313a37335d76 > + .quad 0x3fc60b3100b09476 > + .quad 0x3fc5e533144c1719 > + .quad 0x3fc5bf406b543db2 > + .quad 0x3fc59958ff1d52f1 > + .quad 0x3fc5737cc9018cdd > + .quad 0x3fc54dabc26105d2 > + .quad 0x3fc527e5e4a1b58d > + .quad 0x3fc5022b292f6a45 > + .quad 0x3fc4dc7b897bc1c8 > + .quad 0x3fc4b6d6fefe22a4 > + .quad 0x3fc4913d8333b561 > + .quad 0x3fc46baf0f9f5db7 > + .quad 0x3fc4462b9dc9b3dc > + .quad 0x3fc420b32740fdd4 > + .quad 0x3fc3fb45a59928cc > + .quad 0x3fc3d5e3126bc27f > + .quad 0x3fc3b08b6757f2a9 > + .quad 0x3fc38b3e9e027479 > + .quad 0x3fc365fcb0159016 > + .quad 0x3fc340c59741142e > + .quad 0x3fc31b994d3a4f85 > + .quad 0x3fc2f677cbbc0a96 > + .quad 0x3fc2d1610c86813a > + .quad 0x3fc2ac55095f5c59 > + .quad 0x3fc28753bc11aba5 > + .quad 0x3fc2625d1e6ddf57 > + .quad 0x3fc23d712a49c202 > + .quad 0x3fc2188fd9807263 > + .quad 0x3fc1f3b925f25d41 > + .quad 0x3fc1ceed09853752 > + .quad 0x3fc1aa2b7e23f72a > + .quad 0x3fc185747dbecf34 > + .quad 0x3fc160c8024b27b1 > + .quad 0x3fc13c2605c398c3 > + .quad 0x3fc1178e8227e47c > + .quad 0x3fc0f301717cf0fb > + .quad 0x3fc0ce7ecdccc28d > + .quad 0x3fc0aa06912675d5 > + .quad 0x3fc08598b59e3a07 > + .quad 0x3fc06135354d4b18 > + .quad 0x3fc03cdc0a51ec0d > + .quad 0x3fc0188d2ecf6140 > + .quad 0x3fbfe89139dbd566 > + .quad 0x3fbfa01c9db57ce2 > + .quad 0x3fbf57bc7d9005db > + .quad 0x3fbf0f70cdd992e3 > + .quad 0x3fbec739830a1120 > + .quad 0x3fbe7f1691a32d3e > + .quad 0x3fbe3707ee30487b > + .quad 0x3fbdef0d8d466db9 > + .quad 0x3fbda727638446a2 > + .quad 0x3fbd5f55659210e2 > + .quad 0x3fbd179788219364 > + .quad 0x3fbccfedbfee13a8 > + .quad 0x3fbc885801bc4b23 > + .quad 0x3fbc40d6425a5cb1 > + .quad 0x3fbbf968769fca11 > + .quad 0x3fbbb20e936d6974 > + .quad 0x3fbb6ac88dad5b1c > + .quad 0x3fbb23965a52ff00 > + .quad 0x3fbadc77ee5aea8c > + .quad 0x3fba956d3ecade63 > + .quad 0x3fba4e7640b1bc38 > + .quad 0x3fba0792e9277cac > + .quad 0x3fb9c0c32d4d2548 > + .quad 0x3fb97a07024cbe74 > + .quad 0x3fb9335e5d594989 > + .quad 0x3fb8ecc933aeb6e8 > + .quad 0x3fb8a6477a91dc29 > + .quad 0x3fb85fd927506a48 > + .quad 0x3fb8197e2f40e3f0 > + .quad 0x3fb7d33687c293c9 > + .quad 0x3fb78d02263d82d3 > + .quad 0x3fb746e100226ed9 > + .quad 0x3fb700d30aeac0e1 > + .quad 0x3fb6bad83c1883b6 > + .quad 0x3fb674f089365a7a > + .quad 0x3fb62f1be7d77743 > + .quad 0x3fb5e95a4d9791cb > + .quad 0x3fb5a3abb01ade25 > + .quad 0x3fb55e10050e0384 > + .quad 0x3fb518874226130a > + .quad 0x3fb4d3115d207eac > + .quad 0x3fb48dae4bc31018 > + .quad 0x3fb4485e03dbdfad > + .quad 0x3fb403207b414b7f > + .quad 0x3fb3bdf5a7d1ee64 > + .quad 0x3fb378dd7f749714 > + .quad 0x3fb333d7f8183f4b > + .quad 0x3fb2eee507b40301 > + .quad 0x3fb2aa04a44717a5 > + .quad 0x3fb26536c3d8c369 > + .quad 0x3fb2207b5c78549e > + .quad 0x3fb1dbd2643d190b > + .quad 0x3fb1973bd1465567 > + .quad 0x3fb152b799bb3cc9 > + .quad 0x3fb10e45b3cae831 > + .quad 0x3fb0c9e615ac4e17 > + .quad 0x3fb08598b59e3a07 > + .quad 0x3fb0415d89e74444 > + .quad 0x3faffa6911ab9301 > + .quad 0x3faf723b517fc523 > + .quad 0x3faeea31c006b87c > + .quad 0x3fae624c4a0b5e1b > + .quad 0x3fadda8adc67ee4e > + .quad 0x3fad52ed6405d86f > + .quad 0x3faccb73cdddb2cc > + .quad 0x3fac441e06f72a9e > + .quad 0x3fabbcebfc68f420 > + .quad 0x3fab35dd9b58baad > + .quad 0x3faaaef2d0fb10fc > + .quad 0x3faa282b8a936171 > + .quad 0x3fa9a187b573de7c > + .quad 0x3fa91b073efd7314 > + .quad 0x3fa894aa149fb343 > + .quad 0x3fa80e7023d8ccc4 > + .quad 0x3fa788595a3577ba > + .quad 0x3fa70265a550e777 > + .quad 0x3fa67c94f2d4bb58 > + .quad 0x3fa5f6e73078efb8 > + .quad 0x3fa5715c4c03ceef > + .quad 0x3fa4ebf43349e26f > + .quad 0x3fa466aed42de3ea > + .quad 0x3fa3e18c1ca0ae92 > + .quad 0x3fa35c8bfaa1306b > + .quad 0x3fa2d7ae5c3c5bae > + .quad 0x3fa252f32f8d183f > + .quad 0x3fa1ce5a62bc353a > + .quad 0x3fa149e3e4005a8d > + .quad 0x3fa0c58fa19dfaaa > + .quad 0x3fa0415d89e74444 > + .quad 0x3f9f7a9b16782856 > + .quad 0x3f9e72bf2813ce51 > + .quad 0x3f9d6b2725979802 > + .quad 0x3f9c63d2ec14aaf2 > + .quad 0x3f9b5cc258b718e6 > + .quad 0x3f9a55f548c5c43f > + .quad 0x3f994f6b99a24475 > + .quad 0x3f98492528c8cabf > + .quad 0x3f974321d3d006d3 > + .quad 0x3f963d6178690bd6 > + .quad 0x3f9537e3f45f3565 > + .quad 0x3f9432a925980cc1 > + .quad 0x3f932db0ea132e22 > + .quad 0x3f9228fb1fea2e28 > + .quad 0x3f912487a5507f70 > + .quad 0x3f90205658935847 > + .quad 0x3f8e38ce3033310c > + .quad 0x3f8c317384c75f06 > + .quad 0x3f8a2a9c6c170462 > + .quad 0x3f882448a388a2aa > + .quad 0x3f861e77e8b53fc6 > + .quad 0x3f841929f96832f0 > + .quad 0x3f82145e939ef1e9 > + .quad 0x3f8010157588de71 > + .quad 0x3f7c189cbb0e27fb > + .quad 0x3f78121214586b54 > + .quad 0x3f740c8a747878e2 > + .quad 0x3f70080559588b35 > + .quad 0x3f680904828985c0 > + .quad 0x3f60040155d5889e > + .quad 0x3f50020055655889 > + .quad 0x0000000000000000 > + /*== poly_coeff[4] ==*/ > + .align 32 > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > + /*== ExpMask ==*/ > + .align 32 > + .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff > + /*== Two10 ==*/ > + .align 32 > + .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000 > + /*== MinLog1p = -1+2^(-53) ==*/ > + .align 32 > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff > + /*== MaxLog1p ==*/ > + .align 32 > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000 > + /*== One ==*/ > + .align 32 > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > + /*== SgnMask ==*/ > + .align 32 > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff > + /*== XThreshold ==*/ > + .align 32 > + .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000 > + /*== XhMask ==*/ > + .align 32 > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00 > + /*== Threshold ==*/ > + .align 32 > + .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000 > + /*== Bias ==*/ > + .align 32 > + .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000 > + /*== Bias1 ==*/ > + .align 32 > + .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000 > + /*== ExpMask ==*/ > + .align 32 > + .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 > + /*== ExpMask2 ==*/ > + .align 32 > + .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000 > + /*== L2L ==*/ > + .align 32 > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > + .align 32 > + .type __svml_dlog1p_data_internal,@object > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > new file mode 100644 > index 0000000000..ca174a5f52 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > @@ -0,0 +1,20 @@ > +/* AVX2 version of vectorized log1p, vector length is 8. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVeN8v_log1p _ZGVeN8v_log1p_avx2_wrapper > +#include "../svml_d_log1p8_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > new file mode 100644 > index 0000000000..0aa35ec8c5 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > @@ -0,0 +1,27 @@ > +/* Multiple versions of vectorized log1p, vector length is 8. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVeN8v_log1p > +#include "ifunc-mathvec-avx512-skx.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVeN8v_log1p, __GI__ZGVeN8v_log1p, __redirect__ZGVeN8v_log1p) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > new file mode 100644 > index 0000000000..5e38ff8d39 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > @@ -0,0 +1,317 @@ > +/* Function log1p vectorized with AVX-512. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_dlog1p_data_internal_avx512 > + */ > +#define Log_tbl 0 > +#define One 128 > +#define SgnMask 192 > +#define C075 256 > +#define poly_coeff9 320 > +#define poly_coeff8 384 > +#define poly_coeff7 448 > +#define poly_coeff6 512 > +#define poly_coeff5 576 > +#define poly_coeff4 640 > +#define poly_coeff3 704 > +#define poly_coeff2 768 > +#define L2 832 > + > +#include <sysdep.h> > + > + .text > + .section .text.evex512,"ax",@progbits > +ENTRY(_ZGVeN8v_log1p_skx) > + pushq %rbp > + cfi_def_cfa_offset(16) > + movq %rsp, %rbp > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + andq $-64, %rsp > + subq $192, %rsp > + vmovups One+__svml_dlog1p_data_internal_avx512(%rip), %zmm7 > + vmovups SgnMask+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > + vmovaps %zmm0, %zmm9 > + vaddpd {rn-sae}, %zmm9, %zmm7, %zmm11 > + vandpd %zmm14, %zmm9, %zmm8 > + > +/* compute 1+x as high, low parts */ > + vmaxpd {sae}, %zmm9, %zmm7, %zmm10 > + vminpd {sae}, %zmm9, %zmm7, %zmm12 > + > +/* GetMant(x), normalized to [1,2) for x>=0, NaN for x<0 */ > + vgetmantpd $8, {sae}, %zmm11, %zmm6 > + > +/* GetExp(x) */ > + vgetexppd {sae}, %zmm11, %zmm5 > + vsubpd {rn-sae}, %zmm10, %zmm11, %zmm13 > + > +/* DblRcp ~ 1/Mantissa */ > + vrcp14pd %zmm6, %zmm15 > + > +/* Start polynomial evaluation */ > + vmovups poly_coeff9+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > + vmovups poly_coeff7+__svml_dlog1p_data_internal_avx512(%rip), %zmm11 > + > +/* Xl */ > + vsubpd {rn-sae}, %zmm13, %zmm12, %zmm2 > + vxorpd %zmm14, %zmm5, %zmm3 > + > +/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */ > + vrndscalepd $88, {sae}, %zmm15, %zmm4 > + vmovups poly_coeff5+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > + vmovups poly_coeff6+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > + vmovups poly_coeff3+__svml_dlog1p_data_internal_avx512(%rip), %zmm13 > + > +/* Xl*2^(-Expon) */ > + vscalefpd {rn-sae}, %zmm3, %zmm2, %zmm1 > + > +/* Reduced argument: R = DblRcp*(Mantissa+Xl) - 1 */ > + vfmsub213pd {rn-sae}, %zmm7, %zmm4, %zmm6 > + vmovups __svml_dlog1p_data_internal_avx512(%rip), %zmm3 > + > +/* > + * Table lookup > + * Prepare exponent correction: DblRcp<0.75? > + */ > + vmovups C075+__svml_dlog1p_data_internal_avx512(%rip), %zmm2 > + > +/* Prepare table index */ > + vpsrlq $48, %zmm4, %zmm0 > + vfmadd231pd {rn-sae}, %zmm4, %zmm1, %zmm6 > + vmovups poly_coeff8+__svml_dlog1p_data_internal_avx512(%rip), %zmm1 > + vcmppd $17, {sae}, %zmm2, %zmm4, %k1 > + vcmppd $4, {sae}, %zmm6, %zmm6, %k0 > + vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm1 > + vmovups poly_coeff4+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > + vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14 > + vmovups L2+__svml_dlog1p_data_internal_avx512(%rip), %zmm4 > + vpermt2pd Log_tbl+64+__svml_dlog1p_data_internal_avx512(%rip), %zmm0, %zmm3 > + > +/* add 1 to Expon if DblRcp<0.75 */ > + vaddpd {rn-sae}, %zmm7, %zmm5, %zmm5{%k1} > + > +/* R^2 */ > + vmulpd {rn-sae}, %zmm6, %zmm6, %zmm0 > + vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm10 > + vmovups poly_coeff2+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > + vmulpd {rn-sae}, %zmm0, %zmm0, %zmm15 > + vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12 > + vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm1 > + kmovw %k0, %edx > + vfmadd213pd {rn-sae}, %zmm12, %zmm0, %zmm10 > + > +/* polynomial */ > + vfmadd213pd {rn-sae}, %zmm10, %zmm15, %zmm1 > + vfmadd213pd {rn-sae}, %zmm6, %zmm0, %zmm1 > + vaddpd {rn-sae}, %zmm1, %zmm3, %zmm6 > + vfmadd213pd {rn-sae}, %zmm6, %zmm4, %zmm5 > + vorpd %zmm8, %zmm5, %zmm0 > + testl %edx, %edx > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm9 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movq %rbp, %rsp > + popq %rbp > + cfi_def_cfa(7, 8) > + cfi_restore(6) > + ret > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + vmovups %zmm9, 64(%rsp) > + vmovups %zmm0, 128(%rsp) > + # LOE rbx r12 r13 r14 r15 edx zmm0 > + > + xorl %eax, %eax > + # LOE rbx r12 r13 r14 r15 eax edx > + > + vzeroupper > + movq %r12, 16(%rsp) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > + movl %eax, %r12d > + movq %r13, 8(%rsp) > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > + movl %edx, %r13d > + movq %r14, (%rsp) > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $8, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + vmovups 128(%rsp), %zmm0 > + > +/* Go to exit */ > + jmp L(EXIT) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r12 r13 r14 r15 zmm0 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movsd 64(%rsp,%r14,8), %xmm0 > + call log1p@PLT > + # LOE rbx r14 r15 r12d r13d xmm0 > + > + movsd %xmm0, 128(%rsp,%r14,8) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx r15 r12d r13d > +END(_ZGVeN8v_log1p_skx) > + > + .section .rodata, "a" > + .align 64 > + > +#ifdef __svml_dlog1p_data_internal_avx512_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(64)) VUINT32 Log_tbl[16][2]; > + __declspec(align(64)) VUINT32 One[8][2]; > + __declspec(align(64)) VUINT32 SgnMask[8][2]; > + __declspec(align(64)) VUINT32 C075[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff9[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff8[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff7[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff6[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff5[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff4[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff3[8][2]; > + __declspec(align(64)) VUINT32 poly_coeff2[8][2]; > + __declspec(align(64)) VUINT32 L2[8][2]; > + } __svml_dlog1p_data_internal_avx512; > +#endif > +__svml_dlog1p_data_internal_avx512: > + /*== Log_tbl ==*/ > + .quad 0x0000000000000000 > + .quad 0xbfaf0a30c01162a6 > + .quad 0xbfbe27076e2af2e6 > + .quad 0xbfc5ff3070a793d4 > + .quad 0xbfcc8ff7c79a9a22 > + .quad 0xbfd1675cababa60e > + .quad 0xbfd4618bc21c5ec2 > + .quad 0xbfd739d7f6bbd007 > + .quad 0x3fd269621134db92 > + .quad 0x3fcf991c6cb3b379 > + .quad 0x3fca93ed3c8ad9e3 > + .quad 0x3fc5bf406b543db2 > + .quad 0x3fc1178e8227e47c > + .quad 0x3fb9335e5d594989 > + .quad 0x3fb08598b59e3a07 > + .quad 0x3fa0415d89e74444 > + /*== One ==*/ > + .align 64 > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > + /*== SgnMask ==*/ > + .align 64 > + .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 > + /*== C075 0.75 ==*/ > + .align 64 > + .quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000 > + /*== poly_coeff9 ==*/ > + .align 64 > + .quad 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70 > + /*== poly_coeff8 ==*/ > + .align 64 > + .quad 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62 > + /*== poly_coeff7 ==*/ > + .align 64 > + .quad 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF > + /*== poly_coeff6 ==*/ > + .align 64 > + .quad 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06 > + /*== poly_coeff5 ==*/ > + .align 64 > + .quad 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C > + /*== poly_coeff4 ==*/ > + .align 64 > + .quad 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD > + /*== poly_coeff3 ==*/ > + .align 64 > + .quad 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466 > + /*== poly_coeff2 ==*/ > + .align 64 > + .quad 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6 > + /*== L2 = log(2) ==*/ > + .align 64 > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > + .align 64 > + .type __svml_dlog1p_data_internal_avx512,@object > + .size __svml_dlog1p_data_internal_avx512,.-__svml_dlog1p_data_internal_avx512 > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > new file mode 100644 > index 0000000000..3c0a0a01a2 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > @@ -0,0 +1,20 @@ > +/* AVX2 version of vectorized log1pf. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVeN16v_log1pf _ZGVeN16v_log1pf_avx2_wrapper > +#include "../svml_s_log1pf16_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > new file mode 100644 > index 0000000000..9af1320547 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > @@ -0,0 +1,28 @@ > +/* Multiple versions of vectorized log1pf, vector length is 16. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVeN16v_log1pf > +#include "ifunc-mathvec-avx512-skx.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVeN16v_log1pf, __GI__ZGVeN16v_log1pf, > + __redirect__ZGVeN16v_log1pf) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > new file mode 100644 > index 0000000000..78b2fe417f > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > @@ -0,0 +1,271 @@ > +/* Function log1pf vectorized with AVX-512. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_slog1p_data_internal > + */ > +#define SgnMask 0 > +#define sOne 64 > +#define sPoly_1 128 > +#define sPoly_2 192 > +#define sPoly_3 256 > +#define sPoly_4 320 > +#define sPoly_5 384 > +#define sPoly_6 448 > +#define sPoly_7 512 > +#define sPoly_8 576 > +#define iHiDelta 640 > +#define iLoRange 704 > +#define iBrkValue 768 > +#define iOffExpoMask 832 > +#define sLn2 896 > + > +#include <sysdep.h> > + > + .text > + .section .text.exex512,"ax",@progbits > +ENTRY(_ZGVeN16v_log1pf_skx) > + pushq %rbp > + cfi_def_cfa_offset(16) > + movq %rsp, %rbp > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + andq $-64, %rsp > + subq $192, %rsp > + vmovups sOne+__svml_slog1p_data_internal(%rip), %zmm2 > + > +/* reduction: compute r,n */ > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %zmm12 > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %zmm4 > + vmovaps %zmm0, %zmm3 > + > +/* compute 1+x as high, low parts */ > + vmaxps {sae}, %zmm3, %zmm2, %zmm5 > + vminps {sae}, %zmm3, %zmm2, %zmm7 > + vandnps %zmm3, %zmm4, %zmm1 > + vpternlogd $255, %zmm4, %zmm4, %zmm4 > + vaddps {rn-sae}, %zmm7, %zmm5, %zmm9 > + vpsubd %zmm12, %zmm9, %zmm10 > + vsubps {rn-sae}, %zmm9, %zmm5, %zmm6 > + > +/* check argument value ranges */ > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %zmm9, %zmm8 > + vpsrad $23, %zmm10, %zmm13 > + vmovups sPoly_5+__svml_slog1p_data_internal(%rip), %zmm9 > + vpcmpd $5, iLoRange+__svml_slog1p_data_internal(%rip), %zmm8, %k1 > + vpslld $23, %zmm13, %zmm14 > + vaddps {rn-sae}, %zmm7, %zmm6, %zmm15 > + vcvtdq2ps {rn-sae}, %zmm13, %zmm0 > + vpsubd %zmm14, %zmm2, %zmm13 > + vmovups sPoly_8+__svml_slog1p_data_internal(%rip), %zmm7 > + vmovups sPoly_1+__svml_slog1p_data_internal(%rip), %zmm14 > + vmulps {rn-sae}, %zmm13, %zmm15, %zmm6 > + vpandd iOffExpoMask+__svml_slog1p_data_internal(%rip), %zmm10, %zmm11 > + vpaddd %zmm12, %zmm11, %zmm5 > + vmovups sPoly_4+__svml_slog1p_data_internal(%rip), %zmm10 > + vmovups sPoly_3+__svml_slog1p_data_internal(%rip), %zmm11 > + vmovups sPoly_2+__svml_slog1p_data_internal(%rip), %zmm12 > + > +/* polynomial evaluation */ > + vsubps {rn-sae}, %zmm2, %zmm5, %zmm2 > + vaddps {rn-sae}, %zmm6, %zmm2, %zmm15 > + vmovups sPoly_7+__svml_slog1p_data_internal(%rip), %zmm2 > + vfmadd231ps {rn-sae}, %zmm15, %zmm7, %zmm2 > + vpandnd %zmm8, %zmm8, %zmm4{%k1} > + vmovups sPoly_6+__svml_slog1p_data_internal(%rip), %zmm8 > + > +/* combine and get argument value range mask */ > + vptestmd %zmm4, %zmm4, %k0 > + vfmadd213ps {rn-sae}, %zmm8, %zmm15, %zmm2 > + kmovw %k0, %edx > + vfmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm2 > + vfmadd213ps {rn-sae}, %zmm10, %zmm15, %zmm2 > + vfmadd213ps {rn-sae}, %zmm11, %zmm15, %zmm2 > + vfmadd213ps {rn-sae}, %zmm12, %zmm15, %zmm2 > + vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm2 > + vmulps {rn-sae}, %zmm15, %zmm2, %zmm4 > + vfmadd213ps {rn-sae}, %zmm15, %zmm15, %zmm4 > + > +/* final reconstruction */ > + vmovups sLn2+__svml_slog1p_data_internal(%rip), %zmm15 > + vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm0 > + vorps %zmm1, %zmm0, %zmm0 > + testl %edx, %edx > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm3 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movq %rbp, %rsp > + popq %rbp > + cfi_def_cfa(7, 8) > + cfi_restore(6) > + ret > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + vmovups %zmm3, 64(%rsp) > + vmovups %zmm0, 128(%rsp) > + # LOE rbx r12 r13 r14 r15 edx zmm0 > + > + xorl %eax, %eax > + # LOE rbx r12 r13 r14 r15 eax edx > + > + vzeroupper > + movq %r12, 16(%rsp) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > + movl %eax, %r12d > + movq %r13, 8(%rsp) > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > + movl %edx, %r13d > + movq %r14, (%rsp) > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $16, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + vmovups 128(%rsp), %zmm0 > + > +/* Go to exit */ > + jmp L(EXIT) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r12 r13 r14 r15 zmm0 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movss 64(%rsp,%r14,4), %xmm0 > + call log1pf@PLT > + # LOE rbx r14 r15 r12d r13d xmm0 > + > + movss %xmm0, 128(%rsp,%r14,4) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx r15 r12d r13d > +END(_ZGVeN16v_log1pf_skx) > + > + .section .rodata, "a" > + .align 64 > + > +#ifdef __svml_slog1p_data_internal_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(64)) VUINT32 SgnMask[16][1]; > + __declspec(align(64)) VUINT32 sOne[16][1]; > + __declspec(align(64)) VUINT32 sPoly[8][16][1]; > + __declspec(align(64)) VUINT32 iHiDelta[16][1]; > + __declspec(align(64)) VUINT32 iLoRange[16][1]; > + __declspec(align(64)) VUINT32 iBrkValue[16][1]; > + __declspec(align(64)) VUINT32 iOffExpoMask[16][1]; > + __declspec(align(64)) VUINT32 sLn2[16][1]; > +} __svml_slog1p_data_internal; > +#endif > +__svml_slog1p_data_internal: > + /*== SgnMask ==*/ > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > + /*== sOne = SP 1.0 ==*/ > + .align 64 > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > + /*== sPoly[] = SP polynomial ==*/ > + .align 64 > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > + .align 64 > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > + .align 64 > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > + /*== iBrkValue = SP 2/3 ==*/ > + .align 64 > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > + /*== iOffExpoMask = SP significand mask ==*/ > + .align 64 > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > + /*== sLn2 = SP ln(2) ==*/ > + .align 64 > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > + .align 64 > + .type __svml_slog1p_data_internal,@object > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > new file mode 100644 > index 0000000000..913c8290c8 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > @@ -0,0 +1,20 @@ > +/* SSE2 version of vectorized log1pf, vector length is 4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVbN4v_log1pf _ZGVbN4v_log1pf_sse2 > +#include "../svml_s_log1pf4_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > new file mode 100644 > index 0000000000..b6aff48023 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > @@ -0,0 +1,28 @@ > +/* Multiple versions of vectorized log1pf, vector length is 4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVbN4v_log1pf > +#include "ifunc-mathvec-sse4_1.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVbN4v_log1pf, __GI__ZGVbN4v_log1pf, > + __redirect__ZGVbN4v_log1pf) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > new file mode 100644 > index 0000000000..ef1bae58c0 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > @@ -0,0 +1,252 @@ > +/* Function log1pf vectorized with SSE4. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_slog1p_data_internal > + */ > +#define SgnMask 0 > +#define sOne 16 > +#define sPoly 32 > +#define iHiDelta 160 > +#define iLoRange 176 > +#define iBrkValue 192 > +#define iOffExpoMask 208 > +#define sLn2 224 > + > +#include <sysdep.h> > + > + .text > + .section .text.sse4,"ax",@progbits > +ENTRY(_ZGVbN4v_log1pf_sse4) > + subq $72, %rsp > + cfi_def_cfa_offset(80) > + movups sOne+__svml_slog1p_data_internal(%rip), %xmm7 > + > +/* compute 1+x as high, low parts */ > + movaps %xmm7, %xmm1 > + movaps %xmm7, %xmm5 > + maxps %xmm0, %xmm1 > + minps %xmm0, %xmm5 > + movaps %xmm1, %xmm4 > + > +/* check argument value ranges */ > + movdqu iHiDelta+__svml_slog1p_data_internal(%rip), %xmm2 > + addps %xmm5, %xmm4 > + > +/* reduction: compute r,n */ > + movdqu iBrkValue+__svml_slog1p_data_internal(%rip), %xmm3 > + paddd %xmm4, %xmm2 > + movdqu iOffExpoMask+__svml_slog1p_data_internal(%rip), %xmm8 > + subps %xmm4, %xmm1 > + psubd %xmm3, %xmm4 > + addps %xmm1, %xmm5 > + pand %xmm4, %xmm8 > + psrad $23, %xmm4 > + cvtdq2ps %xmm4, %xmm10 > + pslld $23, %xmm4 > + movaps %xmm7, %xmm1 > + paddd %xmm3, %xmm8 > + psubd %xmm4, %xmm1 > + mulps %xmm5, %xmm1 > + > +/* polynomial evaluation */ > + subps %xmm7, %xmm8 > + > +/* final reconstruction */ > + mulps sLn2+__svml_slog1p_data_internal(%rip), %xmm10 > + addps %xmm8, %xmm1 > + movups sPoly+112+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + movdqu iLoRange+__svml_slog1p_data_internal(%rip), %xmm6 > + pcmpgtd %xmm2, %xmm6 > + addps sPoly+96+__svml_slog1p_data_internal(%rip), %xmm9 > + > +/* combine and get argument value range mask */ > + movmskps %xmm6, %edx > + movups SgnMask+__svml_slog1p_data_internal(%rip), %xmm11 > + mulps %xmm1, %xmm9 > + andnps %xmm0, %xmm11 > + addps sPoly+80+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + addps sPoly+64+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + addps sPoly+48+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + addps sPoly+32+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + addps sPoly+16+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + addps sPoly+__svml_slog1p_data_internal(%rip), %xmm9 > + mulps %xmm1, %xmm9 > + mulps %xmm1, %xmm9 > + addps %xmm9, %xmm1 > + addps %xmm10, %xmm1 > + orps %xmm11, %xmm1 > + testl %edx, %edx > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movaps %xmm1, %xmm0 > + addq $72, %rsp > + cfi_def_cfa_offset(8) > + ret > + cfi_def_cfa_offset(80) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + movups %xmm0, 32(%rsp) > + movups %xmm1, 48(%rsp) > + # LOE rbx rbp r12 r13 r14 r15 edx > + > + xorl %eax, %eax > + movq %r12, 16(%rsp) > + cfi_offset(12, -64) > + movl %eax, %r12d > + movq %r13, 8(%rsp) > + cfi_offset(13, -72) > + movl %edx, %r13d > + movq %r14, (%rsp) > + cfi_offset(14, -80) > + # LOE rbx rbp r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx rbp r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $4, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx rbp r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + movups 48(%rsp), %xmm1 > + > +/* Go to exit */ > + jmp L(EXIT) > + cfi_offset(12, -64) > + cfi_offset(13, -72) > + cfi_offset(14, -80) > + # LOE rbx rbp r12 r13 r14 r15 xmm1 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movss 32(%rsp,%r14,4), %xmm0 > + call log1pf@PLT > + # LOE rbx rbp r14 r15 r12d r13d xmm0 > + > + movss %xmm0, 48(%rsp,%r14,4) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx rbp r15 r12d r13d > +END(_ZGVbN4v_log1pf_sse4) > + > + .section .rodata, "a" > + .align 16 > + > +#ifdef __svml_slog1p_data_internal_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(16)) VUINT32 SgnMask[4][1]; > + __declspec(align(16)) VUINT32 sOne[4][1]; > + __declspec(align(16)) VUINT32 sPoly[8][4][1]; > + __declspec(align(16)) VUINT32 iHiDelta[4][1]; > + __declspec(align(16)) VUINT32 iLoRange[4][1]; > + __declspec(align(16)) VUINT32 iBrkValue[4][1]; > + __declspec(align(16)) VUINT32 iOffExpoMask[4][1]; > + __declspec(align(16)) VUINT32 sLn2[4][1]; > +} __svml_slog1p_data_internal; > +#endif > +__svml_slog1p_data_internal: > + /*== SgnMask ==*/ > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > + /*== sOne = SP 1.0 ==*/ > + .align 16 > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > + /*== sPoly[] = SP polynomial ==*/ > + .align 16 > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > + .align 16 > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > + .align 16 > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000 > + /*== iBrkValue = SP 2/3 ==*/ > + .align 16 > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > + /*== iOffExpoMask = SP significand mask ==*/ > + .align 16 > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > + /*== sLn2 = SP ln(2) ==*/ > + .align 16 > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > + .align 16 > + .type __svml_slog1p_data_internal,@object > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > new file mode 100644 > index 0000000000..c0b97d89e6 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > @@ -0,0 +1,20 @@ > +/* SSE version of vectorized log1pf, vector length is 8. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define _ZGVdN8v_log1pf _ZGVdN8v_log1pf_sse_wrapper > +#include "../svml_s_log1pf8_core.S" > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > new file mode 100644 > index 0000000000..a2bbe37129 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > @@ -0,0 +1,28 @@ > +/* Multiple versions of vectorized log1pf, vector length is 8. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define SYMBOL_NAME _ZGVdN8v_log1pf > +#include "ifunc-mathvec-avx2.h" > + > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > + > +#ifdef SHARED > +__hidden_ver1 (_ZGVdN8v_log1pf, __GI__ZGVdN8v_log1pf, > + __redirect__ZGVdN8v_log1pf) > + __attribute__ ((visibility ("hidden"))); > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > new file mode 100644 > index 0000000000..957dc23e3f > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > @@ -0,0 +1,254 @@ > +/* Function log1pf vectorized with AVX2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + https://www.gnu.org/licenses/. */ > + > +/* > + * ALGORITHM DESCRIPTION: > + * > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > + * Get short reciprocal approximation Rcp ~ 1/xh > + * R = (Rcp*xh - 1.0) + Rcp*xl > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > + * log(Rcp) is tabulated > + * > + * > + */ > + > +/* Offsets for data table __svml_slog1p_data_internal > + */ > +#define SgnMask 0 > +#define sOne 32 > +#define sPoly 64 > +#define iHiDelta 320 > +#define iLoRange 352 > +#define iBrkValue 384 > +#define iOffExpoMask 416 > +#define sLn2 448 > + > +#include <sysdep.h> > + > + .text > + .section .text.avx2,"ax",@progbits > +ENTRY(_ZGVdN8v_log1pf_avx2) > + pushq %rbp > + cfi_def_cfa_offset(16) > + movq %rsp, %rbp > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + andq $-32, %rsp > + subq $96, %rsp > + vmovups sOne+__svml_slog1p_data_internal(%rip), %ymm2 > + > +/* reduction: compute r,n */ > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %ymm13 > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %ymm4 > + vmovups iLoRange+__svml_slog1p_data_internal(%rip), %ymm8 > + vmovaps %ymm0, %ymm3 > + > +/* compute 1+x as high, low parts */ > + vmaxps %ymm3, %ymm2, %ymm5 > + vminps %ymm3, %ymm2, %ymm6 > + vaddps %ymm6, %ymm5, %ymm10 > + vpsubd %ymm13, %ymm10, %ymm11 > + > +/* check argument value ranges */ > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %ymm10, %ymm9 > + vsubps %ymm10, %ymm5, %ymm7 > + vpsrad $23, %ymm11, %ymm14 > + vpand iOffExpoMask+__svml_slog1p_data_internal(%rip), %ymm11, %ymm12 > + vpslld $23, %ymm14, %ymm15 > + vcvtdq2ps %ymm14, %ymm0 > + vpsubd %ymm15, %ymm2, %ymm14 > + vandnps %ymm3, %ymm4, %ymm1 > + vaddps %ymm7, %ymm6, %ymm4 > + vpaddd %ymm13, %ymm12, %ymm6 > + vmulps %ymm4, %ymm14, %ymm7 > + > +/* polynomial evaluation */ > + vsubps %ymm2, %ymm6, %ymm2 > + vpcmpgtd %ymm9, %ymm8, %ymm5 > + vmovups sPoly+224+__svml_slog1p_data_internal(%rip), %ymm8 > + vaddps %ymm2, %ymm7, %ymm9 > + vfmadd213ps sPoly+192+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+160+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+128+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+96+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+64+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+32+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vfmadd213ps sPoly+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > + vmulps %ymm8, %ymm9, %ymm10 > + vfmadd213ps %ymm9, %ymm9, %ymm10 > + > +/* final reconstruction */ > + vfmadd132ps sLn2+__svml_slog1p_data_internal(%rip), %ymm10, %ymm0 > + > +/* combine and get argument value range mask */ > + vmovmskps %ymm5, %edx > + vorps %ymm1, %ymm0, %ymm0 > + testl %edx, %edx > + > +/* Go to special inputs processing branch */ > + jne L(SPECIAL_VALUES_BRANCH) > + # LOE rbx r12 r13 r14 r15 edx ymm0 ymm3 > + > +/* Restore registers > + * and exit the function > + */ > + > +L(EXIT): > + movq %rbp, %rsp > + popq %rbp > + cfi_def_cfa(7, 8) > + cfi_restore(6) > + ret > + cfi_def_cfa(6, 16) > + cfi_offset(6, -16) > + > +/* Branch to process > + * special inputs > + */ > + > +L(SPECIAL_VALUES_BRANCH): > + vmovups %ymm3, 32(%rsp) > + vmovups %ymm0, 64(%rsp) > + # LOE rbx r12 r13 r14 r15 edx ymm0 > + > + xorl %eax, %eax > + # LOE rbx r12 r13 r14 r15 eax edx > + > + vzeroupper > + movq %r12, 16(%rsp) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > + movl %eax, %r12d > + movq %r13, 8(%rsp) > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > + movl %edx, %r13d > + movq %r14, (%rsp) > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r15 r12d r13d > + > +/* Range mask > + * bits check > + */ > + > +L(RANGEMASK_CHECK): > + btl %r12d, %r13d > + > +/* Call scalar math function */ > + jc L(SCALAR_MATH_CALL) > + # LOE rbx r15 r12d r13d > + > +/* Special inputs > + * processing loop > + */ > + > +L(SPECIAL_VALUES_LOOP): > + incl %r12d > + cmpl $8, %r12d > + > +/* Check bits in range mask */ > + jl L(RANGEMASK_CHECK) > + # LOE rbx r15 r12d r13d > + > + movq 16(%rsp), %r12 > + cfi_restore(12) > + movq 8(%rsp), %r13 > + cfi_restore(13) > + movq (%rsp), %r14 > + cfi_restore(14) > + vmovups 64(%rsp), %ymm0 > + > +/* Go to exit */ > + jmp L(EXIT) > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > + # LOE rbx r12 r13 r14 r15 ymm0 > + > +/* Scalar math fucntion call > + * to process special input > + */ > + > +L(SCALAR_MATH_CALL): > + movl %r12d, %r14d > + movss 32(%rsp,%r14,4), %xmm0 > + call log1pf@PLT > + # LOE rbx r14 r15 r12d r13d xmm0 > + > + movss %xmm0, 64(%rsp,%r14,4) > + > +/* Process special inputs in loop */ > + jmp L(SPECIAL_VALUES_LOOP) > + # LOE rbx r15 r12d r13d > +END(_ZGVdN8v_log1pf_avx2) > + > + .section .rodata, "a" > + .align 32 > + > +#ifdef __svml_slog1p_data_internal_typedef > +typedef unsigned int VUINT32; > +typedef struct { > + __declspec(align(32)) VUINT32 SgnMask[8][1]; > + __declspec(align(32)) VUINT32 sOne[8][1]; > + __declspec(align(32)) VUINT32 sPoly[8][8][1]; > + __declspec(align(32)) VUINT32 iHiDelta[8][1]; > + __declspec(align(32)) VUINT32 iLoRange[8][1]; > + __declspec(align(32)) VUINT32 iBrkValue[8][1]; > + __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; > + __declspec(align(32)) VUINT32 sLn2[8][1]; > +} __svml_slog1p_data_internal; > +#endif > +__svml_slog1p_data_internal: > + /*== SgnMask ==*/ > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > + /*== sOne = SP 1.0 ==*/ > + .align 32 > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > + /*== sPoly[] = SP polynomial ==*/ > + .align 32 > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > + .align 32 > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > + .align 32 > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > + /*== iBrkValue = SP 2/3 ==*/ > + .align 32 > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > + /*== iOffExpoMask = SP significand mask ==*/ > + .align 32 > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > + /*== sLn2 = SP ln(2) ==*/ > + .align 32 > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > + .align 32 > + .type __svml_slog1p_data_internal,@object > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p2_core.S b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > new file mode 100644 > index 0000000000..e3f01717d9 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > @@ -0,0 +1,29 @@ > +/* Function log1p vectorized with SSE2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_d_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVbN2v_log1p) > +WRAPPER_IMPL_SSE2 log1p > +END (_ZGVbN2v_log1p) > + > +#ifndef USE_MULTIARCH > + libmvec_hidden_def (_ZGVbN2v_log1p) > +#endif > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > new file mode 100644 > index 0000000000..49beb96183 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > @@ -0,0 +1,29 @@ > +/* Function log1p vectorized with AVX2, wrapper version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_d_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVdN4v_log1p) > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > +END (_ZGVdN4v_log1p) > + > +#ifndef USE_MULTIARCH > + libmvec_hidden_def (_ZGVdN4v_log1p) > +#endif > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > new file mode 100644 > index 0000000000..8b89768b7c > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > @@ -0,0 +1,25 @@ > +/* Function log1p vectorized in AVX ISA as wrapper to SSE4 ISA version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_d_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVcN4v_log1p) > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > +END (_ZGVcN4v_log1p) > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p8_core.S b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > new file mode 100644 > index 0000000000..54b4d4ede8 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > @@ -0,0 +1,25 @@ > +/* Function log1p vectorized with AVX-512, wrapper to AVX2. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_d_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVeN8v_log1p) > +WRAPPER_IMPL_AVX512 _ZGVdN4v_log1p > +END (_ZGVeN8v_log1p) > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > new file mode 100644 > index 0000000000..2c953d00fb > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > @@ -0,0 +1,25 @@ > +/* Function log1pf vectorized with AVX-512. Wrapper to AVX2 version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_s_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVeN16v_log1pf) > +WRAPPER_IMPL_AVX512 _ZGVdN8v_log1pf > +END (_ZGVeN16v_log1pf) > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > new file mode 100644 > index 0000000000..6f68762eaa > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > @@ -0,0 +1,29 @@ > +/* Function log1pf vectorized with SSE2, wrapper version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_s_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVbN4v_log1pf) > +WRAPPER_IMPL_SSE2 log1pf > +END (_ZGVbN4v_log1pf) > + > +#ifndef USE_MULTIARCH > + libmvec_hidden_def (_ZGVbN4v_log1pf) > +#endif > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > new file mode 100644 > index 0000000000..74f81283b1 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > @@ -0,0 +1,29 @@ > +/* Function log1pf vectorized with AVX2, wrapper version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_s_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVdN8v_log1pf) > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > +END (_ZGVdN8v_log1pf) > + > +#ifndef USE_MULTIARCH > + libmvec_hidden_def (_ZGVdN8v_log1pf) > +#endif > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > new file mode 100644 > index 0000000000..f33be0e904 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > @@ -0,0 +1,25 @@ > +/* Function log1pf vectorized in AVX ISA as wrapper to SSE4 ISA version. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include "svml_s_wrapper_impl.h" > + > + .text > +ENTRY (_ZGVcN8v_log1pf) > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > +END (_ZGVcN8v_log1pf) > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > new file mode 100644 > index 0000000000..18aa6aaeaa > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > @@ -0,0 +1 @@ > +#include "test-double-libmvec-log1p.c" > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > new file mode 100644 > index 0000000000..18aa6aaeaa > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > @@ -0,0 +1 @@ > +#include "test-double-libmvec-log1p.c" > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > new file mode 100644 > index 0000000000..18aa6aaeaa > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > @@ -0,0 +1 @@ > +#include "test-double-libmvec-log1p.c" > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > new file mode 100644 > index 0000000000..40937f987a > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > @@ -0,0 +1,3 @@ > +#define LIBMVEC_TYPE double > +#define LIBMVEC_FUNC log1p > +#include "test-vector-abi-arg1.h" > diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > index 08c91ff634..38359b05e3 100644 > --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2) > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10) > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2) > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p) > > #define VEC_INT_TYPE __m128i > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > index a2fb0de309..17701e7731 100644 > --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2) > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10) > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2) > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p) > > #ifndef __ILP32__ > # define VEC_INT_TYPE __m256i > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > index dc65a4ee25..bba62b2446 100644 > --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2) > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10) > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2) > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p) > > #define VEC_INT_TYPE __m128i > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > index 253ee8c906..8a04e13a07 100644 > --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2) > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10) > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2) > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p) > > #ifndef __ILP32__ > # define VEC_INT_TYPE __m512i > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > new file mode 100644 > index 0000000000..3395decaf4 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > @@ -0,0 +1 @@ > +#include "test-float-libmvec-log1pf.c" > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > new file mode 100644 > index 0000000000..3395decaf4 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > @@ -0,0 +1 @@ > +#include "test-float-libmvec-log1pf.c" > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > new file mode 100644 > index 0000000000..3395decaf4 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > @@ -0,0 +1 @@ > +#include "test-float-libmvec-log1pf.c" > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > new file mode 100644 > index 0000000000..1b36069ded > --- /dev/null > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > @@ -0,0 +1,3 @@ > +#define LIBMVEC_TYPE float > +#define LIBMVEC_FUNC log1pf > +#include "test-vector-abi-arg1.h" > diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > index 1c7db5146c..706f52c618 100644 > --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f) > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f) > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f) > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf) > > #define VEC_INT_TYPE __m512i > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > index 8ec51603b3..ceace4c53a 100644 > --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f) > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f) > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f) > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf) > > #define VEC_INT_TYPE __m128i > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > index 1cb4553c7a..06a4753409 100644 > --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f) > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f) > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f) > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf) > > /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ > #undef VECTOR_WRAPPER_fFF > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > index 6ecc1792bb..a87e5298e0 100644 > --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf) > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f) > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f) > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f) > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf) > > #define VEC_INT_TYPE __m128i > > -- > 2.31.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks. H.J.
On Wed, Dec 29, 2021 at 3:43 PM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Tue, Dec 28, 2021 at 10:39:55PM -0800, Sunil K Pandey wrote: > > Implement vectorized log1p/log1pf containing SSE, AVX, AVX2 and > > AVX512 versions for libmvec as per vector ABI. It also contains > > accuracy and ABI tests for vector log1p/log1pf with regenerated ulps. > > --- > > bits/libm-simd-decl-stubs.h | 11 + > > math/bits/mathcalls.h | 2 +- > > .../unix/sysv/linux/x86_64/libmvec.abilist | 8 + > > sysdeps/x86/fpu/bits/math-vector.h | 4 + > > .../x86/fpu/finclude/math-vector-fortran.h | 4 + > > sysdeps/x86_64/fpu/Makeconfig | 1 + > > sysdeps/x86_64/fpu/Versions | 2 + > > sysdeps/x86_64/fpu/libm-test-ulps | 20 + > > .../fpu/multiarch/svml_d_log1p2_core-sse2.S | 20 + > > .../x86_64/fpu/multiarch/svml_d_log1p2_core.c | 27 + > > .../fpu/multiarch/svml_d_log1p2_core_sse4.S | 1398 +++++++++++++++++ > > .../fpu/multiarch/svml_d_log1p4_core-sse.S | 20 + > > .../x86_64/fpu/multiarch/svml_d_log1p4_core.c | 27 + > > .../fpu/multiarch/svml_d_log1p4_core_avx2.S | 1383 ++++++++++++++++ > > .../fpu/multiarch/svml_d_log1p8_core-avx2.S | 20 + > > .../x86_64/fpu/multiarch/svml_d_log1p8_core.c | 27 + > > .../fpu/multiarch/svml_d_log1p8_core_avx512.S | 317 ++++ > > .../fpu/multiarch/svml_s_log1pf16_core-avx2.S | 20 + > > .../fpu/multiarch/svml_s_log1pf16_core.c | 28 + > > .../multiarch/svml_s_log1pf16_core_avx512.S | 271 ++++ > > .../fpu/multiarch/svml_s_log1pf4_core-sse2.S | 20 + > > .../fpu/multiarch/svml_s_log1pf4_core.c | 28 + > > .../fpu/multiarch/svml_s_log1pf4_core_sse4.S | 252 +++ > > .../fpu/multiarch/svml_s_log1pf8_core-sse.S | 20 + > > .../fpu/multiarch/svml_s_log1pf8_core.c | 28 + > > .../fpu/multiarch/svml_s_log1pf8_core_avx2.S | 254 +++ > > sysdeps/x86_64/fpu/svml_d_log1p2_core.S | 29 + > > sysdeps/x86_64/fpu/svml_d_log1p4_core.S | 29 + > > sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S | 25 + > > sysdeps/x86_64/fpu/svml_d_log1p8_core.S | 25 + > > sysdeps/x86_64/fpu/svml_s_log1pf16_core.S | 25 + > > sysdeps/x86_64/fpu/svml_s_log1pf4_core.S | 29 + > > sysdeps/x86_64/fpu/svml_s_log1pf8_core.S | 29 + > > sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S | 25 + > > .../fpu/test-double-libmvec-log1p-avx.c | 1 + > > .../fpu/test-double-libmvec-log1p-avx2.c | 1 + > > .../fpu/test-double-libmvec-log1p-avx512f.c | 1 + > > .../x86_64/fpu/test-double-libmvec-log1p.c | 3 + > > .../x86_64/fpu/test-double-vlen2-wrappers.c | 1 + > > .../fpu/test-double-vlen4-avx2-wrappers.c | 1 + > > .../x86_64/fpu/test-double-vlen4-wrappers.c | 1 + > > .../x86_64/fpu/test-double-vlen8-wrappers.c | 1 + > > .../fpu/test-float-libmvec-log1pf-avx.c | 1 + > > .../fpu/test-float-libmvec-log1pf-avx2.c | 1 + > > .../fpu/test-float-libmvec-log1pf-avx512f.c | 1 + > > .../x86_64/fpu/test-float-libmvec-log1pf.c | 3 + > > .../x86_64/fpu/test-float-vlen16-wrappers.c | 1 + > > .../x86_64/fpu/test-float-vlen4-wrappers.c | 1 + > > .../fpu/test-float-vlen8-avx2-wrappers.c | 1 + > > .../x86_64/fpu/test-float-vlen8-wrappers.c | 1 + > > 50 files changed, 4447 insertions(+), 1 deletion(-) > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > > > diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h > > index 73252615ca..845246fab9 100644 > > --- a/bits/libm-simd-decl-stubs.h > > +++ b/bits/libm-simd-decl-stubs.h > > @@ -241,4 +241,15 @@ > > #define __DECL_SIMD_log2f32x > > #define __DECL_SIMD_log2f64x > > #define __DECL_SIMD_log2f128x > > + > > +#define __DECL_SIMD_log1p > > +#define __DECL_SIMD_log1pf > > +#define __DECL_SIMD_log1pl > > +#define __DECL_SIMD_log1pf16 > > +#define __DECL_SIMD_log1pf32 > > +#define __DECL_SIMD_log1pf64 > > +#define __DECL_SIMD_log1pf128 > > +#define __DECL_SIMD_log1pf32x > > +#define __DECL_SIMD_log1pf64x > > +#define __DECL_SIMD_log1pf128x > > #endif > > diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h > > index bfe52a4666..aa4bc61aa4 100644 > > --- a/math/bits/mathcalls.h > > +++ b/math/bits/mathcalls.h > > @@ -119,7 +119,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x)); > > __MATHCALL_VEC (expm1,, (_Mdouble_ __x)); > > > > /* Return log(1 + X). */ > > -__MATHCALL (log1p,, (_Mdouble_ __x)); > > +__MATHCALL_VEC (log1p,, (_Mdouble_ __x)); > > > > /* Return the base 2 signed integral exponent of X. */ > > __MATHCALL (logb,, (_Mdouble_ __x)); > > diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > index fa8b016c5d..68b940606a 100644 > > --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > @@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_exp10 F > > GLIBC_2.35 _ZGVbN2v_exp2 F > > GLIBC_2.35 _ZGVbN2v_expm1 F > > GLIBC_2.35 _ZGVbN2v_log10 F > > +GLIBC_2.35 _ZGVbN2v_log1p F > > GLIBC_2.35 _ZGVbN2v_log2 F > > GLIBC_2.35 _ZGVbN2v_sinh F > > GLIBC_2.35 _ZGVbN2vv_atan2 F > > @@ -68,6 +69,7 @@ GLIBC_2.35 _ZGVbN4v_exp10f F > > GLIBC_2.35 _ZGVbN4v_exp2f F > > GLIBC_2.35 _ZGVbN4v_expm1f F > > GLIBC_2.35 _ZGVbN4v_log10f F > > +GLIBC_2.35 _ZGVbN4v_log1pf F > > GLIBC_2.35 _ZGVbN4v_log2f F > > GLIBC_2.35 _ZGVbN4v_sinhf F > > GLIBC_2.35 _ZGVbN4vv_atan2f F > > @@ -81,6 +83,7 @@ GLIBC_2.35 _ZGVcN4v_exp10 F > > GLIBC_2.35 _ZGVcN4v_exp2 F > > GLIBC_2.35 _ZGVcN4v_expm1 F > > GLIBC_2.35 _ZGVcN4v_log10 F > > +GLIBC_2.35 _ZGVcN4v_log1p F > > GLIBC_2.35 _ZGVcN4v_log2 F > > GLIBC_2.35 _ZGVcN4v_sinh F > > GLIBC_2.35 _ZGVcN4vv_atan2 F > > @@ -94,6 +97,7 @@ GLIBC_2.35 _ZGVcN8v_exp10f F > > GLIBC_2.35 _ZGVcN8v_exp2f F > > GLIBC_2.35 _ZGVcN8v_expm1f F > > GLIBC_2.35 _ZGVcN8v_log10f F > > +GLIBC_2.35 _ZGVcN8v_log1pf F > > GLIBC_2.35 _ZGVcN8v_log2f F > > GLIBC_2.35 _ZGVcN8v_sinhf F > > GLIBC_2.35 _ZGVcN8vv_atan2f F > > @@ -107,6 +111,7 @@ GLIBC_2.35 _ZGVdN4v_exp10 F > > GLIBC_2.35 _ZGVdN4v_exp2 F > > GLIBC_2.35 _ZGVdN4v_expm1 F > > GLIBC_2.35 _ZGVdN4v_log10 F > > +GLIBC_2.35 _ZGVdN4v_log1p F > > GLIBC_2.35 _ZGVdN4v_log2 F > > GLIBC_2.35 _ZGVdN4v_sinh F > > GLIBC_2.35 _ZGVdN4vv_atan2 F > > @@ -120,6 +125,7 @@ GLIBC_2.35 _ZGVdN8v_exp10f F > > GLIBC_2.35 _ZGVdN8v_exp2f F > > GLIBC_2.35 _ZGVdN8v_expm1f F > > GLIBC_2.35 _ZGVdN8v_log10f F > > +GLIBC_2.35 _ZGVdN8v_log1pf F > > GLIBC_2.35 _ZGVdN8v_log2f F > > GLIBC_2.35 _ZGVdN8v_sinhf F > > GLIBC_2.35 _ZGVdN8vv_atan2f F > > @@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVeN16v_exp10f F > > GLIBC_2.35 _ZGVeN16v_exp2f F > > GLIBC_2.35 _ZGVeN16v_expm1f F > > GLIBC_2.35 _ZGVeN16v_log10f F > > +GLIBC_2.35 _ZGVeN16v_log1pf F > > GLIBC_2.35 _ZGVeN16v_log2f F > > GLIBC_2.35 _ZGVeN16v_sinhf F > > GLIBC_2.35 _ZGVeN16vv_atan2f F > > @@ -146,6 +153,7 @@ GLIBC_2.35 _ZGVeN8v_exp10 F > > GLIBC_2.35 _ZGVeN8v_exp2 F > > GLIBC_2.35 _ZGVeN8v_expm1 F > > GLIBC_2.35 _ZGVeN8v_log10 F > > +GLIBC_2.35 _ZGVeN8v_log1p F > > GLIBC_2.35 _ZGVeN8v_log2 F > > GLIBC_2.35 _ZGVeN8v_sinh F > > GLIBC_2.35 _ZGVeN8vv_atan2 F > > diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h > > index 59d284a10a..14c9db3bb3 100644 > > --- a/sysdeps/x86/fpu/bits/math-vector.h > > +++ b/sysdeps/x86/fpu/bits/math-vector.h > > @@ -110,6 +110,10 @@ > > # define __DECL_SIMD_log2 __DECL_SIMD_x86_64 > > # undef __DECL_SIMD_log2f > > # define __DECL_SIMD_log2f __DECL_SIMD_x86_64 > > +# undef __DECL_SIMD_log1p > > +# define __DECL_SIMD_log1p __DECL_SIMD_x86_64 > > +# undef __DECL_SIMD_log1pf > > +# define __DECL_SIMD_log1pf __DECL_SIMD_x86_64 > > > > # endif > > #endif > > diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > index a2ca9a203f..3dca196432 100644 > > --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > @@ -54,6 +54,8 @@ > > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64') > > !GCC$ builtin (log2) attributes simd (notinbranch) if('x86_64') > > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64') > > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64') > > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64') > > > > !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') > > !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') > > @@ -93,3 +95,5 @@ > > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x32') > > !GCC$ builtin (log2) attributes simd (notinbranch) if('x32') > > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x32') > > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x32') > > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32') > > diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig > > index 8d6d0915af..378cb06d37 100644 > > --- a/sysdeps/x86_64/fpu/Makeconfig > > +++ b/sysdeps/x86_64/fpu/Makeconfig > > @@ -36,6 +36,7 @@ libmvec-funcs = \ > > hypot \ > > log \ > > log10 \ > > + log1p \ > > log2 \ > > pow \ > > sin \ > > diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions > > index 1b48c2d642..155fb115f3 100644 > > --- a/sysdeps/x86_64/fpu/Versions > > +++ b/sysdeps/x86_64/fpu/Versions > > @@ -23,6 +23,7 @@ libmvec { > > _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; > > _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; > > _ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10; > > + _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p; > > _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2; > > _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh; > > _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2; > > @@ -36,6 +37,7 @@ libmvec { > > _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; > > _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f; > > _ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f; > > + _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf; > > _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f; > > _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf; > > _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f; > > diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps > > index 3b7f3cee6f..a2b15a795b 100644 > > --- a/sysdeps/x86_64/fpu/libm-test-ulps > > +++ b/sysdeps/x86_64/fpu/libm-test-ulps > > @@ -1685,6 +1685,26 @@ float: 2 > > float128: 2 > > ldouble: 3 > > > > +Function: "log1p_vlen16": > > +float: 2 > > + > > +Function: "log1p_vlen2": > > +double: 1 > > + > > +Function: "log1p_vlen4": > > +double: 1 > > +float: 2 > > + > > +Function: "log1p_vlen4_avx2": > > +double: 1 > > + > > +Function: "log1p_vlen8": > > +double: 1 > > +float: 2 > > + > > +Function: "log1p_vlen8_avx2": > > +float: 2 > > + > > Function: "log2": > > double: 2 > > float: 1 > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > new file mode 100644 > > index 0000000000..8004088346 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > @@ -0,0 +1,20 @@ > > +/* SSE2 version of vectorized log1p, vector length is 2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVbN2v_log1p _ZGVbN2v_log1p_sse2 > > +#include "../svml_d_log1p2_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > new file mode 100644 > > index 0000000000..35ca620aba > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > @@ -0,0 +1,27 @@ > > +/* Multiple versions of vectorized log1p, vector length is 2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVbN2v_log1p > > +#include "ifunc-mathvec-sse4_1.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVbN2v_log1p, __GI__ZGVbN2v_log1p, __redirect__ZGVbN2v_log1p) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > new file mode 100644 > > index 0000000000..9d3f0647b4 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > @@ -0,0 +1,1398 @@ > > +/* Function log1p vectorized with SSE4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_dlog1p_data_internal > > + */ > > +#define Log_HA_table 0 Where is this used? > > +#define Log_LA_table 8208 > > +#define poly_coeff 12320 > > +#define ExpMask 12384 > > +#define Two10 12400 > > +#define MinLog1p 12416 > > +#define MaxLog1p 12432 > > +#define One 12448 > > +#define SgnMask 12464 > > +#define XThreshold 12480 > > +#define XhMask 12496 > > +#define Threshold 12512 > > +#define Bias 12528 > > +#define Bias1 12544 > > +#define ExpMask0 12560 > > +#define ExpMask2 12576 > > +#define L2 12592 > > + > > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > > +#define Table_Lookup_Bias -0x405ff0 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.sse4,"ax",@progbits > > +ENTRY(_ZGVbN2v_log1p_sse4) > > + pushq %rbp > > + cfi_def_cfa_offset(16) > > + movq %rsp, %rbp > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + andq $-32, %rsp > > + subq $64, %rsp > > + movaps %xmm0, %xmm7 > > + > > +/* SgnMask used by all accuracies */ > > + movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 > > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi > > + movaps %xmm6, %xmm8 > > + movaps %xmm7, %xmm15 > > + movups One+__svml_dlog1p_data_internal(%rip), %xmm0 > > + andps %xmm7, %xmm8 > > + cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 > > + cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 > > + movaps %xmm0, %xmm4 > > + > > +/* compute 1+x as high, low parts */ > > + movaps %xmm0, %xmm9 > > + addpd %xmm7, %xmm4 > > + maxpd %xmm7, %xmm9 > > + orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 > > + movaps %xmm0, %xmm5 > > + > > +/* preserve mantissa, set input exponent to 2^(-10) */ > > + movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 > > + andps %xmm8, %xmm4 > > + andps %xmm4, %xmm3 > > + > > +/* check range */ > > + movaps %xmm7, %xmm8 > > + orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 > > + > > +/* Compute SignMask for all accuracies, including EP */ > > + andnps %xmm7, %xmm6 > > + > > +/* reciprocal approximation good to at least 11 bits */ > > + cvtpd2ps %xmm3, %xmm10 > > + minpd %xmm7, %xmm5 > > + subpd %xmm4, %xmm9 > > + cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 > > + addpd %xmm9, %xmm5 > > + movlhps %xmm10, %xmm10 > > + orps %xmm15, %xmm8 > > + rcpps %xmm10, %xmm11 > > + > > +/* combine and get argument value range mask */ > > + movmskpd %xmm8, %edx > > + > > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > > + movups .FLT_16(%rip), %xmm13 > > + > > +/* exponent of X needed to scale Xl */ > > + movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 > > + cvtps2pd %xmm11, %xmm1 > > + addpd %xmm13, %xmm1 > > + subpd %xmm13, %xmm1 > > + > > +/* 2^ (-10-exp(X) ) */ > > + movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 > > + pand %xmm4, %xmm12 > > + psubq %xmm12, %xmm2 > > + mulpd %xmm1, %xmm3 > > + > > +/* scale DblRcp */ > > + mulpd %xmm1, %xmm2 > > + subpd %xmm0, %xmm3 > > + > > +/* > > + * argument reduction > > + * VQFMS( D, R, X, DblRcp1, One ); > > + */ > > + mulpd %xmm2, %xmm5 > > + addpd %xmm5, %xmm3 > > + > > +/* exponent*log(2.0) */ > > + movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 > > + > > +/* exponent bits */ > > + psrlq $20, %xmm4 > > + pshufd $221, %xmm4, %xmm14 > > + > > +/* > > + * prepare table index > > + * table lookup > > + */ > > + movaps %xmm1, %xmm4 > > + cmpltpd %xmm1, %xmm10 > > + > > +/* biased exponent in DP format */ > > + cvtdq2pd %xmm14, %xmm0 > > + > > +/* polynomial */ > > + movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 > > + movaps %xmm3, %xmm5 > > + mulpd %xmm3, %xmm1 > > + mulpd %xmm3, %xmm5 > > + addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 > > + movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 > > + psrlq $40, %xmm4 > > + mulpd %xmm3, %xmm2 > > + mulpd %xmm5, %xmm1 > > + addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 > > + movd %xmm4, %eax > > + andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 > > + addpd %xmm1, %xmm2 > > + > > +/* reconstruction */ > > + mulpd %xmm2, %xmm5 > > + orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 > > + pshufd $2, %xmm4, %xmm9 > > + subpd %xmm10, %xmm0 > > + addpd %xmm5, %xmm3 > > + movd %xmm9, %ecx > > + mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 > > + movslq %eax, %rax > > + movslq %ecx, %rcx > > + movsd (%rsi,%rax), %xmm11 > > + movhpd (%rsi,%rcx), %xmm11 > > + addpd %xmm3, %xmm11 > > + addpd %xmm11, %xmm0 > > + > > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > > + orps %xmm6, %xmm0 > > + testl %edx, %edx > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movq %rbp, %rsp > > + popq %rbp > > + cfi_def_cfa(7, 8) > > + cfi_restore(6) > > + ret > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + movups %xmm7, 32(%rsp) > > + movups %xmm0, 48(%rsp) > > + # LOE rbx r12 r13 r14 r15 edx > > + > > + xorl %eax, %eax > > + movq %r12, 16(%rsp) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > > + movl %eax, %r12d > > + movq %r13, 8(%rsp) > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > > + movl %edx, %r13d > > + movq %r14, (%rsp) > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $2, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + movups 48(%rsp), %xmm0 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r12 r13 r14 r15 xmm0 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movsd 32(%rsp,%r14,8), %xmm0 > > + call log1p@PLT > > + # LOE rbx r14 r15 r12d r13d xmm0 > > + > > + movsd %xmm0, 48(%rsp,%r14,8) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx r15 r12d r13d > > +END(_ZGVbN2v_log1p_sse4) > > + > > + .section .rodata, "a" > > + .align 16 > > + > > +#ifdef __svml_dlog1p_data_internal_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; > > + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; > > + __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; > > + __declspec(align(16)) VUINT32 ExpMask[2][2]; > > + __declspec(align(16)) VUINT32 Two10[2][2]; > > + __declspec(align(16)) VUINT32 MinLog1p[2][2]; > > + __declspec(align(16)) VUINT32 MaxLog1p[2][2]; > > + __declspec(align(16)) VUINT32 One[2][2]; > > + __declspec(align(16)) VUINT32 SgnMask[2][2]; > > + __declspec(align(16)) VUINT32 XThreshold[2][2]; > > + __declspec(align(16)) VUINT32 XhMask[2][2]; > > + __declspec(align(16)) VUINT32 Threshold[2][2]; > > + __declspec(align(16)) VUINT32 Bias[2][2]; > > + __declspec(align(16)) VUINT32 Bias1[2][2]; > > + __declspec(align(16)) VUINT32 ExpMask0[2][2]; > > + __declspec(align(16)) VUINT32 ExpMask2[2][2]; > > + __declspec(align(16)) VUINT32 L2[2][2]; > > +} __svml_dlog1p_data_internal; > > +#endif > > +__svml_dlog1p_data_internal: > > + /* Log_HA_table */ > > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > > + /*== Log_LA_table ==*/ > > + .align 16 > > + .quad 0x8000000000000000 > > + .quad 0xbf5ff802a9ab10e6 > > + .quad 0xbf6ff00aa2b10bc0 > > + .quad 0xbf77ee11ebd82e94 > > + .quad 0xbf7fe02a6b106789 > > + .quad 0xbf83e7295d25a7d9 > > + .quad 0xbf87dc475f810a77 > > + .quad 0xbf8bcf712c74384c > > + .quad 0xbf8fc0a8b0fc03e4 > > + .quad 0xbf91d7f7eb9eebe7 > > + .quad 0xbf93cea44346a575 > > + .quad 0xbf95c45a51b8d389 > > + .quad 0xbf97b91b07d5b11b > > + .quad 0xbf99ace7551cc514 > > + .quad 0xbf9b9fc027af9198 > > + .quad 0xbf9d91a66c543cc4 > > + .quad 0xbf9f829b0e783300 > > + .quad 0xbfa0b94f7c196176 > > + .quad 0xbfa1b0d98923d980 > > + .quad 0xbfa2a7ec2214e873 > > + .quad 0xbfa39e87b9febd60 > > + .quad 0xbfa494acc34d911c > > + .quad 0xbfa58a5bafc8e4d5 > > + .quad 0xbfa67f94f094bd98 > > + .quad 0xbfa77458f632dcfc > > + .quad 0xbfa868a83083f6cf > > + .quad 0xbfa95c830ec8e3eb > > + .quad 0xbfaa4fe9ffa3d235 > > + .quad 0xbfab42dd711971bf > > + .quad 0xbfac355dd0921f2d > > + .quad 0xbfad276b8adb0b52 > > + .quad 0xbfae19070c276016 > > + .quad 0xbfaf0a30c01162a6 > > + .quad 0xbfaffae9119b9303 > > + .quad 0xbfb075983598e471 > > + .quad 0xbfb0ed839b5526fe > > + .quad 0xbfb16536eea37ae1 > > + .quad 0xbfb1dcb263db1944 > > + .quad 0xbfb253f62f0a1417 > > + .quad 0xbfb2cb0283f5de1f > > + .quad 0xbfb341d7961bd1d1 > > + .quad 0xbfb3b87598b1b6ee > > + .quad 0xbfb42edcbea646f0 > > + .quad 0xbfb4a50d3aa1b040 > > + .quad 0xbfb51b073f06183f > > + .quad 0xbfb590cafdf01c28 > > + .quad 0xbfb60658a93750c4 > > + .quad 0xbfb67bb0726ec0fc > > + .quad 0xbfb6f0d28ae56b4c > > + .quad 0xbfb765bf23a6be13 > > + .quad 0xbfb7da766d7b12cd > > + .quad 0xbfb84ef898e8282a > > + .quad 0xbfb8c345d6319b21 > > + .quad 0xbfb9375e55595ede > > + .quad 0xbfb9ab42462033ad > > + .quad 0xbfba1ef1d8061cd4 > > + .quad 0xbfba926d3a4ad563 > > + .quad 0xbfbb05b49bee43fe > > + .quad 0xbfbb78c82bb0eda1 > > + .quad 0xbfbbeba818146765 > > + .quad 0xbfbc5e548f5bc743 > > + .quad 0xbfbcd0cdbf8c13e1 > > + .quad 0xbfbd4313d66cb35d > > + .quad 0xbfbdb5270187d927 > > + .quad 0xbfbe27076e2af2e6 > > + .quad 0xbfbe98b549671467 > > + .quad 0xbfbf0a30c01162a6 > > + .quad 0xbfbf7b79fec37ddf > > + .quad 0xbfbfec9131dbeabb > > + .quad 0xbfc02ebb42bf3d4b > > + .quad 0xbfc0671512ca596e > > + .quad 0xbfc09f561ee719c3 > > + .quad 0xbfc0d77e7cd08e59 > > + .quad 0xbfc10f8e422539b1 > > + .quad 0xbfc14785846742ac > > + .quad 0xbfc17f6458fca611 > > + .quad 0xbfc1b72ad52f67a0 > > + .quad 0xbfc1eed90e2dc2c3 > > + .quad 0xbfc2266f190a5acb > > + .quad 0xbfc25ded0abc6ad2 > > + .quad 0xbfc29552f81ff523 > > + .quad 0xbfc2cca0f5f5f251 > > + .quad 0xbfc303d718e47fd3 > > + .quad 0xbfc33af575770e4f > > + .quad 0xbfc371fc201e8f74 > > + .quad 0xbfc3a8eb2d31a376 > > + .quad 0xbfc3dfc2b0ecc62a > > + .quad 0xbfc41682bf727bc0 > > + .quad 0xbfc44d2b6ccb7d1e > > + .quad 0xbfc483bccce6e3dd > > + .quad 0xbfc4ba36f39a55e5 > > + .quad 0xbfc4f099f4a230b2 > > + .quad 0xbfc526e5e3a1b438 > > + .quad 0xbfc55d1ad4232d6f > > + .quad 0xbfc59338d9982086 > > + .quad 0xbfc5c940075972b9 > > + .quad 0xbfc5ff3070a793d4 > > + .quad 0xbfc6350a28aaa758 > > + .quad 0xbfc66acd4272ad51 > > + .quad 0xbfc6a079d0f7aad2 > > + .quad 0xbfc6d60fe719d21d > > + .quad 0xbfc70b8f97a1aa75 > > + .quad 0xbfc740f8f54037a5 > > + .quad 0xbfc7764c128f2127 > > + .quad 0xbfc7ab890210d909 > > + .quad 0xbfc7e0afd630c274 > > + .quad 0xbfc815c0a14357eb > > + .quad 0xbfc84abb75865139 > > + .quad 0xbfc87fa06520c911 > > + .quad 0xbfc8b46f8223625b > > + .quad 0xbfc8e928de886d41 > > + .quad 0xbfc91dcc8c340bde > > + .quad 0xbfc9525a9cf456b4 > > + .quad 0xbfc986d3228180ca > > + .quad 0xbfc9bb362e7dfb83 > > + .quad 0xbfc9ef83d2769a34 > > + .quad 0xbfca23bc1fe2b563 > > + .quad 0xbfca57df28244dcd > > + .quad 0xbfca8becfc882f19 > > + .quad 0xbfcabfe5ae46124c > > + .quad 0xbfcaf3c94e80bff3 > > + .quad 0xbfcb2797ee46320c > > + .quad 0xbfcb5b519e8fb5a4 > > + .quad 0xbfcb8ef670420c3b > > + .quad 0xbfcbc286742d8cd6 > > + .quad 0xbfcbf601bb0e44e2 > > + .quad 0xbfcc2968558c18c1 > > + .quad 0xbfcc5cba543ae425 > > + .quad 0xbfcc8ff7c79a9a22 > > + .quad 0xbfccc320c0176502 > > + .quad 0xbfccf6354e09c5dc > > + .quad 0xbfcd293581b6b3e7 > > + .quad 0xbfcd5c216b4fbb91 > > + .quad 0xbfcd8ef91af31d5e > > + .quad 0xbfcdc1bca0abec7d > > + .quad 0xbfcdf46c0c722d2f > > + .quad 0xbfce27076e2af2e6 > > + .quad 0xbfce598ed5a87e2f > > + .quad 0xbfce8c0252aa5a60 > > + .quad 0xbfcebe61f4dd7b0b > > + .quad 0xbfcef0adcbdc5936 > > + .quad 0xbfcf22e5e72f105d > > + .quad 0xbfcf550a564b7b37 > > + .quad 0xbfcf871b28955045 > > + .quad 0xbfcfb9186d5e3e2b > > + .quad 0xbfcfeb0233e607cc > > + .quad 0xbfd00e6c45ad501d > > + .quad 0xbfd0274dc16c232f > > + .quad 0xbfd0402594b4d041 > > + .quad 0xbfd058f3c703ebc6 > > + .quad 0xbfd071b85fcd590d > > + .quad 0xbfd08a73667c57af > > + .quad 0xbfd0a324e27390e3 > > + .quad 0xbfd0bbccdb0d24bd > > + .quad 0xbfd0d46b579ab74b > > + .quad 0xbfd0ed005f657da4 > > + .quad 0xbfd1058bf9ae4ad5 > > + .quad 0xbfd11e0e2dad9cb7 > > + .quad 0xbfd136870293a8b0 > > + .quad 0xbfd14ef67f88685a > > + .quad 0xbfd1675cababa60e > > + .quad 0xbfd17fb98e15095d > > + .quad 0xbfd1980d2dd4236f > > + .quad 0xbfd1b05791f07b49 > > + .quad 0xbfd1c898c16999fb > > + .quad 0xbfd1e0d0c33716be > > + .quad 0xbfd1f8ff9e48a2f3 > > + .quad 0xbfd211255986160c > > + .quad 0xbfd22941fbcf7966 > > + .quad 0xbfd241558bfd1404 > > + .quad 0xbfd2596010df763a > > + .quad 0xbfd27161913f853d > > + .quad 0xbfd2895a13de86a3 > > + .quad 0xbfd2a1499f762bc9 > > + .quad 0xbfd2b9303ab89d25 > > + .quad 0xbfd2d10dec508583 > > + .quad 0xbfd2e8e2bae11d31 > > + .quad 0xbfd300aead06350c > > + .quad 0xbfd31871c9544185 > > + .quad 0xbfd3302c16586588 > > + .quad 0xbfd347dd9a987d55 > > + .quad 0xbfd35f865c93293e > > + .quad 0xbfd3772662bfd85b > > + .quad 0xbfd38ebdb38ed321 > > + .quad 0xbfd3a64c556945ea > > + .quad 0xbfd3bdd24eb14b6a > > + .quad 0xbfd3d54fa5c1f710 > > + .quad 0xbfd3ecc460ef5f50 > > + .quad 0xbfd404308686a7e4 > > + .quad 0xbfd41b941cce0bee > > + .quad 0xbfd432ef2a04e814 > > + .quad 0xbfd44a41b463c47c > > + .quad 0xbfd4618bc21c5ec2 > > + .quad 0xbfd478cd5959b3d9 > > + .quad 0xbfd49006804009d1 > > + .quad 0xbfd4a7373cecf997 > > + .quad 0xbfd4be5f957778a1 > > + .quad 0xbfd4d57f8fefe27f > > + .quad 0xbfd4ec973260026a > > + .quad 0xbfd503a682cb1cb3 > > + .quad 0xbfd51aad872df82d > > + .quad 0xbfd531ac457ee77e > > + .quad 0xbfd548a2c3add263 > > + .quad 0xbfd55f9107a43ee2 > > + .quad 0xbfd5767717455a6c > > + .quad 0xbfd58d54f86e02f2 > > + .quad 0xbfd5a42ab0f4cfe2 > > + .quad 0xbfd5baf846aa1b19 > > + .quad 0xbfd5d1bdbf5809ca > > + .quad 0xbfd5e87b20c2954a > > + .quad 0xbfd5ff3070a793d4 > > + .quad 0xbfd615ddb4bec13c > > + .quad 0xbfd62c82f2b9c795 > > + .quad 0x3fd61965cdb02c1f > > + .quad 0x3fd602d08af091ec > > + .quad 0x3fd5ec433d5c35ae > > + .quad 0x3fd5d5bddf595f30 > > + .quad 0x3fd5bf406b543db2 > > + .quad 0x3fd5a8cadbbedfa1 > > + .quad 0x3fd5925d2b112a59 > > + .quad 0x3fd57bf753c8d1fb > > + .quad 0x3fd565995069514c > > + .quad 0x3fd54f431b7be1a9 > > + .quad 0x3fd538f4af8f72fe > > + .quad 0x3fd522ae0738a3d8 > > + .quad 0x3fd50c6f1d11b97c > > + .quad 0x3fd4f637ebba9810 > > + .quad 0x3fd4e0086dd8baca > > + .quad 0x3fd4c9e09e172c3c > > + .quad 0x3fd4b3c077267e9a > > + .quad 0x3fd49da7f3bcc41f > > + .quad 0x3fd487970e958770 > > + .quad 0x3fd4718dc271c41b > > + .quad 0x3fd45b8c0a17df13 > > + .quad 0x3fd44591e0539f49 > > + .quad 0x3fd42f9f3ff62642 > > + .quad 0x3fd419b423d5e8c7 > > + .quad 0x3fd403d086cea79c > > + .quad 0x3fd3edf463c1683e > > + .quad 0x3fd3d81fb5946dba > > + .quad 0x3fd3c25277333184 > > + .quad 0x3fd3ac8ca38e5c5f > > + .quad 0x3fd396ce359bbf54 > > + .quad 0x3fd3811728564cb2 > > + .quad 0x3fd36b6776be1117 > > + .quad 0x3fd355bf1bd82c8b > > + .quad 0x3fd3401e12aecba1 > > + .quad 0x3fd32a84565120a8 > > + .quad 0x3fd314f1e1d35ce4 > > + .quad 0x3fd2ff66b04ea9d4 > > + .quad 0x3fd2e9e2bce12286 > > + .quad 0x3fd2d46602adccee > > + .quad 0x3fd2bef07cdc9354 > > + .quad 0x3fd2a982269a3dbf > > + .quad 0x3fd2941afb186b7c > > + .quad 0x3fd27ebaf58d8c9d > > + .quad 0x3fd269621134db92 > > + .quad 0x3fd25410494e56c7 > > + .quad 0x3fd23ec5991eba49 > > + .quad 0x3fd22981fbef797b > > + .quad 0x3fd214456d0eb8d4 > > + .quad 0x3fd1ff0fe7cf47a7 > > + .quad 0x3fd1e9e1678899f4 > > + .quad 0x3fd1d4b9e796c245 > > + .quad 0x3fd1bf99635a6b95 > > + .quad 0x3fd1aa7fd638d33f > > + .quad 0x3fd1956d3b9bc2fa > > + .quad 0x3fd180618ef18adf > > + .quad 0x3fd16b5ccbacfb73 > > + .quad 0x3fd1565eed455fc3 > > + .quad 0x3fd14167ef367783 > > + .quad 0x3fd12c77cd00713b > > + .quad 0x3fd1178e8227e47c > > + .quad 0x3fd102ac0a35cc1c > > + .quad 0x3fd0edd060b78081 > > + .quad 0x3fd0d8fb813eb1ef > > + .quad 0x3fd0c42d676162e3 > > + .quad 0x3fd0af660eb9e279 > > + .quad 0x3fd09aa572e6c6d4 > > + .quad 0x3fd085eb8f8ae797 > > + .quad 0x3fd07138604d5862 > > + .quad 0x3fd05c8be0d9635a > > + .quad 0x3fd047e60cde83b8 > > + .quad 0x3fd03346e0106062 > > + .quad 0x3fd01eae5626c691 > > + .quad 0x3fd00a1c6adda473 > > + .quad 0x3fcfeb2233ea07cd > > + .quad 0x3fcfc218be620a5e > > + .quad 0x3fcf991c6cb3b379 > > + .quad 0x3fcf702d36777df0 > > + .quad 0x3fcf474b134df229 > > + .quad 0x3fcf1e75fadf9bde > > + .quad 0x3fcef5ade4dcffe6 > > + .quad 0x3fceccf2c8fe920a > > + .quad 0x3fcea4449f04aaf5 > > + .quad 0x3fce7ba35eb77e2a > > + .quad 0x3fce530effe71012 > > + .quad 0x3fce2a877a6b2c12 > > + .quad 0x3fce020cc6235ab5 > > + .quad 0x3fcdd99edaf6d7e9 > > + .quad 0x3fcdb13db0d48940 > > + .quad 0x3fcd88e93fb2f450 > > + .quad 0x3fcd60a17f903515 > > + .quad 0x3fcd38666871f465 > > + .quad 0x3fcd1037f2655e7b > > + .quad 0x3fcce816157f1988 > > + .quad 0x3fccc000c9db3c52 > > + .quad 0x3fcc97f8079d44ec > > + .quad 0x3fcc6ffbc6f00f71 > > + .quad 0x3fcc480c0005ccd1 > > + .quad 0x3fcc2028ab17f9b4 > > + .quad 0x3fcbf851c067555f > > + .quad 0x3fcbd087383bd8ad > > + .quad 0x3fcba8c90ae4ad19 > > + .quad 0x3fcb811730b823d2 > > + .quad 0x3fcb5971a213acdb > > + .quad 0x3fcb31d8575bce3d > > + .quad 0x3fcb0a4b48fc1b46 > > + .quad 0x3fcae2ca6f672bd4 > > + .quad 0x3fcabb55c31693ad > > + .quad 0x3fca93ed3c8ad9e3 > > + .quad 0x3fca6c90d44b704e > > + .quad 0x3fca454082e6ab05 > > + .quad 0x3fca1dfc40f1b7f1 > > + .quad 0x3fc9f6c407089664 > > + .quad 0x3fc9cf97cdce0ec3 > > + .quad 0x3fc9a8778debaa38 > > + .quad 0x3fc981634011aa75 > > + .quad 0x3fc95a5adcf7017f > > + .quad 0x3fc9335e5d594989 > > + .quad 0x3fc90c6db9fcbcd9 > > + .quad 0x3fc8e588ebac2dbf > > + .quad 0x3fc8beafeb38fe8c > > + .quad 0x3fc897e2b17b19a5 > > + .quad 0x3fc871213750e994 > > + .quad 0x3fc84a6b759f512f > > + .quad 0x3fc823c16551a3c2 > > + .quad 0x3fc7fd22ff599d4f > > + .quad 0x3fc7d6903caf5ad0 > > + .quad 0x3fc7b0091651528c > > + .quad 0x3fc7898d85444c73 > > + .quad 0x3fc7631d82935a86 > > + .quad 0x3fc73cb9074fd14d > > + .quad 0x3fc716600c914054 > > + .quad 0x3fc6f0128b756abc > > + .quad 0x3fc6c9d07d203fc7 > > + .quad 0x3fc6a399dabbd383 > > + .quad 0x3fc67d6e9d785771 > > + .quad 0x3fc6574ebe8c133a > > + .quad 0x3fc6313a37335d76 > > + .quad 0x3fc60b3100b09476 > > + .quad 0x3fc5e533144c1719 > > + .quad 0x3fc5bf406b543db2 > > + .quad 0x3fc59958ff1d52f1 > > + .quad 0x3fc5737cc9018cdd > > + .quad 0x3fc54dabc26105d2 > > + .quad 0x3fc527e5e4a1b58d > > + .quad 0x3fc5022b292f6a45 > > + .quad 0x3fc4dc7b897bc1c8 > > + .quad 0x3fc4b6d6fefe22a4 > > + .quad 0x3fc4913d8333b561 > > + .quad 0x3fc46baf0f9f5db7 > > + .quad 0x3fc4462b9dc9b3dc > > + .quad 0x3fc420b32740fdd4 > > + .quad 0x3fc3fb45a59928cc > > + .quad 0x3fc3d5e3126bc27f > > + .quad 0x3fc3b08b6757f2a9 > > + .quad 0x3fc38b3e9e027479 > > + .quad 0x3fc365fcb0159016 > > + .quad 0x3fc340c59741142e > > + .quad 0x3fc31b994d3a4f85 > > + .quad 0x3fc2f677cbbc0a96 > > + .quad 0x3fc2d1610c86813a > > + .quad 0x3fc2ac55095f5c59 > > + .quad 0x3fc28753bc11aba5 > > + .quad 0x3fc2625d1e6ddf57 > > + .quad 0x3fc23d712a49c202 > > + .quad 0x3fc2188fd9807263 > > + .quad 0x3fc1f3b925f25d41 > > + .quad 0x3fc1ceed09853752 > > + .quad 0x3fc1aa2b7e23f72a > > + .quad 0x3fc185747dbecf34 > > + .quad 0x3fc160c8024b27b1 > > + .quad 0x3fc13c2605c398c3 > > + .quad 0x3fc1178e8227e47c > > + .quad 0x3fc0f301717cf0fb > > + .quad 0x3fc0ce7ecdccc28d > > + .quad 0x3fc0aa06912675d5 > > + .quad 0x3fc08598b59e3a07 > > + .quad 0x3fc06135354d4b18 > > + .quad 0x3fc03cdc0a51ec0d > > + .quad 0x3fc0188d2ecf6140 > > + .quad 0x3fbfe89139dbd566 > > + .quad 0x3fbfa01c9db57ce2 > > + .quad 0x3fbf57bc7d9005db > > + .quad 0x3fbf0f70cdd992e3 > > + .quad 0x3fbec739830a1120 > > + .quad 0x3fbe7f1691a32d3e > > + .quad 0x3fbe3707ee30487b > > + .quad 0x3fbdef0d8d466db9 > > + .quad 0x3fbda727638446a2 > > + .quad 0x3fbd5f55659210e2 > > + .quad 0x3fbd179788219364 > > + .quad 0x3fbccfedbfee13a8 > > + .quad 0x3fbc885801bc4b23 > > + .quad 0x3fbc40d6425a5cb1 > > + .quad 0x3fbbf968769fca11 > > + .quad 0x3fbbb20e936d6974 > > + .quad 0x3fbb6ac88dad5b1c > > + .quad 0x3fbb23965a52ff00 > > + .quad 0x3fbadc77ee5aea8c > > + .quad 0x3fba956d3ecade63 > > + .quad 0x3fba4e7640b1bc38 > > + .quad 0x3fba0792e9277cac > > + .quad 0x3fb9c0c32d4d2548 > > + .quad 0x3fb97a07024cbe74 > > + .quad 0x3fb9335e5d594989 > > + .quad 0x3fb8ecc933aeb6e8 > > + .quad 0x3fb8a6477a91dc29 > > + .quad 0x3fb85fd927506a48 > > + .quad 0x3fb8197e2f40e3f0 > > + .quad 0x3fb7d33687c293c9 > > + .quad 0x3fb78d02263d82d3 > > + .quad 0x3fb746e100226ed9 > > + .quad 0x3fb700d30aeac0e1 > > + .quad 0x3fb6bad83c1883b6 > > + .quad 0x3fb674f089365a7a > > + .quad 0x3fb62f1be7d77743 > > + .quad 0x3fb5e95a4d9791cb > > + .quad 0x3fb5a3abb01ade25 > > + .quad 0x3fb55e10050e0384 > > + .quad 0x3fb518874226130a > > + .quad 0x3fb4d3115d207eac > > + .quad 0x3fb48dae4bc31018 > > + .quad 0x3fb4485e03dbdfad > > + .quad 0x3fb403207b414b7f > > + .quad 0x3fb3bdf5a7d1ee64 > > + .quad 0x3fb378dd7f749714 > > + .quad 0x3fb333d7f8183f4b > > + .quad 0x3fb2eee507b40301 > > + .quad 0x3fb2aa04a44717a5 > > + .quad 0x3fb26536c3d8c369 > > + .quad 0x3fb2207b5c78549e > > + .quad 0x3fb1dbd2643d190b > > + .quad 0x3fb1973bd1465567 > > + .quad 0x3fb152b799bb3cc9 > > + .quad 0x3fb10e45b3cae831 > > + .quad 0x3fb0c9e615ac4e17 > > + .quad 0x3fb08598b59e3a07 > > + .quad 0x3fb0415d89e74444 > > + .quad 0x3faffa6911ab9301 > > + .quad 0x3faf723b517fc523 > > + .quad 0x3faeea31c006b87c > > + .quad 0x3fae624c4a0b5e1b > > + .quad 0x3fadda8adc67ee4e > > + .quad 0x3fad52ed6405d86f > > + .quad 0x3faccb73cdddb2cc > > + .quad 0x3fac441e06f72a9e > > + .quad 0x3fabbcebfc68f420 > > + .quad 0x3fab35dd9b58baad > > + .quad 0x3faaaef2d0fb10fc > > + .quad 0x3faa282b8a936171 > > + .quad 0x3fa9a187b573de7c > > + .quad 0x3fa91b073efd7314 > > + .quad 0x3fa894aa149fb343 > > + .quad 0x3fa80e7023d8ccc4 > > + .quad 0x3fa788595a3577ba > > + .quad 0x3fa70265a550e777 > > + .quad 0x3fa67c94f2d4bb58 > > + .quad 0x3fa5f6e73078efb8 > > + .quad 0x3fa5715c4c03ceef > > + .quad 0x3fa4ebf43349e26f > > + .quad 0x3fa466aed42de3ea > > + .quad 0x3fa3e18c1ca0ae92 > > + .quad 0x3fa35c8bfaa1306b > > + .quad 0x3fa2d7ae5c3c5bae > > + .quad 0x3fa252f32f8d183f > > + .quad 0x3fa1ce5a62bc353a > > + .quad 0x3fa149e3e4005a8d > > + .quad 0x3fa0c58fa19dfaaa > > + .quad 0x3fa0415d89e74444 > > + .quad 0x3f9f7a9b16782856 > > + .quad 0x3f9e72bf2813ce51 > > + .quad 0x3f9d6b2725979802 > > + .quad 0x3f9c63d2ec14aaf2 > > + .quad 0x3f9b5cc258b718e6 > > + .quad 0x3f9a55f548c5c43f > > + .quad 0x3f994f6b99a24475 > > + .quad 0x3f98492528c8cabf > > + .quad 0x3f974321d3d006d3 > > + .quad 0x3f963d6178690bd6 > > + .quad 0x3f9537e3f45f3565 > > + .quad 0x3f9432a925980cc1 > > + .quad 0x3f932db0ea132e22 > > + .quad 0x3f9228fb1fea2e28 > > + .quad 0x3f912487a5507f70 > > + .quad 0x3f90205658935847 > > + .quad 0x3f8e38ce3033310c > > + .quad 0x3f8c317384c75f06 > > + .quad 0x3f8a2a9c6c170462 > > + .quad 0x3f882448a388a2aa > > + .quad 0x3f861e77e8b53fc6 > > + .quad 0x3f841929f96832f0 > > + .quad 0x3f82145e939ef1e9 > > + .quad 0x3f8010157588de71 > > + .quad 0x3f7c189cbb0e27fb > > + .quad 0x3f78121214586b54 > > + .quad 0x3f740c8a747878e2 > > + .quad 0x3f70080559588b35 > > + .quad 0x3f680904828985c0 > > + .quad 0x3f60040155d5889e > > + .quad 0x3f50020055655889 > > + .quad 0x0000000000000000 > > + /*== poly_coeff[4] ==*/ > > + .align 16 > > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > > + /*== ExpMask ==*/ > > + .align 16 > > + .quad 0x000fffffffffffff, 0x000fffffffffffff > > + /*== Two10 ==*/ > > + .align 16 > > + .quad 0x3f50000000000000, 0x3f50000000000000 > > + /*== MinLog1p = -1+2^(-53) ==*/ > > + .align 16 > > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff > > + /*== MaxLog1p ==*/ > > + .align 16 > > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000 > > + /*== One ==*/ > > + .align 16 > > + .quad 0x3ff0000000000000, 0x3ff0000000000000 > > + /*== SgnMask ==*/ > > + .align 16 > > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff > > + /*== XThreshold ==*/ > > + .align 16 > > + .quad 0x3e00000000000000, 0x3e00000000000000 > > + /*== XhMask ==*/ > > + .align 16 > > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00 > > + /*== Threshold ==*/ > > + .align 16 > > + .quad 0x4086a00000000000, 0x4086a00000000000 > > + /*== Bias ==*/ > > + .align 16 > > + .quad 0x408ff80000000000, 0x408ff80000000000 > > + /*== Bias1 ==*/ > > + .align 16 > > + .quad 0x408ff00000000000, 0x408ff00000000000 > > + /*== ExpMask ==*/ > > + .align 16 > > + .quad 0x7ff0000000000000, 0x7ff0000000000000 > > + /*== ExpMask2 ==*/ > > + .align 16 > > + .quad 0x7f40000000000000, 0x7f40000000000000 > > + /*== L2L ==*/ > > + .align 16 > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > + .align 16 > > + .type __svml_dlog1p_data_internal,@object > > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > > + .space 96, 0x00 > > + .align 16 > > + > > +.FLT_16: > > + .long 0x00000000,0x43380000,0x00000000,0x43380000 > > + .type .FLT_16,@object > > + .size .FLT_16,16 > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > new file mode 100644 > > index 0000000000..ec01af680c > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > @@ -0,0 +1,20 @@ > > +/* SSE version of vectorized log1p, vector length is 4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVdN4v_log1p _ZGVdN4v_log1p_sse_wrapper > > +#include "../svml_d_log1p4_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > new file mode 100644 > > index 0000000000..808f3224ef > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > @@ -0,0 +1,27 @@ > > +/* Multiple versions of vectorized log1p, vector length is 4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVdN4v_log1p > > +#include "ifunc-mathvec-avx2.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVdN4v_log1p, __GI__ZGVdN4v_log1p, __redirect__ZGVdN4v_log1p) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > new file mode 100644 > > index 0000000000..548538b0ec > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > @@ -0,0 +1,1383 @@ > > +/* Function log1p vectorized with AVX2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_dlog1p_data_internal > > + */ > > +#define Log_HA_table 0 > > +#define Log_LA_table 8224 > > +#define poly_coeff 12352 > > +#define ExpMask 12480 > > +#define Two10 12512 > > +#define MinLog1p 12544 > > +#define MaxLog1p 12576 > > +#define One 12608 > > +#define SgnMask 12640 > > +#define XThreshold 12672 > > +#define XhMask 12704 > > +#define Threshold 12736 > > +#define Bias 12768 > > +#define Bias1 12800 > > +#define ExpMask0 12832 > > +#define ExpMask2 12864 > > +#define L2 12896 > > + > > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > > +#define Table_Lookup_Bias -0x405fe0 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.avx2,"ax",@progbits > > +ENTRY(_ZGVdN4v_log1p_avx2) > > + pushq %rbp > > + cfi_def_cfa_offset(16) > > + movq %rsp, %rbp > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + andq $-32, %rsp > > + subq $96, %rsp > > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 > > + > > +/* SgnMask used by all accuracies */ > > + vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 > > + vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 > > + > > +/* 2^ (-10-exp(X) ) */ > > + vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 > > + vmovapd %ymm0, %ymm9 > > + vandpd %ymm12, %ymm9, %ymm10 > > + vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 > > + vaddpd %ymm7, %ymm9, %ymm13 > > + > > +/* compute 1+x as high, low parts */ > > + vmaxpd %ymm9, %ymm7, %ymm15 > > + vminpd %ymm9, %ymm7, %ymm6 > > + vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 > > + vandpd %ymm14, %ymm13, %ymm4 > > + > > +/* preserve mantissa, set input exponent to 2^(-10) */ > > + vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 > > + vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 > > + > > +/* reciprocal approximation good to at least 11 bits */ > > + vcvtpd2ps %ymm5, %xmm2 > > + vsubpd %ymm4, %ymm15, %ymm0 > > + > > +/* check range */ > > + vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 > > + vrcpps %xmm2, %xmm1 > > + vaddpd %ymm0, %ymm6, %ymm6 > > + vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 > > + vcvtps2pd %xmm1, %ymm11 > > + > > +/* exponent of X needed to scale Xl */ > > + vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 > > + vpsubq %ymm10, %ymm3, %ymm13 > > + > > +/* exponent bits */ > > + vpsrlq $20, %ymm4, %ymm4 > > + > > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > > + vroundpd $0, %ymm11, %ymm3 > > + > > +/* scale DblRcp */ > > + vmulpd %ymm13, %ymm3, %ymm2 > > + > > +/* exponent*log(2.0) */ > > + vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 > > + vfmsub213pd %ymm7, %ymm3, %ymm5 > > + > > +/* Compute SignMask for all accuracies, including EP */ > > + vandnpd %ymm9, %ymm12, %ymm8 > > + vorpd %ymm0, %ymm15, %ymm7 > > + > > +/* > > + * prepare table index > > + * table lookup > > + */ > > + vpsrlq $40, %ymm3, %ymm0 > > + > > +/* > > + * argument reduction > > + * VQFMS( D, R, X, DblRcp1, One ); > > + */ > > + vfmadd213pd %ymm5, %ymm2, %ymm6 > > + vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 > > + vcmplt_oqpd %ymm3, %ymm13, %ymm3 > > + vmulpd %ymm6, %ymm6, %ymm5 > > + vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 > > + > > +/* combine and get argument value range mask */ > > + vmovmskpd %ymm7, %eax > > + vextractf128 $1, %ymm4, %xmm12 > > + vshufps $221, %xmm12, %xmm4, %xmm14 > > + > > +/* biased exponent in DP format */ > > + vcvtdq2pd %xmm14, %ymm1 > > + vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 > > + vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 > > + vsubpd %ymm15, %ymm1, %ymm1 > > + vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 > > + > > +/* polynomial */ > > + vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 > > + vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 > > + vfmadd213pd %ymm2, %ymm5, %ymm1 > > + > > +/* reconstruction */ > > + vfmadd213pd %ymm6, %ymm5, %ymm1 > > + vextractf128 $1, %ymm0, %xmm10 > > + vmovd %xmm0, %edx > > + vmovd %xmm10, %esi > > + movslq %edx, %rdx > > + vpextrd $2, %xmm0, %ecx > > + movslq %esi, %rsi > > + vpextrd $2, %xmm10, %edi > > + movslq %ecx, %rcx > > + movslq %edi, %rdi > > + vmovsd (%r8,%rdx), %xmm4 > > + vmovsd (%r8,%rsi), %xmm11 > > + vmovhpd (%r8,%rcx), %xmm4, %xmm7 > > + vmovhpd (%r8,%rdi), %xmm11, %xmm12 > > + vinsertf128 $1, %xmm12, %ymm7, %ymm0 > > + vaddpd %ymm1, %ymm0, %ymm6 > > + vaddpd %ymm6, %ymm3, %ymm0 > > + > > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > > + vorpd %ymm8, %ymm0, %ymm0 > > + testl %eax, %eax > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movq %rbp, %rsp > > + popq %rbp > > + cfi_def_cfa(7, 8) > > + cfi_restore(6) > > + ret > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + vmovupd %ymm9, 32(%rsp) > > + vmovupd %ymm0, 64(%rsp) > > + # LOE rbx r12 r13 r14 r15 eax ymm0 > > + > > + xorl %edx, %edx > > + # LOE rbx r12 r13 r14 r15 eax edx > > + > > + vzeroupper > > + movq %r12, 16(%rsp) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > + movl %edx, %r12d > > + movq %r13, 8(%rsp) > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > + movl %eax, %r13d > > + movq %r14, (%rsp) > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $4, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + vmovupd 64(%rsp), %ymm0 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r12 r13 r14 r15 ymm0 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movsd 32(%rsp,%r14,8), %xmm0 > > + call log1p@PLT > > + # LOE rbx r14 r15 r12d r13d xmm0 > > + > > + movsd %xmm0, 64(%rsp,%r14,8) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx r15 r12d r13d > > +END(_ZGVdN4v_log1p_avx2) > > + > > + .section .rodata, "a" > > + .align 32 > > + > > +#ifdef __svml_dlog1p_data_internal_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; > > + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; > > + __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; > > + __declspec(align(32)) VUINT32 ExpMask[4][2]; > > + __declspec(align(32)) VUINT32 Two10[4][2]; > > + __declspec(align(32)) VUINT32 MinLog1p[4][2]; > > + __declspec(align(32)) VUINT32 MaxLog1p[4][2]; > > + __declspec(align(32)) VUINT32 One[4][2]; > > + __declspec(align(32)) VUINT32 SgnMask[4][2]; > > + __declspec(align(32)) VUINT32 XThreshold[4][2]; > > + __declspec(align(32)) VUINT32 XhMask[4][2]; > > + __declspec(align(32)) VUINT32 Threshold[4][2]; > > + __declspec(align(32)) VUINT32 Bias[4][2]; > > + __declspec(align(32)) VUINT32 Bias1[4][2]; > > + __declspec(align(32)) VUINT32 ExpMask0[4][2]; > > + __declspec(align(32)) VUINT32 ExpMask2[4][2]; > > + __declspec(align(32)) VUINT32 L2[4][2]; > > +} __svml_dlog1p_data_internal; > > +#endif > > +__svml_dlog1p_data_internal: > > + /* Log_HA_table */ > > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > > + /*== Log_LA_table ==*/ > > + .align 32 > > + .quad 0x8000000000000000 > > + .quad 0xbf5ff802a9ab10e6 > > + .quad 0xbf6ff00aa2b10bc0 > > + .quad 0xbf77ee11ebd82e94 > > + .quad 0xbf7fe02a6b106789 > > + .quad 0xbf83e7295d25a7d9 > > + .quad 0xbf87dc475f810a77 > > + .quad 0xbf8bcf712c74384c > > + .quad 0xbf8fc0a8b0fc03e4 > > + .quad 0xbf91d7f7eb9eebe7 > > + .quad 0xbf93cea44346a575 > > + .quad 0xbf95c45a51b8d389 > > + .quad 0xbf97b91b07d5b11b > > + .quad 0xbf99ace7551cc514 > > + .quad 0xbf9b9fc027af9198 > > + .quad 0xbf9d91a66c543cc4 > > + .quad 0xbf9f829b0e783300 > > + .quad 0xbfa0b94f7c196176 > > + .quad 0xbfa1b0d98923d980 > > + .quad 0xbfa2a7ec2214e873 > > + .quad 0xbfa39e87b9febd60 > > + .quad 0xbfa494acc34d911c > > + .quad 0xbfa58a5bafc8e4d5 > > + .quad 0xbfa67f94f094bd98 > > + .quad 0xbfa77458f632dcfc > > + .quad 0xbfa868a83083f6cf > > + .quad 0xbfa95c830ec8e3eb > > + .quad 0xbfaa4fe9ffa3d235 > > + .quad 0xbfab42dd711971bf > > + .quad 0xbfac355dd0921f2d > > + .quad 0xbfad276b8adb0b52 > > + .quad 0xbfae19070c276016 > > + .quad 0xbfaf0a30c01162a6 > > + .quad 0xbfaffae9119b9303 > > + .quad 0xbfb075983598e471 > > + .quad 0xbfb0ed839b5526fe > > + .quad 0xbfb16536eea37ae1 > > + .quad 0xbfb1dcb263db1944 > > + .quad 0xbfb253f62f0a1417 > > + .quad 0xbfb2cb0283f5de1f > > + .quad 0xbfb341d7961bd1d1 > > + .quad 0xbfb3b87598b1b6ee > > + .quad 0xbfb42edcbea646f0 > > + .quad 0xbfb4a50d3aa1b040 > > + .quad 0xbfb51b073f06183f > > + .quad 0xbfb590cafdf01c28 > > + .quad 0xbfb60658a93750c4 > > + .quad 0xbfb67bb0726ec0fc > > + .quad 0xbfb6f0d28ae56b4c > > + .quad 0xbfb765bf23a6be13 > > + .quad 0xbfb7da766d7b12cd > > + .quad 0xbfb84ef898e8282a > > + .quad 0xbfb8c345d6319b21 > > + .quad 0xbfb9375e55595ede > > + .quad 0xbfb9ab42462033ad > > + .quad 0xbfba1ef1d8061cd4 > > + .quad 0xbfba926d3a4ad563 > > + .quad 0xbfbb05b49bee43fe > > + .quad 0xbfbb78c82bb0eda1 > > + .quad 0xbfbbeba818146765 > > + .quad 0xbfbc5e548f5bc743 > > + .quad 0xbfbcd0cdbf8c13e1 > > + .quad 0xbfbd4313d66cb35d > > + .quad 0xbfbdb5270187d927 > > + .quad 0xbfbe27076e2af2e6 > > + .quad 0xbfbe98b549671467 > > + .quad 0xbfbf0a30c01162a6 > > + .quad 0xbfbf7b79fec37ddf > > + .quad 0xbfbfec9131dbeabb > > + .quad 0xbfc02ebb42bf3d4b > > + .quad 0xbfc0671512ca596e > > + .quad 0xbfc09f561ee719c3 > > + .quad 0xbfc0d77e7cd08e59 > > + .quad 0xbfc10f8e422539b1 > > + .quad 0xbfc14785846742ac > > + .quad 0xbfc17f6458fca611 > > + .quad 0xbfc1b72ad52f67a0 > > + .quad 0xbfc1eed90e2dc2c3 > > + .quad 0xbfc2266f190a5acb > > + .quad 0xbfc25ded0abc6ad2 > > + .quad 0xbfc29552f81ff523 > > + .quad 0xbfc2cca0f5f5f251 > > + .quad 0xbfc303d718e47fd3 > > + .quad 0xbfc33af575770e4f > > + .quad 0xbfc371fc201e8f74 > > + .quad 0xbfc3a8eb2d31a376 > > + .quad 0xbfc3dfc2b0ecc62a > > + .quad 0xbfc41682bf727bc0 > > + .quad 0xbfc44d2b6ccb7d1e > > + .quad 0xbfc483bccce6e3dd > > + .quad 0xbfc4ba36f39a55e5 > > + .quad 0xbfc4f099f4a230b2 > > + .quad 0xbfc526e5e3a1b438 > > + .quad 0xbfc55d1ad4232d6f > > + .quad 0xbfc59338d9982086 > > + .quad 0xbfc5c940075972b9 > > + .quad 0xbfc5ff3070a793d4 > > + .quad 0xbfc6350a28aaa758 > > + .quad 0xbfc66acd4272ad51 > > + .quad 0xbfc6a079d0f7aad2 > > + .quad 0xbfc6d60fe719d21d > > + .quad 0xbfc70b8f97a1aa75 > > + .quad 0xbfc740f8f54037a5 > > + .quad 0xbfc7764c128f2127 > > + .quad 0xbfc7ab890210d909 > > + .quad 0xbfc7e0afd630c274 > > + .quad 0xbfc815c0a14357eb > > + .quad 0xbfc84abb75865139 > > + .quad 0xbfc87fa06520c911 > > + .quad 0xbfc8b46f8223625b > > + .quad 0xbfc8e928de886d41 > > + .quad 0xbfc91dcc8c340bde > > + .quad 0xbfc9525a9cf456b4 > > + .quad 0xbfc986d3228180ca > > + .quad 0xbfc9bb362e7dfb83 > > + .quad 0xbfc9ef83d2769a34 > > + .quad 0xbfca23bc1fe2b563 > > + .quad 0xbfca57df28244dcd > > + .quad 0xbfca8becfc882f19 > > + .quad 0xbfcabfe5ae46124c > > + .quad 0xbfcaf3c94e80bff3 > > + .quad 0xbfcb2797ee46320c > > + .quad 0xbfcb5b519e8fb5a4 > > + .quad 0xbfcb8ef670420c3b > > + .quad 0xbfcbc286742d8cd6 > > + .quad 0xbfcbf601bb0e44e2 > > + .quad 0xbfcc2968558c18c1 > > + .quad 0xbfcc5cba543ae425 > > + .quad 0xbfcc8ff7c79a9a22 > > + .quad 0xbfccc320c0176502 > > + .quad 0xbfccf6354e09c5dc > > + .quad 0xbfcd293581b6b3e7 > > + .quad 0xbfcd5c216b4fbb91 > > + .quad 0xbfcd8ef91af31d5e > > + .quad 0xbfcdc1bca0abec7d > > + .quad 0xbfcdf46c0c722d2f > > + .quad 0xbfce27076e2af2e6 > > + .quad 0xbfce598ed5a87e2f > > + .quad 0xbfce8c0252aa5a60 > > + .quad 0xbfcebe61f4dd7b0b > > + .quad 0xbfcef0adcbdc5936 > > + .quad 0xbfcf22e5e72f105d > > + .quad 0xbfcf550a564b7b37 > > + .quad 0xbfcf871b28955045 > > + .quad 0xbfcfb9186d5e3e2b > > + .quad 0xbfcfeb0233e607cc > > + .quad 0xbfd00e6c45ad501d > > + .quad 0xbfd0274dc16c232f > > + .quad 0xbfd0402594b4d041 > > + .quad 0xbfd058f3c703ebc6 > > + .quad 0xbfd071b85fcd590d > > + .quad 0xbfd08a73667c57af > > + .quad 0xbfd0a324e27390e3 > > + .quad 0xbfd0bbccdb0d24bd > > + .quad 0xbfd0d46b579ab74b > > + .quad 0xbfd0ed005f657da4 > > + .quad 0xbfd1058bf9ae4ad5 > > + .quad 0xbfd11e0e2dad9cb7 > > + .quad 0xbfd136870293a8b0 > > + .quad 0xbfd14ef67f88685a > > + .quad 0xbfd1675cababa60e > > + .quad 0xbfd17fb98e15095d > > + .quad 0xbfd1980d2dd4236f > > + .quad 0xbfd1b05791f07b49 > > + .quad 0xbfd1c898c16999fb > > + .quad 0xbfd1e0d0c33716be > > + .quad 0xbfd1f8ff9e48a2f3 > > + .quad 0xbfd211255986160c > > + .quad 0xbfd22941fbcf7966 > > + .quad 0xbfd241558bfd1404 > > + .quad 0xbfd2596010df763a > > + .quad 0xbfd27161913f853d > > + .quad 0xbfd2895a13de86a3 > > + .quad 0xbfd2a1499f762bc9 > > + .quad 0xbfd2b9303ab89d25 > > + .quad 0xbfd2d10dec508583 > > + .quad 0xbfd2e8e2bae11d31 > > + .quad 0xbfd300aead06350c > > + .quad 0xbfd31871c9544185 > > + .quad 0xbfd3302c16586588 > > + .quad 0xbfd347dd9a987d55 > > + .quad 0xbfd35f865c93293e > > + .quad 0xbfd3772662bfd85b > > + .quad 0xbfd38ebdb38ed321 > > + .quad 0xbfd3a64c556945ea > > + .quad 0xbfd3bdd24eb14b6a > > + .quad 0xbfd3d54fa5c1f710 > > + .quad 0xbfd3ecc460ef5f50 > > + .quad 0xbfd404308686a7e4 > > + .quad 0xbfd41b941cce0bee > > + .quad 0xbfd432ef2a04e814 > > + .quad 0xbfd44a41b463c47c > > + .quad 0xbfd4618bc21c5ec2 > > + .quad 0xbfd478cd5959b3d9 > > + .quad 0xbfd49006804009d1 > > + .quad 0xbfd4a7373cecf997 > > + .quad 0xbfd4be5f957778a1 > > + .quad 0xbfd4d57f8fefe27f > > + .quad 0xbfd4ec973260026a > > + .quad 0xbfd503a682cb1cb3 > > + .quad 0xbfd51aad872df82d > > + .quad 0xbfd531ac457ee77e > > + .quad 0xbfd548a2c3add263 > > + .quad 0xbfd55f9107a43ee2 > > + .quad 0xbfd5767717455a6c > > + .quad 0xbfd58d54f86e02f2 > > + .quad 0xbfd5a42ab0f4cfe2 > > + .quad 0xbfd5baf846aa1b19 > > + .quad 0xbfd5d1bdbf5809ca > > + .quad 0xbfd5e87b20c2954a > > + .quad 0xbfd5ff3070a793d4 > > + .quad 0xbfd615ddb4bec13c > > + .quad 0xbfd62c82f2b9c795 > > + .quad 0x3fd61965cdb02c1f > > + .quad 0x3fd602d08af091ec > > + .quad 0x3fd5ec433d5c35ae > > + .quad 0x3fd5d5bddf595f30 > > + .quad 0x3fd5bf406b543db2 > > + .quad 0x3fd5a8cadbbedfa1 > > + .quad 0x3fd5925d2b112a59 > > + .quad 0x3fd57bf753c8d1fb > > + .quad 0x3fd565995069514c > > + .quad 0x3fd54f431b7be1a9 > > + .quad 0x3fd538f4af8f72fe > > + .quad 0x3fd522ae0738a3d8 > > + .quad 0x3fd50c6f1d11b97c > > + .quad 0x3fd4f637ebba9810 > > + .quad 0x3fd4e0086dd8baca > > + .quad 0x3fd4c9e09e172c3c > > + .quad 0x3fd4b3c077267e9a > > + .quad 0x3fd49da7f3bcc41f > > + .quad 0x3fd487970e958770 > > + .quad 0x3fd4718dc271c41b > > + .quad 0x3fd45b8c0a17df13 > > + .quad 0x3fd44591e0539f49 > > + .quad 0x3fd42f9f3ff62642 > > + .quad 0x3fd419b423d5e8c7 > > + .quad 0x3fd403d086cea79c > > + .quad 0x3fd3edf463c1683e > > + .quad 0x3fd3d81fb5946dba > > + .quad 0x3fd3c25277333184 > > + .quad 0x3fd3ac8ca38e5c5f > > + .quad 0x3fd396ce359bbf54 > > + .quad 0x3fd3811728564cb2 > > + .quad 0x3fd36b6776be1117 > > + .quad 0x3fd355bf1bd82c8b > > + .quad 0x3fd3401e12aecba1 > > + .quad 0x3fd32a84565120a8 > > + .quad 0x3fd314f1e1d35ce4 > > + .quad 0x3fd2ff66b04ea9d4 > > + .quad 0x3fd2e9e2bce12286 > > + .quad 0x3fd2d46602adccee > > + .quad 0x3fd2bef07cdc9354 > > + .quad 0x3fd2a982269a3dbf > > + .quad 0x3fd2941afb186b7c > > + .quad 0x3fd27ebaf58d8c9d > > + .quad 0x3fd269621134db92 > > + .quad 0x3fd25410494e56c7 > > + .quad 0x3fd23ec5991eba49 > > + .quad 0x3fd22981fbef797b > > + .quad 0x3fd214456d0eb8d4 > > + .quad 0x3fd1ff0fe7cf47a7 > > + .quad 0x3fd1e9e1678899f4 > > + .quad 0x3fd1d4b9e796c245 > > + .quad 0x3fd1bf99635a6b95 > > + .quad 0x3fd1aa7fd638d33f > > + .quad 0x3fd1956d3b9bc2fa > > + .quad 0x3fd180618ef18adf > > + .quad 0x3fd16b5ccbacfb73 > > + .quad 0x3fd1565eed455fc3 > > + .quad 0x3fd14167ef367783 > > + .quad 0x3fd12c77cd00713b > > + .quad 0x3fd1178e8227e47c > > + .quad 0x3fd102ac0a35cc1c > > + .quad 0x3fd0edd060b78081 > > + .quad 0x3fd0d8fb813eb1ef > > + .quad 0x3fd0c42d676162e3 > > + .quad 0x3fd0af660eb9e279 > > + .quad 0x3fd09aa572e6c6d4 > > + .quad 0x3fd085eb8f8ae797 > > + .quad 0x3fd07138604d5862 > > + .quad 0x3fd05c8be0d9635a > > + .quad 0x3fd047e60cde83b8 > > + .quad 0x3fd03346e0106062 > > + .quad 0x3fd01eae5626c691 > > + .quad 0x3fd00a1c6adda473 > > + .quad 0x3fcfeb2233ea07cd > > + .quad 0x3fcfc218be620a5e > > + .quad 0x3fcf991c6cb3b379 > > + .quad 0x3fcf702d36777df0 > > + .quad 0x3fcf474b134df229 > > + .quad 0x3fcf1e75fadf9bde > > + .quad 0x3fcef5ade4dcffe6 > > + .quad 0x3fceccf2c8fe920a > > + .quad 0x3fcea4449f04aaf5 > > + .quad 0x3fce7ba35eb77e2a > > + .quad 0x3fce530effe71012 > > + .quad 0x3fce2a877a6b2c12 > > + .quad 0x3fce020cc6235ab5 > > + .quad 0x3fcdd99edaf6d7e9 > > + .quad 0x3fcdb13db0d48940 > > + .quad 0x3fcd88e93fb2f450 > > + .quad 0x3fcd60a17f903515 > > + .quad 0x3fcd38666871f465 > > + .quad 0x3fcd1037f2655e7b > > + .quad 0x3fcce816157f1988 > > + .quad 0x3fccc000c9db3c52 > > + .quad 0x3fcc97f8079d44ec > > + .quad 0x3fcc6ffbc6f00f71 > > + .quad 0x3fcc480c0005ccd1 > > + .quad 0x3fcc2028ab17f9b4 > > + .quad 0x3fcbf851c067555f > > + .quad 0x3fcbd087383bd8ad > > + .quad 0x3fcba8c90ae4ad19 > > + .quad 0x3fcb811730b823d2 > > + .quad 0x3fcb5971a213acdb > > + .quad 0x3fcb31d8575bce3d > > + .quad 0x3fcb0a4b48fc1b46 > > + .quad 0x3fcae2ca6f672bd4 > > + .quad 0x3fcabb55c31693ad > > + .quad 0x3fca93ed3c8ad9e3 > > + .quad 0x3fca6c90d44b704e > > + .quad 0x3fca454082e6ab05 > > + .quad 0x3fca1dfc40f1b7f1 > > + .quad 0x3fc9f6c407089664 > > + .quad 0x3fc9cf97cdce0ec3 > > + .quad 0x3fc9a8778debaa38 > > + .quad 0x3fc981634011aa75 > > + .quad 0x3fc95a5adcf7017f > > + .quad 0x3fc9335e5d594989 > > + .quad 0x3fc90c6db9fcbcd9 > > + .quad 0x3fc8e588ebac2dbf > > + .quad 0x3fc8beafeb38fe8c > > + .quad 0x3fc897e2b17b19a5 > > + .quad 0x3fc871213750e994 > > + .quad 0x3fc84a6b759f512f > > + .quad 0x3fc823c16551a3c2 > > + .quad 0x3fc7fd22ff599d4f > > + .quad 0x3fc7d6903caf5ad0 > > + .quad 0x3fc7b0091651528c > > + .quad 0x3fc7898d85444c73 > > + .quad 0x3fc7631d82935a86 > > + .quad 0x3fc73cb9074fd14d > > + .quad 0x3fc716600c914054 > > + .quad 0x3fc6f0128b756abc > > + .quad 0x3fc6c9d07d203fc7 > > + .quad 0x3fc6a399dabbd383 > > + .quad 0x3fc67d6e9d785771 > > + .quad 0x3fc6574ebe8c133a > > + .quad 0x3fc6313a37335d76 > > + .quad 0x3fc60b3100b09476 > > + .quad 0x3fc5e533144c1719 > > + .quad 0x3fc5bf406b543db2 > > + .quad 0x3fc59958ff1d52f1 > > + .quad 0x3fc5737cc9018cdd > > + .quad 0x3fc54dabc26105d2 > > + .quad 0x3fc527e5e4a1b58d > > + .quad 0x3fc5022b292f6a45 > > + .quad 0x3fc4dc7b897bc1c8 > > + .quad 0x3fc4b6d6fefe22a4 > > + .quad 0x3fc4913d8333b561 > > + .quad 0x3fc46baf0f9f5db7 > > + .quad 0x3fc4462b9dc9b3dc > > + .quad 0x3fc420b32740fdd4 > > + .quad 0x3fc3fb45a59928cc > > + .quad 0x3fc3d5e3126bc27f > > + .quad 0x3fc3b08b6757f2a9 > > + .quad 0x3fc38b3e9e027479 > > + .quad 0x3fc365fcb0159016 > > + .quad 0x3fc340c59741142e > > + .quad 0x3fc31b994d3a4f85 > > + .quad 0x3fc2f677cbbc0a96 > > + .quad 0x3fc2d1610c86813a > > + .quad 0x3fc2ac55095f5c59 > > + .quad 0x3fc28753bc11aba5 > > + .quad 0x3fc2625d1e6ddf57 > > + .quad 0x3fc23d712a49c202 > > + .quad 0x3fc2188fd9807263 > > + .quad 0x3fc1f3b925f25d41 > > + .quad 0x3fc1ceed09853752 > > + .quad 0x3fc1aa2b7e23f72a > > + .quad 0x3fc185747dbecf34 > > + .quad 0x3fc160c8024b27b1 > > + .quad 0x3fc13c2605c398c3 > > + .quad 0x3fc1178e8227e47c > > + .quad 0x3fc0f301717cf0fb > > + .quad 0x3fc0ce7ecdccc28d > > + .quad 0x3fc0aa06912675d5 > > + .quad 0x3fc08598b59e3a07 > > + .quad 0x3fc06135354d4b18 > > + .quad 0x3fc03cdc0a51ec0d > > + .quad 0x3fc0188d2ecf6140 > > + .quad 0x3fbfe89139dbd566 > > + .quad 0x3fbfa01c9db57ce2 > > + .quad 0x3fbf57bc7d9005db > > + .quad 0x3fbf0f70cdd992e3 > > + .quad 0x3fbec739830a1120 > > + .quad 0x3fbe7f1691a32d3e > > + .quad 0x3fbe3707ee30487b > > + .quad 0x3fbdef0d8d466db9 > > + .quad 0x3fbda727638446a2 > > + .quad 0x3fbd5f55659210e2 > > + .quad 0x3fbd179788219364 > > + .quad 0x3fbccfedbfee13a8 > > + .quad 0x3fbc885801bc4b23 > > + .quad 0x3fbc40d6425a5cb1 > > + .quad 0x3fbbf968769fca11 > > + .quad 0x3fbbb20e936d6974 > > + .quad 0x3fbb6ac88dad5b1c > > + .quad 0x3fbb23965a52ff00 > > + .quad 0x3fbadc77ee5aea8c > > + .quad 0x3fba956d3ecade63 > > + .quad 0x3fba4e7640b1bc38 > > + .quad 0x3fba0792e9277cac > > + .quad 0x3fb9c0c32d4d2548 > > + .quad 0x3fb97a07024cbe74 > > + .quad 0x3fb9335e5d594989 > > + .quad 0x3fb8ecc933aeb6e8 > > + .quad 0x3fb8a6477a91dc29 > > + .quad 0x3fb85fd927506a48 > > + .quad 0x3fb8197e2f40e3f0 > > + .quad 0x3fb7d33687c293c9 > > + .quad 0x3fb78d02263d82d3 > > + .quad 0x3fb746e100226ed9 > > + .quad 0x3fb700d30aeac0e1 > > + .quad 0x3fb6bad83c1883b6 > > + .quad 0x3fb674f089365a7a > > + .quad 0x3fb62f1be7d77743 > > + .quad 0x3fb5e95a4d9791cb > > + .quad 0x3fb5a3abb01ade25 > > + .quad 0x3fb55e10050e0384 > > + .quad 0x3fb518874226130a > > + .quad 0x3fb4d3115d207eac > > + .quad 0x3fb48dae4bc31018 > > + .quad 0x3fb4485e03dbdfad > > + .quad 0x3fb403207b414b7f > > + .quad 0x3fb3bdf5a7d1ee64 > > + .quad 0x3fb378dd7f749714 > > + .quad 0x3fb333d7f8183f4b > > + .quad 0x3fb2eee507b40301 > > + .quad 0x3fb2aa04a44717a5 > > + .quad 0x3fb26536c3d8c369 > > + .quad 0x3fb2207b5c78549e > > + .quad 0x3fb1dbd2643d190b > > + .quad 0x3fb1973bd1465567 > > + .quad 0x3fb152b799bb3cc9 > > + .quad 0x3fb10e45b3cae831 > > + .quad 0x3fb0c9e615ac4e17 > > + .quad 0x3fb08598b59e3a07 > > + .quad 0x3fb0415d89e74444 > > + .quad 0x3faffa6911ab9301 > > + .quad 0x3faf723b517fc523 > > + .quad 0x3faeea31c006b87c > > + .quad 0x3fae624c4a0b5e1b > > + .quad 0x3fadda8adc67ee4e > > + .quad 0x3fad52ed6405d86f > > + .quad 0x3faccb73cdddb2cc > > + .quad 0x3fac441e06f72a9e > > + .quad 0x3fabbcebfc68f420 > > + .quad 0x3fab35dd9b58baad > > + .quad 0x3faaaef2d0fb10fc > > + .quad 0x3faa282b8a936171 > > + .quad 0x3fa9a187b573de7c > > + .quad 0x3fa91b073efd7314 > > + .quad 0x3fa894aa149fb343 > > + .quad 0x3fa80e7023d8ccc4 > > + .quad 0x3fa788595a3577ba > > + .quad 0x3fa70265a550e777 > > + .quad 0x3fa67c94f2d4bb58 > > + .quad 0x3fa5f6e73078efb8 > > + .quad 0x3fa5715c4c03ceef > > + .quad 0x3fa4ebf43349e26f > > + .quad 0x3fa466aed42de3ea > > + .quad 0x3fa3e18c1ca0ae92 > > + .quad 0x3fa35c8bfaa1306b > > + .quad 0x3fa2d7ae5c3c5bae > > + .quad 0x3fa252f32f8d183f > > + .quad 0x3fa1ce5a62bc353a > > + .quad 0x3fa149e3e4005a8d > > + .quad 0x3fa0c58fa19dfaaa > > + .quad 0x3fa0415d89e74444 > > + .quad 0x3f9f7a9b16782856 > > + .quad 0x3f9e72bf2813ce51 > > + .quad 0x3f9d6b2725979802 > > + .quad 0x3f9c63d2ec14aaf2 > > + .quad 0x3f9b5cc258b718e6 > > + .quad 0x3f9a55f548c5c43f > > + .quad 0x3f994f6b99a24475 > > + .quad 0x3f98492528c8cabf > > + .quad 0x3f974321d3d006d3 > > + .quad 0x3f963d6178690bd6 > > + .quad 0x3f9537e3f45f3565 > > + .quad 0x3f9432a925980cc1 > > + .quad 0x3f932db0ea132e22 > > + .quad 0x3f9228fb1fea2e28 > > + .quad 0x3f912487a5507f70 > > + .quad 0x3f90205658935847 > > + .quad 0x3f8e38ce3033310c > > + .quad 0x3f8c317384c75f06 > > + .quad 0x3f8a2a9c6c170462 > > + .quad 0x3f882448a388a2aa > > + .quad 0x3f861e77e8b53fc6 > > + .quad 0x3f841929f96832f0 > > + .quad 0x3f82145e939ef1e9 > > + .quad 0x3f8010157588de71 > > + .quad 0x3f7c189cbb0e27fb > > + .quad 0x3f78121214586b54 > > + .quad 0x3f740c8a747878e2 > > + .quad 0x3f70080559588b35 > > + .quad 0x3f680904828985c0 > > + .quad 0x3f60040155d5889e > > + .quad 0x3f50020055655889 > > + .quad 0x0000000000000000 > > + /*== poly_coeff[4] ==*/ > > + .align 32 > > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > > + /*== ExpMask ==*/ > > + .align 32 > > + .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff > > + /*== Two10 ==*/ > > + .align 32 > > + .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000 > > + /*== MinLog1p = -1+2^(-53) ==*/ > > + .align 32 > > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff > > + /*== MaxLog1p ==*/ > > + .align 32 > > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000 > > + /*== One ==*/ > > + .align 32 > > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > > + /*== SgnMask ==*/ > > + .align 32 > > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff > > + /*== XThreshold ==*/ > > + .align 32 > > + .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000 > > + /*== XhMask ==*/ > > + .align 32 > > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00 > > + /*== Threshold ==*/ > > + .align 32 > > + .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000 > > + /*== Bias ==*/ > > + .align 32 > > + .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000 > > + /*== Bias1 ==*/ > > + .align 32 > > + .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000 > > + /*== ExpMask ==*/ > > + .align 32 > > + .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 > > + /*== ExpMask2 ==*/ > > + .align 32 > > + .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000 > > + /*== L2L ==*/ > > + .align 32 > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > + .align 32 > > + .type __svml_dlog1p_data_internal,@object > > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > new file mode 100644 > > index 0000000000..ca174a5f52 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > @@ -0,0 +1,20 @@ > > +/* AVX2 version of vectorized log1p, vector length is 8. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVeN8v_log1p _ZGVeN8v_log1p_avx2_wrapper > > +#include "../svml_d_log1p8_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > new file mode 100644 > > index 0000000000..0aa35ec8c5 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > @@ -0,0 +1,27 @@ > > +/* Multiple versions of vectorized log1p, vector length is 8. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVeN8v_log1p > > +#include "ifunc-mathvec-avx512-skx.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVeN8v_log1p, __GI__ZGVeN8v_log1p, __redirect__ZGVeN8v_log1p) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > new file mode 100644 > > index 0000000000..5e38ff8d39 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > @@ -0,0 +1,317 @@ > > +/* Function log1p vectorized with AVX-512. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_dlog1p_data_internal_avx512 > > + */ > > +#define Log_tbl 0 > > +#define One 128 > > +#define SgnMask 192 > > +#define C075 256 > > +#define poly_coeff9 320 > > +#define poly_coeff8 384 > > +#define poly_coeff7 448 > > +#define poly_coeff6 512 > > +#define poly_coeff5 576 > > +#define poly_coeff4 640 > > +#define poly_coeff3 704 > > +#define poly_coeff2 768 > > +#define L2 832 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.evex512,"ax",@progbits > > +ENTRY(_ZGVeN8v_log1p_skx) > > + pushq %rbp > > + cfi_def_cfa_offset(16) > > + movq %rsp, %rbp > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + andq $-64, %rsp > > + subq $192, %rsp > > + vmovups One+__svml_dlog1p_data_internal_avx512(%rip), %zmm7 > > + vmovups SgnMask+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > > + vmovaps %zmm0, %zmm9 > > + vaddpd {rn-sae}, %zmm9, %zmm7, %zmm11 > > + vandpd %zmm14, %zmm9, %zmm8 > > + > > +/* compute 1+x as high, low parts */ > > + vmaxpd {sae}, %zmm9, %zmm7, %zmm10 > > + vminpd {sae}, %zmm9, %zmm7, %zmm12 > > + > > +/* GetMant(x), normalized to [1,2) for x>=0, NaN for x<0 */ > > + vgetmantpd $8, {sae}, %zmm11, %zmm6 > > + > > +/* GetExp(x) */ > > + vgetexppd {sae}, %zmm11, %zmm5 > > + vsubpd {rn-sae}, %zmm10, %zmm11, %zmm13 > > + > > +/* DblRcp ~ 1/Mantissa */ > > + vrcp14pd %zmm6, %zmm15 > > + > > +/* Start polynomial evaluation */ > > + vmovups poly_coeff9+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > > + vmovups poly_coeff7+__svml_dlog1p_data_internal_avx512(%rip), %zmm11 > > + > > +/* Xl */ > > + vsubpd {rn-sae}, %zmm13, %zmm12, %zmm2 > > + vxorpd %zmm14, %zmm5, %zmm3 > > + > > +/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */ > > + vrndscalepd $88, {sae}, %zmm15, %zmm4 > > + vmovups poly_coeff5+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > > + vmovups poly_coeff6+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > > + vmovups poly_coeff3+__svml_dlog1p_data_internal_avx512(%rip), %zmm13 > > + > > +/* Xl*2^(-Expon) */ > > + vscalefpd {rn-sae}, %zmm3, %zmm2, %zmm1 > > + > > +/* Reduced argument: R = DblRcp*(Mantissa+Xl) - 1 */ > > + vfmsub213pd {rn-sae}, %zmm7, %zmm4, %zmm6 > > + vmovups __svml_dlog1p_data_internal_avx512(%rip), %zmm3 > > + > > +/* > > + * Table lookup > > + * Prepare exponent correction: DblRcp<0.75? > > + */ > > + vmovups C075+__svml_dlog1p_data_internal_avx512(%rip), %zmm2 > > + > > +/* Prepare table index */ > > + vpsrlq $48, %zmm4, %zmm0 > > + vfmadd231pd {rn-sae}, %zmm4, %zmm1, %zmm6 > > + vmovups poly_coeff8+__svml_dlog1p_data_internal_avx512(%rip), %zmm1 > > + vcmppd $17, {sae}, %zmm2, %zmm4, %k1 > > + vcmppd $4, {sae}, %zmm6, %zmm6, %k0 > > + vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm1 > > + vmovups poly_coeff4+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > > + vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14 > > + vmovups L2+__svml_dlog1p_data_internal_avx512(%rip), %zmm4 > > + vpermt2pd Log_tbl+64+__svml_dlog1p_data_internal_avx512(%rip), %zmm0, %zmm3 > > + > > +/* add 1 to Expon if DblRcp<0.75 */ > > + vaddpd {rn-sae}, %zmm7, %zmm5, %zmm5{%k1} > > + > > +/* R^2 */ > > + vmulpd {rn-sae}, %zmm6, %zmm6, %zmm0 > > + vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm10 > > + vmovups poly_coeff2+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > > + vmulpd {rn-sae}, %zmm0, %zmm0, %zmm15 > > + vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12 > > + vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm1 > > + kmovw %k0, %edx > > + vfmadd213pd {rn-sae}, %zmm12, %zmm0, %zmm10 > > + > > +/* polynomial */ > > + vfmadd213pd {rn-sae}, %zmm10, %zmm15, %zmm1 > > + vfmadd213pd {rn-sae}, %zmm6, %zmm0, %zmm1 > > + vaddpd {rn-sae}, %zmm1, %zmm3, %zmm6 > > + vfmadd213pd {rn-sae}, %zmm6, %zmm4, %zmm5 > > + vorpd %zmm8, %zmm5, %zmm0 > > + testl %edx, %edx > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm9 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movq %rbp, %rsp > > + popq %rbp > > + cfi_def_cfa(7, 8) > > + cfi_restore(6) > > + ret > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + vmovups %zmm9, 64(%rsp) > > + vmovups %zmm0, 128(%rsp) > > + # LOE rbx r12 r13 r14 r15 edx zmm0 > > + > > + xorl %eax, %eax > > + # LOE rbx r12 r13 r14 r15 eax edx > > + > > + vzeroupper > > + movq %r12, 16(%rsp) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > + movl %eax, %r12d > > + movq %r13, 8(%rsp) > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > + movl %edx, %r13d > > + movq %r14, (%rsp) > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $8, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + vmovups 128(%rsp), %zmm0 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r12 r13 r14 r15 zmm0 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movsd 64(%rsp,%r14,8), %xmm0 > > + call log1p@PLT > > + # LOE rbx r14 r15 r12d r13d xmm0 > > + > > + movsd %xmm0, 128(%rsp,%r14,8) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx r15 r12d r13d > > +END(_ZGVeN8v_log1p_skx) > > + > > + .section .rodata, "a" > > + .align 64 > > + > > +#ifdef __svml_dlog1p_data_internal_avx512_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(64)) VUINT32 Log_tbl[16][2]; > > + __declspec(align(64)) VUINT32 One[8][2]; > > + __declspec(align(64)) VUINT32 SgnMask[8][2]; > > + __declspec(align(64)) VUINT32 C075[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff9[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff8[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff7[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff6[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff5[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff4[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff3[8][2]; > > + __declspec(align(64)) VUINT32 poly_coeff2[8][2]; > > + __declspec(align(64)) VUINT32 L2[8][2]; > > + } __svml_dlog1p_data_internal_avx512; > > +#endif > > +__svml_dlog1p_data_internal_avx512: > > + /*== Log_tbl ==*/ > > + .quad 0x0000000000000000 > > + .quad 0xbfaf0a30c01162a6 > > + .quad 0xbfbe27076e2af2e6 > > + .quad 0xbfc5ff3070a793d4 > > + .quad 0xbfcc8ff7c79a9a22 > > + .quad 0xbfd1675cababa60e > > + .quad 0xbfd4618bc21c5ec2 > > + .quad 0xbfd739d7f6bbd007 > > + .quad 0x3fd269621134db92 > > + .quad 0x3fcf991c6cb3b379 > > + .quad 0x3fca93ed3c8ad9e3 > > + .quad 0x3fc5bf406b543db2 > > + .quad 0x3fc1178e8227e47c > > + .quad 0x3fb9335e5d594989 > > + .quad 0x3fb08598b59e3a07 > > + .quad 0x3fa0415d89e74444 > > + /*== One ==*/ > > + .align 64 > > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > > + /*== SgnMask ==*/ > > + .align 64 > > + .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 > > + /*== C075 0.75 ==*/ > > + .align 64 > > + .quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000 > > + /*== poly_coeff9 ==*/ > > + .align 64 > > + .quad 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70 > > + /*== poly_coeff8 ==*/ > > + .align 64 > > + .quad 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62 > > + /*== poly_coeff7 ==*/ > > + .align 64 > > + .quad 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF > > + /*== poly_coeff6 ==*/ > > + .align 64 > > + .quad 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06 > > + /*== poly_coeff5 ==*/ > > + .align 64 > > + .quad 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C > > + /*== poly_coeff4 ==*/ > > + .align 64 > > + .quad 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD > > + /*== poly_coeff3 ==*/ > > + .align 64 > > + .quad 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466 > > + /*== poly_coeff2 ==*/ > > + .align 64 > > + .quad 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6 > > + /*== L2 = log(2) ==*/ > > + .align 64 > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > + .align 64 > > + .type __svml_dlog1p_data_internal_avx512,@object > > + .size __svml_dlog1p_data_internal_avx512,.-__svml_dlog1p_data_internal_avx512 > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > new file mode 100644 > > index 0000000000..3c0a0a01a2 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > @@ -0,0 +1,20 @@ > > +/* AVX2 version of vectorized log1pf. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVeN16v_log1pf _ZGVeN16v_log1pf_avx2_wrapper > > +#include "../svml_s_log1pf16_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > new file mode 100644 > > index 0000000000..9af1320547 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > @@ -0,0 +1,28 @@ > > +/* Multiple versions of vectorized log1pf, vector length is 16. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVeN16v_log1pf > > +#include "ifunc-mathvec-avx512-skx.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVeN16v_log1pf, __GI__ZGVeN16v_log1pf, > > + __redirect__ZGVeN16v_log1pf) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > new file mode 100644 > > index 0000000000..78b2fe417f > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > @@ -0,0 +1,271 @@ > > +/* Function log1pf vectorized with AVX-512. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_slog1p_data_internal > > + */ > > +#define SgnMask 0 > > +#define sOne 64 > > +#define sPoly_1 128 > > +#define sPoly_2 192 > > +#define sPoly_3 256 > > +#define sPoly_4 320 > > +#define sPoly_5 384 > > +#define sPoly_6 448 > > +#define sPoly_7 512 > > +#define sPoly_8 576 > > +#define iHiDelta 640 > > +#define iLoRange 704 > > +#define iBrkValue 768 > > +#define iOffExpoMask 832 > > +#define sLn2 896 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.exex512,"ax",@progbits > > +ENTRY(_ZGVeN16v_log1pf_skx) > > + pushq %rbp > > + cfi_def_cfa_offset(16) > > + movq %rsp, %rbp > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + andq $-64, %rsp > > + subq $192, %rsp > > + vmovups sOne+__svml_slog1p_data_internal(%rip), %zmm2 > > + > > +/* reduction: compute r,n */ > > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %zmm12 > > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %zmm4 > > + vmovaps %zmm0, %zmm3 > > + > > +/* compute 1+x as high, low parts */ > > + vmaxps {sae}, %zmm3, %zmm2, %zmm5 > > + vminps {sae}, %zmm3, %zmm2, %zmm7 > > + vandnps %zmm3, %zmm4, %zmm1 > > + vpternlogd $255, %zmm4, %zmm4, %zmm4 > > + vaddps {rn-sae}, %zmm7, %zmm5, %zmm9 > > + vpsubd %zmm12, %zmm9, %zmm10 > > + vsubps {rn-sae}, %zmm9, %zmm5, %zmm6 > > + > > +/* check argument value ranges */ > > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %zmm9, %zmm8 > > + vpsrad $23, %zmm10, %zmm13 > > + vmovups sPoly_5+__svml_slog1p_data_internal(%rip), %zmm9 > > + vpcmpd $5, iLoRange+__svml_slog1p_data_internal(%rip), %zmm8, %k1 > > + vpslld $23, %zmm13, %zmm14 > > + vaddps {rn-sae}, %zmm7, %zmm6, %zmm15 > > + vcvtdq2ps {rn-sae}, %zmm13, %zmm0 > > + vpsubd %zmm14, %zmm2, %zmm13 > > + vmovups sPoly_8+__svml_slog1p_data_internal(%rip), %zmm7 > > + vmovups sPoly_1+__svml_slog1p_data_internal(%rip), %zmm14 > > + vmulps {rn-sae}, %zmm13, %zmm15, %zmm6 > > + vpandd iOffExpoMask+__svml_slog1p_data_internal(%rip), %zmm10, %zmm11 > > + vpaddd %zmm12, %zmm11, %zmm5 > > + vmovups sPoly_4+__svml_slog1p_data_internal(%rip), %zmm10 > > + vmovups sPoly_3+__svml_slog1p_data_internal(%rip), %zmm11 > > + vmovups sPoly_2+__svml_slog1p_data_internal(%rip), %zmm12 > > + > > +/* polynomial evaluation */ > > + vsubps {rn-sae}, %zmm2, %zmm5, %zmm2 > > + vaddps {rn-sae}, %zmm6, %zmm2, %zmm15 > > + vmovups sPoly_7+__svml_slog1p_data_internal(%rip), %zmm2 > > + vfmadd231ps {rn-sae}, %zmm15, %zmm7, %zmm2 > > + vpandnd %zmm8, %zmm8, %zmm4{%k1} > > + vmovups sPoly_6+__svml_slog1p_data_internal(%rip), %zmm8 > > + > > +/* combine and get argument value range mask */ > > + vptestmd %zmm4, %zmm4, %k0 > > + vfmadd213ps {rn-sae}, %zmm8, %zmm15, %zmm2 > > + kmovw %k0, %edx > > + vfmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm2 > > + vfmadd213ps {rn-sae}, %zmm10, %zmm15, %zmm2 > > + vfmadd213ps {rn-sae}, %zmm11, %zmm15, %zmm2 > > + vfmadd213ps {rn-sae}, %zmm12, %zmm15, %zmm2 > > + vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm2 > > + vmulps {rn-sae}, %zmm15, %zmm2, %zmm4 > > + vfmadd213ps {rn-sae}, %zmm15, %zmm15, %zmm4 > > + > > +/* final reconstruction */ > > + vmovups sLn2+__svml_slog1p_data_internal(%rip), %zmm15 > > + vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm0 > > + vorps %zmm1, %zmm0, %zmm0 > > + testl %edx, %edx > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm3 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movq %rbp, %rsp > > + popq %rbp > > + cfi_def_cfa(7, 8) > > + cfi_restore(6) > > + ret > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + vmovups %zmm3, 64(%rsp) > > + vmovups %zmm0, 128(%rsp) > > + # LOE rbx r12 r13 r14 r15 edx zmm0 > > + > > + xorl %eax, %eax > > + # LOE rbx r12 r13 r14 r15 eax edx > > + > > + vzeroupper > > + movq %r12, 16(%rsp) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > + movl %eax, %r12d > > + movq %r13, 8(%rsp) > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > + movl %edx, %r13d > > + movq %r14, (%rsp) > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $16, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + vmovups 128(%rsp), %zmm0 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r12 r13 r14 r15 zmm0 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movss 64(%rsp,%r14,4), %xmm0 > > + call log1pf@PLT > > + # LOE rbx r14 r15 r12d r13d xmm0 > > + > > + movss %xmm0, 128(%rsp,%r14,4) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx r15 r12d r13d > > +END(_ZGVeN16v_log1pf_skx) > > + > > + .section .rodata, "a" > > + .align 64 > > + > > +#ifdef __svml_slog1p_data_internal_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(64)) VUINT32 SgnMask[16][1]; > > + __declspec(align(64)) VUINT32 sOne[16][1]; > > + __declspec(align(64)) VUINT32 sPoly[8][16][1]; > > + __declspec(align(64)) VUINT32 iHiDelta[16][1]; > > + __declspec(align(64)) VUINT32 iLoRange[16][1]; > > + __declspec(align(64)) VUINT32 iBrkValue[16][1]; > > + __declspec(align(64)) VUINT32 iOffExpoMask[16][1]; > > + __declspec(align(64)) VUINT32 sLn2[16][1]; > > +} __svml_slog1p_data_internal; > > +#endif > > +__svml_slog1p_data_internal: > > + /*== SgnMask ==*/ > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > + /*== sOne = SP 1.0 ==*/ > > + .align 64 > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > + /*== sPoly[] = SP polynomial ==*/ > > + .align 64 > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > + .align 64 > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > + .align 64 > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > + /*== iBrkValue = SP 2/3 ==*/ > > + .align 64 > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > + /*== iOffExpoMask = SP significand mask ==*/ > > + .align 64 > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > + /*== sLn2 = SP ln(2) ==*/ > > + .align 64 > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > + .align 64 > > + .type __svml_slog1p_data_internal,@object > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > new file mode 100644 > > index 0000000000..913c8290c8 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > @@ -0,0 +1,20 @@ > > +/* SSE2 version of vectorized log1pf, vector length is 4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVbN4v_log1pf _ZGVbN4v_log1pf_sse2 > > +#include "../svml_s_log1pf4_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > new file mode 100644 > > index 0000000000..b6aff48023 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > @@ -0,0 +1,28 @@ > > +/* Multiple versions of vectorized log1pf, vector length is 4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVbN4v_log1pf > > +#include "ifunc-mathvec-sse4_1.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVbN4v_log1pf, __GI__ZGVbN4v_log1pf, > > + __redirect__ZGVbN4v_log1pf) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > new file mode 100644 > > index 0000000000..ef1bae58c0 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > @@ -0,0 +1,252 @@ > > +/* Function log1pf vectorized with SSE4. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_slog1p_data_internal > > + */ > > +#define SgnMask 0 > > +#define sOne 16 > > +#define sPoly 32 > > +#define iHiDelta 160 > > +#define iLoRange 176 > > +#define iBrkValue 192 > > +#define iOffExpoMask 208 > > +#define sLn2 224 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.sse4,"ax",@progbits > > +ENTRY(_ZGVbN4v_log1pf_sse4) > > + subq $72, %rsp > > + cfi_def_cfa_offset(80) > > + movups sOne+__svml_slog1p_data_internal(%rip), %xmm7 > > + > > +/* compute 1+x as high, low parts */ > > + movaps %xmm7, %xmm1 > > + movaps %xmm7, %xmm5 > > + maxps %xmm0, %xmm1 > > + minps %xmm0, %xmm5 > > + movaps %xmm1, %xmm4 > > + > > +/* check argument value ranges */ > > + movdqu iHiDelta+__svml_slog1p_data_internal(%rip), %xmm2 > > + addps %xmm5, %xmm4 > > + > > +/* reduction: compute r,n */ > > + movdqu iBrkValue+__svml_slog1p_data_internal(%rip), %xmm3 > > + paddd %xmm4, %xmm2 > > + movdqu iOffExpoMask+__svml_slog1p_data_internal(%rip), %xmm8 > > + subps %xmm4, %xmm1 > > + psubd %xmm3, %xmm4 > > + addps %xmm1, %xmm5 > > + pand %xmm4, %xmm8 > > + psrad $23, %xmm4 > > + cvtdq2ps %xmm4, %xmm10 > > + pslld $23, %xmm4 > > + movaps %xmm7, %xmm1 > > + paddd %xmm3, %xmm8 > > + psubd %xmm4, %xmm1 > > + mulps %xmm5, %xmm1 > > + > > +/* polynomial evaluation */ > > + subps %xmm7, %xmm8 > > + > > +/* final reconstruction */ > > + mulps sLn2+__svml_slog1p_data_internal(%rip), %xmm10 > > + addps %xmm8, %xmm1 > > + movups sPoly+112+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + movdqu iLoRange+__svml_slog1p_data_internal(%rip), %xmm6 > > + pcmpgtd %xmm2, %xmm6 > > + addps sPoly+96+__svml_slog1p_data_internal(%rip), %xmm9 > > + > > +/* combine and get argument value range mask */ > > + movmskps %xmm6, %edx > > + movups SgnMask+__svml_slog1p_data_internal(%rip), %xmm11 > > + mulps %xmm1, %xmm9 > > + andnps %xmm0, %xmm11 > > + addps sPoly+80+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + addps sPoly+64+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + addps sPoly+48+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + addps sPoly+32+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + addps sPoly+16+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + addps sPoly+__svml_slog1p_data_internal(%rip), %xmm9 > > + mulps %xmm1, %xmm9 > > + mulps %xmm1, %xmm9 > > + addps %xmm9, %xmm1 > > + addps %xmm10, %xmm1 > > + orps %xmm11, %xmm1 > > + testl %edx, %edx > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movaps %xmm1, %xmm0 > > + addq $72, %rsp > > + cfi_def_cfa_offset(8) > > + ret > > + cfi_def_cfa_offset(80) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + movups %xmm0, 32(%rsp) > > + movups %xmm1, 48(%rsp) > > + # LOE rbx rbp r12 r13 r14 r15 edx > > + > > + xorl %eax, %eax > > + movq %r12, 16(%rsp) > > + cfi_offset(12, -64) > > + movl %eax, %r12d > > + movq %r13, 8(%rsp) > > + cfi_offset(13, -72) > > + movl %edx, %r13d > > + movq %r14, (%rsp) > > + cfi_offset(14, -80) > > + # LOE rbx rbp r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx rbp r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $4, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx rbp r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + movups 48(%rsp), %xmm1 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + cfi_offset(12, -64) > > + cfi_offset(13, -72) > > + cfi_offset(14, -80) > > + # LOE rbx rbp r12 r13 r14 r15 xmm1 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movss 32(%rsp,%r14,4), %xmm0 > > + call log1pf@PLT > > + # LOE rbx rbp r14 r15 r12d r13d xmm0 > > + > > + movss %xmm0, 48(%rsp,%r14,4) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx rbp r15 r12d r13d > > +END(_ZGVbN4v_log1pf_sse4) > > + > > + .section .rodata, "a" > > + .align 16 > > + > > +#ifdef __svml_slog1p_data_internal_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(16)) VUINT32 SgnMask[4][1]; > > + __declspec(align(16)) VUINT32 sOne[4][1]; > > + __declspec(align(16)) VUINT32 sPoly[8][4][1]; > > + __declspec(align(16)) VUINT32 iHiDelta[4][1]; > > + __declspec(align(16)) VUINT32 iLoRange[4][1]; > > + __declspec(align(16)) VUINT32 iBrkValue[4][1]; > > + __declspec(align(16)) VUINT32 iOffExpoMask[4][1]; > > + __declspec(align(16)) VUINT32 sLn2[4][1]; > > +} __svml_slog1p_data_internal; > > +#endif > > +__svml_slog1p_data_internal: > > + /*== SgnMask ==*/ > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > + /*== sOne = SP 1.0 ==*/ > > + .align 16 > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > + /*== sPoly[] = SP polynomial ==*/ > > + .align 16 > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > + .align 16 > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > + .align 16 > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > + /*== iBrkValue = SP 2/3 ==*/ > > + .align 16 > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > + /*== iOffExpoMask = SP significand mask ==*/ > > + .align 16 > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > + /*== sLn2 = SP ln(2) ==*/ > > + .align 16 > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > + .align 16 > > + .type __svml_slog1p_data_internal,@object > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > new file mode 100644 > > index 0000000000..c0b97d89e6 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > @@ -0,0 +1,20 @@ > > +/* SSE version of vectorized log1pf, vector length is 8. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define _ZGVdN8v_log1pf _ZGVdN8v_log1pf_sse_wrapper > > +#include "../svml_s_log1pf8_core.S" > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > new file mode 100644 > > index 0000000000..a2bbe37129 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > @@ -0,0 +1,28 @@ > > +/* Multiple versions of vectorized log1pf, vector length is 8. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define SYMBOL_NAME _ZGVdN8v_log1pf > > +#include "ifunc-mathvec-avx2.h" > > + > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > + > > +#ifdef SHARED > > +__hidden_ver1 (_ZGVdN8v_log1pf, __GI__ZGVdN8v_log1pf, > > + __redirect__ZGVdN8v_log1pf) > > + __attribute__ ((visibility ("hidden"))); > > +#endif > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > new file mode 100644 > > index 0000000000..957dc23e3f > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > @@ -0,0 +1,254 @@ > > +/* Function log1pf vectorized with AVX2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + https://www.gnu.org/licenses/. */ > > + > > +/* > > + * ALGORITHM DESCRIPTION: > > + * > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > + * Get short reciprocal approximation Rcp ~ 1/xh > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > + * log(Rcp) is tabulated > > + * > > + * > > + */ > > + > > +/* Offsets for data table __svml_slog1p_data_internal > > + */ > > +#define SgnMask 0 > > +#define sOne 32 > > +#define sPoly 64 > > +#define iHiDelta 320 > > +#define iLoRange 352 > > +#define iBrkValue 384 > > +#define iOffExpoMask 416 > > +#define sLn2 448 > > + > > +#include <sysdep.h> > > + > > + .text > > + .section .text.avx2,"ax",@progbits > > +ENTRY(_ZGVdN8v_log1pf_avx2) > > + pushq %rbp > > + cfi_def_cfa_offset(16) > > + movq %rsp, %rbp > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + andq $-32, %rsp > > + subq $96, %rsp > > + vmovups sOne+__svml_slog1p_data_internal(%rip), %ymm2 > > + > > +/* reduction: compute r,n */ > > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %ymm13 > > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %ymm4 > > + vmovups iLoRange+__svml_slog1p_data_internal(%rip), %ymm8 > > + vmovaps %ymm0, %ymm3 > > + > > +/* compute 1+x as high, low parts */ > > + vmaxps %ymm3, %ymm2, %ymm5 > > + vminps %ymm3, %ymm2, %ymm6 > > + vaddps %ymm6, %ymm5, %ymm10 > > + vpsubd %ymm13, %ymm10, %ymm11 > > + > > +/* check argument value ranges */ > > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %ymm10, %ymm9 > > + vsubps %ymm10, %ymm5, %ymm7 > > + vpsrad $23, %ymm11, %ymm14 > > + vpand iOffExpoMask+__svml_slog1p_data_internal(%rip), %ymm11, %ymm12 > > + vpslld $23, %ymm14, %ymm15 > > + vcvtdq2ps %ymm14, %ymm0 > > + vpsubd %ymm15, %ymm2, %ymm14 > > + vandnps %ymm3, %ymm4, %ymm1 > > + vaddps %ymm7, %ymm6, %ymm4 > > + vpaddd %ymm13, %ymm12, %ymm6 > > + vmulps %ymm4, %ymm14, %ymm7 > > + > > +/* polynomial evaluation */ > > + vsubps %ymm2, %ymm6, %ymm2 > > + vpcmpgtd %ymm9, %ymm8, %ymm5 > > + vmovups sPoly+224+__svml_slog1p_data_internal(%rip), %ymm8 > > + vaddps %ymm2, %ymm7, %ymm9 > > + vfmadd213ps sPoly+192+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+160+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+128+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+96+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+64+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+32+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vfmadd213ps sPoly+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > + vmulps %ymm8, %ymm9, %ymm10 > > + vfmadd213ps %ymm9, %ymm9, %ymm10 > > + > > +/* final reconstruction */ > > + vfmadd132ps sLn2+__svml_slog1p_data_internal(%rip), %ymm10, %ymm0 > > + > > +/* combine and get argument value range mask */ > > + vmovmskps %ymm5, %edx > > + vorps %ymm1, %ymm0, %ymm0 > > + testl %edx, %edx > > + > > +/* Go to special inputs processing branch */ > > + jne L(SPECIAL_VALUES_BRANCH) > > + # LOE rbx r12 r13 r14 r15 edx ymm0 ymm3 > > + > > +/* Restore registers > > + * and exit the function > > + */ > > + > > +L(EXIT): > > + movq %rbp, %rsp > > + popq %rbp > > + cfi_def_cfa(7, 8) > > + cfi_restore(6) > > + ret > > + cfi_def_cfa(6, 16) > > + cfi_offset(6, -16) > > + > > +/* Branch to process > > + * special inputs > > + */ > > + > > +L(SPECIAL_VALUES_BRANCH): > > + vmovups %ymm3, 32(%rsp) > > + vmovups %ymm0, 64(%rsp) > > + # LOE rbx r12 r13 r14 r15 edx ymm0 > > + > > + xorl %eax, %eax > > + # LOE rbx r12 r13 r14 r15 eax edx > > + > > + vzeroupper > > + movq %r12, 16(%rsp) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > + movl %eax, %r12d > > + movq %r13, 8(%rsp) > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > + movl %edx, %r13d > > + movq %r14, (%rsp) > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r15 r12d r13d > > + > > +/* Range mask > > + * bits check > > + */ > > + > > +L(RANGEMASK_CHECK): > > + btl %r12d, %r13d > > + > > +/* Call scalar math function */ > > + jc L(SCALAR_MATH_CALL) > > + # LOE rbx r15 r12d r13d > > + > > +/* Special inputs > > + * processing loop > > + */ > > + > > +L(SPECIAL_VALUES_LOOP): > > + incl %r12d > > + cmpl $8, %r12d > > + > > +/* Check bits in range mask */ > > + jl L(RANGEMASK_CHECK) > > + # LOE rbx r15 r12d r13d > > + > > + movq 16(%rsp), %r12 > > + cfi_restore(12) > > + movq 8(%rsp), %r13 > > + cfi_restore(13) > > + movq (%rsp), %r14 > > + cfi_restore(14) > > + vmovups 64(%rsp), %ymm0 > > + > > +/* Go to exit */ > > + jmp L(EXIT) > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > + # LOE rbx r12 r13 r14 r15 ymm0 > > + > > +/* Scalar math fucntion call > > + * to process special input > > + */ > > + > > +L(SCALAR_MATH_CALL): > > + movl %r12d, %r14d > > + movss 32(%rsp,%r14,4), %xmm0 > > + call log1pf@PLT > > + # LOE rbx r14 r15 r12d r13d xmm0 > > + > > + movss %xmm0, 64(%rsp,%r14,4) > > + > > +/* Process special inputs in loop */ > > + jmp L(SPECIAL_VALUES_LOOP) > > + # LOE rbx r15 r12d r13d > > +END(_ZGVdN8v_log1pf_avx2) > > + > > + .section .rodata, "a" > > + .align 32 > > + > > +#ifdef __svml_slog1p_data_internal_typedef > > +typedef unsigned int VUINT32; > > +typedef struct { > > + __declspec(align(32)) VUINT32 SgnMask[8][1]; > > + __declspec(align(32)) VUINT32 sOne[8][1]; > > + __declspec(align(32)) VUINT32 sPoly[8][8][1]; > > + __declspec(align(32)) VUINT32 iHiDelta[8][1]; > > + __declspec(align(32)) VUINT32 iLoRange[8][1]; > > + __declspec(align(32)) VUINT32 iBrkValue[8][1]; > > + __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; > > + __declspec(align(32)) VUINT32 sLn2[8][1]; > > +} __svml_slog1p_data_internal; > > +#endif > > +__svml_slog1p_data_internal: > > + /*== SgnMask ==*/ > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > + /*== sOne = SP 1.0 ==*/ > > + .align 32 > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > + /*== sPoly[] = SP polynomial ==*/ > > + .align 32 > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > + .align 32 > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > + .align 32 > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > + /*== iBrkValue = SP 2/3 ==*/ > > + .align 32 > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > + /*== iOffExpoMask = SP significand mask ==*/ > > + .align 32 > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > + /*== sLn2 = SP ln(2) ==*/ > > + .align 32 > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > + .align 32 > > + .type __svml_slog1p_data_internal,@object > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p2_core.S b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > new file mode 100644 > > index 0000000000..e3f01717d9 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > @@ -0,0 +1,29 @@ > > +/* Function log1p vectorized with SSE2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_d_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVbN2v_log1p) > > +WRAPPER_IMPL_SSE2 log1p > > +END (_ZGVbN2v_log1p) > > + > > +#ifndef USE_MULTIARCH > > + libmvec_hidden_def (_ZGVbN2v_log1p) > > +#endif > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > new file mode 100644 > > index 0000000000..49beb96183 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > @@ -0,0 +1,29 @@ > > +/* Function log1p vectorized with AVX2, wrapper version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_d_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVdN4v_log1p) > > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > > +END (_ZGVdN4v_log1p) > > + > > +#ifndef USE_MULTIARCH > > + libmvec_hidden_def (_ZGVdN4v_log1p) > > +#endif > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > new file mode 100644 > > index 0000000000..8b89768b7c > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > @@ -0,0 +1,25 @@ > > +/* Function log1p vectorized in AVX ISA as wrapper to SSE4 ISA version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_d_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVcN4v_log1p) > > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > > +END (_ZGVcN4v_log1p) > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p8_core.S b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > new file mode 100644 > > index 0000000000..54b4d4ede8 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > @@ -0,0 +1,25 @@ > > +/* Function log1p vectorized with AVX-512, wrapper to AVX2. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_d_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVeN8v_log1p) > > +WRAPPER_IMPL_AVX512 _ZGVdN4v_log1p > > +END (_ZGVeN8v_log1p) > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > new file mode 100644 > > index 0000000000..2c953d00fb > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > @@ -0,0 +1,25 @@ > > +/* Function log1pf vectorized with AVX-512. Wrapper to AVX2 version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_s_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVeN16v_log1pf) > > +WRAPPER_IMPL_AVX512 _ZGVdN8v_log1pf > > +END (_ZGVeN16v_log1pf) > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > new file mode 100644 > > index 0000000000..6f68762eaa > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > @@ -0,0 +1,29 @@ > > +/* Function log1pf vectorized with SSE2, wrapper version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_s_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVbN4v_log1pf) > > +WRAPPER_IMPL_SSE2 log1pf > > +END (_ZGVbN4v_log1pf) > > + > > +#ifndef USE_MULTIARCH > > + libmvec_hidden_def (_ZGVbN4v_log1pf) > > +#endif > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > new file mode 100644 > > index 0000000000..74f81283b1 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > @@ -0,0 +1,29 @@ > > +/* Function log1pf vectorized with AVX2, wrapper version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_s_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVdN8v_log1pf) > > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > > +END (_ZGVdN8v_log1pf) > > + > > +#ifndef USE_MULTIARCH > > + libmvec_hidden_def (_ZGVdN8v_log1pf) > > +#endif > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > new file mode 100644 > > index 0000000000..f33be0e904 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > @@ -0,0 +1,25 @@ > > +/* Function log1pf vectorized in AVX ISA as wrapper to SSE4 ISA version. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > +#include "svml_s_wrapper_impl.h" > > + > > + .text > > +ENTRY (_ZGVcN8v_log1pf) > > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > > +END (_ZGVcN8v_log1pf) > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > new file mode 100644 > > index 0000000000..18aa6aaeaa > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > @@ -0,0 +1 @@ > > +#include "test-double-libmvec-log1p.c" > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > new file mode 100644 > > index 0000000000..18aa6aaeaa > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > @@ -0,0 +1 @@ > > +#include "test-double-libmvec-log1p.c" > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > new file mode 100644 > > index 0000000000..18aa6aaeaa > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > @@ -0,0 +1 @@ > > +#include "test-double-libmvec-log1p.c" > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > new file mode 100644 > > index 0000000000..40937f987a > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > @@ -0,0 +1,3 @@ > > +#define LIBMVEC_TYPE double > > +#define LIBMVEC_FUNC log1p > > +#include "test-vector-abi-arg1.h" > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > index 08c91ff634..38359b05e3 100644 > > --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2) > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10) > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p) > > > > #define VEC_INT_TYPE __m128i > > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > index a2fb0de309..17701e7731 100644 > > --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2) > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10) > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p) > > > > #ifndef __ILP32__ > > # define VEC_INT_TYPE __m256i > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > index dc65a4ee25..bba62b2446 100644 > > --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2) > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10) > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p) > > > > #define VEC_INT_TYPE __m128i > > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > index 253ee8c906..8a04e13a07 100644 > > --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2) > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10) > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p) > > > > #ifndef __ILP32__ > > # define VEC_INT_TYPE __m512i > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > new file mode 100644 > > index 0000000000..3395decaf4 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > @@ -0,0 +1 @@ > > +#include "test-float-libmvec-log1pf.c" > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > new file mode 100644 > > index 0000000000..3395decaf4 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > @@ -0,0 +1 @@ > > +#include "test-float-libmvec-log1pf.c" > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > new file mode 100644 > > index 0000000000..3395decaf4 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > @@ -0,0 +1 @@ > > +#include "test-float-libmvec-log1pf.c" > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > new file mode 100644 > > index 0000000000..1b36069ded > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > @@ -0,0 +1,3 @@ > > +#define LIBMVEC_TYPE float > > +#define LIBMVEC_FUNC log1pf > > +#include "test-vector-abi-arg1.h" > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > index 1c7db5146c..706f52c618 100644 > > --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f) > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f) > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf) > > > > #define VEC_INT_TYPE __m512i > > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > index 8ec51603b3..ceace4c53a 100644 > > --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f) > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f) > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf) > > > > #define VEC_INT_TYPE __m128i > > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > index 1cb4553c7a..06a4753409 100644 > > --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f) > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f) > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf) > > > > /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ > > #undef VECTOR_WRAPPER_fFF > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > index 6ecc1792bb..a87e5298e0 100644 > > --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf) > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f) > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f) > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f) > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf) > > > > #define VEC_INT_TYPE __m128i > > > > -- > > 2.31.1 > > > > LGTM. > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > > Thanks. > > > H.J.
On Wed, Dec 29, 2021 at 3:28 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Dec 29, 2021 at 3:43 PM H.J. Lu via Libc-alpha > <libc-alpha@sourceware.org> wrote: > > > > On Tue, Dec 28, 2021 at 10:39:55PM -0800, Sunil K Pandey wrote: > > > Implement vectorized log1p/log1pf containing SSE, AVX, AVX2 and > > > AVX512 versions for libmvec as per vector ABI. It also contains > > > accuracy and ABI tests for vector log1p/log1pf with regenerated ulps. > > > --- > > > bits/libm-simd-decl-stubs.h | 11 + > > > math/bits/mathcalls.h | 2 +- > > > .../unix/sysv/linux/x86_64/libmvec.abilist | 8 + > > > sysdeps/x86/fpu/bits/math-vector.h | 4 + > > > .../x86/fpu/finclude/math-vector-fortran.h | 4 + > > > sysdeps/x86_64/fpu/Makeconfig | 1 + > > > sysdeps/x86_64/fpu/Versions | 2 + > > > sysdeps/x86_64/fpu/libm-test-ulps | 20 + > > > .../fpu/multiarch/svml_d_log1p2_core-sse2.S | 20 + > > > .../x86_64/fpu/multiarch/svml_d_log1p2_core.c | 27 + > > > .../fpu/multiarch/svml_d_log1p2_core_sse4.S | 1398 +++++++++++++++++ > > > .../fpu/multiarch/svml_d_log1p4_core-sse.S | 20 + > > > .../x86_64/fpu/multiarch/svml_d_log1p4_core.c | 27 + > > > .../fpu/multiarch/svml_d_log1p4_core_avx2.S | 1383 ++++++++++++++++ > > > .../fpu/multiarch/svml_d_log1p8_core-avx2.S | 20 + > > > .../x86_64/fpu/multiarch/svml_d_log1p8_core.c | 27 + > > > .../fpu/multiarch/svml_d_log1p8_core_avx512.S | 317 ++++ > > > .../fpu/multiarch/svml_s_log1pf16_core-avx2.S | 20 + > > > .../fpu/multiarch/svml_s_log1pf16_core.c | 28 + > > > .../multiarch/svml_s_log1pf16_core_avx512.S | 271 ++++ > > > .../fpu/multiarch/svml_s_log1pf4_core-sse2.S | 20 + > > > .../fpu/multiarch/svml_s_log1pf4_core.c | 28 + > > > .../fpu/multiarch/svml_s_log1pf4_core_sse4.S | 252 +++ > > > .../fpu/multiarch/svml_s_log1pf8_core-sse.S | 20 + > > > .../fpu/multiarch/svml_s_log1pf8_core.c | 28 + > > > .../fpu/multiarch/svml_s_log1pf8_core_avx2.S | 254 +++ > > > sysdeps/x86_64/fpu/svml_d_log1p2_core.S | 29 + > > > sysdeps/x86_64/fpu/svml_d_log1p4_core.S | 29 + > > > sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S | 25 + > > > sysdeps/x86_64/fpu/svml_d_log1p8_core.S | 25 + > > > sysdeps/x86_64/fpu/svml_s_log1pf16_core.S | 25 + > > > sysdeps/x86_64/fpu/svml_s_log1pf4_core.S | 29 + > > > sysdeps/x86_64/fpu/svml_s_log1pf8_core.S | 29 + > > > sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S | 25 + > > > .../fpu/test-double-libmvec-log1p-avx.c | 1 + > > > .../fpu/test-double-libmvec-log1p-avx2.c | 1 + > > > .../fpu/test-double-libmvec-log1p-avx512f.c | 1 + > > > .../x86_64/fpu/test-double-libmvec-log1p.c | 3 + > > > .../x86_64/fpu/test-double-vlen2-wrappers.c | 1 + > > > .../fpu/test-double-vlen4-avx2-wrappers.c | 1 + > > > .../x86_64/fpu/test-double-vlen4-wrappers.c | 1 + > > > .../x86_64/fpu/test-double-vlen8-wrappers.c | 1 + > > > .../fpu/test-float-libmvec-log1pf-avx.c | 1 + > > > .../fpu/test-float-libmvec-log1pf-avx2.c | 1 + > > > .../fpu/test-float-libmvec-log1pf-avx512f.c | 1 + > > > .../x86_64/fpu/test-float-libmvec-log1pf.c | 3 + > > > .../x86_64/fpu/test-float-vlen16-wrappers.c | 1 + > > > .../x86_64/fpu/test-float-vlen4-wrappers.c | 1 + > > > .../fpu/test-float-vlen8-avx2-wrappers.c | 1 + > > > .../x86_64/fpu/test-float-vlen8-wrappers.c | 1 + > > > 50 files changed, 4447 insertions(+), 1 deletion(-) > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > > create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > > create mode 100644 sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > > create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > > create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > > > > > diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h > > > index 73252615ca..845246fab9 100644 > > > --- a/bits/libm-simd-decl-stubs.h > > > +++ b/bits/libm-simd-decl-stubs.h > > > @@ -241,4 +241,15 @@ > > > #define __DECL_SIMD_log2f32x > > > #define __DECL_SIMD_log2f64x > > > #define __DECL_SIMD_log2f128x > > > + > > > +#define __DECL_SIMD_log1p > > > +#define __DECL_SIMD_log1pf > > > +#define __DECL_SIMD_log1pl > > > +#define __DECL_SIMD_log1pf16 > > > +#define __DECL_SIMD_log1pf32 > > > +#define __DECL_SIMD_log1pf64 > > > +#define __DECL_SIMD_log1pf128 > > > +#define __DECL_SIMD_log1pf32x > > > +#define __DECL_SIMD_log1pf64x > > > +#define __DECL_SIMD_log1pf128x > > > #endif > > > diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h > > > index bfe52a4666..aa4bc61aa4 100644 > > > --- a/math/bits/mathcalls.h > > > +++ b/math/bits/mathcalls.h > > > @@ -119,7 +119,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x)); > > > __MATHCALL_VEC (expm1,, (_Mdouble_ __x)); > > > > > > /* Return log(1 + X). */ > > > -__MATHCALL (log1p,, (_Mdouble_ __x)); > > > +__MATHCALL_VEC (log1p,, (_Mdouble_ __x)); > > > > > > /* Return the base 2 signed integral exponent of X. */ > > > __MATHCALL (logb,, (_Mdouble_ __x)); > > > diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > > index fa8b016c5d..68b940606a 100644 > > > --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > > +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist > > > @@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_exp10 F > > > GLIBC_2.35 _ZGVbN2v_exp2 F > > > GLIBC_2.35 _ZGVbN2v_expm1 F > > > GLIBC_2.35 _ZGVbN2v_log10 F > > > +GLIBC_2.35 _ZGVbN2v_log1p F > > > GLIBC_2.35 _ZGVbN2v_log2 F > > > GLIBC_2.35 _ZGVbN2v_sinh F > > > GLIBC_2.35 _ZGVbN2vv_atan2 F > > > @@ -68,6 +69,7 @@ GLIBC_2.35 _ZGVbN4v_exp10f F > > > GLIBC_2.35 _ZGVbN4v_exp2f F > > > GLIBC_2.35 _ZGVbN4v_expm1f F > > > GLIBC_2.35 _ZGVbN4v_log10f F > > > +GLIBC_2.35 _ZGVbN4v_log1pf F > > > GLIBC_2.35 _ZGVbN4v_log2f F > > > GLIBC_2.35 _ZGVbN4v_sinhf F > > > GLIBC_2.35 _ZGVbN4vv_atan2f F > > > @@ -81,6 +83,7 @@ GLIBC_2.35 _ZGVcN4v_exp10 F > > > GLIBC_2.35 _ZGVcN4v_exp2 F > > > GLIBC_2.35 _ZGVcN4v_expm1 F > > > GLIBC_2.35 _ZGVcN4v_log10 F > > > +GLIBC_2.35 _ZGVcN4v_log1p F > > > GLIBC_2.35 _ZGVcN4v_log2 F > > > GLIBC_2.35 _ZGVcN4v_sinh F > > > GLIBC_2.35 _ZGVcN4vv_atan2 F > > > @@ -94,6 +97,7 @@ GLIBC_2.35 _ZGVcN8v_exp10f F > > > GLIBC_2.35 _ZGVcN8v_exp2f F > > > GLIBC_2.35 _ZGVcN8v_expm1f F > > > GLIBC_2.35 _ZGVcN8v_log10f F > > > +GLIBC_2.35 _ZGVcN8v_log1pf F > > > GLIBC_2.35 _ZGVcN8v_log2f F > > > GLIBC_2.35 _ZGVcN8v_sinhf F > > > GLIBC_2.35 _ZGVcN8vv_atan2f F > > > @@ -107,6 +111,7 @@ GLIBC_2.35 _ZGVdN4v_exp10 F > > > GLIBC_2.35 _ZGVdN4v_exp2 F > > > GLIBC_2.35 _ZGVdN4v_expm1 F > > > GLIBC_2.35 _ZGVdN4v_log10 F > > > +GLIBC_2.35 _ZGVdN4v_log1p F > > > GLIBC_2.35 _ZGVdN4v_log2 F > > > GLIBC_2.35 _ZGVdN4v_sinh F > > > GLIBC_2.35 _ZGVdN4vv_atan2 F > > > @@ -120,6 +125,7 @@ GLIBC_2.35 _ZGVdN8v_exp10f F > > > GLIBC_2.35 _ZGVdN8v_exp2f F > > > GLIBC_2.35 _ZGVdN8v_expm1f F > > > GLIBC_2.35 _ZGVdN8v_log10f F > > > +GLIBC_2.35 _ZGVdN8v_log1pf F > > > GLIBC_2.35 _ZGVdN8v_log2f F > > > GLIBC_2.35 _ZGVdN8v_sinhf F > > > GLIBC_2.35 _ZGVdN8vv_atan2f F > > > @@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVeN16v_exp10f F > > > GLIBC_2.35 _ZGVeN16v_exp2f F > > > GLIBC_2.35 _ZGVeN16v_expm1f F > > > GLIBC_2.35 _ZGVeN16v_log10f F > > > +GLIBC_2.35 _ZGVeN16v_log1pf F > > > GLIBC_2.35 _ZGVeN16v_log2f F > > > GLIBC_2.35 _ZGVeN16v_sinhf F > > > GLIBC_2.35 _ZGVeN16vv_atan2f F > > > @@ -146,6 +153,7 @@ GLIBC_2.35 _ZGVeN8v_exp10 F > > > GLIBC_2.35 _ZGVeN8v_exp2 F > > > GLIBC_2.35 _ZGVeN8v_expm1 F > > > GLIBC_2.35 _ZGVeN8v_log10 F > > > +GLIBC_2.35 _ZGVeN8v_log1p F > > > GLIBC_2.35 _ZGVeN8v_log2 F > > > GLIBC_2.35 _ZGVeN8v_sinh F > > > GLIBC_2.35 _ZGVeN8vv_atan2 F > > > diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h > > > index 59d284a10a..14c9db3bb3 100644 > > > --- a/sysdeps/x86/fpu/bits/math-vector.h > > > +++ b/sysdeps/x86/fpu/bits/math-vector.h > > > @@ -110,6 +110,10 @@ > > > # define __DECL_SIMD_log2 __DECL_SIMD_x86_64 > > > # undef __DECL_SIMD_log2f > > > # define __DECL_SIMD_log2f __DECL_SIMD_x86_64 > > > +# undef __DECL_SIMD_log1p > > > +# define __DECL_SIMD_log1p __DECL_SIMD_x86_64 > > > +# undef __DECL_SIMD_log1pf > > > +# define __DECL_SIMD_log1pf __DECL_SIMD_x86_64 > > > > > > # endif > > > #endif > > > diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > > index a2ca9a203f..3dca196432 100644 > > > --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > > +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h > > > @@ -54,6 +54,8 @@ > > > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64') > > > !GCC$ builtin (log2) attributes simd (notinbranch) if('x86_64') > > > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64') > > > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64') > > > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64') > > > > > > !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') > > > !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') > > > @@ -93,3 +95,5 @@ > > > !GCC$ builtin (log10f) attributes simd (notinbranch) if('x32') > > > !GCC$ builtin (log2) attributes simd (notinbranch) if('x32') > > > !GCC$ builtin (log2f) attributes simd (notinbranch) if('x32') > > > +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x32') > > > +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32') > > > diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig > > > index 8d6d0915af..378cb06d37 100644 > > > --- a/sysdeps/x86_64/fpu/Makeconfig > > > +++ b/sysdeps/x86_64/fpu/Makeconfig > > > @@ -36,6 +36,7 @@ libmvec-funcs = \ > > > hypot \ > > > log \ > > > log10 \ > > > + log1p \ > > > log2 \ > > > pow \ > > > sin \ > > > diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions > > > index 1b48c2d642..155fb115f3 100644 > > > --- a/sysdeps/x86_64/fpu/Versions > > > +++ b/sysdeps/x86_64/fpu/Versions > > > @@ -23,6 +23,7 @@ libmvec { > > > _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; > > > _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; > > > _ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10; > > > + _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p; > > > _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2; > > > _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh; > > > _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2; > > > @@ -36,6 +37,7 @@ libmvec { > > > _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; > > > _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f; > > > _ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f; > > > + _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf; > > > _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f; > > > _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf; > > > _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f; > > > diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps > > > index 3b7f3cee6f..a2b15a795b 100644 > > > --- a/sysdeps/x86_64/fpu/libm-test-ulps > > > +++ b/sysdeps/x86_64/fpu/libm-test-ulps > > > @@ -1685,6 +1685,26 @@ float: 2 > > > float128: 2 > > > ldouble: 3 > > > > > > +Function: "log1p_vlen16": > > > +float: 2 > > > + > > > +Function: "log1p_vlen2": > > > +double: 1 > > > + > > > +Function: "log1p_vlen4": > > > +double: 1 > > > +float: 2 > > > + > > > +Function: "log1p_vlen4_avx2": > > > +double: 1 > > > + > > > +Function: "log1p_vlen8": > > > +double: 1 > > > +float: 2 > > > + > > > +Function: "log1p_vlen8_avx2": > > > +float: 2 > > > + > > > Function: "log2": > > > double: 2 > > > float: 1 > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > > new file mode 100644 > > > index 0000000000..8004088346 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S > > > @@ -0,0 +1,20 @@ > > > +/* SSE2 version of vectorized log1p, vector length is 2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVbN2v_log1p _ZGVbN2v_log1p_sse2 > > > +#include "../svml_d_log1p2_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > > new file mode 100644 > > > index 0000000000..35ca620aba > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c > > > @@ -0,0 +1,27 @@ > > > +/* Multiple versions of vectorized log1p, vector length is 2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVbN2v_log1p > > > +#include "ifunc-mathvec-sse4_1.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVbN2v_log1p, __GI__ZGVbN2v_log1p, __redirect__ZGVbN2v_log1p) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > > new file mode 100644 > > > index 0000000000..9d3f0647b4 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S > > > @@ -0,0 +1,1398 @@ > > > +/* Function log1p vectorized with SSE4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_dlog1p_data_internal > > > + */ > > > +#define Log_HA_table 0 > > Where is this used? This field isn't used directly, but accessed via table lookup code. A macro is defined for each field used, directly and indirectly. > > > > +#define Log_LA_table 8208 > > > +#define poly_coeff 12320 > > > +#define ExpMask 12384 > > > +#define Two10 12400 > > > +#define MinLog1p 12416 > > > +#define MaxLog1p 12432 > > > +#define One 12448 > > > +#define SgnMask 12464 > > > +#define XThreshold 12480 > > > +#define XhMask 12496 > > > +#define Threshold 12512 > > > +#define Bias 12528 > > > +#define Bias1 12544 > > > +#define ExpMask0 12560 > > > +#define ExpMask2 12576 > > > +#define L2 12592 > > > + > > > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > > > +#define Table_Lookup_Bias -0x405ff0 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.sse4,"ax",@progbits > > > +ENTRY(_ZGVbN2v_log1p_sse4) > > > + pushq %rbp > > > + cfi_def_cfa_offset(16) > > > + movq %rsp, %rbp > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + andq $-32, %rsp > > > + subq $64, %rsp > > > + movaps %xmm0, %xmm7 > > > + > > > +/* SgnMask used by all accuracies */ > > > + movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 > > > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi > > > + movaps %xmm6, %xmm8 > > > + movaps %xmm7, %xmm15 > > > + movups One+__svml_dlog1p_data_internal(%rip), %xmm0 > > > + andps %xmm7, %xmm8 > > > + cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 > > > + cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 > > > + movaps %xmm0, %xmm4 > > > + > > > +/* compute 1+x as high, low parts */ > > > + movaps %xmm0, %xmm9 > > > + addpd %xmm7, %xmm4 > > > + maxpd %xmm7, %xmm9 > > > + orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 > > > + movaps %xmm0, %xmm5 > > > + > > > +/* preserve mantissa, set input exponent to 2^(-10) */ > > > + movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 > > > + andps %xmm8, %xmm4 > > > + andps %xmm4, %xmm3 > > > + > > > +/* check range */ > > > + movaps %xmm7, %xmm8 > > > + orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 > > > + > > > +/* Compute SignMask for all accuracies, including EP */ > > > + andnps %xmm7, %xmm6 > > > + > > > +/* reciprocal approximation good to at least 11 bits */ > > > + cvtpd2ps %xmm3, %xmm10 > > > + minpd %xmm7, %xmm5 > > > + subpd %xmm4, %xmm9 > > > + cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 > > > + addpd %xmm9, %xmm5 > > > + movlhps %xmm10, %xmm10 > > > + orps %xmm15, %xmm8 > > > + rcpps %xmm10, %xmm11 > > > + > > > +/* combine and get argument value range mask */ > > > + movmskpd %xmm8, %edx > > > + > > > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > > > + movups .FLT_16(%rip), %xmm13 > > > + > > > +/* exponent of X needed to scale Xl */ > > > + movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 > > > + cvtps2pd %xmm11, %xmm1 > > > + addpd %xmm13, %xmm1 > > > + subpd %xmm13, %xmm1 > > > + > > > +/* 2^ (-10-exp(X) ) */ > > > + movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 > > > + pand %xmm4, %xmm12 > > > + psubq %xmm12, %xmm2 > > > + mulpd %xmm1, %xmm3 > > > + > > > +/* scale DblRcp */ > > > + mulpd %xmm1, %xmm2 > > > + subpd %xmm0, %xmm3 > > > + > > > +/* > > > + * argument reduction > > > + * VQFMS( D, R, X, DblRcp1, One ); > > > + */ > > > + mulpd %xmm2, %xmm5 > > > + addpd %xmm5, %xmm3 > > > + > > > +/* exponent*log(2.0) */ > > > + movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 > > > + > > > +/* exponent bits */ > > > + psrlq $20, %xmm4 > > > + pshufd $221, %xmm4, %xmm14 > > > + > > > +/* > > > + * prepare table index > > > + * table lookup > > > + */ > > > + movaps %xmm1, %xmm4 > > > + cmpltpd %xmm1, %xmm10 > > > + > > > +/* biased exponent in DP format */ > > > + cvtdq2pd %xmm14, %xmm0 > > > + > > > +/* polynomial */ > > > + movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 > > > + movaps %xmm3, %xmm5 > > > + mulpd %xmm3, %xmm1 > > > + mulpd %xmm3, %xmm5 > > > + addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 > > > + movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 > > > + psrlq $40, %xmm4 > > > + mulpd %xmm3, %xmm2 > > > + mulpd %xmm5, %xmm1 > > > + addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 > > > + movd %xmm4, %eax > > > + andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 > > > + addpd %xmm1, %xmm2 > > > + > > > +/* reconstruction */ > > > + mulpd %xmm2, %xmm5 > > > + orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 > > > + pshufd $2, %xmm4, %xmm9 > > > + subpd %xmm10, %xmm0 > > > + addpd %xmm5, %xmm3 > > > + movd %xmm9, %ecx > > > + mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 > > > + movslq %eax, %rax > > > + movslq %ecx, %rcx > > > + movsd (%rsi,%rax), %xmm11 > > > + movhpd (%rsi,%rcx), %xmm11 > > > + addpd %xmm3, %xmm11 > > > + addpd %xmm11, %xmm0 > > > + > > > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > > > + orps %xmm6, %xmm0 > > > + testl %edx, %edx > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movq %rbp, %rsp > > > + popq %rbp > > > + cfi_def_cfa(7, 8) > > > + cfi_restore(6) > > > + ret > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + movups %xmm7, 32(%rsp) > > > + movups %xmm0, 48(%rsp) > > > + # LOE rbx r12 r13 r14 r15 edx > > > + > > > + xorl %eax, %eax > > > + movq %r12, 16(%rsp) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > > > + movl %eax, %r12d > > > + movq %r13, 8(%rsp) > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > > > + movl %edx, %r13d > > > + movq %r14, (%rsp) > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $2, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + movups 48(%rsp), %xmm0 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r12 r13 r14 r15 xmm0 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movsd 32(%rsp,%r14,8), %xmm0 > > > + call log1p@PLT > > > + # LOE rbx r14 r15 r12d r13d xmm0 > > > + > > > + movsd %xmm0, 48(%rsp,%r14,8) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx r15 r12d r13d > > > +END(_ZGVbN2v_log1p_sse4) > > > + > > > + .section .rodata, "a" > > > + .align 16 > > > + > > > +#ifdef __svml_dlog1p_data_internal_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; > > > + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; > > > + __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; > > > + __declspec(align(16)) VUINT32 ExpMask[2][2]; > > > + __declspec(align(16)) VUINT32 Two10[2][2]; > > > + __declspec(align(16)) VUINT32 MinLog1p[2][2]; > > > + __declspec(align(16)) VUINT32 MaxLog1p[2][2]; > > > + __declspec(align(16)) VUINT32 One[2][2]; > > > + __declspec(align(16)) VUINT32 SgnMask[2][2]; > > > + __declspec(align(16)) VUINT32 XThreshold[2][2]; > > > + __declspec(align(16)) VUINT32 XhMask[2][2]; > > > + __declspec(align(16)) VUINT32 Threshold[2][2]; > > > + __declspec(align(16)) VUINT32 Bias[2][2]; > > > + __declspec(align(16)) VUINT32 Bias1[2][2]; > > > + __declspec(align(16)) VUINT32 ExpMask0[2][2]; > > > + __declspec(align(16)) VUINT32 ExpMask2[2][2]; > > > + __declspec(align(16)) VUINT32 L2[2][2]; > > > +} __svml_dlog1p_data_internal; > > > +#endif > > > +__svml_dlog1p_data_internal: > > > + /* Log_HA_table */ > > > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > > > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > > > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > > > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > > > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > > > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > > > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > > > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > > > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > > > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > > > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > > > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > > > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > > > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > > > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > > > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > > > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > > > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > > > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > > > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > > > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > > > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > > > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > > > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > > > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > > > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > > > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > > > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > > > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > > > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > > > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > > > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > > > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > > > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > > > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > > > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > > > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > > > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > > > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > > > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > > > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > > > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > > > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > > > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > > > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > > > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > > > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > > > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > > > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > > > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > > > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > > > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > > > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > > > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > > > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > > > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > > > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > > > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > > > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > > > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > > > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > > > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > > > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > > > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > > > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > > > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > > > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > > > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > > > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > > > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > > > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > > > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > > > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > > > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > > > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > > > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > > > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > > > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > > > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > > > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > > > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > > > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > > > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > > > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > > > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > > > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > > > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > > > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > > > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > > > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > > > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > > > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > > > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > > > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > > > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > > > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > > > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > > > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > > > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > > > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > > > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > > > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > > > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > > > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > > > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > > > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > > > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > > > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > > > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > > > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > > > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > > > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > > > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > > > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > > > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > > > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > > > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > > > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > > > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > > > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > > > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > > > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > > > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > > > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > > > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > > > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > > > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > > > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > > > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > > > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > > > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > > > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > > > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > > > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > > > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > > > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > > > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > > > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > > > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > > > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > > > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > > > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > > > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > > > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > > > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > > > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > > > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > > > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > > > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > > > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > > > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > > > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > > > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > > > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > > > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > > > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > > > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > > > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > > > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > > > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > > > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > > > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > > > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > > > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > > > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > > > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > > > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > > > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > > > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > > > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > > > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > > > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > > > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > > > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > > > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > > > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > > > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > > > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > > > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > > > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > > > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > > > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > > > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > > > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > > > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > > > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > > > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > > > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > > > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > > > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > > > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > > > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > > > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > > > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > > > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > > > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > > > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > > > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > > > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > > > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > > > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > > > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > > > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > > > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > > > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > > > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > > > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > > > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > > > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > > > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > > > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > > > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > > > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > > > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > > > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > > > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > > > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > > > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > > > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > > > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > > > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > > > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > > > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > > > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > > > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > > > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > > > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > > > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > > > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > > > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > > > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > > > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > > > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > > > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > > > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > > > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > > > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > > > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > > > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > > > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > > > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > > > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > > > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > > > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > > > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > > > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > > > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > > > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > > > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > > > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > > > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > > > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > > > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > > > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > > > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > > > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > > > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > > > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > > > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > > > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > > > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > > > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > > > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > > > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > > > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > > > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > > > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > > > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > > > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > > > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > > > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > > > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > > > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > > > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > > > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > > > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > > > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > > > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > > > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > > > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > > > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > > > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > > > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > > > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > > > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > > > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > > > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > > > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > > > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > > > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > > > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > > > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > > > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > > > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > > > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > > > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > > > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > > > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > > > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > > > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > > > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > > > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > > > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > > > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > > > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > > > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > > > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > > > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > > > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > > > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > > > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > > > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > > > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > > > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > > > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > > > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > > > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > > > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > > > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > > > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > > > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > > > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > > > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > > > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > > > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > > > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > > > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > > > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > > > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > > > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > > > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > > > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > > > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > > > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > > > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > > > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > > > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > > > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > > > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > > > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > > > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > > > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > > > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > > > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > > > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > > > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > > > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > > > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > > > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > > > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > > > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > > > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > > > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > > > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > > > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > > > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > > > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > > > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > > > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > > > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > > > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > > > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > > > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > > > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > > > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > > > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > > > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > > > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > > > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > > > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > > > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > > > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > > > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > > > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > > > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > > > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > > > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > > > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > > > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > > > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > > > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > > > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > > > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > > > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > > > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > > > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > > > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > > > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > > > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > > > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > > > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > > > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > > > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > > > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > > > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > > > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > > > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > > > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > > > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > > > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > > > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > > > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > > > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > > > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > > > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > > > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > > > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > > > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > > > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > > > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > > > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > > > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > > > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > > > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > > > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > > > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > > > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > > > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > > > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > > > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > > > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > > > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > > > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > > > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > > > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > > > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > > > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > > > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > > > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > > > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > > > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > > > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > > > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > > > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > > > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > > > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > > > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > > > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > > > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > > > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > > > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > > > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > > > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > > > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > > > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > > > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > > > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > > > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > > > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > > > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > > > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > > > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > > > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > > > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > > > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > > > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > > > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > > > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > > > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > > > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > > > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > > > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > > > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > > > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > > > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > > > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > > > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > > > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > > > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > > > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > > > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > > > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > > > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > > > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > > > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > > > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > > > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > > > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > > > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > > > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > > > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > > > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > > > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > > > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > > > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > > > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > > > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > > > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > > > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > > > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > > > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > > > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > > > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > > > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > > > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > > > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > > > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > > > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > > > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > > > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > > > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > > > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > > > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > > > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > > > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > > > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > > > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > > > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > > > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > > > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > > > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > > > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > > > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > > > + /*== Log_LA_table ==*/ > > > + .align 16 > > > + .quad 0x8000000000000000 > > > + .quad 0xbf5ff802a9ab10e6 > > > + .quad 0xbf6ff00aa2b10bc0 > > > + .quad 0xbf77ee11ebd82e94 > > > + .quad 0xbf7fe02a6b106789 > > > + .quad 0xbf83e7295d25a7d9 > > > + .quad 0xbf87dc475f810a77 > > > + .quad 0xbf8bcf712c74384c > > > + .quad 0xbf8fc0a8b0fc03e4 > > > + .quad 0xbf91d7f7eb9eebe7 > > > + .quad 0xbf93cea44346a575 > > > + .quad 0xbf95c45a51b8d389 > > > + .quad 0xbf97b91b07d5b11b > > > + .quad 0xbf99ace7551cc514 > > > + .quad 0xbf9b9fc027af9198 > > > + .quad 0xbf9d91a66c543cc4 > > > + .quad 0xbf9f829b0e783300 > > > + .quad 0xbfa0b94f7c196176 > > > + .quad 0xbfa1b0d98923d980 > > > + .quad 0xbfa2a7ec2214e873 > > > + .quad 0xbfa39e87b9febd60 > > > + .quad 0xbfa494acc34d911c > > > + .quad 0xbfa58a5bafc8e4d5 > > > + .quad 0xbfa67f94f094bd98 > > > + .quad 0xbfa77458f632dcfc > > > + .quad 0xbfa868a83083f6cf > > > + .quad 0xbfa95c830ec8e3eb > > > + .quad 0xbfaa4fe9ffa3d235 > > > + .quad 0xbfab42dd711971bf > > > + .quad 0xbfac355dd0921f2d > > > + .quad 0xbfad276b8adb0b52 > > > + .quad 0xbfae19070c276016 > > > + .quad 0xbfaf0a30c01162a6 > > > + .quad 0xbfaffae9119b9303 > > > + .quad 0xbfb075983598e471 > > > + .quad 0xbfb0ed839b5526fe > > > + .quad 0xbfb16536eea37ae1 > > > + .quad 0xbfb1dcb263db1944 > > > + .quad 0xbfb253f62f0a1417 > > > + .quad 0xbfb2cb0283f5de1f > > > + .quad 0xbfb341d7961bd1d1 > > > + .quad 0xbfb3b87598b1b6ee > > > + .quad 0xbfb42edcbea646f0 > > > + .quad 0xbfb4a50d3aa1b040 > > > + .quad 0xbfb51b073f06183f > > > + .quad 0xbfb590cafdf01c28 > > > + .quad 0xbfb60658a93750c4 > > > + .quad 0xbfb67bb0726ec0fc > > > + .quad 0xbfb6f0d28ae56b4c > > > + .quad 0xbfb765bf23a6be13 > > > + .quad 0xbfb7da766d7b12cd > > > + .quad 0xbfb84ef898e8282a > > > + .quad 0xbfb8c345d6319b21 > > > + .quad 0xbfb9375e55595ede > > > + .quad 0xbfb9ab42462033ad > > > + .quad 0xbfba1ef1d8061cd4 > > > + .quad 0xbfba926d3a4ad563 > > > + .quad 0xbfbb05b49bee43fe > > > + .quad 0xbfbb78c82bb0eda1 > > > + .quad 0xbfbbeba818146765 > > > + .quad 0xbfbc5e548f5bc743 > > > + .quad 0xbfbcd0cdbf8c13e1 > > > + .quad 0xbfbd4313d66cb35d > > > + .quad 0xbfbdb5270187d927 > > > + .quad 0xbfbe27076e2af2e6 > > > + .quad 0xbfbe98b549671467 > > > + .quad 0xbfbf0a30c01162a6 > > > + .quad 0xbfbf7b79fec37ddf > > > + .quad 0xbfbfec9131dbeabb > > > + .quad 0xbfc02ebb42bf3d4b > > > + .quad 0xbfc0671512ca596e > > > + .quad 0xbfc09f561ee719c3 > > > + .quad 0xbfc0d77e7cd08e59 > > > + .quad 0xbfc10f8e422539b1 > > > + .quad 0xbfc14785846742ac > > > + .quad 0xbfc17f6458fca611 > > > + .quad 0xbfc1b72ad52f67a0 > > > + .quad 0xbfc1eed90e2dc2c3 > > > + .quad 0xbfc2266f190a5acb > > > + .quad 0xbfc25ded0abc6ad2 > > > + .quad 0xbfc29552f81ff523 > > > + .quad 0xbfc2cca0f5f5f251 > > > + .quad 0xbfc303d718e47fd3 > > > + .quad 0xbfc33af575770e4f > > > + .quad 0xbfc371fc201e8f74 > > > + .quad 0xbfc3a8eb2d31a376 > > > + .quad 0xbfc3dfc2b0ecc62a > > > + .quad 0xbfc41682bf727bc0 > > > + .quad 0xbfc44d2b6ccb7d1e > > > + .quad 0xbfc483bccce6e3dd > > > + .quad 0xbfc4ba36f39a55e5 > > > + .quad 0xbfc4f099f4a230b2 > > > + .quad 0xbfc526e5e3a1b438 > > > + .quad 0xbfc55d1ad4232d6f > > > + .quad 0xbfc59338d9982086 > > > + .quad 0xbfc5c940075972b9 > > > + .quad 0xbfc5ff3070a793d4 > > > + .quad 0xbfc6350a28aaa758 > > > + .quad 0xbfc66acd4272ad51 > > > + .quad 0xbfc6a079d0f7aad2 > > > + .quad 0xbfc6d60fe719d21d > > > + .quad 0xbfc70b8f97a1aa75 > > > + .quad 0xbfc740f8f54037a5 > > > + .quad 0xbfc7764c128f2127 > > > + .quad 0xbfc7ab890210d909 > > > + .quad 0xbfc7e0afd630c274 > > > + .quad 0xbfc815c0a14357eb > > > + .quad 0xbfc84abb75865139 > > > + .quad 0xbfc87fa06520c911 > > > + .quad 0xbfc8b46f8223625b > > > + .quad 0xbfc8e928de886d41 > > > + .quad 0xbfc91dcc8c340bde > > > + .quad 0xbfc9525a9cf456b4 > > > + .quad 0xbfc986d3228180ca > > > + .quad 0xbfc9bb362e7dfb83 > > > + .quad 0xbfc9ef83d2769a34 > > > + .quad 0xbfca23bc1fe2b563 > > > + .quad 0xbfca57df28244dcd > > > + .quad 0xbfca8becfc882f19 > > > + .quad 0xbfcabfe5ae46124c > > > + .quad 0xbfcaf3c94e80bff3 > > > + .quad 0xbfcb2797ee46320c > > > + .quad 0xbfcb5b519e8fb5a4 > > > + .quad 0xbfcb8ef670420c3b > > > + .quad 0xbfcbc286742d8cd6 > > > + .quad 0xbfcbf601bb0e44e2 > > > + .quad 0xbfcc2968558c18c1 > > > + .quad 0xbfcc5cba543ae425 > > > + .quad 0xbfcc8ff7c79a9a22 > > > + .quad 0xbfccc320c0176502 > > > + .quad 0xbfccf6354e09c5dc > > > + .quad 0xbfcd293581b6b3e7 > > > + .quad 0xbfcd5c216b4fbb91 > > > + .quad 0xbfcd8ef91af31d5e > > > + .quad 0xbfcdc1bca0abec7d > > > + .quad 0xbfcdf46c0c722d2f > > > + .quad 0xbfce27076e2af2e6 > > > + .quad 0xbfce598ed5a87e2f > > > + .quad 0xbfce8c0252aa5a60 > > > + .quad 0xbfcebe61f4dd7b0b > > > + .quad 0xbfcef0adcbdc5936 > > > + .quad 0xbfcf22e5e72f105d > > > + .quad 0xbfcf550a564b7b37 > > > + .quad 0xbfcf871b28955045 > > > + .quad 0xbfcfb9186d5e3e2b > > > + .quad 0xbfcfeb0233e607cc > > > + .quad 0xbfd00e6c45ad501d > > > + .quad 0xbfd0274dc16c232f > > > + .quad 0xbfd0402594b4d041 > > > + .quad 0xbfd058f3c703ebc6 > > > + .quad 0xbfd071b85fcd590d > > > + .quad 0xbfd08a73667c57af > > > + .quad 0xbfd0a324e27390e3 > > > + .quad 0xbfd0bbccdb0d24bd > > > + .quad 0xbfd0d46b579ab74b > > > + .quad 0xbfd0ed005f657da4 > > > + .quad 0xbfd1058bf9ae4ad5 > > > + .quad 0xbfd11e0e2dad9cb7 > > > + .quad 0xbfd136870293a8b0 > > > + .quad 0xbfd14ef67f88685a > > > + .quad 0xbfd1675cababa60e > > > + .quad 0xbfd17fb98e15095d > > > + .quad 0xbfd1980d2dd4236f > > > + .quad 0xbfd1b05791f07b49 > > > + .quad 0xbfd1c898c16999fb > > > + .quad 0xbfd1e0d0c33716be > > > + .quad 0xbfd1f8ff9e48a2f3 > > > + .quad 0xbfd211255986160c > > > + .quad 0xbfd22941fbcf7966 > > > + .quad 0xbfd241558bfd1404 > > > + .quad 0xbfd2596010df763a > > > + .quad 0xbfd27161913f853d > > > + .quad 0xbfd2895a13de86a3 > > > + .quad 0xbfd2a1499f762bc9 > > > + .quad 0xbfd2b9303ab89d25 > > > + .quad 0xbfd2d10dec508583 > > > + .quad 0xbfd2e8e2bae11d31 > > > + .quad 0xbfd300aead06350c > > > + .quad 0xbfd31871c9544185 > > > + .quad 0xbfd3302c16586588 > > > + .quad 0xbfd347dd9a987d55 > > > + .quad 0xbfd35f865c93293e > > > + .quad 0xbfd3772662bfd85b > > > + .quad 0xbfd38ebdb38ed321 > > > + .quad 0xbfd3a64c556945ea > > > + .quad 0xbfd3bdd24eb14b6a > > > + .quad 0xbfd3d54fa5c1f710 > > > + .quad 0xbfd3ecc460ef5f50 > > > + .quad 0xbfd404308686a7e4 > > > + .quad 0xbfd41b941cce0bee > > > + .quad 0xbfd432ef2a04e814 > > > + .quad 0xbfd44a41b463c47c > > > + .quad 0xbfd4618bc21c5ec2 > > > + .quad 0xbfd478cd5959b3d9 > > > + .quad 0xbfd49006804009d1 > > > + .quad 0xbfd4a7373cecf997 > > > + .quad 0xbfd4be5f957778a1 > > > + .quad 0xbfd4d57f8fefe27f > > > + .quad 0xbfd4ec973260026a > > > + .quad 0xbfd503a682cb1cb3 > > > + .quad 0xbfd51aad872df82d > > > + .quad 0xbfd531ac457ee77e > > > + .quad 0xbfd548a2c3add263 > > > + .quad 0xbfd55f9107a43ee2 > > > + .quad 0xbfd5767717455a6c > > > + .quad 0xbfd58d54f86e02f2 > > > + .quad 0xbfd5a42ab0f4cfe2 > > > + .quad 0xbfd5baf846aa1b19 > > > + .quad 0xbfd5d1bdbf5809ca > > > + .quad 0xbfd5e87b20c2954a > > > + .quad 0xbfd5ff3070a793d4 > > > + .quad 0xbfd615ddb4bec13c > > > + .quad 0xbfd62c82f2b9c795 > > > + .quad 0x3fd61965cdb02c1f > > > + .quad 0x3fd602d08af091ec > > > + .quad 0x3fd5ec433d5c35ae > > > + .quad 0x3fd5d5bddf595f30 > > > + .quad 0x3fd5bf406b543db2 > > > + .quad 0x3fd5a8cadbbedfa1 > > > + .quad 0x3fd5925d2b112a59 > > > + .quad 0x3fd57bf753c8d1fb > > > + .quad 0x3fd565995069514c > > > + .quad 0x3fd54f431b7be1a9 > > > + .quad 0x3fd538f4af8f72fe > > > + .quad 0x3fd522ae0738a3d8 > > > + .quad 0x3fd50c6f1d11b97c > > > + .quad 0x3fd4f637ebba9810 > > > + .quad 0x3fd4e0086dd8baca > > > + .quad 0x3fd4c9e09e172c3c > > > + .quad 0x3fd4b3c077267e9a > > > + .quad 0x3fd49da7f3bcc41f > > > + .quad 0x3fd487970e958770 > > > + .quad 0x3fd4718dc271c41b > > > + .quad 0x3fd45b8c0a17df13 > > > + .quad 0x3fd44591e0539f49 > > > + .quad 0x3fd42f9f3ff62642 > > > + .quad 0x3fd419b423d5e8c7 > > > + .quad 0x3fd403d086cea79c > > > + .quad 0x3fd3edf463c1683e > > > + .quad 0x3fd3d81fb5946dba > > > + .quad 0x3fd3c25277333184 > > > + .quad 0x3fd3ac8ca38e5c5f > > > + .quad 0x3fd396ce359bbf54 > > > + .quad 0x3fd3811728564cb2 > > > + .quad 0x3fd36b6776be1117 > > > + .quad 0x3fd355bf1bd82c8b > > > + .quad 0x3fd3401e12aecba1 > > > + .quad 0x3fd32a84565120a8 > > > + .quad 0x3fd314f1e1d35ce4 > > > + .quad 0x3fd2ff66b04ea9d4 > > > + .quad 0x3fd2e9e2bce12286 > > > + .quad 0x3fd2d46602adccee > > > + .quad 0x3fd2bef07cdc9354 > > > + .quad 0x3fd2a982269a3dbf > > > + .quad 0x3fd2941afb186b7c > > > + .quad 0x3fd27ebaf58d8c9d > > > + .quad 0x3fd269621134db92 > > > + .quad 0x3fd25410494e56c7 > > > + .quad 0x3fd23ec5991eba49 > > > + .quad 0x3fd22981fbef797b > > > + .quad 0x3fd214456d0eb8d4 > > > + .quad 0x3fd1ff0fe7cf47a7 > > > + .quad 0x3fd1e9e1678899f4 > > > + .quad 0x3fd1d4b9e796c245 > > > + .quad 0x3fd1bf99635a6b95 > > > + .quad 0x3fd1aa7fd638d33f > > > + .quad 0x3fd1956d3b9bc2fa > > > + .quad 0x3fd180618ef18adf > > > + .quad 0x3fd16b5ccbacfb73 > > > + .quad 0x3fd1565eed455fc3 > > > + .quad 0x3fd14167ef367783 > > > + .quad 0x3fd12c77cd00713b > > > + .quad 0x3fd1178e8227e47c > > > + .quad 0x3fd102ac0a35cc1c > > > + .quad 0x3fd0edd060b78081 > > > + .quad 0x3fd0d8fb813eb1ef > > > + .quad 0x3fd0c42d676162e3 > > > + .quad 0x3fd0af660eb9e279 > > > + .quad 0x3fd09aa572e6c6d4 > > > + .quad 0x3fd085eb8f8ae797 > > > + .quad 0x3fd07138604d5862 > > > + .quad 0x3fd05c8be0d9635a > > > + .quad 0x3fd047e60cde83b8 > > > + .quad 0x3fd03346e0106062 > > > + .quad 0x3fd01eae5626c691 > > > + .quad 0x3fd00a1c6adda473 > > > + .quad 0x3fcfeb2233ea07cd > > > + .quad 0x3fcfc218be620a5e > > > + .quad 0x3fcf991c6cb3b379 > > > + .quad 0x3fcf702d36777df0 > > > + .quad 0x3fcf474b134df229 > > > + .quad 0x3fcf1e75fadf9bde > > > + .quad 0x3fcef5ade4dcffe6 > > > + .quad 0x3fceccf2c8fe920a > > > + .quad 0x3fcea4449f04aaf5 > > > + .quad 0x3fce7ba35eb77e2a > > > + .quad 0x3fce530effe71012 > > > + .quad 0x3fce2a877a6b2c12 > > > + .quad 0x3fce020cc6235ab5 > > > + .quad 0x3fcdd99edaf6d7e9 > > > + .quad 0x3fcdb13db0d48940 > > > + .quad 0x3fcd88e93fb2f450 > > > + .quad 0x3fcd60a17f903515 > > > + .quad 0x3fcd38666871f465 > > > + .quad 0x3fcd1037f2655e7b > > > + .quad 0x3fcce816157f1988 > > > + .quad 0x3fccc000c9db3c52 > > > + .quad 0x3fcc97f8079d44ec > > > + .quad 0x3fcc6ffbc6f00f71 > > > + .quad 0x3fcc480c0005ccd1 > > > + .quad 0x3fcc2028ab17f9b4 > > > + .quad 0x3fcbf851c067555f > > > + .quad 0x3fcbd087383bd8ad > > > + .quad 0x3fcba8c90ae4ad19 > > > + .quad 0x3fcb811730b823d2 > > > + .quad 0x3fcb5971a213acdb > > > + .quad 0x3fcb31d8575bce3d > > > + .quad 0x3fcb0a4b48fc1b46 > > > + .quad 0x3fcae2ca6f672bd4 > > > + .quad 0x3fcabb55c31693ad > > > + .quad 0x3fca93ed3c8ad9e3 > > > + .quad 0x3fca6c90d44b704e > > > + .quad 0x3fca454082e6ab05 > > > + .quad 0x3fca1dfc40f1b7f1 > > > + .quad 0x3fc9f6c407089664 > > > + .quad 0x3fc9cf97cdce0ec3 > > > + .quad 0x3fc9a8778debaa38 > > > + .quad 0x3fc981634011aa75 > > > + .quad 0x3fc95a5adcf7017f > > > + .quad 0x3fc9335e5d594989 > > > + .quad 0x3fc90c6db9fcbcd9 > > > + .quad 0x3fc8e588ebac2dbf > > > + .quad 0x3fc8beafeb38fe8c > > > + .quad 0x3fc897e2b17b19a5 > > > + .quad 0x3fc871213750e994 > > > + .quad 0x3fc84a6b759f512f > > > + .quad 0x3fc823c16551a3c2 > > > + .quad 0x3fc7fd22ff599d4f > > > + .quad 0x3fc7d6903caf5ad0 > > > + .quad 0x3fc7b0091651528c > > > + .quad 0x3fc7898d85444c73 > > > + .quad 0x3fc7631d82935a86 > > > + .quad 0x3fc73cb9074fd14d > > > + .quad 0x3fc716600c914054 > > > + .quad 0x3fc6f0128b756abc > > > + .quad 0x3fc6c9d07d203fc7 > > > + .quad 0x3fc6a399dabbd383 > > > + .quad 0x3fc67d6e9d785771 > > > + .quad 0x3fc6574ebe8c133a > > > + .quad 0x3fc6313a37335d76 > > > + .quad 0x3fc60b3100b09476 > > > + .quad 0x3fc5e533144c1719 > > > + .quad 0x3fc5bf406b543db2 > > > + .quad 0x3fc59958ff1d52f1 > > > + .quad 0x3fc5737cc9018cdd > > > + .quad 0x3fc54dabc26105d2 > > > + .quad 0x3fc527e5e4a1b58d > > > + .quad 0x3fc5022b292f6a45 > > > + .quad 0x3fc4dc7b897bc1c8 > > > + .quad 0x3fc4b6d6fefe22a4 > > > + .quad 0x3fc4913d8333b561 > > > + .quad 0x3fc46baf0f9f5db7 > > > + .quad 0x3fc4462b9dc9b3dc > > > + .quad 0x3fc420b32740fdd4 > > > + .quad 0x3fc3fb45a59928cc > > > + .quad 0x3fc3d5e3126bc27f > > > + .quad 0x3fc3b08b6757f2a9 > > > + .quad 0x3fc38b3e9e027479 > > > + .quad 0x3fc365fcb0159016 > > > + .quad 0x3fc340c59741142e > > > + .quad 0x3fc31b994d3a4f85 > > > + .quad 0x3fc2f677cbbc0a96 > > > + .quad 0x3fc2d1610c86813a > > > + .quad 0x3fc2ac55095f5c59 > > > + .quad 0x3fc28753bc11aba5 > > > + .quad 0x3fc2625d1e6ddf57 > > > + .quad 0x3fc23d712a49c202 > > > + .quad 0x3fc2188fd9807263 > > > + .quad 0x3fc1f3b925f25d41 > > > + .quad 0x3fc1ceed09853752 > > > + .quad 0x3fc1aa2b7e23f72a > > > + .quad 0x3fc185747dbecf34 > > > + .quad 0x3fc160c8024b27b1 > > > + .quad 0x3fc13c2605c398c3 > > > + .quad 0x3fc1178e8227e47c > > > + .quad 0x3fc0f301717cf0fb > > > + .quad 0x3fc0ce7ecdccc28d > > > + .quad 0x3fc0aa06912675d5 > > > + .quad 0x3fc08598b59e3a07 > > > + .quad 0x3fc06135354d4b18 > > > + .quad 0x3fc03cdc0a51ec0d > > > + .quad 0x3fc0188d2ecf6140 > > > + .quad 0x3fbfe89139dbd566 > > > + .quad 0x3fbfa01c9db57ce2 > > > + .quad 0x3fbf57bc7d9005db > > > + .quad 0x3fbf0f70cdd992e3 > > > + .quad 0x3fbec739830a1120 > > > + .quad 0x3fbe7f1691a32d3e > > > + .quad 0x3fbe3707ee30487b > > > + .quad 0x3fbdef0d8d466db9 > > > + .quad 0x3fbda727638446a2 > > > + .quad 0x3fbd5f55659210e2 > > > + .quad 0x3fbd179788219364 > > > + .quad 0x3fbccfedbfee13a8 > > > + .quad 0x3fbc885801bc4b23 > > > + .quad 0x3fbc40d6425a5cb1 > > > + .quad 0x3fbbf968769fca11 > > > + .quad 0x3fbbb20e936d6974 > > > + .quad 0x3fbb6ac88dad5b1c > > > + .quad 0x3fbb23965a52ff00 > > > + .quad 0x3fbadc77ee5aea8c > > > + .quad 0x3fba956d3ecade63 > > > + .quad 0x3fba4e7640b1bc38 > > > + .quad 0x3fba0792e9277cac > > > + .quad 0x3fb9c0c32d4d2548 > > > + .quad 0x3fb97a07024cbe74 > > > + .quad 0x3fb9335e5d594989 > > > + .quad 0x3fb8ecc933aeb6e8 > > > + .quad 0x3fb8a6477a91dc29 > > > + .quad 0x3fb85fd927506a48 > > > + .quad 0x3fb8197e2f40e3f0 > > > + .quad 0x3fb7d33687c293c9 > > > + .quad 0x3fb78d02263d82d3 > > > + .quad 0x3fb746e100226ed9 > > > + .quad 0x3fb700d30aeac0e1 > > > + .quad 0x3fb6bad83c1883b6 > > > + .quad 0x3fb674f089365a7a > > > + .quad 0x3fb62f1be7d77743 > > > + .quad 0x3fb5e95a4d9791cb > > > + .quad 0x3fb5a3abb01ade25 > > > + .quad 0x3fb55e10050e0384 > > > + .quad 0x3fb518874226130a > > > + .quad 0x3fb4d3115d207eac > > > + .quad 0x3fb48dae4bc31018 > > > + .quad 0x3fb4485e03dbdfad > > > + .quad 0x3fb403207b414b7f > > > + .quad 0x3fb3bdf5a7d1ee64 > > > + .quad 0x3fb378dd7f749714 > > > + .quad 0x3fb333d7f8183f4b > > > + .quad 0x3fb2eee507b40301 > > > + .quad 0x3fb2aa04a44717a5 > > > + .quad 0x3fb26536c3d8c369 > > > + .quad 0x3fb2207b5c78549e > > > + .quad 0x3fb1dbd2643d190b > > > + .quad 0x3fb1973bd1465567 > > > + .quad 0x3fb152b799bb3cc9 > > > + .quad 0x3fb10e45b3cae831 > > > + .quad 0x3fb0c9e615ac4e17 > > > + .quad 0x3fb08598b59e3a07 > > > + .quad 0x3fb0415d89e74444 > > > + .quad 0x3faffa6911ab9301 > > > + .quad 0x3faf723b517fc523 > > > + .quad 0x3faeea31c006b87c > > > + .quad 0x3fae624c4a0b5e1b > > > + .quad 0x3fadda8adc67ee4e > > > + .quad 0x3fad52ed6405d86f > > > + .quad 0x3faccb73cdddb2cc > > > + .quad 0x3fac441e06f72a9e > > > + .quad 0x3fabbcebfc68f420 > > > + .quad 0x3fab35dd9b58baad > > > + .quad 0x3faaaef2d0fb10fc > > > + .quad 0x3faa282b8a936171 > > > + .quad 0x3fa9a187b573de7c > > > + .quad 0x3fa91b073efd7314 > > > + .quad 0x3fa894aa149fb343 > > > + .quad 0x3fa80e7023d8ccc4 > > > + .quad 0x3fa788595a3577ba > > > + .quad 0x3fa70265a550e777 > > > + .quad 0x3fa67c94f2d4bb58 > > > + .quad 0x3fa5f6e73078efb8 > > > + .quad 0x3fa5715c4c03ceef > > > + .quad 0x3fa4ebf43349e26f > > > + .quad 0x3fa466aed42de3ea > > > + .quad 0x3fa3e18c1ca0ae92 > > > + .quad 0x3fa35c8bfaa1306b > > > + .quad 0x3fa2d7ae5c3c5bae > > > + .quad 0x3fa252f32f8d183f > > > + .quad 0x3fa1ce5a62bc353a > > > + .quad 0x3fa149e3e4005a8d > > > + .quad 0x3fa0c58fa19dfaaa > > > + .quad 0x3fa0415d89e74444 > > > + .quad 0x3f9f7a9b16782856 > > > + .quad 0x3f9e72bf2813ce51 > > > + .quad 0x3f9d6b2725979802 > > > + .quad 0x3f9c63d2ec14aaf2 > > > + .quad 0x3f9b5cc258b718e6 > > > + .quad 0x3f9a55f548c5c43f > > > + .quad 0x3f994f6b99a24475 > > > + .quad 0x3f98492528c8cabf > > > + .quad 0x3f974321d3d006d3 > > > + .quad 0x3f963d6178690bd6 > > > + .quad 0x3f9537e3f45f3565 > > > + .quad 0x3f9432a925980cc1 > > > + .quad 0x3f932db0ea132e22 > > > + .quad 0x3f9228fb1fea2e28 > > > + .quad 0x3f912487a5507f70 > > > + .quad 0x3f90205658935847 > > > + .quad 0x3f8e38ce3033310c > > > + .quad 0x3f8c317384c75f06 > > > + .quad 0x3f8a2a9c6c170462 > > > + .quad 0x3f882448a388a2aa > > > + .quad 0x3f861e77e8b53fc6 > > > + .quad 0x3f841929f96832f0 > > > + .quad 0x3f82145e939ef1e9 > > > + .quad 0x3f8010157588de71 > > > + .quad 0x3f7c189cbb0e27fb > > > + .quad 0x3f78121214586b54 > > > + .quad 0x3f740c8a747878e2 > > > + .quad 0x3f70080559588b35 > > > + .quad 0x3f680904828985c0 > > > + .quad 0x3f60040155d5889e > > > + .quad 0x3f50020055655889 > > > + .quad 0x0000000000000000 > > > + /*== poly_coeff[4] ==*/ > > > + .align 16 > > > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > > > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > > > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > > > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > > > + /*== ExpMask ==*/ > > > + .align 16 > > > + .quad 0x000fffffffffffff, 0x000fffffffffffff > > > + /*== Two10 ==*/ > > > + .align 16 > > > + .quad 0x3f50000000000000, 0x3f50000000000000 > > > + /*== MinLog1p = -1+2^(-53) ==*/ > > > + .align 16 > > > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff > > > + /*== MaxLog1p ==*/ > > > + .align 16 > > > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000 > > > + /*== One ==*/ > > > + .align 16 > > > + .quad 0x3ff0000000000000, 0x3ff0000000000000 > > > + /*== SgnMask ==*/ > > > + .align 16 > > > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff > > > + /*== XThreshold ==*/ > > > + .align 16 > > > + .quad 0x3e00000000000000, 0x3e00000000000000 > > > + /*== XhMask ==*/ > > > + .align 16 > > > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00 > > > + /*== Threshold ==*/ > > > + .align 16 > > > + .quad 0x4086a00000000000, 0x4086a00000000000 > > > + /*== Bias ==*/ > > > + .align 16 > > > + .quad 0x408ff80000000000, 0x408ff80000000000 > > > + /*== Bias1 ==*/ > > > + .align 16 > > > + .quad 0x408ff00000000000, 0x408ff00000000000 > > > + /*== ExpMask ==*/ > > > + .align 16 > > > + .quad 0x7ff0000000000000, 0x7ff0000000000000 > > > + /*== ExpMask2 ==*/ > > > + .align 16 > > > + .quad 0x7f40000000000000, 0x7f40000000000000 > > > + /*== L2L ==*/ > > > + .align 16 > > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > > + .align 16 > > > + .type __svml_dlog1p_data_internal,@object > > > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > > > + .space 96, 0x00 > > > + .align 16 > > > + > > > +.FLT_16: > > > + .long 0x00000000,0x43380000,0x00000000,0x43380000 > > > + .type .FLT_16,@object > > > + .size .FLT_16,16 > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > > new file mode 100644 > > > index 0000000000..ec01af680c > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S > > > @@ -0,0 +1,20 @@ > > > +/* SSE version of vectorized log1p, vector length is 4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVdN4v_log1p _ZGVdN4v_log1p_sse_wrapper > > > +#include "../svml_d_log1p4_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > > new file mode 100644 > > > index 0000000000..808f3224ef > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c > > > @@ -0,0 +1,27 @@ > > > +/* Multiple versions of vectorized log1p, vector length is 4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVdN4v_log1p > > > +#include "ifunc-mathvec-avx2.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVdN4v_log1p, __GI__ZGVdN4v_log1p, __redirect__ZGVdN4v_log1p) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > > new file mode 100644 > > > index 0000000000..548538b0ec > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S > > > @@ -0,0 +1,1383 @@ > > > +/* Function log1p vectorized with AVX2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_dlog1p_data_internal > > > + */ > > > +#define Log_HA_table 0 > > > +#define Log_LA_table 8224 > > > +#define poly_coeff 12352 > > > +#define ExpMask 12480 > > > +#define Two10 12512 > > > +#define MinLog1p 12544 > > > +#define MaxLog1p 12576 > > > +#define One 12608 > > > +#define SgnMask 12640 > > > +#define XThreshold 12672 > > > +#define XhMask 12704 > > > +#define Threshold 12736 > > > +#define Bias 12768 > > > +#define Bias1 12800 > > > +#define ExpMask0 12832 > > > +#define ExpMask2 12864 > > > +#define L2 12896 > > > + > > > +/* Lookup bias for data table __svml_dlog1p_data_internal. */ > > > +#define Table_Lookup_Bias -0x405fe0 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.avx2,"ax",@progbits > > > +ENTRY(_ZGVdN4v_log1p_avx2) > > > + pushq %rbp > > > + cfi_def_cfa_offset(16) > > > + movq %rsp, %rbp > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + andq $-32, %rsp > > > + subq $96, %rsp > > > + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 > > > + > > > +/* SgnMask used by all accuracies */ > > > + vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 > > > + vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 > > > + > > > +/* 2^ (-10-exp(X) ) */ > > > + vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 > > > + vmovapd %ymm0, %ymm9 > > > + vandpd %ymm12, %ymm9, %ymm10 > > > + vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 > > > + vaddpd %ymm7, %ymm9, %ymm13 > > > + > > > +/* compute 1+x as high, low parts */ > > > + vmaxpd %ymm9, %ymm7, %ymm15 > > > + vminpd %ymm9, %ymm7, %ymm6 > > > + vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 > > > + vandpd %ymm14, %ymm13, %ymm4 > > > + > > > +/* preserve mantissa, set input exponent to 2^(-10) */ > > > + vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 > > > + vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 > > > + > > > +/* reciprocal approximation good to at least 11 bits */ > > > + vcvtpd2ps %ymm5, %xmm2 > > > + vsubpd %ymm4, %ymm15, %ymm0 > > > + > > > +/* check range */ > > > + vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 > > > + vrcpps %xmm2, %xmm1 > > > + vaddpd %ymm0, %ymm6, %ymm6 > > > + vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 > > > + vcvtps2pd %xmm1, %ymm11 > > > + > > > +/* exponent of X needed to scale Xl */ > > > + vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 > > > + vpsubq %ymm10, %ymm3, %ymm13 > > > + > > > +/* exponent bits */ > > > + vpsrlq $20, %ymm4, %ymm4 > > > + > > > +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ > > > + vroundpd $0, %ymm11, %ymm3 > > > + > > > +/* scale DblRcp */ > > > + vmulpd %ymm13, %ymm3, %ymm2 > > > + > > > +/* exponent*log(2.0) */ > > > + vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 > > > + vfmsub213pd %ymm7, %ymm3, %ymm5 > > > + > > > +/* Compute SignMask for all accuracies, including EP */ > > > + vandnpd %ymm9, %ymm12, %ymm8 > > > + vorpd %ymm0, %ymm15, %ymm7 > > > + > > > +/* > > > + * prepare table index > > > + * table lookup > > > + */ > > > + vpsrlq $40, %ymm3, %ymm0 > > > + > > > +/* > > > + * argument reduction > > > + * VQFMS( D, R, X, DblRcp1, One ); > > > + */ > > > + vfmadd213pd %ymm5, %ymm2, %ymm6 > > > + vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 > > > + vcmplt_oqpd %ymm3, %ymm13, %ymm3 > > > + vmulpd %ymm6, %ymm6, %ymm5 > > > + vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 > > > + > > > +/* combine and get argument value range mask */ > > > + vmovmskpd %ymm7, %eax > > > + vextractf128 $1, %ymm4, %xmm12 > > > + vshufps $221, %xmm12, %xmm4, %xmm14 > > > + > > > +/* biased exponent in DP format */ > > > + vcvtdq2pd %xmm14, %ymm1 > > > + vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 > > > + vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 > > > + vsubpd %ymm15, %ymm1, %ymm1 > > > + vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 > > > + > > > +/* polynomial */ > > > + vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 > > > + vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 > > > + vfmadd213pd %ymm2, %ymm5, %ymm1 > > > + > > > +/* reconstruction */ > > > + vfmadd213pd %ymm6, %ymm5, %ymm1 > > > + vextractf128 $1, %ymm0, %xmm10 > > > + vmovd %xmm0, %edx > > > + vmovd %xmm10, %esi > > > + movslq %edx, %rdx > > > + vpextrd $2, %xmm0, %ecx > > > + movslq %esi, %rsi > > > + vpextrd $2, %xmm10, %edi > > > + movslq %ecx, %rcx > > > + movslq %edi, %rdi > > > + vmovsd (%r8,%rdx), %xmm4 > > > + vmovsd (%r8,%rsi), %xmm11 > > > + vmovhpd (%r8,%rcx), %xmm4, %xmm7 > > > + vmovhpd (%r8,%rdi), %xmm11, %xmm12 > > > + vinsertf128 $1, %xmm12, %ymm7, %ymm0 > > > + vaddpd %ymm1, %ymm0, %ymm6 > > > + vaddpd %ymm6, %ymm3, %ymm0 > > > + > > > +/* OR in the Sign of input argument to produce correct log1p(-0) */ > > > + vorpd %ymm8, %ymm0, %ymm0 > > > + testl %eax, %eax > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movq %rbp, %rsp > > > + popq %rbp > > > + cfi_def_cfa(7, 8) > > > + cfi_restore(6) > > > + ret > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + vmovupd %ymm9, 32(%rsp) > > > + vmovupd %ymm0, 64(%rsp) > > > + # LOE rbx r12 r13 r14 r15 eax ymm0 > > > + > > > + xorl %edx, %edx > > > + # LOE rbx r12 r13 r14 r15 eax edx > > > + > > > + vzeroupper > > > + movq %r12, 16(%rsp) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > > + movl %edx, %r12d > > > + movq %r13, 8(%rsp) > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > > + movl %eax, %r13d > > > + movq %r14, (%rsp) > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $4, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + vmovupd 64(%rsp), %ymm0 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r12 r13 r14 r15 ymm0 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movsd 32(%rsp,%r14,8), %xmm0 > > > + call log1p@PLT > > > + # LOE rbx r14 r15 r12d r13d xmm0 > > > + > > > + movsd %xmm0, 64(%rsp,%r14,8) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx r15 r12d r13d > > > +END(_ZGVdN4v_log1p_avx2) > > > + > > > + .section .rodata, "a" > > > + .align 32 > > > + > > > +#ifdef __svml_dlog1p_data_internal_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; > > > + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; > > > + __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; > > > + __declspec(align(32)) VUINT32 ExpMask[4][2]; > > > + __declspec(align(32)) VUINT32 Two10[4][2]; > > > + __declspec(align(32)) VUINT32 MinLog1p[4][2]; > > > + __declspec(align(32)) VUINT32 MaxLog1p[4][2]; > > > + __declspec(align(32)) VUINT32 One[4][2]; > > > + __declspec(align(32)) VUINT32 SgnMask[4][2]; > > > + __declspec(align(32)) VUINT32 XThreshold[4][2]; > > > + __declspec(align(32)) VUINT32 XhMask[4][2]; > > > + __declspec(align(32)) VUINT32 Threshold[4][2]; > > > + __declspec(align(32)) VUINT32 Bias[4][2]; > > > + __declspec(align(32)) VUINT32 Bias1[4][2]; > > > + __declspec(align(32)) VUINT32 ExpMask0[4][2]; > > > + __declspec(align(32)) VUINT32 ExpMask2[4][2]; > > > + __declspec(align(32)) VUINT32 L2[4][2]; > > > +} __svml_dlog1p_data_internal; > > > +#endif > > > +__svml_dlog1p_data_internal: > > > + /* Log_HA_table */ > > > + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 > > > + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a > > > + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff > > > + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a > > > + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb > > > + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e > > > + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b > > > + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af > > > + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e > > > + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 > > > + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 > > > + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 > > > + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 > > > + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 > > > + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 > > > + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b > > > + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed > > > + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed > > > + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f > > > + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce > > > + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 > > > + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 > > > + .quad 0xc086238206e94218, 0xbe1ceee898588610 > > > + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea > > > + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 > > > + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 > > > + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 > > > + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 > > > + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b > > > + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 > > > + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c > > > + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c > > > + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 > > > + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 > > > + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b > > > + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf > > > + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 > > > + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b > > > + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 > > > + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 > > > + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff > > > + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 > > > + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e > > > + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde > > > + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b > > > + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c > > > + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 > > > + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f > > > + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 > > > + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 > > > + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 > > > + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b > > > + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 > > > + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 > > > + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 > > > + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 > > > + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a > > > + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d > > > + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b > > > + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 > > > + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 > > > + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 > > > + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb > > > + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa > > > + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 > > > + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 > > > + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 > > > + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 > > > + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 > > > + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 > > > + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 > > > + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 > > > + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d > > > + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e > > > + .quad 0xc086244055d2c968, 0xbe1cef345284c119 > > > + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 > > > + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 > > > + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 > > > + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f > > > + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f > > > + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 > > > + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 > > > + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d > > > + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb > > > + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 > > > + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f > > > + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 > > > + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 > > > + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc > > > + .quad 0xc086247419475160, 0xbe1cf03dd9922331 > > > + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 > > > + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 > > > + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 > > > + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a > > > + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 > > > + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 > > > + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 > > > + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 > > > + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c > > > + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 > > > + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 > > > + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 > > > + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb > > > + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e > > > + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b > > > + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 > > > + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 > > > + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 > > > + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 > > > + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f > > > + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 > > > + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d > > > + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 > > > + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 > > > + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 > > > + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe > > > + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f > > > + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 > > > + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 > > > + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 > > > + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 > > > + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d > > > + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed > > > + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f > > > + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 > > > + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 > > > + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a > > > + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a > > > + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 > > > + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc > > > + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 > > > + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 > > > + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c > > > + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c > > > + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 > > > + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf > > > + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 > > > + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 > > > + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f > > > + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 > > > + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 > > > + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 > > > + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd > > > + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 > > > + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 > > > + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d > > > + .quad 0xc086252dab033898, 0xbe1cf220bba8861f > > > + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 > > > + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae > > > + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 > > > + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 > > > + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 > > > + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b > > > + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 > > > + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 > > > + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c > > > + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc > > > + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 > > > + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 > > > + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff > > > + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f > > > + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 > > > + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d > > > + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc > > > + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 > > > + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 > > > + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b > > > + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 > > > + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 > > > + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 > > > + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c > > > + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 > > > + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 > > > + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 > > > + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 > > > + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 > > > + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 > > > + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 > > > + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 > > > + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 > > > + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 > > > + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b > > > + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 > > > + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec > > > + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e > > > + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 > > > + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 > > > + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 > > > + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 > > > + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 > > > + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 > > > + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 > > > + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 > > > + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 > > > + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 > > > + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 > > > + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 > > > + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 > > > + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb > > > + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 > > > + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 > > > + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e > > > + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 > > > + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 > > > + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e > > > + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 > > > + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 > > > + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 > > > + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 > > > + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece > > > + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 > > > + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad > > > + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 > > > + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 > > > + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 > > > + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 > > > + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 > > > + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 > > > + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e > > > + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 > > > + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 > > > + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 > > > + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef > > > + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 > > > + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 > > > + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a > > > + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 > > > + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 > > > + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 > > > + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 > > > + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 > > > + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c > > > + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 > > > + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 > > > + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 > > > + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b > > > + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 > > > + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e > > > + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d > > > + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba > > > + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 > > > + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 > > > + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 > > > + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 > > > + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f > > > + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a > > > + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 > > > + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 > > > + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 > > > + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa > > > + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 > > > + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b > > > + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 > > > + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 > > > + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 > > > + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 > > > + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 > > > + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 > > > + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c > > > + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 > > > + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d > > > + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 > > > + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 > > > + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa > > > + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 > > > + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 > > > + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd > > > + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 > > > + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 > > > + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed > > > + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 > > > + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e > > > + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc > > > + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 > > > + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be > > > + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c > > > + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 > > > + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e > > > + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 > > > + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 > > > + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 > > > + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 > > > + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d > > > + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 > > > + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 > > > + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 > > > + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f > > > + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c > > > + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 > > > + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 > > > + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe > > > + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 > > > + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae > > > + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d > > > + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 > > > + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d > > > + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e > > > + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 > > > + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 > > > + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d > > > + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab > > > + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 > > > + .quad 0xc08626e164224880, 0xbe1ceeb431709788 > > > + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 > > > + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b > > > + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 > > > + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 > > > + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 > > > + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 > > > + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef > > > + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 > > > + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 > > > + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 > > > + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f > > > + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 > > > + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 > > > + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 > > > + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 > > > + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c > > > + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 > > > + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c > > > + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f > > > + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 > > > + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd > > > + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 > > > + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 > > > + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 > > > + .quad 0xc086271f58064068, 0xbe1cef092a785e3f > > > + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 > > > + .quad 0xc086272438546be8, 0xbe1cf210907ded8b > > > + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 > > > + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc > > > + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 > > > + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 > > > + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 > > > + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe > > > + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d > > > + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 > > > + .quad 0xc086273a05367688, 0xbe1cf18656c50806 > > > + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a > > > + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 > > > + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c > > > + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 > > > + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 > > > + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 > > > + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a > > > + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 > > > + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c > > > + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 > > > + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 > > > + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 > > > + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a > > > + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 > > > + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 > > > + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e > > > + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee > > > + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad > > > + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 > > > + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f > > > + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 > > > + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 > > > + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 > > > + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 > > > + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da > > > + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 > > > + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 > > > + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd > > > + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 > > > + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 > > > + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 > > > + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec > > > + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc > > > + .quad 0xc086278a58297918, 0xbe1cf053073872bf > > > + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 > > > + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 > > > + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 > > > + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac > > > + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 > > > + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 > > > + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 > > > + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f > > > + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a > > > + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f > > > + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f > > > + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 > > > + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a > > > + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 > > > + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d > > > + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 > > > + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 > > > + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 > > > + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 > > > + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 > > > + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 > > > + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 > > > + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 > > > + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d > > > + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e > > > + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 > > > + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a > > > + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 > > > + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f > > > + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 > > > + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 > > > + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb > > > + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b > > > + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b > > > + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 > > > + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 > > > + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 > > > + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c > > > + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 > > > + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 > > > + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 > > > + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e > > > + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 > > > + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 > > > + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 > > > + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 > > > + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc > > > + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 > > > + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe > > > + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 > > > + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 > > > + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c > > > + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b > > > + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b > > > + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c > > > + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a > > > + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 > > > + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 > > > + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea > > > + .quad 0xc0862810d5af5880, 0xbe1cee622478393d > > > + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f > > > + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 > > > + .quad 0xc086281755366778, 0xbe1cef2edae5837d > > > + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 > > > + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 > > > + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 > > > + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 > > > + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 > > > + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 > > > + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d > > > + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 > > > + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b > > > + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 > > > + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 > > > + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 > > > + .quad 0xc086283341749490, 0xbe1cef74bbcc488a > > > + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e > > > + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 > > > + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 > > > + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 > > > + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e > > > + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 > > > + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 > > > + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 > > > + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c > > > + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 > > > + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 > > > + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 > > > + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 > > > + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 > > > + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 > > > + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da > > > + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 > > > + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb > > > + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b > > > + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d > > > + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 > > > + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 > > > + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 > > > + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc > > > + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 > > > + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 > > > + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 > > > + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d > > > + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 > > > + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 > > > + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 > > > + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 > > > + .quad 0xc086287879041490, 0xbe1cf034803c8a48 > > > + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f > > > + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 > > > + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 > > > + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 > > > + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da > > > + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc > > > + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 > > > + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 > > > + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 > > > + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 > > > + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d > > > + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f > > > + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed > > > + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d > > > + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 > > > + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 > > > + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f > > > + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 > > > + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a > > > + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 > > > + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 > > > + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c > > > + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d > > > + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 > > > + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 > > > + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 > > > + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c > > > + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 > > > + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 > > > + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 > > > + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 > > > + /*== Log_LA_table ==*/ > > > + .align 32 > > > + .quad 0x8000000000000000 > > > + .quad 0xbf5ff802a9ab10e6 > > > + .quad 0xbf6ff00aa2b10bc0 > > > + .quad 0xbf77ee11ebd82e94 > > > + .quad 0xbf7fe02a6b106789 > > > + .quad 0xbf83e7295d25a7d9 > > > + .quad 0xbf87dc475f810a77 > > > + .quad 0xbf8bcf712c74384c > > > + .quad 0xbf8fc0a8b0fc03e4 > > > + .quad 0xbf91d7f7eb9eebe7 > > > + .quad 0xbf93cea44346a575 > > > + .quad 0xbf95c45a51b8d389 > > > + .quad 0xbf97b91b07d5b11b > > > + .quad 0xbf99ace7551cc514 > > > + .quad 0xbf9b9fc027af9198 > > > + .quad 0xbf9d91a66c543cc4 > > > + .quad 0xbf9f829b0e783300 > > > + .quad 0xbfa0b94f7c196176 > > > + .quad 0xbfa1b0d98923d980 > > > + .quad 0xbfa2a7ec2214e873 > > > + .quad 0xbfa39e87b9febd60 > > > + .quad 0xbfa494acc34d911c > > > + .quad 0xbfa58a5bafc8e4d5 > > > + .quad 0xbfa67f94f094bd98 > > > + .quad 0xbfa77458f632dcfc > > > + .quad 0xbfa868a83083f6cf > > > + .quad 0xbfa95c830ec8e3eb > > > + .quad 0xbfaa4fe9ffa3d235 > > > + .quad 0xbfab42dd711971bf > > > + .quad 0xbfac355dd0921f2d > > > + .quad 0xbfad276b8adb0b52 > > > + .quad 0xbfae19070c276016 > > > + .quad 0xbfaf0a30c01162a6 > > > + .quad 0xbfaffae9119b9303 > > > + .quad 0xbfb075983598e471 > > > + .quad 0xbfb0ed839b5526fe > > > + .quad 0xbfb16536eea37ae1 > > > + .quad 0xbfb1dcb263db1944 > > > + .quad 0xbfb253f62f0a1417 > > > + .quad 0xbfb2cb0283f5de1f > > > + .quad 0xbfb341d7961bd1d1 > > > + .quad 0xbfb3b87598b1b6ee > > > + .quad 0xbfb42edcbea646f0 > > > + .quad 0xbfb4a50d3aa1b040 > > > + .quad 0xbfb51b073f06183f > > > + .quad 0xbfb590cafdf01c28 > > > + .quad 0xbfb60658a93750c4 > > > + .quad 0xbfb67bb0726ec0fc > > > + .quad 0xbfb6f0d28ae56b4c > > > + .quad 0xbfb765bf23a6be13 > > > + .quad 0xbfb7da766d7b12cd > > > + .quad 0xbfb84ef898e8282a > > > + .quad 0xbfb8c345d6319b21 > > > + .quad 0xbfb9375e55595ede > > > + .quad 0xbfb9ab42462033ad > > > + .quad 0xbfba1ef1d8061cd4 > > > + .quad 0xbfba926d3a4ad563 > > > + .quad 0xbfbb05b49bee43fe > > > + .quad 0xbfbb78c82bb0eda1 > > > + .quad 0xbfbbeba818146765 > > > + .quad 0xbfbc5e548f5bc743 > > > + .quad 0xbfbcd0cdbf8c13e1 > > > + .quad 0xbfbd4313d66cb35d > > > + .quad 0xbfbdb5270187d927 > > > + .quad 0xbfbe27076e2af2e6 > > > + .quad 0xbfbe98b549671467 > > > + .quad 0xbfbf0a30c01162a6 > > > + .quad 0xbfbf7b79fec37ddf > > > + .quad 0xbfbfec9131dbeabb > > > + .quad 0xbfc02ebb42bf3d4b > > > + .quad 0xbfc0671512ca596e > > > + .quad 0xbfc09f561ee719c3 > > > + .quad 0xbfc0d77e7cd08e59 > > > + .quad 0xbfc10f8e422539b1 > > > + .quad 0xbfc14785846742ac > > > + .quad 0xbfc17f6458fca611 > > > + .quad 0xbfc1b72ad52f67a0 > > > + .quad 0xbfc1eed90e2dc2c3 > > > + .quad 0xbfc2266f190a5acb > > > + .quad 0xbfc25ded0abc6ad2 > > > + .quad 0xbfc29552f81ff523 > > > + .quad 0xbfc2cca0f5f5f251 > > > + .quad 0xbfc303d718e47fd3 > > > + .quad 0xbfc33af575770e4f > > > + .quad 0xbfc371fc201e8f74 > > > + .quad 0xbfc3a8eb2d31a376 > > > + .quad 0xbfc3dfc2b0ecc62a > > > + .quad 0xbfc41682bf727bc0 > > > + .quad 0xbfc44d2b6ccb7d1e > > > + .quad 0xbfc483bccce6e3dd > > > + .quad 0xbfc4ba36f39a55e5 > > > + .quad 0xbfc4f099f4a230b2 > > > + .quad 0xbfc526e5e3a1b438 > > > + .quad 0xbfc55d1ad4232d6f > > > + .quad 0xbfc59338d9982086 > > > + .quad 0xbfc5c940075972b9 > > > + .quad 0xbfc5ff3070a793d4 > > > + .quad 0xbfc6350a28aaa758 > > > + .quad 0xbfc66acd4272ad51 > > > + .quad 0xbfc6a079d0f7aad2 > > > + .quad 0xbfc6d60fe719d21d > > > + .quad 0xbfc70b8f97a1aa75 > > > + .quad 0xbfc740f8f54037a5 > > > + .quad 0xbfc7764c128f2127 > > > + .quad 0xbfc7ab890210d909 > > > + .quad 0xbfc7e0afd630c274 > > > + .quad 0xbfc815c0a14357eb > > > + .quad 0xbfc84abb75865139 > > > + .quad 0xbfc87fa06520c911 > > > + .quad 0xbfc8b46f8223625b > > > + .quad 0xbfc8e928de886d41 > > > + .quad 0xbfc91dcc8c340bde > > > + .quad 0xbfc9525a9cf456b4 > > > + .quad 0xbfc986d3228180ca > > > + .quad 0xbfc9bb362e7dfb83 > > > + .quad 0xbfc9ef83d2769a34 > > > + .quad 0xbfca23bc1fe2b563 > > > + .quad 0xbfca57df28244dcd > > > + .quad 0xbfca8becfc882f19 > > > + .quad 0xbfcabfe5ae46124c > > > + .quad 0xbfcaf3c94e80bff3 > > > + .quad 0xbfcb2797ee46320c > > > + .quad 0xbfcb5b519e8fb5a4 > > > + .quad 0xbfcb8ef670420c3b > > > + .quad 0xbfcbc286742d8cd6 > > > + .quad 0xbfcbf601bb0e44e2 > > > + .quad 0xbfcc2968558c18c1 > > > + .quad 0xbfcc5cba543ae425 > > > + .quad 0xbfcc8ff7c79a9a22 > > > + .quad 0xbfccc320c0176502 > > > + .quad 0xbfccf6354e09c5dc > > > + .quad 0xbfcd293581b6b3e7 > > > + .quad 0xbfcd5c216b4fbb91 > > > + .quad 0xbfcd8ef91af31d5e > > > + .quad 0xbfcdc1bca0abec7d > > > + .quad 0xbfcdf46c0c722d2f > > > + .quad 0xbfce27076e2af2e6 > > > + .quad 0xbfce598ed5a87e2f > > > + .quad 0xbfce8c0252aa5a60 > > > + .quad 0xbfcebe61f4dd7b0b > > > + .quad 0xbfcef0adcbdc5936 > > > + .quad 0xbfcf22e5e72f105d > > > + .quad 0xbfcf550a564b7b37 > > > + .quad 0xbfcf871b28955045 > > > + .quad 0xbfcfb9186d5e3e2b > > > + .quad 0xbfcfeb0233e607cc > > > + .quad 0xbfd00e6c45ad501d > > > + .quad 0xbfd0274dc16c232f > > > + .quad 0xbfd0402594b4d041 > > > + .quad 0xbfd058f3c703ebc6 > > > + .quad 0xbfd071b85fcd590d > > > + .quad 0xbfd08a73667c57af > > > + .quad 0xbfd0a324e27390e3 > > > + .quad 0xbfd0bbccdb0d24bd > > > + .quad 0xbfd0d46b579ab74b > > > + .quad 0xbfd0ed005f657da4 > > > + .quad 0xbfd1058bf9ae4ad5 > > > + .quad 0xbfd11e0e2dad9cb7 > > > + .quad 0xbfd136870293a8b0 > > > + .quad 0xbfd14ef67f88685a > > > + .quad 0xbfd1675cababa60e > > > + .quad 0xbfd17fb98e15095d > > > + .quad 0xbfd1980d2dd4236f > > > + .quad 0xbfd1b05791f07b49 > > > + .quad 0xbfd1c898c16999fb > > > + .quad 0xbfd1e0d0c33716be > > > + .quad 0xbfd1f8ff9e48a2f3 > > > + .quad 0xbfd211255986160c > > > + .quad 0xbfd22941fbcf7966 > > > + .quad 0xbfd241558bfd1404 > > > + .quad 0xbfd2596010df763a > > > + .quad 0xbfd27161913f853d > > > + .quad 0xbfd2895a13de86a3 > > > + .quad 0xbfd2a1499f762bc9 > > > + .quad 0xbfd2b9303ab89d25 > > > + .quad 0xbfd2d10dec508583 > > > + .quad 0xbfd2e8e2bae11d31 > > > + .quad 0xbfd300aead06350c > > > + .quad 0xbfd31871c9544185 > > > + .quad 0xbfd3302c16586588 > > > + .quad 0xbfd347dd9a987d55 > > > + .quad 0xbfd35f865c93293e > > > + .quad 0xbfd3772662bfd85b > > > + .quad 0xbfd38ebdb38ed321 > > > + .quad 0xbfd3a64c556945ea > > > + .quad 0xbfd3bdd24eb14b6a > > > + .quad 0xbfd3d54fa5c1f710 > > > + .quad 0xbfd3ecc460ef5f50 > > > + .quad 0xbfd404308686a7e4 > > > + .quad 0xbfd41b941cce0bee > > > + .quad 0xbfd432ef2a04e814 > > > + .quad 0xbfd44a41b463c47c > > > + .quad 0xbfd4618bc21c5ec2 > > > + .quad 0xbfd478cd5959b3d9 > > > + .quad 0xbfd49006804009d1 > > > + .quad 0xbfd4a7373cecf997 > > > + .quad 0xbfd4be5f957778a1 > > > + .quad 0xbfd4d57f8fefe27f > > > + .quad 0xbfd4ec973260026a > > > + .quad 0xbfd503a682cb1cb3 > > > + .quad 0xbfd51aad872df82d > > > + .quad 0xbfd531ac457ee77e > > > + .quad 0xbfd548a2c3add263 > > > + .quad 0xbfd55f9107a43ee2 > > > + .quad 0xbfd5767717455a6c > > > + .quad 0xbfd58d54f86e02f2 > > > + .quad 0xbfd5a42ab0f4cfe2 > > > + .quad 0xbfd5baf846aa1b19 > > > + .quad 0xbfd5d1bdbf5809ca > > > + .quad 0xbfd5e87b20c2954a > > > + .quad 0xbfd5ff3070a793d4 > > > + .quad 0xbfd615ddb4bec13c > > > + .quad 0xbfd62c82f2b9c795 > > > + .quad 0x3fd61965cdb02c1f > > > + .quad 0x3fd602d08af091ec > > > + .quad 0x3fd5ec433d5c35ae > > > + .quad 0x3fd5d5bddf595f30 > > > + .quad 0x3fd5bf406b543db2 > > > + .quad 0x3fd5a8cadbbedfa1 > > > + .quad 0x3fd5925d2b112a59 > > > + .quad 0x3fd57bf753c8d1fb > > > + .quad 0x3fd565995069514c > > > + .quad 0x3fd54f431b7be1a9 > > > + .quad 0x3fd538f4af8f72fe > > > + .quad 0x3fd522ae0738a3d8 > > > + .quad 0x3fd50c6f1d11b97c > > > + .quad 0x3fd4f637ebba9810 > > > + .quad 0x3fd4e0086dd8baca > > > + .quad 0x3fd4c9e09e172c3c > > > + .quad 0x3fd4b3c077267e9a > > > + .quad 0x3fd49da7f3bcc41f > > > + .quad 0x3fd487970e958770 > > > + .quad 0x3fd4718dc271c41b > > > + .quad 0x3fd45b8c0a17df13 > > > + .quad 0x3fd44591e0539f49 > > > + .quad 0x3fd42f9f3ff62642 > > > + .quad 0x3fd419b423d5e8c7 > > > + .quad 0x3fd403d086cea79c > > > + .quad 0x3fd3edf463c1683e > > > + .quad 0x3fd3d81fb5946dba > > > + .quad 0x3fd3c25277333184 > > > + .quad 0x3fd3ac8ca38e5c5f > > > + .quad 0x3fd396ce359bbf54 > > > + .quad 0x3fd3811728564cb2 > > > + .quad 0x3fd36b6776be1117 > > > + .quad 0x3fd355bf1bd82c8b > > > + .quad 0x3fd3401e12aecba1 > > > + .quad 0x3fd32a84565120a8 > > > + .quad 0x3fd314f1e1d35ce4 > > > + .quad 0x3fd2ff66b04ea9d4 > > > + .quad 0x3fd2e9e2bce12286 > > > + .quad 0x3fd2d46602adccee > > > + .quad 0x3fd2bef07cdc9354 > > > + .quad 0x3fd2a982269a3dbf > > > + .quad 0x3fd2941afb186b7c > > > + .quad 0x3fd27ebaf58d8c9d > > > + .quad 0x3fd269621134db92 > > > + .quad 0x3fd25410494e56c7 > > > + .quad 0x3fd23ec5991eba49 > > > + .quad 0x3fd22981fbef797b > > > + .quad 0x3fd214456d0eb8d4 > > > + .quad 0x3fd1ff0fe7cf47a7 > > > + .quad 0x3fd1e9e1678899f4 > > > + .quad 0x3fd1d4b9e796c245 > > > + .quad 0x3fd1bf99635a6b95 > > > + .quad 0x3fd1aa7fd638d33f > > > + .quad 0x3fd1956d3b9bc2fa > > > + .quad 0x3fd180618ef18adf > > > + .quad 0x3fd16b5ccbacfb73 > > > + .quad 0x3fd1565eed455fc3 > > > + .quad 0x3fd14167ef367783 > > > + .quad 0x3fd12c77cd00713b > > > + .quad 0x3fd1178e8227e47c > > > + .quad 0x3fd102ac0a35cc1c > > > + .quad 0x3fd0edd060b78081 > > > + .quad 0x3fd0d8fb813eb1ef > > > + .quad 0x3fd0c42d676162e3 > > > + .quad 0x3fd0af660eb9e279 > > > + .quad 0x3fd09aa572e6c6d4 > > > + .quad 0x3fd085eb8f8ae797 > > > + .quad 0x3fd07138604d5862 > > > + .quad 0x3fd05c8be0d9635a > > > + .quad 0x3fd047e60cde83b8 > > > + .quad 0x3fd03346e0106062 > > > + .quad 0x3fd01eae5626c691 > > > + .quad 0x3fd00a1c6adda473 > > > + .quad 0x3fcfeb2233ea07cd > > > + .quad 0x3fcfc218be620a5e > > > + .quad 0x3fcf991c6cb3b379 > > > + .quad 0x3fcf702d36777df0 > > > + .quad 0x3fcf474b134df229 > > > + .quad 0x3fcf1e75fadf9bde > > > + .quad 0x3fcef5ade4dcffe6 > > > + .quad 0x3fceccf2c8fe920a > > > + .quad 0x3fcea4449f04aaf5 > > > + .quad 0x3fce7ba35eb77e2a > > > + .quad 0x3fce530effe71012 > > > + .quad 0x3fce2a877a6b2c12 > > > + .quad 0x3fce020cc6235ab5 > > > + .quad 0x3fcdd99edaf6d7e9 > > > + .quad 0x3fcdb13db0d48940 > > > + .quad 0x3fcd88e93fb2f450 > > > + .quad 0x3fcd60a17f903515 > > > + .quad 0x3fcd38666871f465 > > > + .quad 0x3fcd1037f2655e7b > > > + .quad 0x3fcce816157f1988 > > > + .quad 0x3fccc000c9db3c52 > > > + .quad 0x3fcc97f8079d44ec > > > + .quad 0x3fcc6ffbc6f00f71 > > > + .quad 0x3fcc480c0005ccd1 > > > + .quad 0x3fcc2028ab17f9b4 > > > + .quad 0x3fcbf851c067555f > > > + .quad 0x3fcbd087383bd8ad > > > + .quad 0x3fcba8c90ae4ad19 > > > + .quad 0x3fcb811730b823d2 > > > + .quad 0x3fcb5971a213acdb > > > + .quad 0x3fcb31d8575bce3d > > > + .quad 0x3fcb0a4b48fc1b46 > > > + .quad 0x3fcae2ca6f672bd4 > > > + .quad 0x3fcabb55c31693ad > > > + .quad 0x3fca93ed3c8ad9e3 > > > + .quad 0x3fca6c90d44b704e > > > + .quad 0x3fca454082e6ab05 > > > + .quad 0x3fca1dfc40f1b7f1 > > > + .quad 0x3fc9f6c407089664 > > > + .quad 0x3fc9cf97cdce0ec3 > > > + .quad 0x3fc9a8778debaa38 > > > + .quad 0x3fc981634011aa75 > > > + .quad 0x3fc95a5adcf7017f > > > + .quad 0x3fc9335e5d594989 > > > + .quad 0x3fc90c6db9fcbcd9 > > > + .quad 0x3fc8e588ebac2dbf > > > + .quad 0x3fc8beafeb38fe8c > > > + .quad 0x3fc897e2b17b19a5 > > > + .quad 0x3fc871213750e994 > > > + .quad 0x3fc84a6b759f512f > > > + .quad 0x3fc823c16551a3c2 > > > + .quad 0x3fc7fd22ff599d4f > > > + .quad 0x3fc7d6903caf5ad0 > > > + .quad 0x3fc7b0091651528c > > > + .quad 0x3fc7898d85444c73 > > > + .quad 0x3fc7631d82935a86 > > > + .quad 0x3fc73cb9074fd14d > > > + .quad 0x3fc716600c914054 > > > + .quad 0x3fc6f0128b756abc > > > + .quad 0x3fc6c9d07d203fc7 > > > + .quad 0x3fc6a399dabbd383 > > > + .quad 0x3fc67d6e9d785771 > > > + .quad 0x3fc6574ebe8c133a > > > + .quad 0x3fc6313a37335d76 > > > + .quad 0x3fc60b3100b09476 > > > + .quad 0x3fc5e533144c1719 > > > + .quad 0x3fc5bf406b543db2 > > > + .quad 0x3fc59958ff1d52f1 > > > + .quad 0x3fc5737cc9018cdd > > > + .quad 0x3fc54dabc26105d2 > > > + .quad 0x3fc527e5e4a1b58d > > > + .quad 0x3fc5022b292f6a45 > > > + .quad 0x3fc4dc7b897bc1c8 > > > + .quad 0x3fc4b6d6fefe22a4 > > > + .quad 0x3fc4913d8333b561 > > > + .quad 0x3fc46baf0f9f5db7 > > > + .quad 0x3fc4462b9dc9b3dc > > > + .quad 0x3fc420b32740fdd4 > > > + .quad 0x3fc3fb45a59928cc > > > + .quad 0x3fc3d5e3126bc27f > > > + .quad 0x3fc3b08b6757f2a9 > > > + .quad 0x3fc38b3e9e027479 > > > + .quad 0x3fc365fcb0159016 > > > + .quad 0x3fc340c59741142e > > > + .quad 0x3fc31b994d3a4f85 > > > + .quad 0x3fc2f677cbbc0a96 > > > + .quad 0x3fc2d1610c86813a > > > + .quad 0x3fc2ac55095f5c59 > > > + .quad 0x3fc28753bc11aba5 > > > + .quad 0x3fc2625d1e6ddf57 > > > + .quad 0x3fc23d712a49c202 > > > + .quad 0x3fc2188fd9807263 > > > + .quad 0x3fc1f3b925f25d41 > > > + .quad 0x3fc1ceed09853752 > > > + .quad 0x3fc1aa2b7e23f72a > > > + .quad 0x3fc185747dbecf34 > > > + .quad 0x3fc160c8024b27b1 > > > + .quad 0x3fc13c2605c398c3 > > > + .quad 0x3fc1178e8227e47c > > > + .quad 0x3fc0f301717cf0fb > > > + .quad 0x3fc0ce7ecdccc28d > > > + .quad 0x3fc0aa06912675d5 > > > + .quad 0x3fc08598b59e3a07 > > > + .quad 0x3fc06135354d4b18 > > > + .quad 0x3fc03cdc0a51ec0d > > > + .quad 0x3fc0188d2ecf6140 > > > + .quad 0x3fbfe89139dbd566 > > > + .quad 0x3fbfa01c9db57ce2 > > > + .quad 0x3fbf57bc7d9005db > > > + .quad 0x3fbf0f70cdd992e3 > > > + .quad 0x3fbec739830a1120 > > > + .quad 0x3fbe7f1691a32d3e > > > + .quad 0x3fbe3707ee30487b > > > + .quad 0x3fbdef0d8d466db9 > > > + .quad 0x3fbda727638446a2 > > > + .quad 0x3fbd5f55659210e2 > > > + .quad 0x3fbd179788219364 > > > + .quad 0x3fbccfedbfee13a8 > > > + .quad 0x3fbc885801bc4b23 > > > + .quad 0x3fbc40d6425a5cb1 > > > + .quad 0x3fbbf968769fca11 > > > + .quad 0x3fbbb20e936d6974 > > > + .quad 0x3fbb6ac88dad5b1c > > > + .quad 0x3fbb23965a52ff00 > > > + .quad 0x3fbadc77ee5aea8c > > > + .quad 0x3fba956d3ecade63 > > > + .quad 0x3fba4e7640b1bc38 > > > + .quad 0x3fba0792e9277cac > > > + .quad 0x3fb9c0c32d4d2548 > > > + .quad 0x3fb97a07024cbe74 > > > + .quad 0x3fb9335e5d594989 > > > + .quad 0x3fb8ecc933aeb6e8 > > > + .quad 0x3fb8a6477a91dc29 > > > + .quad 0x3fb85fd927506a48 > > > + .quad 0x3fb8197e2f40e3f0 > > > + .quad 0x3fb7d33687c293c9 > > > + .quad 0x3fb78d02263d82d3 > > > + .quad 0x3fb746e100226ed9 > > > + .quad 0x3fb700d30aeac0e1 > > > + .quad 0x3fb6bad83c1883b6 > > > + .quad 0x3fb674f089365a7a > > > + .quad 0x3fb62f1be7d77743 > > > + .quad 0x3fb5e95a4d9791cb > > > + .quad 0x3fb5a3abb01ade25 > > > + .quad 0x3fb55e10050e0384 > > > + .quad 0x3fb518874226130a > > > + .quad 0x3fb4d3115d207eac > > > + .quad 0x3fb48dae4bc31018 > > > + .quad 0x3fb4485e03dbdfad > > > + .quad 0x3fb403207b414b7f > > > + .quad 0x3fb3bdf5a7d1ee64 > > > + .quad 0x3fb378dd7f749714 > > > + .quad 0x3fb333d7f8183f4b > > > + .quad 0x3fb2eee507b40301 > > > + .quad 0x3fb2aa04a44717a5 > > > + .quad 0x3fb26536c3d8c369 > > > + .quad 0x3fb2207b5c78549e > > > + .quad 0x3fb1dbd2643d190b > > > + .quad 0x3fb1973bd1465567 > > > + .quad 0x3fb152b799bb3cc9 > > > + .quad 0x3fb10e45b3cae831 > > > + .quad 0x3fb0c9e615ac4e17 > > > + .quad 0x3fb08598b59e3a07 > > > + .quad 0x3fb0415d89e74444 > > > + .quad 0x3faffa6911ab9301 > > > + .quad 0x3faf723b517fc523 > > > + .quad 0x3faeea31c006b87c > > > + .quad 0x3fae624c4a0b5e1b > > > + .quad 0x3fadda8adc67ee4e > > > + .quad 0x3fad52ed6405d86f > > > + .quad 0x3faccb73cdddb2cc > > > + .quad 0x3fac441e06f72a9e > > > + .quad 0x3fabbcebfc68f420 > > > + .quad 0x3fab35dd9b58baad > > > + .quad 0x3faaaef2d0fb10fc > > > + .quad 0x3faa282b8a936171 > > > + .quad 0x3fa9a187b573de7c > > > + .quad 0x3fa91b073efd7314 > > > + .quad 0x3fa894aa149fb343 > > > + .quad 0x3fa80e7023d8ccc4 > > > + .quad 0x3fa788595a3577ba > > > + .quad 0x3fa70265a550e777 > > > + .quad 0x3fa67c94f2d4bb58 > > > + .quad 0x3fa5f6e73078efb8 > > > + .quad 0x3fa5715c4c03ceef > > > + .quad 0x3fa4ebf43349e26f > > > + .quad 0x3fa466aed42de3ea > > > + .quad 0x3fa3e18c1ca0ae92 > > > + .quad 0x3fa35c8bfaa1306b > > > + .quad 0x3fa2d7ae5c3c5bae > > > + .quad 0x3fa252f32f8d183f > > > + .quad 0x3fa1ce5a62bc353a > > > + .quad 0x3fa149e3e4005a8d > > > + .quad 0x3fa0c58fa19dfaaa > > > + .quad 0x3fa0415d89e74444 > > > + .quad 0x3f9f7a9b16782856 > > > + .quad 0x3f9e72bf2813ce51 > > > + .quad 0x3f9d6b2725979802 > > > + .quad 0x3f9c63d2ec14aaf2 > > > + .quad 0x3f9b5cc258b718e6 > > > + .quad 0x3f9a55f548c5c43f > > > + .quad 0x3f994f6b99a24475 > > > + .quad 0x3f98492528c8cabf > > > + .quad 0x3f974321d3d006d3 > > > + .quad 0x3f963d6178690bd6 > > > + .quad 0x3f9537e3f45f3565 > > > + .quad 0x3f9432a925980cc1 > > > + .quad 0x3f932db0ea132e22 > > > + .quad 0x3f9228fb1fea2e28 > > > + .quad 0x3f912487a5507f70 > > > + .quad 0x3f90205658935847 > > > + .quad 0x3f8e38ce3033310c > > > + .quad 0x3f8c317384c75f06 > > > + .quad 0x3f8a2a9c6c170462 > > > + .quad 0x3f882448a388a2aa > > > + .quad 0x3f861e77e8b53fc6 > > > + .quad 0x3f841929f96832f0 > > > + .quad 0x3f82145e939ef1e9 > > > + .quad 0x3f8010157588de71 > > > + .quad 0x3f7c189cbb0e27fb > > > + .quad 0x3f78121214586b54 > > > + .quad 0x3f740c8a747878e2 > > > + .quad 0x3f70080559588b35 > > > + .quad 0x3f680904828985c0 > > > + .quad 0x3f60040155d5889e > > > + .quad 0x3f50020055655889 > > > + .quad 0x0000000000000000 > > > + /*== poly_coeff[4] ==*/ > > > + .align 32 > > > + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ > > > + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ > > > + .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ > > > + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ > > > + /*== ExpMask ==*/ > > > + .align 32 > > > + .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff > > > + /*== Two10 ==*/ > > > + .align 32 > > > + .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000 > > > + /*== MinLog1p = -1+2^(-53) ==*/ > > > + .align 32 > > > + .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff > > > + /*== MaxLog1p ==*/ > > > + .align 32 > > > + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000 > > > + /*== One ==*/ > > > + .align 32 > > > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > > > + /*== SgnMask ==*/ > > > + .align 32 > > > + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff > > > + /*== XThreshold ==*/ > > > + .align 32 > > > + .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000 > > > + /*== XhMask ==*/ > > > + .align 32 > > > + .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00 > > > + /*== Threshold ==*/ > > > + .align 32 > > > + .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000 > > > + /*== Bias ==*/ > > > + .align 32 > > > + .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000 > > > + /*== Bias1 ==*/ > > > + .align 32 > > > + .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000 > > > + /*== ExpMask ==*/ > > > + .align 32 > > > + .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 > > > + /*== ExpMask2 ==*/ > > > + .align 32 > > > + .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000 > > > + /*== L2L ==*/ > > > + .align 32 > > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > > + .align 32 > > > + .type __svml_dlog1p_data_internal,@object > > > + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > > new file mode 100644 > > > index 0000000000..ca174a5f52 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S > > > @@ -0,0 +1,20 @@ > > > +/* AVX2 version of vectorized log1p, vector length is 8. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVeN8v_log1p _ZGVeN8v_log1p_avx2_wrapper > > > +#include "../svml_d_log1p8_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > > new file mode 100644 > > > index 0000000000..0aa35ec8c5 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c > > > @@ -0,0 +1,27 @@ > > > +/* Multiple versions of vectorized log1p, vector length is 8. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVeN8v_log1p > > > +#include "ifunc-mathvec-avx512-skx.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVeN8v_log1p, __GI__ZGVeN8v_log1p, __redirect__ZGVeN8v_log1p) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > > new file mode 100644 > > > index 0000000000..5e38ff8d39 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S > > > @@ -0,0 +1,317 @@ > > > +/* Function log1p vectorized with AVX-512. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_dlog1p_data_internal_avx512 > > > + */ > > > +#define Log_tbl 0 > > > +#define One 128 > > > +#define SgnMask 192 > > > +#define C075 256 > > > +#define poly_coeff9 320 > > > +#define poly_coeff8 384 > > > +#define poly_coeff7 448 > > > +#define poly_coeff6 512 > > > +#define poly_coeff5 576 > > > +#define poly_coeff4 640 > > > +#define poly_coeff3 704 > > > +#define poly_coeff2 768 > > > +#define L2 832 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.evex512,"ax",@progbits > > > +ENTRY(_ZGVeN8v_log1p_skx) > > > + pushq %rbp > > > + cfi_def_cfa_offset(16) > > > + movq %rsp, %rbp > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + andq $-64, %rsp > > > + subq $192, %rsp > > > + vmovups One+__svml_dlog1p_data_internal_avx512(%rip), %zmm7 > > > + vmovups SgnMask+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > > > + vmovaps %zmm0, %zmm9 > > > + vaddpd {rn-sae}, %zmm9, %zmm7, %zmm11 > > > + vandpd %zmm14, %zmm9, %zmm8 > > > + > > > +/* compute 1+x as high, low parts */ > > > + vmaxpd {sae}, %zmm9, %zmm7, %zmm10 > > > + vminpd {sae}, %zmm9, %zmm7, %zmm12 > > > + > > > +/* GetMant(x), normalized to [1,2) for x>=0, NaN for x<0 */ > > > + vgetmantpd $8, {sae}, %zmm11, %zmm6 > > > + > > > +/* GetExp(x) */ > > > + vgetexppd {sae}, %zmm11, %zmm5 > > > + vsubpd {rn-sae}, %zmm10, %zmm11, %zmm13 > > > + > > > +/* DblRcp ~ 1/Mantissa */ > > > + vrcp14pd %zmm6, %zmm15 > > > + > > > +/* Start polynomial evaluation */ > > > + vmovups poly_coeff9+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > > > + vmovups poly_coeff7+__svml_dlog1p_data_internal_avx512(%rip), %zmm11 > > > + > > > +/* Xl */ > > > + vsubpd {rn-sae}, %zmm13, %zmm12, %zmm2 > > > + vxorpd %zmm14, %zmm5, %zmm3 > > > + > > > +/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */ > > > + vrndscalepd $88, {sae}, %zmm15, %zmm4 > > > + vmovups poly_coeff5+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > > > + vmovups poly_coeff6+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 > > > + vmovups poly_coeff3+__svml_dlog1p_data_internal_avx512(%rip), %zmm13 > > > + > > > +/* Xl*2^(-Expon) */ > > > + vscalefpd {rn-sae}, %zmm3, %zmm2, %zmm1 > > > + > > > +/* Reduced argument: R = DblRcp*(Mantissa+Xl) - 1 */ > > > + vfmsub213pd {rn-sae}, %zmm7, %zmm4, %zmm6 > > > + vmovups __svml_dlog1p_data_internal_avx512(%rip), %zmm3 > > > + > > > +/* > > > + * Table lookup > > > + * Prepare exponent correction: DblRcp<0.75? > > > + */ > > > + vmovups C075+__svml_dlog1p_data_internal_avx512(%rip), %zmm2 > > > + > > > +/* Prepare table index */ > > > + vpsrlq $48, %zmm4, %zmm0 > > > + vfmadd231pd {rn-sae}, %zmm4, %zmm1, %zmm6 > > > + vmovups poly_coeff8+__svml_dlog1p_data_internal_avx512(%rip), %zmm1 > > > + vcmppd $17, {sae}, %zmm2, %zmm4, %k1 > > > + vcmppd $4, {sae}, %zmm6, %zmm6, %k0 > > > + vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm1 > > > + vmovups poly_coeff4+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 > > > + vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14 > > > + vmovups L2+__svml_dlog1p_data_internal_avx512(%rip), %zmm4 > > > + vpermt2pd Log_tbl+64+__svml_dlog1p_data_internal_avx512(%rip), %zmm0, %zmm3 > > > + > > > +/* add 1 to Expon if DblRcp<0.75 */ > > > + vaddpd {rn-sae}, %zmm7, %zmm5, %zmm5{%k1} > > > + > > > +/* R^2 */ > > > + vmulpd {rn-sae}, %zmm6, %zmm6, %zmm0 > > > + vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm10 > > > + vmovups poly_coeff2+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 > > > + vmulpd {rn-sae}, %zmm0, %zmm0, %zmm15 > > > + vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12 > > > + vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm1 > > > + kmovw %k0, %edx > > > + vfmadd213pd {rn-sae}, %zmm12, %zmm0, %zmm10 > > > + > > > +/* polynomial */ > > > + vfmadd213pd {rn-sae}, %zmm10, %zmm15, %zmm1 > > > + vfmadd213pd {rn-sae}, %zmm6, %zmm0, %zmm1 > > > + vaddpd {rn-sae}, %zmm1, %zmm3, %zmm6 > > > + vfmadd213pd {rn-sae}, %zmm6, %zmm4, %zmm5 > > > + vorpd %zmm8, %zmm5, %zmm0 > > > + testl %edx, %edx > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm9 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movq %rbp, %rsp > > > + popq %rbp > > > + cfi_def_cfa(7, 8) > > > + cfi_restore(6) > > > + ret > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + vmovups %zmm9, 64(%rsp) > > > + vmovups %zmm0, 128(%rsp) > > > + # LOE rbx r12 r13 r14 r15 edx zmm0 > > > + > > > + xorl %eax, %eax > > > + # LOE rbx r12 r13 r14 r15 eax edx > > > + > > > + vzeroupper > > > + movq %r12, 16(%rsp) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > > + movl %eax, %r12d > > > + movq %r13, 8(%rsp) > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > > + movl %edx, %r13d > > > + movq %r14, (%rsp) > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $8, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + vmovups 128(%rsp), %zmm0 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r12 r13 r14 r15 zmm0 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movsd 64(%rsp,%r14,8), %xmm0 > > > + call log1p@PLT > > > + # LOE rbx r14 r15 r12d r13d xmm0 > > > + > > > + movsd %xmm0, 128(%rsp,%r14,8) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx r15 r12d r13d > > > +END(_ZGVeN8v_log1p_skx) > > > + > > > + .section .rodata, "a" > > > + .align 64 > > > + > > > +#ifdef __svml_dlog1p_data_internal_avx512_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(64)) VUINT32 Log_tbl[16][2]; > > > + __declspec(align(64)) VUINT32 One[8][2]; > > > + __declspec(align(64)) VUINT32 SgnMask[8][2]; > > > + __declspec(align(64)) VUINT32 C075[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff9[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff8[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff7[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff6[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff5[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff4[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff3[8][2]; > > > + __declspec(align(64)) VUINT32 poly_coeff2[8][2]; > > > + __declspec(align(64)) VUINT32 L2[8][2]; > > > + } __svml_dlog1p_data_internal_avx512; > > > +#endif > > > +__svml_dlog1p_data_internal_avx512: > > > + /*== Log_tbl ==*/ > > > + .quad 0x0000000000000000 > > > + .quad 0xbfaf0a30c01162a6 > > > + .quad 0xbfbe27076e2af2e6 > > > + .quad 0xbfc5ff3070a793d4 > > > + .quad 0xbfcc8ff7c79a9a22 > > > + .quad 0xbfd1675cababa60e > > > + .quad 0xbfd4618bc21c5ec2 > > > + .quad 0xbfd739d7f6bbd007 > > > + .quad 0x3fd269621134db92 > > > + .quad 0x3fcf991c6cb3b379 > > > + .quad 0x3fca93ed3c8ad9e3 > > > + .quad 0x3fc5bf406b543db2 > > > + .quad 0x3fc1178e8227e47c > > > + .quad 0x3fb9335e5d594989 > > > + .quad 0x3fb08598b59e3a07 > > > + .quad 0x3fa0415d89e74444 > > > + /*== One ==*/ > > > + .align 64 > > > + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 > > > + /*== SgnMask ==*/ > > > + .align 64 > > > + .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 > > > + /*== C075 0.75 ==*/ > > > + .align 64 > > > + .quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000 > > > + /*== poly_coeff9 ==*/ > > > + .align 64 > > > + .quad 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70 > > > + /*== poly_coeff8 ==*/ > > > + .align 64 > > > + .quad 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62 > > > + /*== poly_coeff7 ==*/ > > > + .align 64 > > > + .quad 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF > > > + /*== poly_coeff6 ==*/ > > > + .align 64 > > > + .quad 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06 > > > + /*== poly_coeff5 ==*/ > > > + .align 64 > > > + .quad 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C > > > + /*== poly_coeff4 ==*/ > > > + .align 64 > > > + .quad 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD > > > + /*== poly_coeff3 ==*/ > > > + .align 64 > > > + .quad 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466 > > > + /*== poly_coeff2 ==*/ > > > + .align 64 > > > + .quad 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6 > > > + /*== L2 = log(2) ==*/ > > > + .align 64 > > > + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF > > > + .align 64 > > > + .type __svml_dlog1p_data_internal_avx512,@object > > > + .size __svml_dlog1p_data_internal_avx512,.-__svml_dlog1p_data_internal_avx512 > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > > new file mode 100644 > > > index 0000000000..3c0a0a01a2 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S > > > @@ -0,0 +1,20 @@ > > > +/* AVX2 version of vectorized log1pf. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVeN16v_log1pf _ZGVeN16v_log1pf_avx2_wrapper > > > +#include "../svml_s_log1pf16_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > > new file mode 100644 > > > index 0000000000..9af1320547 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c > > > @@ -0,0 +1,28 @@ > > > +/* Multiple versions of vectorized log1pf, vector length is 16. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVeN16v_log1pf > > > +#include "ifunc-mathvec-avx512-skx.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVeN16v_log1pf, __GI__ZGVeN16v_log1pf, > > > + __redirect__ZGVeN16v_log1pf) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > > new file mode 100644 > > > index 0000000000..78b2fe417f > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S > > > @@ -0,0 +1,271 @@ > > > +/* Function log1pf vectorized with AVX-512. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_slog1p_data_internal > > > + */ > > > +#define SgnMask 0 > > > +#define sOne 64 > > > +#define sPoly_1 128 > > > +#define sPoly_2 192 > > > +#define sPoly_3 256 > > > +#define sPoly_4 320 > > > +#define sPoly_5 384 > > > +#define sPoly_6 448 > > > +#define sPoly_7 512 > > > +#define sPoly_8 576 > > > +#define iHiDelta 640 > > > +#define iLoRange 704 > > > +#define iBrkValue 768 > > > +#define iOffExpoMask 832 > > > +#define sLn2 896 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.exex512,"ax",@progbits > > > +ENTRY(_ZGVeN16v_log1pf_skx) > > > + pushq %rbp > > > + cfi_def_cfa_offset(16) > > > + movq %rsp, %rbp > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + andq $-64, %rsp > > > + subq $192, %rsp > > > + vmovups sOne+__svml_slog1p_data_internal(%rip), %zmm2 > > > + > > > +/* reduction: compute r,n */ > > > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %zmm12 > > > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %zmm4 > > > + vmovaps %zmm0, %zmm3 > > > + > > > +/* compute 1+x as high, low parts */ > > > + vmaxps {sae}, %zmm3, %zmm2, %zmm5 > > > + vminps {sae}, %zmm3, %zmm2, %zmm7 > > > + vandnps %zmm3, %zmm4, %zmm1 > > > + vpternlogd $255, %zmm4, %zmm4, %zmm4 > > > + vaddps {rn-sae}, %zmm7, %zmm5, %zmm9 > > > + vpsubd %zmm12, %zmm9, %zmm10 > > > + vsubps {rn-sae}, %zmm9, %zmm5, %zmm6 > > > + > > > +/* check argument value ranges */ > > > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %zmm9, %zmm8 > > > + vpsrad $23, %zmm10, %zmm13 > > > + vmovups sPoly_5+__svml_slog1p_data_internal(%rip), %zmm9 > > > + vpcmpd $5, iLoRange+__svml_slog1p_data_internal(%rip), %zmm8, %k1 > > > + vpslld $23, %zmm13, %zmm14 > > > + vaddps {rn-sae}, %zmm7, %zmm6, %zmm15 > > > + vcvtdq2ps {rn-sae}, %zmm13, %zmm0 > > > + vpsubd %zmm14, %zmm2, %zmm13 > > > + vmovups sPoly_8+__svml_slog1p_data_internal(%rip), %zmm7 > > > + vmovups sPoly_1+__svml_slog1p_data_internal(%rip), %zmm14 > > > + vmulps {rn-sae}, %zmm13, %zmm15, %zmm6 > > > + vpandd iOffExpoMask+__svml_slog1p_data_internal(%rip), %zmm10, %zmm11 > > > + vpaddd %zmm12, %zmm11, %zmm5 > > > + vmovups sPoly_4+__svml_slog1p_data_internal(%rip), %zmm10 > > > + vmovups sPoly_3+__svml_slog1p_data_internal(%rip), %zmm11 > > > + vmovups sPoly_2+__svml_slog1p_data_internal(%rip), %zmm12 > > > + > > > +/* polynomial evaluation */ > > > + vsubps {rn-sae}, %zmm2, %zmm5, %zmm2 > > > + vaddps {rn-sae}, %zmm6, %zmm2, %zmm15 > > > + vmovups sPoly_7+__svml_slog1p_data_internal(%rip), %zmm2 > > > + vfmadd231ps {rn-sae}, %zmm15, %zmm7, %zmm2 > > > + vpandnd %zmm8, %zmm8, %zmm4{%k1} > > > + vmovups sPoly_6+__svml_slog1p_data_internal(%rip), %zmm8 > > > + > > > +/* combine and get argument value range mask */ > > > + vptestmd %zmm4, %zmm4, %k0 > > > + vfmadd213ps {rn-sae}, %zmm8, %zmm15, %zmm2 > > > + kmovw %k0, %edx > > > + vfmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm2 > > > + vfmadd213ps {rn-sae}, %zmm10, %zmm15, %zmm2 > > > + vfmadd213ps {rn-sae}, %zmm11, %zmm15, %zmm2 > > > + vfmadd213ps {rn-sae}, %zmm12, %zmm15, %zmm2 > > > + vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm2 > > > + vmulps {rn-sae}, %zmm15, %zmm2, %zmm4 > > > + vfmadd213ps {rn-sae}, %zmm15, %zmm15, %zmm4 > > > + > > > +/* final reconstruction */ > > > + vmovups sLn2+__svml_slog1p_data_internal(%rip), %zmm15 > > > + vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm0 > > > + vorps %zmm1, %zmm0, %zmm0 > > > + testl %edx, %edx > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm3 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movq %rbp, %rsp > > > + popq %rbp > > > + cfi_def_cfa(7, 8) > > > + cfi_restore(6) > > > + ret > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + vmovups %zmm3, 64(%rsp) > > > + vmovups %zmm0, 128(%rsp) > > > + # LOE rbx r12 r13 r14 r15 edx zmm0 > > > + > > > + xorl %eax, %eax > > > + # LOE rbx r12 r13 r14 r15 eax edx > > > + > > > + vzeroupper > > > + movq %r12, 16(%rsp) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > > + movl %eax, %r12d > > > + movq %r13, 8(%rsp) > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > > + movl %edx, %r13d > > > + movq %r14, (%rsp) > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $16, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + vmovups 128(%rsp), %zmm0 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r12 r13 r14 r15 zmm0 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movss 64(%rsp,%r14,4), %xmm0 > > > + call log1pf@PLT > > > + # LOE rbx r14 r15 r12d r13d xmm0 > > > + > > > + movss %xmm0, 128(%rsp,%r14,4) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx r15 r12d r13d > > > +END(_ZGVeN16v_log1pf_skx) > > > + > > > + .section .rodata, "a" > > > + .align 64 > > > + > > > +#ifdef __svml_slog1p_data_internal_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(64)) VUINT32 SgnMask[16][1]; > > > + __declspec(align(64)) VUINT32 sOne[16][1]; > > > + __declspec(align(64)) VUINT32 sPoly[8][16][1]; > > > + __declspec(align(64)) VUINT32 iHiDelta[16][1]; > > > + __declspec(align(64)) VUINT32 iLoRange[16][1]; > > > + __declspec(align(64)) VUINT32 iBrkValue[16][1]; > > > + __declspec(align(64)) VUINT32 iOffExpoMask[16][1]; > > > + __declspec(align(64)) VUINT32 sLn2[16][1]; > > > +} __svml_slog1p_data_internal; > > > +#endif > > > +__svml_slog1p_data_internal: > > > + /*== SgnMask ==*/ > > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > > + /*== sOne = SP 1.0 ==*/ > > > + .align 64 > > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > > + /*== sPoly[] = SP polynomial ==*/ > > > + .align 64 > > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > > + .align 64 > > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > > + .align 64 > > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > > + /*== iBrkValue = SP 2/3 ==*/ > > > + .align 64 > > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > > + /*== iOffExpoMask = SP significand mask ==*/ > > > + .align 64 > > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > > + /*== sLn2 = SP ln(2) ==*/ > > > + .align 64 > > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > > + .align 64 > > > + .type __svml_slog1p_data_internal,@object > > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > > new file mode 100644 > > > index 0000000000..913c8290c8 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S > > > @@ -0,0 +1,20 @@ > > > +/* SSE2 version of vectorized log1pf, vector length is 4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVbN4v_log1pf _ZGVbN4v_log1pf_sse2 > > > +#include "../svml_s_log1pf4_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > > new file mode 100644 > > > index 0000000000..b6aff48023 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c > > > @@ -0,0 +1,28 @@ > > > +/* Multiple versions of vectorized log1pf, vector length is 4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVbN4v_log1pf > > > +#include "ifunc-mathvec-sse4_1.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVbN4v_log1pf, __GI__ZGVbN4v_log1pf, > > > + __redirect__ZGVbN4v_log1pf) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > > new file mode 100644 > > > index 0000000000..ef1bae58c0 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S > > > @@ -0,0 +1,252 @@ > > > +/* Function log1pf vectorized with SSE4. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_slog1p_data_internal > > > + */ > > > +#define SgnMask 0 > > > +#define sOne 16 > > > +#define sPoly 32 > > > +#define iHiDelta 160 > > > +#define iLoRange 176 > > > +#define iBrkValue 192 > > > +#define iOffExpoMask 208 > > > +#define sLn2 224 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.sse4,"ax",@progbits > > > +ENTRY(_ZGVbN4v_log1pf_sse4) > > > + subq $72, %rsp > > > + cfi_def_cfa_offset(80) > > > + movups sOne+__svml_slog1p_data_internal(%rip), %xmm7 > > > + > > > +/* compute 1+x as high, low parts */ > > > + movaps %xmm7, %xmm1 > > > + movaps %xmm7, %xmm5 > > > + maxps %xmm0, %xmm1 > > > + minps %xmm0, %xmm5 > > > + movaps %xmm1, %xmm4 > > > + > > > +/* check argument value ranges */ > > > + movdqu iHiDelta+__svml_slog1p_data_internal(%rip), %xmm2 > > > + addps %xmm5, %xmm4 > > > + > > > +/* reduction: compute r,n */ > > > + movdqu iBrkValue+__svml_slog1p_data_internal(%rip), %xmm3 > > > + paddd %xmm4, %xmm2 > > > + movdqu iOffExpoMask+__svml_slog1p_data_internal(%rip), %xmm8 > > > + subps %xmm4, %xmm1 > > > + psubd %xmm3, %xmm4 > > > + addps %xmm1, %xmm5 > > > + pand %xmm4, %xmm8 > > > + psrad $23, %xmm4 > > > + cvtdq2ps %xmm4, %xmm10 > > > + pslld $23, %xmm4 > > > + movaps %xmm7, %xmm1 > > > + paddd %xmm3, %xmm8 > > > + psubd %xmm4, %xmm1 > > > + mulps %xmm5, %xmm1 > > > + > > > +/* polynomial evaluation */ > > > + subps %xmm7, %xmm8 > > > + > > > +/* final reconstruction */ > > > + mulps sLn2+__svml_slog1p_data_internal(%rip), %xmm10 > > > + addps %xmm8, %xmm1 > > > + movups sPoly+112+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + movdqu iLoRange+__svml_slog1p_data_internal(%rip), %xmm6 > > > + pcmpgtd %xmm2, %xmm6 > > > + addps sPoly+96+__svml_slog1p_data_internal(%rip), %xmm9 > > > + > > > +/* combine and get argument value range mask */ > > > + movmskps %xmm6, %edx > > > + movups SgnMask+__svml_slog1p_data_internal(%rip), %xmm11 > > > + mulps %xmm1, %xmm9 > > > + andnps %xmm0, %xmm11 > > > + addps sPoly+80+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps sPoly+64+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps sPoly+48+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps sPoly+32+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps sPoly+16+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps sPoly+__svml_slog1p_data_internal(%rip), %xmm9 > > > + mulps %xmm1, %xmm9 > > > + mulps %xmm1, %xmm9 > > > + addps %xmm9, %xmm1 > > > + addps %xmm10, %xmm1 > > > + orps %xmm11, %xmm1 > > > + testl %edx, %edx > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movaps %xmm1, %xmm0 > > > + addq $72, %rsp > > > + cfi_def_cfa_offset(8) > > > + ret > > > + cfi_def_cfa_offset(80) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + movups %xmm0, 32(%rsp) > > > + movups %xmm1, 48(%rsp) > > > + # LOE rbx rbp r12 r13 r14 r15 edx > > > + > > > + xorl %eax, %eax > > > + movq %r12, 16(%rsp) > > > + cfi_offset(12, -64) > > > + movl %eax, %r12d > > > + movq %r13, 8(%rsp) > > > + cfi_offset(13, -72) > > > + movl %edx, %r13d > > > + movq %r14, (%rsp) > > > + cfi_offset(14, -80) > > > + # LOE rbx rbp r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx rbp r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $4, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx rbp r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + movups 48(%rsp), %xmm1 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + cfi_offset(12, -64) > > > + cfi_offset(13, -72) > > > + cfi_offset(14, -80) > > > + # LOE rbx rbp r12 r13 r14 r15 xmm1 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movss 32(%rsp,%r14,4), %xmm0 > > > + call log1pf@PLT > > > + # LOE rbx rbp r14 r15 r12d r13d xmm0 > > > + > > > + movss %xmm0, 48(%rsp,%r14,4) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx rbp r15 r12d r13d > > > +END(_ZGVbN4v_log1pf_sse4) > > > + > > > + .section .rodata, "a" > > > + .align 16 > > > + > > > +#ifdef __svml_slog1p_data_internal_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(16)) VUINT32 SgnMask[4][1]; > > > + __declspec(align(16)) VUINT32 sOne[4][1]; > > > + __declspec(align(16)) VUINT32 sPoly[8][4][1]; > > > + __declspec(align(16)) VUINT32 iHiDelta[4][1]; > > > + __declspec(align(16)) VUINT32 iLoRange[4][1]; > > > + __declspec(align(16)) VUINT32 iBrkValue[4][1]; > > > + __declspec(align(16)) VUINT32 iOffExpoMask[4][1]; > > > + __declspec(align(16)) VUINT32 sLn2[4][1]; > > > +} __svml_slog1p_data_internal; > > > +#endif > > > +__svml_slog1p_data_internal: > > > + /*== SgnMask ==*/ > > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > > + /*== sOne = SP 1.0 ==*/ > > > + .align 16 > > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > > + /*== sPoly[] = SP polynomial ==*/ > > > + .align 16 > > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > > + .align 16 > > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > > + .align 16 > > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > > + /*== iBrkValue = SP 2/3 ==*/ > > > + .align 16 > > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > > + /*== iOffExpoMask = SP significand mask ==*/ > > > + .align 16 > > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > > + /*== sLn2 = SP ln(2) ==*/ > > > + .align 16 > > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > > + .align 16 > > > + .type __svml_slog1p_data_internal,@object > > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > > new file mode 100644 > > > index 0000000000..c0b97d89e6 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S > > > @@ -0,0 +1,20 @@ > > > +/* SSE version of vectorized log1pf, vector length is 8. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define _ZGVdN8v_log1pf _ZGVdN8v_log1pf_sse_wrapper > > > +#include "../svml_s_log1pf8_core.S" > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > > new file mode 100644 > > > index 0000000000..a2bbe37129 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c > > > @@ -0,0 +1,28 @@ > > > +/* Multiple versions of vectorized log1pf, vector length is 8. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#define SYMBOL_NAME _ZGVdN8v_log1pf > > > +#include "ifunc-mathvec-avx2.h" > > > + > > > +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); > > > + > > > +#ifdef SHARED > > > +__hidden_ver1 (_ZGVdN8v_log1pf, __GI__ZGVdN8v_log1pf, > > > + __redirect__ZGVdN8v_log1pf) > > > + __attribute__ ((visibility ("hidden"))); > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > > new file mode 100644 > > > index 0000000000..957dc23e3f > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S > > > @@ -0,0 +1,254 @@ > > > +/* Function log1pf vectorized with AVX2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + https://www.gnu.org/licenses/. */ > > > + > > > +/* > > > + * ALGORITHM DESCRIPTION: > > > + * > > > + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) > > > + * Get short reciprocal approximation Rcp ~ 1/xh > > > + * R = (Rcp*xh - 1.0) + Rcp*xl > > > + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) > > > + * log(Rcp) is tabulated > > > + * > > > + * > > > + */ > > > + > > > +/* Offsets for data table __svml_slog1p_data_internal > > > + */ > > > +#define SgnMask 0 > > > +#define sOne 32 > > > +#define sPoly 64 > > > +#define iHiDelta 320 > > > +#define iLoRange 352 > > > +#define iBrkValue 384 > > > +#define iOffExpoMask 416 > > > +#define sLn2 448 > > > + > > > +#include <sysdep.h> > > > + > > > + .text > > > + .section .text.avx2,"ax",@progbits > > > +ENTRY(_ZGVdN8v_log1pf_avx2) > > > + pushq %rbp > > > + cfi_def_cfa_offset(16) > > > + movq %rsp, %rbp > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + andq $-32, %rsp > > > + subq $96, %rsp > > > + vmovups sOne+__svml_slog1p_data_internal(%rip), %ymm2 > > > + > > > +/* reduction: compute r,n */ > > > + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %ymm13 > > > + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %ymm4 > > > + vmovups iLoRange+__svml_slog1p_data_internal(%rip), %ymm8 > > > + vmovaps %ymm0, %ymm3 > > > + > > > +/* compute 1+x as high, low parts */ > > > + vmaxps %ymm3, %ymm2, %ymm5 > > > + vminps %ymm3, %ymm2, %ymm6 > > > + vaddps %ymm6, %ymm5, %ymm10 > > > + vpsubd %ymm13, %ymm10, %ymm11 > > > + > > > +/* check argument value ranges */ > > > + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %ymm10, %ymm9 > > > + vsubps %ymm10, %ymm5, %ymm7 > > > + vpsrad $23, %ymm11, %ymm14 > > > + vpand iOffExpoMask+__svml_slog1p_data_internal(%rip), %ymm11, %ymm12 > > > + vpslld $23, %ymm14, %ymm15 > > > + vcvtdq2ps %ymm14, %ymm0 > > > + vpsubd %ymm15, %ymm2, %ymm14 > > > + vandnps %ymm3, %ymm4, %ymm1 > > > + vaddps %ymm7, %ymm6, %ymm4 > > > + vpaddd %ymm13, %ymm12, %ymm6 > > > + vmulps %ymm4, %ymm14, %ymm7 > > > + > > > +/* polynomial evaluation */ > > > + vsubps %ymm2, %ymm6, %ymm2 > > > + vpcmpgtd %ymm9, %ymm8, %ymm5 > > > + vmovups sPoly+224+__svml_slog1p_data_internal(%rip), %ymm8 > > > + vaddps %ymm2, %ymm7, %ymm9 > > > + vfmadd213ps sPoly+192+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+160+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+128+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+96+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+64+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+32+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vfmadd213ps sPoly+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 > > > + vmulps %ymm8, %ymm9, %ymm10 > > > + vfmadd213ps %ymm9, %ymm9, %ymm10 > > > + > > > +/* final reconstruction */ > > > + vfmadd132ps sLn2+__svml_slog1p_data_internal(%rip), %ymm10, %ymm0 > > > + > > > +/* combine and get argument value range mask */ > > > + vmovmskps %ymm5, %edx > > > + vorps %ymm1, %ymm0, %ymm0 > > > + testl %edx, %edx > > > + > > > +/* Go to special inputs processing branch */ > > > + jne L(SPECIAL_VALUES_BRANCH) > > > + # LOE rbx r12 r13 r14 r15 edx ymm0 ymm3 > > > + > > > +/* Restore registers > > > + * and exit the function > > > + */ > > > + > > > +L(EXIT): > > > + movq %rbp, %rsp > > > + popq %rbp > > > + cfi_def_cfa(7, 8) > > > + cfi_restore(6) > > > + ret > > > + cfi_def_cfa(6, 16) > > > + cfi_offset(6, -16) > > > + > > > +/* Branch to process > > > + * special inputs > > > + */ > > > + > > > +L(SPECIAL_VALUES_BRANCH): > > > + vmovups %ymm3, 32(%rsp) > > > + vmovups %ymm0, 64(%rsp) > > > + # LOE rbx r12 r13 r14 r15 edx ymm0 > > > + > > > + xorl %eax, %eax > > > + # LOE rbx r12 r13 r14 r15 eax edx > > > + > > > + vzeroupper > > > + movq %r12, 16(%rsp) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > > + movl %eax, %r12d > > > + movq %r13, 8(%rsp) > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > > + movl %edx, %r13d > > > + movq %r14, (%rsp) > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Range mask > > > + * bits check > > > + */ > > > + > > > +L(RANGEMASK_CHECK): > > > + btl %r12d, %r13d > > > + > > > +/* Call scalar math function */ > > > + jc L(SCALAR_MATH_CALL) > > > + # LOE rbx r15 r12d r13d > > > + > > > +/* Special inputs > > > + * processing loop > > > + */ > > > + > > > +L(SPECIAL_VALUES_LOOP): > > > + incl %r12d > > > + cmpl $8, %r12d > > > + > > > +/* Check bits in range mask */ > > > + jl L(RANGEMASK_CHECK) > > > + # LOE rbx r15 r12d r13d > > > + > > > + movq 16(%rsp), %r12 > > > + cfi_restore(12) > > > + movq 8(%rsp), %r13 > > > + cfi_restore(13) > > > + movq (%rsp), %r14 > > > + cfi_restore(14) > > > + vmovups 64(%rsp), %ymm0 > > > + > > > +/* Go to exit */ > > > + jmp L(EXIT) > > > + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 > > > + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ > > > + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 > > > + # LOE rbx r12 r13 r14 r15 ymm0 > > > + > > > +/* Scalar math fucntion call > > > + * to process special input > > > + */ > > > + > > > +L(SCALAR_MATH_CALL): > > > + movl %r12d, %r14d > > > + movss 32(%rsp,%r14,4), %xmm0 > > > + call log1pf@PLT > > > + # LOE rbx r14 r15 r12d r13d xmm0 > > > + > > > + movss %xmm0, 64(%rsp,%r14,4) > > > + > > > +/* Process special inputs in loop */ > > > + jmp L(SPECIAL_VALUES_LOOP) > > > + # LOE rbx r15 r12d r13d > > > +END(_ZGVdN8v_log1pf_avx2) > > > + > > > + .section .rodata, "a" > > > + .align 32 > > > + > > > +#ifdef __svml_slog1p_data_internal_typedef > > > +typedef unsigned int VUINT32; > > > +typedef struct { > > > + __declspec(align(32)) VUINT32 SgnMask[8][1]; > > > + __declspec(align(32)) VUINT32 sOne[8][1]; > > > + __declspec(align(32)) VUINT32 sPoly[8][8][1]; > > > + __declspec(align(32)) VUINT32 iHiDelta[8][1]; > > > + __declspec(align(32)) VUINT32 iLoRange[8][1]; > > > + __declspec(align(32)) VUINT32 iBrkValue[8][1]; > > > + __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; > > > + __declspec(align(32)) VUINT32 sLn2[8][1]; > > > +} __svml_slog1p_data_internal; > > > +#endif > > > +__svml_slog1p_data_internal: > > > + /*== SgnMask ==*/ > > > + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff > > > + /*== sOne = SP 1.0 ==*/ > > > + .align 32 > > > + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 > > > + /*== sPoly[] = SP polynomial ==*/ > > > + .align 32 > > > + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ > > > + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ > > > + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ > > > + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ > > > + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ > > > + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ > > > + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ > > > + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ > > > + /*== iHiDelta = SP 80000000-7f000000 ==*/ > > > + .align 32 > > > + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 > > > + /*== iLoRange = SP 00800000+iHiDelta ==*/ > > > + .align 32 > > > + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 > > > + /*== iBrkValue = SP 2/3 ==*/ > > > + .align 32 > > > + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab > > > + /*== iOffExpoMask = SP significand mask ==*/ > > > + .align 32 > > > + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff > > > + /*== sLn2 = SP ln(2) ==*/ > > > + .align 32 > > > + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 > > > + .align 32 > > > + .type __svml_slog1p_data_internal,@object > > > + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal > > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p2_core.S b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > > new file mode 100644 > > > index 0000000000..e3f01717d9 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S > > > @@ -0,0 +1,29 @@ > > > +/* Function log1p vectorized with SSE2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_d_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVbN2v_log1p) > > > +WRAPPER_IMPL_SSE2 log1p > > > +END (_ZGVbN2v_log1p) > > > + > > > +#ifndef USE_MULTIARCH > > > + libmvec_hidden_def (_ZGVbN2v_log1p) > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > > new file mode 100644 > > > index 0000000000..49beb96183 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S > > > @@ -0,0 +1,29 @@ > > > +/* Function log1p vectorized with AVX2, wrapper version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_d_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVdN4v_log1p) > > > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > > > +END (_ZGVdN4v_log1p) > > > + > > > +#ifndef USE_MULTIARCH > > > + libmvec_hidden_def (_ZGVdN4v_log1p) > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > > new file mode 100644 > > > index 0000000000..8b89768b7c > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S > > > @@ -0,0 +1,25 @@ > > > +/* Function log1p vectorized in AVX ISA as wrapper to SSE4 ISA version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_d_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVcN4v_log1p) > > > +WRAPPER_IMPL_AVX _ZGVbN2v_log1p > > > +END (_ZGVcN4v_log1p) > > > diff --git a/sysdeps/x86_64/fpu/svml_d_log1p8_core.S b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > > new file mode 100644 > > > index 0000000000..54b4d4ede8 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S > > > @@ -0,0 +1,25 @@ > > > +/* Function log1p vectorized with AVX-512, wrapper to AVX2. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_d_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVeN8v_log1p) > > > +WRAPPER_IMPL_AVX512 _ZGVdN4v_log1p > > > +END (_ZGVeN8v_log1p) > > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > > new file mode 100644 > > > index 0000000000..2c953d00fb > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S > > > @@ -0,0 +1,25 @@ > > > +/* Function log1pf vectorized with AVX-512. Wrapper to AVX2 version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_s_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVeN16v_log1pf) > > > +WRAPPER_IMPL_AVX512 _ZGVdN8v_log1pf > > > +END (_ZGVeN16v_log1pf) > > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > > new file mode 100644 > > > index 0000000000..6f68762eaa > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S > > > @@ -0,0 +1,29 @@ > > > +/* Function log1pf vectorized with SSE2, wrapper version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_s_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVbN4v_log1pf) > > > +WRAPPER_IMPL_SSE2 log1pf > > > +END (_ZGVbN4v_log1pf) > > > + > > > +#ifndef USE_MULTIARCH > > > + libmvec_hidden_def (_ZGVbN4v_log1pf) > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > > new file mode 100644 > > > index 0000000000..74f81283b1 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S > > > @@ -0,0 +1,29 @@ > > > +/* Function log1pf vectorized with AVX2, wrapper version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_s_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVdN8v_log1pf) > > > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > > > +END (_ZGVdN8v_log1pf) > > > + > > > +#ifndef USE_MULTIARCH > > > + libmvec_hidden_def (_ZGVdN8v_log1pf) > > > +#endif > > > diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > > new file mode 100644 > > > index 0000000000..f33be0e904 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S > > > @@ -0,0 +1,25 @@ > > > +/* Function log1pf vectorized in AVX ISA as wrapper to SSE4 ISA version. > > > + Copyright (C) 2021 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <sysdep.h> > > > +#include "svml_s_wrapper_impl.h" > > > + > > > + .text > > > +ENTRY (_ZGVcN8v_log1pf) > > > +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf > > > +END (_ZGVcN8v_log1pf) > > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > > new file mode 100644 > > > index 0000000000..18aa6aaeaa > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c > > > @@ -0,0 +1 @@ > > > +#include "test-double-libmvec-log1p.c" > > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > > new file mode 100644 > > > index 0000000000..18aa6aaeaa > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c > > > @@ -0,0 +1 @@ > > > +#include "test-double-libmvec-log1p.c" > > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > > new file mode 100644 > > > index 0000000000..18aa6aaeaa > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c > > > @@ -0,0 +1 @@ > > > +#include "test-double-libmvec-log1p.c" > > > diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > > new file mode 100644 > > > index 0000000000..40937f987a > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c > > > @@ -0,0 +1,3 @@ > > > +#define LIBMVEC_TYPE double > > > +#define LIBMVEC_FUNC log1p > > > +#include "test-vector-abi-arg1.h" > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > > index 08c91ff634..38359b05e3 100644 > > > --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p) > > > > > > #define VEC_INT_TYPE __m128i > > > > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > > index a2fb0de309..17701e7731 100644 > > > --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c > > > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p) > > > > > > #ifndef __ILP32__ > > > # define VEC_INT_TYPE __m256i > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > > index dc65a4ee25..bba62b2446 100644 > > > --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p) > > > > > > #define VEC_INT_TYPE __m128i > > > > > > diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > > index 253ee8c906..8a04e13a07 100644 > > > --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p) > > > > > > #ifndef __ILP32__ > > > # define VEC_INT_TYPE __m512i > > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > > new file mode 100644 > > > index 0000000000..3395decaf4 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c > > > @@ -0,0 +1 @@ > > > +#include "test-float-libmvec-log1pf.c" > > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > > new file mode 100644 > > > index 0000000000..3395decaf4 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c > > > @@ -0,0 +1 @@ > > > +#include "test-float-libmvec-log1pf.c" > > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > > new file mode 100644 > > > index 0000000000..3395decaf4 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c > > > @@ -0,0 +1 @@ > > > +#include "test-float-libmvec-log1pf.c" > > > diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > > new file mode 100644 > > > index 0000000000..1b36069ded > > > --- /dev/null > > > +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c > > > @@ -0,0 +1,3 @@ > > > +#define LIBMVEC_TYPE float > > > +#define LIBMVEC_FUNC log1pf > > > +#include "test-vector-abi-arg1.h" > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > > index 1c7db5146c..706f52c618 100644 > > > --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf) > > > > > > #define VEC_INT_TYPE __m512i > > > > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > > index 8ec51603b3..ceace4c53a 100644 > > > --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf) > > > > > > #define VEC_INT_TYPE __m128i > > > > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > > index 1cb4553c7a..06a4753409 100644 > > > --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c > > > @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf) > > > > > > /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ > > > #undef VECTOR_WRAPPER_fFF > > > diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > > index 6ecc1792bb..a87e5298e0 100644 > > > --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > > +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c > > > @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf) > > > VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f) > > > VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f) > > > +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf) > > > > > > #define VEC_INT_TYPE __m128i > > > > > > -- > > > 2.31.1 > > > > > > > LGTM. > > > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > > > > Thanks. > > > > > > H.J.
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 73252615ca..845246fab9 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -241,4 +241,15 @@ #define __DECL_SIMD_log2f32x #define __DECL_SIMD_log2f64x #define __DECL_SIMD_log2f128x + +#define __DECL_SIMD_log1p +#define __DECL_SIMD_log1pf +#define __DECL_SIMD_log1pl +#define __DECL_SIMD_log1pf16 +#define __DECL_SIMD_log1pf32 +#define __DECL_SIMD_log1pf64 +#define __DECL_SIMD_log1pf128 +#define __DECL_SIMD_log1pf32x +#define __DECL_SIMD_log1pf64x +#define __DECL_SIMD_log1pf128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index bfe52a4666..aa4bc61aa4 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -119,7 +119,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x)); __MATHCALL_VEC (expm1,, (_Mdouble_ __x)); /* Return log(1 + X). */ -__MATHCALL (log1p,, (_Mdouble_ __x)); +__MATHCALL_VEC (log1p,, (_Mdouble_ __x)); /* Return the base 2 signed integral exponent of X. */ __MATHCALL (logb,, (_Mdouble_ __x)); diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index fa8b016c5d..68b940606a 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_exp10 F GLIBC_2.35 _ZGVbN2v_exp2 F GLIBC_2.35 _ZGVbN2v_expm1 F GLIBC_2.35 _ZGVbN2v_log10 F +GLIBC_2.35 _ZGVbN2v_log1p F GLIBC_2.35 _ZGVbN2v_log2 F GLIBC_2.35 _ZGVbN2v_sinh F GLIBC_2.35 _ZGVbN2vv_atan2 F @@ -68,6 +69,7 @@ GLIBC_2.35 _ZGVbN4v_exp10f F GLIBC_2.35 _ZGVbN4v_exp2f F GLIBC_2.35 _ZGVbN4v_expm1f F GLIBC_2.35 _ZGVbN4v_log10f F +GLIBC_2.35 _ZGVbN4v_log1pf F GLIBC_2.35 _ZGVbN4v_log2f F GLIBC_2.35 _ZGVbN4v_sinhf F GLIBC_2.35 _ZGVbN4vv_atan2f F @@ -81,6 +83,7 @@ GLIBC_2.35 _ZGVcN4v_exp10 F GLIBC_2.35 _ZGVcN4v_exp2 F GLIBC_2.35 _ZGVcN4v_expm1 F GLIBC_2.35 _ZGVcN4v_log10 F +GLIBC_2.35 _ZGVcN4v_log1p F GLIBC_2.35 _ZGVcN4v_log2 F GLIBC_2.35 _ZGVcN4v_sinh F GLIBC_2.35 _ZGVcN4vv_atan2 F @@ -94,6 +97,7 @@ GLIBC_2.35 _ZGVcN8v_exp10f F GLIBC_2.35 _ZGVcN8v_exp2f F GLIBC_2.35 _ZGVcN8v_expm1f F GLIBC_2.35 _ZGVcN8v_log10f F +GLIBC_2.35 _ZGVcN8v_log1pf F GLIBC_2.35 _ZGVcN8v_log2f F GLIBC_2.35 _ZGVcN8v_sinhf F GLIBC_2.35 _ZGVcN8vv_atan2f F @@ -107,6 +111,7 @@ GLIBC_2.35 _ZGVdN4v_exp10 F GLIBC_2.35 _ZGVdN4v_exp2 F GLIBC_2.35 _ZGVdN4v_expm1 F GLIBC_2.35 _ZGVdN4v_log10 F +GLIBC_2.35 _ZGVdN4v_log1p F GLIBC_2.35 _ZGVdN4v_log2 F GLIBC_2.35 _ZGVdN4v_sinh F GLIBC_2.35 _ZGVdN4vv_atan2 F @@ -120,6 +125,7 @@ GLIBC_2.35 _ZGVdN8v_exp10f F GLIBC_2.35 _ZGVdN8v_exp2f F GLIBC_2.35 _ZGVdN8v_expm1f F GLIBC_2.35 _ZGVdN8v_log10f F +GLIBC_2.35 _ZGVdN8v_log1pf F GLIBC_2.35 _ZGVdN8v_log2f F GLIBC_2.35 _ZGVdN8v_sinhf F GLIBC_2.35 _ZGVdN8vv_atan2f F @@ -133,6 +139,7 @@ GLIBC_2.35 _ZGVeN16v_exp10f F GLIBC_2.35 _ZGVeN16v_exp2f F GLIBC_2.35 _ZGVeN16v_expm1f F GLIBC_2.35 _ZGVeN16v_log10f F +GLIBC_2.35 _ZGVeN16v_log1pf F GLIBC_2.35 _ZGVeN16v_log2f F GLIBC_2.35 _ZGVeN16v_sinhf F GLIBC_2.35 _ZGVeN16vv_atan2f F @@ -146,6 +153,7 @@ GLIBC_2.35 _ZGVeN8v_exp10 F GLIBC_2.35 _ZGVeN8v_exp2 F GLIBC_2.35 _ZGVeN8v_expm1 F GLIBC_2.35 _ZGVeN8v_log10 F +GLIBC_2.35 _ZGVeN8v_log1p F GLIBC_2.35 _ZGVeN8v_log2 F GLIBC_2.35 _ZGVeN8v_sinh F GLIBC_2.35 _ZGVeN8vv_atan2 F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 59d284a10a..14c9db3bb3 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -110,6 +110,10 @@ # define __DECL_SIMD_log2 __DECL_SIMD_x86_64 # undef __DECL_SIMD_log2f # define __DECL_SIMD_log2f __DECL_SIMD_x86_64 +# undef __DECL_SIMD_log1p +# define __DECL_SIMD_log1p __DECL_SIMD_x86_64 +# undef __DECL_SIMD_log1pf +# define __DECL_SIMD_log1pf __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index a2ca9a203f..3dca196432 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -54,6 +54,8 @@ !GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (log2) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -93,3 +95,5 @@ !GCC$ builtin (log10f) attributes simd (notinbranch) if('x32') !GCC$ builtin (log2) attributes simd (notinbranch) if('x32') !GCC$ builtin (log2f) attributes simd (notinbranch) if('x32') +!GCC$ builtin (log1p) attributes simd (notinbranch) if('x32') +!GCC$ builtin (log1pf) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index 8d6d0915af..378cb06d37 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -36,6 +36,7 @@ libmvec-funcs = \ hypot \ log \ log10 \ + log1p \ log2 \ pow \ sin \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 1b48c2d642..155fb115f3 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -23,6 +23,7 @@ libmvec { _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; _ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10; + _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p; _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2; _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh; _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2; @@ -36,6 +37,7 @@ libmvec { _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f; _ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f; + _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf; _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f; _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf; _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f; diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 3b7f3cee6f..a2b15a795b 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1685,6 +1685,26 @@ float: 2 float128: 2 ldouble: 3 +Function: "log1p_vlen16": +float: 2 + +Function: "log1p_vlen2": +double: 1 + +Function: "log1p_vlen4": +double: 1 +float: 2 + +Function: "log1p_vlen4_avx2": +double: 1 + +Function: "log1p_vlen8": +double: 1 +float: 2 + +Function: "log1p_vlen8_avx2": +float: 2 + Function: "log2": double: 2 float: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S new file mode 100644 index 0000000000..8004088346 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized log1p, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_log1p _ZGVbN2v_log1p_sse2 +#include "../svml_d_log1p2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c new file mode 100644 index 0000000000..35ca620aba --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log1p, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_log1p +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_log1p, __GI__ZGVbN2v_log1p, __redirect__ZGVbN2v_log1p) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S new file mode 100644 index 0000000000..9d3f0647b4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S @@ -0,0 +1,1398 @@ +/* Function log1p vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_dlog1p_data_internal + */ +#define Log_HA_table 0 +#define Log_LA_table 8208 +#define poly_coeff 12320 +#define ExpMask 12384 +#define Two10 12400 +#define MinLog1p 12416 +#define MaxLog1p 12432 +#define One 12448 +#define SgnMask 12464 +#define XThreshold 12480 +#define XhMask 12496 +#define Threshold 12512 +#define Bias 12528 +#define Bias1 12544 +#define ExpMask0 12560 +#define ExpMask2 12576 +#define L2 12592 + +/* Lookup bias for data table __svml_dlog1p_data_internal. */ +#define Table_Lookup_Bias -0x405ff0 + +#include <sysdep.h> + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN2v_log1p_sse4) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $64, %rsp + movaps %xmm0, %xmm7 + +/* SgnMask used by all accuracies */ + movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi + movaps %xmm6, %xmm8 + movaps %xmm7, %xmm15 + movups One+__svml_dlog1p_data_internal(%rip), %xmm0 + andps %xmm7, %xmm8 + cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 + cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 + movaps %xmm0, %xmm4 + +/* compute 1+x as high, low parts */ + movaps %xmm0, %xmm9 + addpd %xmm7, %xmm4 + maxpd %xmm7, %xmm9 + orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 + movaps %xmm0, %xmm5 + +/* preserve mantissa, set input exponent to 2^(-10) */ + movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 + andps %xmm8, %xmm4 + andps %xmm4, %xmm3 + +/* check range */ + movaps %xmm7, %xmm8 + orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 + +/* Compute SignMask for all accuracies, including EP */ + andnps %xmm7, %xmm6 + +/* reciprocal approximation good to at least 11 bits */ + cvtpd2ps %xmm3, %xmm10 + minpd %xmm7, %xmm5 + subpd %xmm4, %xmm9 + cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 + addpd %xmm9, %xmm5 + movlhps %xmm10, %xmm10 + orps %xmm15, %xmm8 + rcpps %xmm10, %xmm11 + +/* combine and get argument value range mask */ + movmskpd %xmm8, %edx + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + movups .FLT_16(%rip), %xmm13 + +/* exponent of X needed to scale Xl */ + movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 + cvtps2pd %xmm11, %xmm1 + addpd %xmm13, %xmm1 + subpd %xmm13, %xmm1 + +/* 2^ (-10-exp(X) ) */ + movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 + pand %xmm4, %xmm12 + psubq %xmm12, %xmm2 + mulpd %xmm1, %xmm3 + +/* scale DblRcp */ + mulpd %xmm1, %xmm2 + subpd %xmm0, %xmm3 + +/* + * argument reduction + * VQFMS( D, R, X, DblRcp1, One ); + */ + mulpd %xmm2, %xmm5 + addpd %xmm5, %xmm3 + +/* exponent*log(2.0) */ + movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 + +/* exponent bits */ + psrlq $20, %xmm4 + pshufd $221, %xmm4, %xmm14 + +/* + * prepare table index + * table lookup + */ + movaps %xmm1, %xmm4 + cmpltpd %xmm1, %xmm10 + +/* biased exponent in DP format */ + cvtdq2pd %xmm14, %xmm0 + +/* polynomial */ + movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 + movaps %xmm3, %xmm5 + mulpd %xmm3, %xmm1 + mulpd %xmm3, %xmm5 + addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 + movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 + psrlq $40, %xmm4 + mulpd %xmm3, %xmm2 + mulpd %xmm5, %xmm1 + addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 + movd %xmm4, %eax + andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 + addpd %xmm1, %xmm2 + +/* reconstruction */ + mulpd %xmm2, %xmm5 + orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 + pshufd $2, %xmm4, %xmm9 + subpd %xmm10, %xmm0 + addpd %xmm5, %xmm3 + movd %xmm9, %ecx + mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 + movslq %eax, %rax + movslq %ecx, %rcx + movsd (%rsi,%rax), %xmm11 + movhpd (%rsi,%rcx), %xmm11 + addpd %xmm3, %xmm11 + addpd %xmm11, %xmm0 + +/* OR in the Sign of input argument to produce correct log1p(-0) */ + orps %xmm6, %xmm0 + testl %edx, %edx + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + movups %xmm7, 32(%rsp) + movups %xmm0, 48(%rsp) + # LOE rbx r12 r13 r14 r15 edx + + xorl %eax, %eax + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $2, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + movups 48(%rsp), %xmm0 + +/* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 xmm0 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movsd 32(%rsp,%r14,8), %xmm0 + call log1p@PLT + # LOE rbx r14 r15 r12d r13d xmm0 + + movsd %xmm0, 48(%rsp,%r14,8) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d +END(_ZGVbN2v_log1p_sse4) + + .section .rodata, "a" + .align 16 + +#ifdef __svml_dlog1p_data_internal_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; + __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; + __declspec(align(16)) VUINT32 ExpMask[2][2]; + __declspec(align(16)) VUINT32 Two10[2][2]; + __declspec(align(16)) VUINT32 MinLog1p[2][2]; + __declspec(align(16)) VUINT32 MaxLog1p[2][2]; + __declspec(align(16)) VUINT32 One[2][2]; + __declspec(align(16)) VUINT32 SgnMask[2][2]; + __declspec(align(16)) VUINT32 XThreshold[2][2]; + __declspec(align(16)) VUINT32 XhMask[2][2]; + __declspec(align(16)) VUINT32 Threshold[2][2]; + __declspec(align(16)) VUINT32 Bias[2][2]; + __declspec(align(16)) VUINT32 Bias1[2][2]; + __declspec(align(16)) VUINT32 ExpMask0[2][2]; + __declspec(align(16)) VUINT32 ExpMask2[2][2]; + __declspec(align(16)) VUINT32 L2[2][2]; +} __svml_dlog1p_data_internal; +#endif +__svml_dlog1p_data_internal: + /* Log_HA_table */ + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 + .quad 0xc086238206e94218, 0xbe1ceee898588610 + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e + .quad 0xc086244055d2c968, 0xbe1cef345284c119 + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc + .quad 0xc086247419475160, 0xbe1cf03dd9922331 + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d + .quad 0xc086252dab033898, 0xbe1cf220bba8861f + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 + .quad 0xc08626e164224880, 0xbe1ceeb431709788 + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 + .quad 0xc086271f58064068, 0xbe1cef092a785e3f + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 + .quad 0xc086272438546be8, 0xbe1cf210907ded8b + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 + .quad 0xc086273a05367688, 0xbe1cf18656c50806 + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc + .quad 0xc086278a58297918, 0xbe1cf053073872bf + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea + .quad 0xc0862810d5af5880, 0xbe1cee622478393d + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 + .quad 0xc086281755366778, 0xbe1cef2edae5837d + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 + .quad 0xc086283341749490, 0xbe1cef74bbcc488a + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 + .quad 0xc086287879041490, 0xbe1cf034803c8a48 + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 + /*== Log_LA_table ==*/ + .align 16 + .quad 0x8000000000000000 + .quad 0xbf5ff802a9ab10e6 + .quad 0xbf6ff00aa2b10bc0 + .quad 0xbf77ee11ebd82e94 + .quad 0xbf7fe02a6b106789 + .quad 0xbf83e7295d25a7d9 + .quad 0xbf87dc475f810a77 + .quad 0xbf8bcf712c74384c + .quad 0xbf8fc0a8b0fc03e4 + .quad 0xbf91d7f7eb9eebe7 + .quad 0xbf93cea44346a575 + .quad 0xbf95c45a51b8d389 + .quad 0xbf97b91b07d5b11b + .quad 0xbf99ace7551cc514 + .quad 0xbf9b9fc027af9198 + .quad 0xbf9d91a66c543cc4 + .quad 0xbf9f829b0e783300 + .quad 0xbfa0b94f7c196176 + .quad 0xbfa1b0d98923d980 + .quad 0xbfa2a7ec2214e873 + .quad 0xbfa39e87b9febd60 + .quad 0xbfa494acc34d911c + .quad 0xbfa58a5bafc8e4d5 + .quad 0xbfa67f94f094bd98 + .quad 0xbfa77458f632dcfc + .quad 0xbfa868a83083f6cf + .quad 0xbfa95c830ec8e3eb + .quad 0xbfaa4fe9ffa3d235 + .quad 0xbfab42dd711971bf + .quad 0xbfac355dd0921f2d + .quad 0xbfad276b8adb0b52 + .quad 0xbfae19070c276016 + .quad 0xbfaf0a30c01162a6 + .quad 0xbfaffae9119b9303 + .quad 0xbfb075983598e471 + .quad 0xbfb0ed839b5526fe + .quad 0xbfb16536eea37ae1 + .quad 0xbfb1dcb263db1944 + .quad 0xbfb253f62f0a1417 + .quad 0xbfb2cb0283f5de1f + .quad 0xbfb341d7961bd1d1 + .quad 0xbfb3b87598b1b6ee + .quad 0xbfb42edcbea646f0 + .quad 0xbfb4a50d3aa1b040 + .quad 0xbfb51b073f06183f + .quad 0xbfb590cafdf01c28 + .quad 0xbfb60658a93750c4 + .quad 0xbfb67bb0726ec0fc + .quad 0xbfb6f0d28ae56b4c + .quad 0xbfb765bf23a6be13 + .quad 0xbfb7da766d7b12cd + .quad 0xbfb84ef898e8282a + .quad 0xbfb8c345d6319b21 + .quad 0xbfb9375e55595ede + .quad 0xbfb9ab42462033ad + .quad 0xbfba1ef1d8061cd4 + .quad 0xbfba926d3a4ad563 + .quad 0xbfbb05b49bee43fe + .quad 0xbfbb78c82bb0eda1 + .quad 0xbfbbeba818146765 + .quad 0xbfbc5e548f5bc743 + .quad 0xbfbcd0cdbf8c13e1 + .quad 0xbfbd4313d66cb35d + .quad 0xbfbdb5270187d927 + .quad 0xbfbe27076e2af2e6 + .quad 0xbfbe98b549671467 + .quad 0xbfbf0a30c01162a6 + .quad 0xbfbf7b79fec37ddf + .quad 0xbfbfec9131dbeabb + .quad 0xbfc02ebb42bf3d4b + .quad 0xbfc0671512ca596e + .quad 0xbfc09f561ee719c3 + .quad 0xbfc0d77e7cd08e59 + .quad 0xbfc10f8e422539b1 + .quad 0xbfc14785846742ac + .quad 0xbfc17f6458fca611 + .quad 0xbfc1b72ad52f67a0 + .quad 0xbfc1eed90e2dc2c3 + .quad 0xbfc2266f190a5acb + .quad 0xbfc25ded0abc6ad2 + .quad 0xbfc29552f81ff523 + .quad 0xbfc2cca0f5f5f251 + .quad 0xbfc303d718e47fd3 + .quad 0xbfc33af575770e4f + .quad 0xbfc371fc201e8f74 + .quad 0xbfc3a8eb2d31a376 + .quad 0xbfc3dfc2b0ecc62a + .quad 0xbfc41682bf727bc0 + .quad 0xbfc44d2b6ccb7d1e + .quad 0xbfc483bccce6e3dd + .quad 0xbfc4ba36f39a55e5 + .quad 0xbfc4f099f4a230b2 + .quad 0xbfc526e5e3a1b438 + .quad 0xbfc55d1ad4232d6f + .quad 0xbfc59338d9982086 + .quad 0xbfc5c940075972b9 + .quad 0xbfc5ff3070a793d4 + .quad 0xbfc6350a28aaa758 + .quad 0xbfc66acd4272ad51 + .quad 0xbfc6a079d0f7aad2 + .quad 0xbfc6d60fe719d21d + .quad 0xbfc70b8f97a1aa75 + .quad 0xbfc740f8f54037a5 + .quad 0xbfc7764c128f2127 + .quad 0xbfc7ab890210d909 + .quad 0xbfc7e0afd630c274 + .quad 0xbfc815c0a14357eb + .quad 0xbfc84abb75865139 + .quad 0xbfc87fa06520c911 + .quad 0xbfc8b46f8223625b + .quad 0xbfc8e928de886d41 + .quad 0xbfc91dcc8c340bde + .quad 0xbfc9525a9cf456b4 + .quad 0xbfc986d3228180ca + .quad 0xbfc9bb362e7dfb83 + .quad 0xbfc9ef83d2769a34 + .quad 0xbfca23bc1fe2b563 + .quad 0xbfca57df28244dcd + .quad 0xbfca8becfc882f19 + .quad 0xbfcabfe5ae46124c + .quad 0xbfcaf3c94e80bff3 + .quad 0xbfcb2797ee46320c + .quad 0xbfcb5b519e8fb5a4 + .quad 0xbfcb8ef670420c3b + .quad 0xbfcbc286742d8cd6 + .quad 0xbfcbf601bb0e44e2 + .quad 0xbfcc2968558c18c1 + .quad 0xbfcc5cba543ae425 + .quad 0xbfcc8ff7c79a9a22 + .quad 0xbfccc320c0176502 + .quad 0xbfccf6354e09c5dc + .quad 0xbfcd293581b6b3e7 + .quad 0xbfcd5c216b4fbb91 + .quad 0xbfcd8ef91af31d5e + .quad 0xbfcdc1bca0abec7d + .quad 0xbfcdf46c0c722d2f + .quad 0xbfce27076e2af2e6 + .quad 0xbfce598ed5a87e2f + .quad 0xbfce8c0252aa5a60 + .quad 0xbfcebe61f4dd7b0b + .quad 0xbfcef0adcbdc5936 + .quad 0xbfcf22e5e72f105d + .quad 0xbfcf550a564b7b37 + .quad 0xbfcf871b28955045 + .quad 0xbfcfb9186d5e3e2b + .quad 0xbfcfeb0233e607cc + .quad 0xbfd00e6c45ad501d + .quad 0xbfd0274dc16c232f + .quad 0xbfd0402594b4d041 + .quad 0xbfd058f3c703ebc6 + .quad 0xbfd071b85fcd590d + .quad 0xbfd08a73667c57af + .quad 0xbfd0a324e27390e3 + .quad 0xbfd0bbccdb0d24bd + .quad 0xbfd0d46b579ab74b + .quad 0xbfd0ed005f657da4 + .quad 0xbfd1058bf9ae4ad5 + .quad 0xbfd11e0e2dad9cb7 + .quad 0xbfd136870293a8b0 + .quad 0xbfd14ef67f88685a + .quad 0xbfd1675cababa60e + .quad 0xbfd17fb98e15095d + .quad 0xbfd1980d2dd4236f + .quad 0xbfd1b05791f07b49 + .quad 0xbfd1c898c16999fb + .quad 0xbfd1e0d0c33716be + .quad 0xbfd1f8ff9e48a2f3 + .quad 0xbfd211255986160c + .quad 0xbfd22941fbcf7966 + .quad 0xbfd241558bfd1404 + .quad 0xbfd2596010df763a + .quad 0xbfd27161913f853d + .quad 0xbfd2895a13de86a3 + .quad 0xbfd2a1499f762bc9 + .quad 0xbfd2b9303ab89d25 + .quad 0xbfd2d10dec508583 + .quad 0xbfd2e8e2bae11d31 + .quad 0xbfd300aead06350c + .quad 0xbfd31871c9544185 + .quad 0xbfd3302c16586588 + .quad 0xbfd347dd9a987d55 + .quad 0xbfd35f865c93293e + .quad 0xbfd3772662bfd85b + .quad 0xbfd38ebdb38ed321 + .quad 0xbfd3a64c556945ea + .quad 0xbfd3bdd24eb14b6a + .quad 0xbfd3d54fa5c1f710 + .quad 0xbfd3ecc460ef5f50 + .quad 0xbfd404308686a7e4 + .quad 0xbfd41b941cce0bee + .quad 0xbfd432ef2a04e814 + .quad 0xbfd44a41b463c47c + .quad 0xbfd4618bc21c5ec2 + .quad 0xbfd478cd5959b3d9 + .quad 0xbfd49006804009d1 + .quad 0xbfd4a7373cecf997 + .quad 0xbfd4be5f957778a1 + .quad 0xbfd4d57f8fefe27f + .quad 0xbfd4ec973260026a + .quad 0xbfd503a682cb1cb3 + .quad 0xbfd51aad872df82d + .quad 0xbfd531ac457ee77e + .quad 0xbfd548a2c3add263 + .quad 0xbfd55f9107a43ee2 + .quad 0xbfd5767717455a6c + .quad 0xbfd58d54f86e02f2 + .quad 0xbfd5a42ab0f4cfe2 + .quad 0xbfd5baf846aa1b19 + .quad 0xbfd5d1bdbf5809ca + .quad 0xbfd5e87b20c2954a + .quad 0xbfd5ff3070a793d4 + .quad 0xbfd615ddb4bec13c + .quad 0xbfd62c82f2b9c795 + .quad 0x3fd61965cdb02c1f + .quad 0x3fd602d08af091ec + .quad 0x3fd5ec433d5c35ae + .quad 0x3fd5d5bddf595f30 + .quad 0x3fd5bf406b543db2 + .quad 0x3fd5a8cadbbedfa1 + .quad 0x3fd5925d2b112a59 + .quad 0x3fd57bf753c8d1fb + .quad 0x3fd565995069514c + .quad 0x3fd54f431b7be1a9 + .quad 0x3fd538f4af8f72fe + .quad 0x3fd522ae0738a3d8 + .quad 0x3fd50c6f1d11b97c + .quad 0x3fd4f637ebba9810 + .quad 0x3fd4e0086dd8baca + .quad 0x3fd4c9e09e172c3c + .quad 0x3fd4b3c077267e9a + .quad 0x3fd49da7f3bcc41f + .quad 0x3fd487970e958770 + .quad 0x3fd4718dc271c41b + .quad 0x3fd45b8c0a17df13 + .quad 0x3fd44591e0539f49 + .quad 0x3fd42f9f3ff62642 + .quad 0x3fd419b423d5e8c7 + .quad 0x3fd403d086cea79c + .quad 0x3fd3edf463c1683e + .quad 0x3fd3d81fb5946dba + .quad 0x3fd3c25277333184 + .quad 0x3fd3ac8ca38e5c5f + .quad 0x3fd396ce359bbf54 + .quad 0x3fd3811728564cb2 + .quad 0x3fd36b6776be1117 + .quad 0x3fd355bf1bd82c8b + .quad 0x3fd3401e12aecba1 + .quad 0x3fd32a84565120a8 + .quad 0x3fd314f1e1d35ce4 + .quad 0x3fd2ff66b04ea9d4 + .quad 0x3fd2e9e2bce12286 + .quad 0x3fd2d46602adccee + .quad 0x3fd2bef07cdc9354 + .quad 0x3fd2a982269a3dbf + .quad 0x3fd2941afb186b7c + .quad 0x3fd27ebaf58d8c9d + .quad 0x3fd269621134db92 + .quad 0x3fd25410494e56c7 + .quad 0x3fd23ec5991eba49 + .quad 0x3fd22981fbef797b + .quad 0x3fd214456d0eb8d4 + .quad 0x3fd1ff0fe7cf47a7 + .quad 0x3fd1e9e1678899f4 + .quad 0x3fd1d4b9e796c245 + .quad 0x3fd1bf99635a6b95 + .quad 0x3fd1aa7fd638d33f + .quad 0x3fd1956d3b9bc2fa + .quad 0x3fd180618ef18adf + .quad 0x3fd16b5ccbacfb73 + .quad 0x3fd1565eed455fc3 + .quad 0x3fd14167ef367783 + .quad 0x3fd12c77cd00713b + .quad 0x3fd1178e8227e47c + .quad 0x3fd102ac0a35cc1c + .quad 0x3fd0edd060b78081 + .quad 0x3fd0d8fb813eb1ef + .quad 0x3fd0c42d676162e3 + .quad 0x3fd0af660eb9e279 + .quad 0x3fd09aa572e6c6d4 + .quad 0x3fd085eb8f8ae797 + .quad 0x3fd07138604d5862 + .quad 0x3fd05c8be0d9635a + .quad 0x3fd047e60cde83b8 + .quad 0x3fd03346e0106062 + .quad 0x3fd01eae5626c691 + .quad 0x3fd00a1c6adda473 + .quad 0x3fcfeb2233ea07cd + .quad 0x3fcfc218be620a5e + .quad 0x3fcf991c6cb3b379 + .quad 0x3fcf702d36777df0 + .quad 0x3fcf474b134df229 + .quad 0x3fcf1e75fadf9bde + .quad 0x3fcef5ade4dcffe6 + .quad 0x3fceccf2c8fe920a + .quad 0x3fcea4449f04aaf5 + .quad 0x3fce7ba35eb77e2a + .quad 0x3fce530effe71012 + .quad 0x3fce2a877a6b2c12 + .quad 0x3fce020cc6235ab5 + .quad 0x3fcdd99edaf6d7e9 + .quad 0x3fcdb13db0d48940 + .quad 0x3fcd88e93fb2f450 + .quad 0x3fcd60a17f903515 + .quad 0x3fcd38666871f465 + .quad 0x3fcd1037f2655e7b + .quad 0x3fcce816157f1988 + .quad 0x3fccc000c9db3c52 + .quad 0x3fcc97f8079d44ec + .quad 0x3fcc6ffbc6f00f71 + .quad 0x3fcc480c0005ccd1 + .quad 0x3fcc2028ab17f9b4 + .quad 0x3fcbf851c067555f + .quad 0x3fcbd087383bd8ad + .quad 0x3fcba8c90ae4ad19 + .quad 0x3fcb811730b823d2 + .quad 0x3fcb5971a213acdb + .quad 0x3fcb31d8575bce3d + .quad 0x3fcb0a4b48fc1b46 + .quad 0x3fcae2ca6f672bd4 + .quad 0x3fcabb55c31693ad + .quad 0x3fca93ed3c8ad9e3 + .quad 0x3fca6c90d44b704e + .quad 0x3fca454082e6ab05 + .quad 0x3fca1dfc40f1b7f1 + .quad 0x3fc9f6c407089664 + .quad 0x3fc9cf97cdce0ec3 + .quad 0x3fc9a8778debaa38 + .quad 0x3fc981634011aa75 + .quad 0x3fc95a5adcf7017f + .quad 0x3fc9335e5d594989 + .quad 0x3fc90c6db9fcbcd9 + .quad 0x3fc8e588ebac2dbf + .quad 0x3fc8beafeb38fe8c + .quad 0x3fc897e2b17b19a5 + .quad 0x3fc871213750e994 + .quad 0x3fc84a6b759f512f + .quad 0x3fc823c16551a3c2 + .quad 0x3fc7fd22ff599d4f + .quad 0x3fc7d6903caf5ad0 + .quad 0x3fc7b0091651528c + .quad 0x3fc7898d85444c73 + .quad 0x3fc7631d82935a86 + .quad 0x3fc73cb9074fd14d + .quad 0x3fc716600c914054 + .quad 0x3fc6f0128b756abc + .quad 0x3fc6c9d07d203fc7 + .quad 0x3fc6a399dabbd383 + .quad 0x3fc67d6e9d785771 + .quad 0x3fc6574ebe8c133a + .quad 0x3fc6313a37335d76 + .quad 0x3fc60b3100b09476 + .quad 0x3fc5e533144c1719 + .quad 0x3fc5bf406b543db2 + .quad 0x3fc59958ff1d52f1 + .quad 0x3fc5737cc9018cdd + .quad 0x3fc54dabc26105d2 + .quad 0x3fc527e5e4a1b58d + .quad 0x3fc5022b292f6a45 + .quad 0x3fc4dc7b897bc1c8 + .quad 0x3fc4b6d6fefe22a4 + .quad 0x3fc4913d8333b561 + .quad 0x3fc46baf0f9f5db7 + .quad 0x3fc4462b9dc9b3dc + .quad 0x3fc420b32740fdd4 + .quad 0x3fc3fb45a59928cc + .quad 0x3fc3d5e3126bc27f + .quad 0x3fc3b08b6757f2a9 + .quad 0x3fc38b3e9e027479 + .quad 0x3fc365fcb0159016 + .quad 0x3fc340c59741142e + .quad 0x3fc31b994d3a4f85 + .quad 0x3fc2f677cbbc0a96 + .quad 0x3fc2d1610c86813a + .quad 0x3fc2ac55095f5c59 + .quad 0x3fc28753bc11aba5 + .quad 0x3fc2625d1e6ddf57 + .quad 0x3fc23d712a49c202 + .quad 0x3fc2188fd9807263 + .quad 0x3fc1f3b925f25d41 + .quad 0x3fc1ceed09853752 + .quad 0x3fc1aa2b7e23f72a + .quad 0x3fc185747dbecf34 + .quad 0x3fc160c8024b27b1 + .quad 0x3fc13c2605c398c3 + .quad 0x3fc1178e8227e47c + .quad 0x3fc0f301717cf0fb + .quad 0x3fc0ce7ecdccc28d + .quad 0x3fc0aa06912675d5 + .quad 0x3fc08598b59e3a07 + .quad 0x3fc06135354d4b18 + .quad 0x3fc03cdc0a51ec0d + .quad 0x3fc0188d2ecf6140 + .quad 0x3fbfe89139dbd566 + .quad 0x3fbfa01c9db57ce2 + .quad 0x3fbf57bc7d9005db + .quad 0x3fbf0f70cdd992e3 + .quad 0x3fbec739830a1120 + .quad 0x3fbe7f1691a32d3e + .quad 0x3fbe3707ee30487b + .quad 0x3fbdef0d8d466db9 + .quad 0x3fbda727638446a2 + .quad 0x3fbd5f55659210e2 + .quad 0x3fbd179788219364 + .quad 0x3fbccfedbfee13a8 + .quad 0x3fbc885801bc4b23 + .quad 0x3fbc40d6425a5cb1 + .quad 0x3fbbf968769fca11 + .quad 0x3fbbb20e936d6974 + .quad 0x3fbb6ac88dad5b1c + .quad 0x3fbb23965a52ff00 + .quad 0x3fbadc77ee5aea8c + .quad 0x3fba956d3ecade63 + .quad 0x3fba4e7640b1bc38 + .quad 0x3fba0792e9277cac + .quad 0x3fb9c0c32d4d2548 + .quad 0x3fb97a07024cbe74 + .quad 0x3fb9335e5d594989 + .quad 0x3fb8ecc933aeb6e8 + .quad 0x3fb8a6477a91dc29 + .quad 0x3fb85fd927506a48 + .quad 0x3fb8197e2f40e3f0 + .quad 0x3fb7d33687c293c9 + .quad 0x3fb78d02263d82d3 + .quad 0x3fb746e100226ed9 + .quad 0x3fb700d30aeac0e1 + .quad 0x3fb6bad83c1883b6 + .quad 0x3fb674f089365a7a + .quad 0x3fb62f1be7d77743 + .quad 0x3fb5e95a4d9791cb + .quad 0x3fb5a3abb01ade25 + .quad 0x3fb55e10050e0384 + .quad 0x3fb518874226130a + .quad 0x3fb4d3115d207eac + .quad 0x3fb48dae4bc31018 + .quad 0x3fb4485e03dbdfad + .quad 0x3fb403207b414b7f + .quad 0x3fb3bdf5a7d1ee64 + .quad 0x3fb378dd7f749714 + .quad 0x3fb333d7f8183f4b + .quad 0x3fb2eee507b40301 + .quad 0x3fb2aa04a44717a5 + .quad 0x3fb26536c3d8c369 + .quad 0x3fb2207b5c78549e + .quad 0x3fb1dbd2643d190b + .quad 0x3fb1973bd1465567 + .quad 0x3fb152b799bb3cc9 + .quad 0x3fb10e45b3cae831 + .quad 0x3fb0c9e615ac4e17 + .quad 0x3fb08598b59e3a07 + .quad 0x3fb0415d89e74444 + .quad 0x3faffa6911ab9301 + .quad 0x3faf723b517fc523 + .quad 0x3faeea31c006b87c + .quad 0x3fae624c4a0b5e1b + .quad 0x3fadda8adc67ee4e + .quad 0x3fad52ed6405d86f + .quad 0x3faccb73cdddb2cc + .quad 0x3fac441e06f72a9e + .quad 0x3fabbcebfc68f420 + .quad 0x3fab35dd9b58baad + .quad 0x3faaaef2d0fb10fc + .quad 0x3faa282b8a936171 + .quad 0x3fa9a187b573de7c + .quad 0x3fa91b073efd7314 + .quad 0x3fa894aa149fb343 + .quad 0x3fa80e7023d8ccc4 + .quad 0x3fa788595a3577ba + .quad 0x3fa70265a550e777 + .quad 0x3fa67c94f2d4bb58 + .quad 0x3fa5f6e73078efb8 + .quad 0x3fa5715c4c03ceef + .quad 0x3fa4ebf43349e26f + .quad 0x3fa466aed42de3ea + .quad 0x3fa3e18c1ca0ae92 + .quad 0x3fa35c8bfaa1306b + .quad 0x3fa2d7ae5c3c5bae + .quad 0x3fa252f32f8d183f + .quad 0x3fa1ce5a62bc353a + .quad 0x3fa149e3e4005a8d + .quad 0x3fa0c58fa19dfaaa + .quad 0x3fa0415d89e74444 + .quad 0x3f9f7a9b16782856 + .quad 0x3f9e72bf2813ce51 + .quad 0x3f9d6b2725979802 + .quad 0x3f9c63d2ec14aaf2 + .quad 0x3f9b5cc258b718e6 + .quad 0x3f9a55f548c5c43f + .quad 0x3f994f6b99a24475 + .quad 0x3f98492528c8cabf + .quad 0x3f974321d3d006d3 + .quad 0x3f963d6178690bd6 + .quad 0x3f9537e3f45f3565 + .quad 0x3f9432a925980cc1 + .quad 0x3f932db0ea132e22 + .quad 0x3f9228fb1fea2e28 + .quad 0x3f912487a5507f70 + .quad 0x3f90205658935847 + .quad 0x3f8e38ce3033310c + .quad 0x3f8c317384c75f06 + .quad 0x3f8a2a9c6c170462 + .quad 0x3f882448a388a2aa + .quad 0x3f861e77e8b53fc6 + .quad 0x3f841929f96832f0 + .quad 0x3f82145e939ef1e9 + .quad 0x3f8010157588de71 + .quad 0x3f7c189cbb0e27fb + .quad 0x3f78121214586b54 + .quad 0x3f740c8a747878e2 + .quad 0x3f70080559588b35 + .quad 0x3f680904828985c0 + .quad 0x3f60040155d5889e + .quad 0x3f50020055655889 + .quad 0x0000000000000000 + /*== poly_coeff[4] ==*/ + .align 16 + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ + .quad 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ + /*== ExpMask ==*/ + .align 16 + .quad 0x000fffffffffffff, 0x000fffffffffffff + /*== Two10 ==*/ + .align 16 + .quad 0x3f50000000000000, 0x3f50000000000000 + /*== MinLog1p = -1+2^(-53) ==*/ + .align 16 + .quad 0xbfefffffffffffff, 0xbfefffffffffffff + /*== MaxLog1p ==*/ + .align 16 + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000 + /*== One ==*/ + .align 16 + .quad 0x3ff0000000000000, 0x3ff0000000000000 + /*== SgnMask ==*/ + .align 16 + .quad 0x7fffffffffffffff, 0x7fffffffffffffff + /*== XThreshold ==*/ + .align 16 + .quad 0x3e00000000000000, 0x3e00000000000000 + /*== XhMask ==*/ + .align 16 + .quad 0xfffffffffffffc00, 0xfffffffffffffc00 + /*== Threshold ==*/ + .align 16 + .quad 0x4086a00000000000, 0x4086a00000000000 + /*== Bias ==*/ + .align 16 + .quad 0x408ff80000000000, 0x408ff80000000000 + /*== Bias1 ==*/ + .align 16 + .quad 0x408ff00000000000, 0x408ff00000000000 + /*== ExpMask ==*/ + .align 16 + .quad 0x7ff0000000000000, 0x7ff0000000000000 + /*== ExpMask2 ==*/ + .align 16 + .quad 0x7f40000000000000, 0x7f40000000000000 + /*== L2L ==*/ + .align 16 + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF + .align 16 + .type __svml_dlog1p_data_internal,@object + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal + .space 96, 0x00 + .align 16 + +.FLT_16: + .long 0x00000000,0x43380000,0x00000000,0x43380000 + .type .FLT_16,@object + .size .FLT_16,16 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S new file mode 100644 index 0000000000..ec01af680c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized log1p, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVdN4v_log1p _ZGVdN4v_log1p_sse_wrapper +#include "../svml_d_log1p4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c new file mode 100644 index 0000000000..808f3224ef --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log1p, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_log1p +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_log1p, __GI__ZGVdN4v_log1p, __redirect__ZGVdN4v_log1p) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S new file mode 100644 index 0000000000..548538b0ec --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S @@ -0,0 +1,1383 @@ +/* Function log1p vectorized with AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_dlog1p_data_internal + */ +#define Log_HA_table 0 +#define Log_LA_table 8224 +#define poly_coeff 12352 +#define ExpMask 12480 +#define Two10 12512 +#define MinLog1p 12544 +#define MaxLog1p 12576 +#define One 12608 +#define SgnMask 12640 +#define XThreshold 12672 +#define XhMask 12704 +#define Threshold 12736 +#define Bias 12768 +#define Bias1 12800 +#define ExpMask0 12832 +#define ExpMask2 12864 +#define L2 12896 + +/* Lookup bias for data table __svml_dlog1p_data_internal. */ +#define Table_Lookup_Bias -0x405fe0 + +#include <sysdep.h> + + .text + .section .text.avx2,"ax",@progbits +ENTRY(_ZGVdN4v_log1p_avx2) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 + +/* SgnMask used by all accuracies */ + vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 + vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 + +/* 2^ (-10-exp(X) ) */ + vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 + vmovapd %ymm0, %ymm9 + vandpd %ymm12, %ymm9, %ymm10 + vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 + vaddpd %ymm7, %ymm9, %ymm13 + +/* compute 1+x as high, low parts */ + vmaxpd %ymm9, %ymm7, %ymm15 + vminpd %ymm9, %ymm7, %ymm6 + vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 + vandpd %ymm14, %ymm13, %ymm4 + +/* preserve mantissa, set input exponent to 2^(-10) */ + vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 + vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 + +/* reciprocal approximation good to at least 11 bits */ + vcvtpd2ps %ymm5, %xmm2 + vsubpd %ymm4, %ymm15, %ymm0 + +/* check range */ + vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 + vrcpps %xmm2, %xmm1 + vaddpd %ymm0, %ymm6, %ymm6 + vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 + vcvtps2pd %xmm1, %ymm11 + +/* exponent of X needed to scale Xl */ + vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 + vpsubq %ymm10, %ymm3, %ymm13 + +/* exponent bits */ + vpsrlq $20, %ymm4, %ymm4 + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + vroundpd $0, %ymm11, %ymm3 + +/* scale DblRcp */ + vmulpd %ymm13, %ymm3, %ymm2 + +/* exponent*log(2.0) */ + vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 + vfmsub213pd %ymm7, %ymm3, %ymm5 + +/* Compute SignMask for all accuracies, including EP */ + vandnpd %ymm9, %ymm12, %ymm8 + vorpd %ymm0, %ymm15, %ymm7 + +/* + * prepare table index + * table lookup + */ + vpsrlq $40, %ymm3, %ymm0 + +/* + * argument reduction + * VQFMS( D, R, X, DblRcp1, One ); + */ + vfmadd213pd %ymm5, %ymm2, %ymm6 + vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 + vcmplt_oqpd %ymm3, %ymm13, %ymm3 + vmulpd %ymm6, %ymm6, %ymm5 + vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 + +/* combine and get argument value range mask */ + vmovmskpd %ymm7, %eax + vextractf128 $1, %ymm4, %xmm12 + vshufps $221, %xmm12, %xmm4, %xmm14 + +/* biased exponent in DP format */ + vcvtdq2pd %xmm14, %ymm1 + vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 + vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 + vsubpd %ymm15, %ymm1, %ymm1 + vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 + +/* polynomial */ + vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 + vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 + vfmadd213pd %ymm2, %ymm5, %ymm1 + +/* reconstruction */ + vfmadd213pd %ymm6, %ymm5, %ymm1 + vextractf128 $1, %ymm0, %xmm10 + vmovd %xmm0, %edx + vmovd %xmm10, %esi + movslq %edx, %rdx + vpextrd $2, %xmm0, %ecx + movslq %esi, %rsi + vpextrd $2, %xmm10, %edi + movslq %ecx, %rcx + movslq %edi, %rdi + vmovsd (%r8,%rdx), %xmm4 + vmovsd (%r8,%rsi), %xmm11 + vmovhpd (%r8,%rcx), %xmm4, %xmm7 + vmovhpd (%r8,%rdi), %xmm11, %xmm12 + vinsertf128 $1, %xmm12, %ymm7, %ymm0 + vaddpd %ymm1, %ymm0, %ymm6 + vaddpd %ymm6, %ymm3, %ymm0 + +/* OR in the Sign of input argument to produce correct log1p(-0) */ + vorpd %ymm8, %ymm0, %ymm0 + testl %eax, %eax + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + vmovupd %ymm9, 32(%rsp) + vmovupd %ymm0, 64(%rsp) + # LOE rbx r12 r13 r14 r15 eax ymm0 + + xorl %edx, %edx + # LOE rbx r12 r13 r14 r15 eax edx + + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $4, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovupd 64(%rsp), %ymm0 + +/* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 ymm0 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movsd 32(%rsp,%r14,8), %xmm0 + call log1p@PLT + # LOE rbx r14 r15 r12d r13d xmm0 + + movsd %xmm0, 64(%rsp,%r14,8) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d +END(_ZGVdN4v_log1p_avx2) + + .section .rodata, "a" + .align 32 + +#ifdef __svml_dlog1p_data_internal_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; + __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; + __declspec(align(32)) VUINT32 ExpMask[4][2]; + __declspec(align(32)) VUINT32 Two10[4][2]; + __declspec(align(32)) VUINT32 MinLog1p[4][2]; + __declspec(align(32)) VUINT32 MaxLog1p[4][2]; + __declspec(align(32)) VUINT32 One[4][2]; + __declspec(align(32)) VUINT32 SgnMask[4][2]; + __declspec(align(32)) VUINT32 XThreshold[4][2]; + __declspec(align(32)) VUINT32 XhMask[4][2]; + __declspec(align(32)) VUINT32 Threshold[4][2]; + __declspec(align(32)) VUINT32 Bias[4][2]; + __declspec(align(32)) VUINT32 Bias1[4][2]; + __declspec(align(32)) VUINT32 ExpMask0[4][2]; + __declspec(align(32)) VUINT32 ExpMask2[4][2]; + __declspec(align(32)) VUINT32 L2[4][2]; +} __svml_dlog1p_data_internal; +#endif +__svml_dlog1p_data_internal: + /* Log_HA_table */ + .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 + .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a + .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff + .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a + .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb + .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e + .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b + .quad 0xc0862347acebaf68, 0xbe1cef3b152048af + .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e + .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 + .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 + .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 + .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 + .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 + .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 + .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b + .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed + .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed + .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f + .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce + .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 + .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 + .quad 0xc086238206e94218, 0xbe1ceee898588610 + .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea + .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 + .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 + .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 + .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 + .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b + .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 + .quad 0xc08623a07b28ae60, 0xbe1cef359363787c + .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c + .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 + .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 + .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b + .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf + .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 + .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b + .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 + .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 + .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff + .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 + .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e + .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde + .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b + .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c + .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 + .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f + .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 + .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 + .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 + .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b + .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 + .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 + .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 + .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 + .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a + .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d + .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b + .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 + .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 + .quad 0xc086241263e87f50, 0xbe1cf16e74768529 + .quad 0xc0862415f6193658, 0xbe1cefec64b8becb + .quad 0xc086241986b28f30, 0xbe1cf0838d210baa + .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 + .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 + .quad 0xc08624242f008380, 0xbe1ceea988c5a417 + .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 + .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 + .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 + .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 + .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 + .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d + .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e + .quad 0xc086244055d2c968, 0xbe1cef345284c119 + .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 + .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 + .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 + .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f + .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f + .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 + .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 + .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d + .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb + .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 + .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f + .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 + .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 + .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc + .quad 0xc086247419475160, 0xbe1cf03dd9922331 + .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 + .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 + .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 + .quad 0xc0862481af27c528, 0xbe1cee8a6593278a + .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 + .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 + .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 + .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 + .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c + .quad 0xc0862495e5179270, 0xbe1cee757f20c326 + .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 + .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 + .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb + .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e + .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b + .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 + .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 + .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 + .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 + .quad 0xc08624b72472a528, 0xbe1cf031c931c11f + .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 + .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d + .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 + .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 + .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 + .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe + .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f + .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 + .quad 0xc08624d49c4a4b78, 0xbe1cee97b556ed78 + .quad 0xc08624d7dbd56750, 0xbe1cf1b14b6acb75 + .quad 0xc08624db1a0f6b00, 0xbe1cef1e860623f2 + .quad 0xc08624de56f96758, 0xbe1ceeaf4d156f3d + .quad 0xc08624e192946bf0, 0xbe1ceecc12b400ed + .quad 0xc08624e4cce18710, 0xbe1cf180c40c794f + .quad 0xc08624e805e1c5c8, 0xbe1cf185a08f7f65 + .quad 0xc08624eb3d9633d8, 0xbe1cef45fc924078 + .quad 0xc08624ee73ffdbb0, 0xbe1cf1e4f457f32a + .quad 0xc08624f1a91fc6a0, 0xbe1cf040147b8a5a + .quad 0xc08624f4dcf6fc98, 0xbe1cf1effca0dfb2 + .quad 0xc08624f80f868468, 0xbe1cf0470146e5bc + .quad 0xc08624fb40cf6390, 0xbe1cef4dd186e501 + .quad 0xc08624fe70d29e60, 0xbe1ceebe257f66c7 + .quad 0xc08625019f9137f0, 0xbe1ceefb7a1c395c + .quad 0xc0862504cd0c3220, 0xbe1cf209dedfed8c + .quad 0xc0862507f9448db0, 0xbe1cf082da464994 + .quad 0xc086250b243b4a18, 0xbe1cee88694a73cf + .quad 0xc086250e4df165a0, 0xbe1cf0b61e8f0531 + .quad 0xc08625117667dd78, 0xbe1cf1106599c962 + .quad 0xc08625149d9fad98, 0xbe1ceff1ee88af1f + .quad 0xc0862517c399d0c8, 0xbe1cf0f746994ef6 + .quad 0xc086251ae85740b8, 0xbe1cefe8a1d077e4 + .quad 0xc086251e0bd8f5e0, 0xbe1cf1a1da036092 + .quad 0xc08625212e1fe7a8, 0xbe1cf0f8a7786fcd + .quad 0xc08625244f2d0c48, 0xbe1cefa1174a07a7 + .quad 0xc08625276f0158d8, 0xbe1cef1043aa5b25 + .quad 0xc086252a8d9dc150, 0xbe1cf15d521c169d + .quad 0xc086252dab033898, 0xbe1cf220bba8861f + .quad 0xc0862530c732b078, 0xbe1cef51e310eae2 + .quad 0xc0862533e22d1988, 0xbe1cf222fcedd8ae + .quad 0xc0862536fbf36370, 0xbe1cefdb4da4bda8 + .quad 0xc086253a14867ca0, 0xbe1ceeafc1112171 + .quad 0xc086253d2be75280, 0xbe1cee99dfb4b408 + .quad 0xc08625404216d160, 0xbe1cf22d2536f06b + .quad 0xc08625435715e498, 0xbe1cef6abbf2e268 + .quad 0xc08625466ae57648, 0xbe1cf093a14789f5 + .quad 0xc08625497d866fa0, 0xbe1cf0f93655603c + .quad 0xc086254c8ef9b8b8, 0xbe1cf1cc40c9aafc + .quad 0xc086254f9f4038a8, 0xbe1ceeea5f4e9157 + .quad 0xc0862552ae5ad568, 0xbe1cefa9f52d4997 + .quad 0xc0862555bc4a7400, 0xbe1cefa490a638ff + .quad 0xc0862558c90ff868, 0xbe1cef7fcf797d6f + .quad 0xc086255bd4ac4590, 0xbe1cf1b4c51113c9 + .quad 0xc086255edf203d78, 0xbe1cef55e5b4a55d + .quad 0xc0862561e86cc100, 0xbe1cf0d37a25f9dc + .quad 0xc0862564f092b028, 0xbe1ceebe9efc19d9 + .quad 0xc0862567f792e9d8, 0xbe1cee8ad30a57b5 + .quad 0xc086256afd6e4c08, 0xbe1cef4e1817b90b + .quad 0xc086256e0225b3b8, 0xbe1cee7fa9229996 + .quad 0xc086257105b9fce0, 0xbe1cf0b54963d945 + .quad 0xc0862574082c0298, 0xbe1cee5f2f3c7995 + .quad 0xc0862577097c9ee0, 0xbe1cf0828e303a2c + .quad 0xc086257a09acaae0, 0xbe1cf172c3078947 + .quad 0xc086257d08bcfec0, 0xbe1cf189252afa22 + .quad 0xc086258006ae71b8, 0xbe1cefdb80426923 + .quad 0xc08625830381da08, 0xbe1ceef1391a0372 + .quad 0xc0862585ff380d00, 0xbe1cf17720c78d13 + .quad 0xc0862588f9d1df18, 0xbe1ceef1f9027d83 + .quad 0xc086258bf35023b8, 0xbe1cf06fac99dec9 + .quad 0xc086258eebb3ad78, 0xbe1cf1373eeb45c0 + .quad 0xc0862591e2fd4e00, 0xbe1cef777536bb81 + .quad 0xc0862594d92dd600, 0xbe1cf0f43ca40766 + .quad 0xc0862597ce461558, 0xbe1cefb2cfc6766b + .quad 0xc086259ac246daf0, 0xbe1ceea49e64ffa2 + .quad 0xc086259db530f4c8, 0xbe1cf250fa457dec + .quad 0xc08625a0a7053018, 0xbe1cf17d8bb2a44e + .quad 0xc08625a397c45918, 0xbe1cf1d5906d54b7 + .quad 0xc08625a6876f3b30, 0xbe1cf08fe7b31780 + .quad 0xc08625a97606a0e0, 0xbe1cef13edfc9d11 + .quad 0xc08625ac638b53c8, 0xbe1cef9d2b107219 + .quad 0xc08625af4ffe1cb0, 0xbe1cf1ddd4ff6160 + .quad 0xc08625b23b5fc390, 0xbe1cefa02a996495 + .quad 0xc08625b525b10f68, 0xbe1cf166a7e37ee5 + .quad 0xc08625b80ef2c680, 0xbe1cef0b171068a5 + .quad 0xc08625baf725ae28, 0xbe1cf05c80779283 + .quad 0xc08625bdde4a8af0, 0xbe1cf1bbfbffb889 + .quad 0xc08625c0c4622090, 0xbe1cf0b8666c0124 + .quad 0xc08625c3a96d31e0, 0xbe1cf0a8fcf47a86 + .quad 0xc08625c68d6c80f0, 0xbe1cef46e18cb092 + .quad 0xc08625c97060cef0, 0xbe1cf1458a350efb + .quad 0xc08625cc524adc58, 0xbe1ceeea1dadce12 + .quad 0xc08625cf332b68b0, 0xbe1cf0a1bfdc44c7 + .quad 0xc08625d2130332d0, 0xbe1cef96d02da73e + .quad 0xc08625d4f1d2f8a8, 0xbe1cf2451c3c7701 + .quad 0xc08625d7cf9b7778, 0xbe1cf10d08f83812 + .quad 0xc08625daac5d6ba0, 0xbe1ceec5b4895c5e + .quad 0xc08625dd881990b0, 0xbe1cf14e1325c5e4 + .quad 0xc08625e062d0a188, 0xbe1cf21d0904be12 + .quad 0xc08625e33c835838, 0xbe1ceed0839bcf21 + .quad 0xc08625e615326df0, 0xbe1cf1bb944889d2 + .quad 0xc08625e8ecde9b48, 0xbe1cee738e85eece + .quad 0xc08625ebc38897e0, 0xbe1cf25c2bc6ef12 + .quad 0xc08625ee99311ac8, 0xbe1cf132b70a41ad + .quad 0xc08625f16dd8da28, 0xbe1cf1984236a6e3 + .quad 0xc08625f441808b78, 0xbe1cf19ae74998f9 + .quad 0xc08625f71428e370, 0xbe1cef3e175d61a1 + .quad 0xc08625f9e5d295f8, 0xbe1cf101f9868fd9 + .quad 0xc08625fcb67e5658, 0xbe1cee69db83dcd2 + .quad 0xc08625ff862cd6f8, 0xbe1cf081b636af51 + .quad 0xc086260254dec9a8, 0xbe1cee62c7d59b3e + .quad 0xc08626052294df58, 0xbe1cf1b745c57716 + .quad 0xc0862607ef4fc868, 0xbe1cef3d2800ea23 + .quad 0xc086260abb103458, 0xbe1cef480ff1acd2 + .quad 0xc086260d85d6d200, 0xbe1cf2424c9a17ef + .quad 0xc08626104fa44f90, 0xbe1cf12cfde90fd5 + .quad 0xc086261318795a68, 0xbe1cf21f590dd5b6 + .quad 0xc0862615e0569f48, 0xbe1cf0c50f9cd28a + .quad 0xc0862618a73cca30, 0xbe1ceedbdb520545 + .quad 0xc086261b6d2c8668, 0xbe1cf0b030396011 + .quad 0xc086261e32267e98, 0xbe1cf19917010e96 + .quad 0xc0862620f62b5cb0, 0xbe1cf07331355985 + .quad 0xc0862623b93bc9e8, 0xbe1cf01ae921a1c3 + .quad 0xc08626267b586ed0, 0xbe1cefe5cf0dbf0c + .quad 0xc08626293c81f348, 0xbe1cf01b258aeb50 + .quad 0xc086262bfcb8fe88, 0xbe1cee6b9e7f4c68 + .quad 0xc086262ebbfe3710, 0xbe1cee684a9b21c9 + .quad 0xc08626317a5242b8, 0xbe1cf1f8bcde9a8b + .quad 0xc086263437b5c6c0, 0xbe1cf1d063d36238 + .quad 0xc0862636f42967a8, 0xbe1cf1e31a19075e + .quad 0xc0862639afadc950, 0xbe1cf1d8efdf7e7d + .quad 0xc086263c6a438ef0, 0xbe1cf1812ee72dba + .quad 0xc086263f23eb5b18, 0xbe1cf1449a9a2279 + .quad 0xc0862641dca5cfb8, 0xbe1cee96edce5085 + .quad 0xc086264494738e08, 0xbe1cf06797bd03b2 + .quad 0xc08626474b5536b8, 0xbe1cef91b9b7ffc1 + .quad 0xc086264a014b69c0, 0xbe1cef4b6721278f + .quad 0xc086264cb656c678, 0xbe1cf1942925eb4a + .quad 0xc086264f6a77eba8, 0xbe1cefa2c7bc2e39 + .quad 0xc08626521daf7758, 0xbe1cf252595aceb3 + .quad 0xc0862654cffe0718, 0xbe1cee8e9ae47ec2 + .quad 0xc0862657816437a8, 0xbe1cf1bf913828fa + .quad 0xc086265a31e2a558, 0xbe1cf23475d6b366 + .quad 0xc086265ce179ebc8, 0xbe1cef8df00a922b + .quad 0xc086265f902aa5f0, 0xbe1cef279bfa43e0 + .quad 0xc08626623df56e38, 0xbe1cf080e10b8365 + .quad 0xc0862664eadade70, 0xbe1cf1a518f9b544 + .quad 0xc086266796db8fd0, 0xbe1cef9308fed9e9 + .quad 0xc086266a41f81ae8, 0xbe1ceea3ae6b19c9 + .quad 0xc086266cec3117b8, 0xbe1ceef06003d4c2 + .quad 0xc086266f95871da8, 0xbe1cf0b8457ffb0c + .quad 0xc08626723dfac390, 0xbe1cf0c526745ad6 + .quad 0xc0862674e58c9fa8, 0xbe1cf0cf91ff7b5d + .quad 0xc08626778c3d4798, 0xbe1cefe260819380 + .quad 0xc086267a320d5070, 0xbe1ceebd90aa27a3 + .quad 0xc086267cd6fd4ea8, 0xbe1cf0388121dffa + .quad 0xc086267f7b0dd630, 0xbe1cf1a3881435f1 + .quad 0xc08626821e3f7a68, 0xbe1cef28e9d9ac52 + .quad 0xc0862684c092ce08, 0xbe1cf02d300062dd + .quad 0xc086268762086350, 0xbe1cefaee1edfa35 + .quad 0xc086268a02a0cbe0, 0xbe1cf0a5a052e936 + .quad 0xc086268ca25c98d8, 0xbe1cee60a4a497ed + .quad 0xc086268f413c5ab0, 0xbe1cf0e4a5d0cf49 + .quad 0xc0862691df40a170, 0xbe1cf149235a4e6e + .quad 0xc08626947c69fc80, 0xbe1cf215180b9fcc + .quad 0xc086269718b8fac8, 0xbe1cef9b156a9840 + .quad 0xc0862699b42e2a90, 0xbe1cf054c91441be + .quad 0xc086269c4eca19a8, 0xbe1cf13ded26512c + .quad 0xc086269ee88d5550, 0xbe1cf22ea4d8ac06 + .quad 0xc08626a181786a40, 0xbe1cf2354666ee2e + .quad 0xc08626a4198be4a8, 0xbe1cefef936752b3 + .quad 0xc08626a6b0c85020, 0xbe1cf1e360a9db68 + .quad 0xc08626a9472e37d8, 0xbe1ceed6aeb812c5 + .quad 0xc08626abdcbe2650, 0xbe1cf227340b4986 + .quad 0xc08626ae7178a5b0, 0xbe1cf0215a0cbe0d + .quad 0xc08626b1055e3f70, 0xbe1cf256adf0ae26 + .quad 0xc08626b3986f7ca8, 0xbe1ceff3c67aed06 + .quad 0xc08626b62aace5c8, 0xbe1cf2159fb93652 + .quad 0xc08626b8bc1702e0, 0xbe1cf01e6dbd1c7f + .quad 0xc08626bb4cae5b60, 0xbe1cf009e75d1c0c + .quad 0xc08626bddc737648, 0xbe1ceec10a020e73 + .quad 0xc08626c06b66da08, 0xbe1cf06d5783eee7 + .quad 0xc08626c2f9890ca0, 0xbe1cf0cb8f169ffe + .quad 0xc08626c586da9388, 0xbe1cef7de2452430 + .quad 0xc08626c8135bf3b0, 0xbe1cf05da6f783ae + .quad 0xc08626ca9f0db198, 0xbe1cefcc877d681d + .quad 0xc08626cd29f05138, 0xbe1cef0531954ab3 + .quad 0xc08626cfb4045608, 0xbe1cf06b8565ea3d + .quad 0xc08626d23d4a4310, 0xbe1cefdc455d9d7e + .quad 0xc08626d4c5c29ad0, 0xbe1ceefc47e8fa64 + .quad 0xc08626d74d6ddf48, 0xbe1cf1872bf033f2 + .quad 0xc08626d9d44c9210, 0xbe1cf19d91087f9d + .quad 0xc08626dc5a5f3438, 0xbe1cf012d444c6ab + .quad 0xc08626dedfa64650, 0xbe1cf0ba528ee153 + .quad 0xc08626e164224880, 0xbe1ceeb431709788 + .quad 0xc08626e3e7d3ba60, 0xbe1cf0b9af31a6a5 + .quad 0xc08626e66abb1b28, 0xbe1cf168fb2e135b + .quad 0xc08626e8ecd8e990, 0xbe1cef9097461c93 + .quad 0xc08626eb6e2da3d0, 0xbe1cee7a434735d8 + .quad 0xc08626edeeb9c7a8, 0xbe1cf235732b86f2 + .quad 0xc08626f06e7dd280, 0xbe1cefe1510b89e6 + .quad 0xc08626f2ed7a4120, 0xbe1cf1f64b9b80ef + .quad 0xc08626f56baf9000, 0xbe1cf08f320ca339 + .quad 0xc08626f7e91e3b08, 0xbe1cf1b1de2808a1 + .quad 0xc08626fa65c6bdc0, 0xbe1cf1976d778b28 + .quad 0xc08626fce1a99338, 0xbe1ceef40a4f076f + .quad 0xc08626ff5cc73600, 0xbe1cef3e45869ce3 + .quad 0xc0862701d7202048, 0xbe1ceef601b4c9d6 + .quad 0xc086270450b4cbc0, 0xbe1cf1eaf0b57fd6 + .quad 0xc0862706c985b1c0, 0xbe1cef82a44990f3 + .quad 0xc086270941934b10, 0xbe1ceefe32981f2c + .quad 0xc086270bb8de1018, 0xbe1cefbf6f5a0445 + .quad 0xc086270e2f6678d0, 0xbe1cf18dba75792c + .quad 0xc0862710a52cfcc8, 0xbe1cf0da64ce995f + .quad 0xc08627131a321318, 0xbe1cef04ac0fb802 + .quad 0xc08627158e763268, 0xbe1cee9d4e2ad9bd + .quad 0xc086271801f9d0f8, 0xbe1cefa9b55407b5 + .quad 0xc086271a74bd64a0, 0xbe1cefe6bd329570 + .quad 0xc086271ce6c162c8, 0xbe1cef0b1205dc85 + .quad 0xc086271f58064068, 0xbe1cef092a785e3f + .quad 0xc0862721c88c7210, 0xbe1cf050dcdaac30 + .quad 0xc086272438546be8, 0xbe1cf210907ded8b + .quad 0xc0862726a75ea1b8, 0xbe1cee760be44f99 + .quad 0xc086272915ab86c0, 0xbe1ceeeee07c2bcc + .quad 0xc086272b833b8df0, 0xbe1cf06874992df5 + .quad 0xc086272df00f29d0, 0xbe1cef8fac5d4899 + .quad 0xc08627305c26cc70, 0xbe1cf1103241cc99 + .quad 0xc0862732c782e788, 0xbe1cf1d35fef83fe + .quad 0xc08627353223ec68, 0xbe1cef3ec8133e1d + .quad 0xc08627379c0a4be8, 0xbe1cef7261daccd8 + .quad 0xc086273a05367688, 0xbe1cf18656c50806 + .quad 0xc086273c6da8dc68, 0xbe1cf1c8736e049a + .quad 0xc086273ed561ed38, 0xbe1cf1f93bff4911 + .quad 0xc08627413c621848, 0xbe1cf188a4ea680c + .quad 0xc0862743a2a9cc80, 0xbe1cf1d270930c80 + .quad 0xc086274608397868, 0xbe1cf25a328c28e2 + .quad 0xc08627486d118a28, 0xbe1cf106f90aa3b8 + .quad 0xc086274ad1326f80, 0xbe1cee5e9d2e885a + .quad 0xc086274d349c95c0, 0xbe1cf1c0bac27228 + .quad 0xc086274f975069f8, 0xbe1cf1a1500f9b1c + .quad 0xc0862751f94e58c0, 0xbe1cefc30663ac44 + .quad 0xc08627545a96ce48, 0xbe1cf17123e427a2 + .quad 0xc0862756bb2a3678, 0xbe1cefb92749fea4 + .quad 0xc08627591b08fcc0, 0xbe1cefa40e1ea74a + .quad 0xc086275b7a338c40, 0xbe1cee6f4612c3e9 + .quad 0xc086275dd8aa4fa8, 0xbe1cf1c54a053627 + .quad 0xc0862760366db168, 0xbe1ceff5eb503d9e + .quad 0xc0862762937e1b70, 0xbe1cf02e47f10cee + .quad 0xc0862764efdbf768, 0xbe1ceeb06e1d0dad + .quad 0xc08627674b87ae88, 0xbe1cf10aadd6dba5 + .quad 0xc0862769a681a9c0, 0xbe1cf24e9913d30f + .quad 0xc086276c00ca51a0, 0xbe1cef47b301e312 + .quad 0xc086276e5a620e48, 0xbe1ceeb1cefc2e85 + .quad 0xc0862770b3494788, 0xbe1cf16f1fbbe011 + .quad 0xc08627730b8064e8, 0xbe1ceebdf75174c7 + .quad 0xc08627756307cd70, 0xbe1cf06e3871a0da + .quad 0xc0862777b9dfe7f0, 0xbe1cef16799fd554 + .quad 0xc086277a10091ac0, 0xbe1cf248dabf5377 + .quad 0xc086277c6583cc00, 0xbe1cf0c78d92a2cd + .quad 0xc086277eba506158, 0xbe1cf0b911b029f0 + .quad 0xc08627810e6f4028, 0xbe1cefdc24719766 + .quad 0xc086278361e0cd70, 0xbe1cefbb6562b7e7 + .quad 0xc0862785b4a56dd8, 0xbe1cf1e0afb349ec + .quad 0xc086278806bd85c0, 0xbe1cf008292e52fc + .quad 0xc086278a58297918, 0xbe1cf053073872bf + .quad 0xc086278ca8e9ab88, 0xbe1cf17a0a55a947 + .quad 0xc086278ef8fe8068, 0xbe1ceeffb0b60234 + .quad 0xc086279148685aa0, 0xbe1cf162204794a8 + .quad 0xc086279397279ce0, 0xbe1cf24cc8cb48ac + .quad 0xc0862795e53ca978, 0xbe1cf0c9be68d5c3 + .quad 0xc086279832a7e258, 0xbe1cf172cd3d7388 + .quad 0xc086279a7f69a930, 0xbe1ceea2465fbce5 + .quad 0xc086279ccb825f40, 0xbe1cf0a386d2500f + .quad 0xc086279f16f26590, 0xbe1cf1e338ddc18a + .quad 0xc08627a161ba1cd0, 0xbe1cef1f5049867f + .quad 0xc08627a3abd9e548, 0xbe1cef96c1ea8b1f + .quad 0xc08627a5f5521f00, 0xbe1cf138f6fd3c26 + .quad 0xc08627a83e2329b0, 0xbe1cf0d4fcbfdf3a + .quad 0xc08627aa864d64b0, 0xbe1cf24870c12c81 + .quad 0xc08627accdd12f18, 0xbe1cf0ae2a56348d + .quad 0xc08627af14aee7a0, 0xbe1cee8ca1a9b893 + .quad 0xc08627b15ae6eca8, 0xbe1cf20414d637b0 + .quad 0xc08627b3a0799c60, 0xbe1cf0fc6b7b12d8 + .quad 0xc08627b5e5675488, 0xbe1cf152d93c4a00 + .quad 0xc08627b829b072a0, 0xbe1cf1073f9b77c2 + .quad 0xc08627ba6d5553d8, 0xbe1cee694f97d5a4 + .quad 0xc08627bcb0565500, 0xbe1cf0456b8239d7 + .quad 0xc08627bef2b3d2b0, 0xbe1cf211497127e3 + .quad 0xc08627c1346e2930, 0xbe1cf01856c0384d + .quad 0xc08627c37585b468, 0xbe1cefa7dd05479e + .quad 0xc08627c5b5fad000, 0xbe1cef3ae8e50b93 + .quad 0xc08627c7f5cdd750, 0xbe1ceea5f32fdd3a + .quad 0xc08627ca34ff2560, 0xbe1cef424caeb8d9 + .quad 0xc08627cc738f14f0, 0xbe1cf0194d07a81f + .quad 0xc08627ceb17e0070, 0xbe1cf20f452000c1 + .quad 0xc08627d0eecc4210, 0xbe1cf00e356218e4 + .quad 0xc08627d32b7a33a0, 0xbe1cef30484b4bcb + .quad 0xc08627d567882eb0, 0xbe1ceeea11a6641b + .quad 0xc08627d7a2f68c80, 0xbe1cf13492d5bd7b + .quad 0xc08627d9ddc5a618, 0xbe1ceeb7048fad96 + .quad 0xc08627dc17f5d418, 0xbe1ceef0666f0477 + .quad 0xc08627de51876ee8, 0xbe1cf060d4b8b5c2 + .quad 0xc08627e08a7acea8, 0xbe1cf0b2a4b6ff8c + .quad 0xc08627e2c2d04b28, 0xbe1cf0e34809a875 + .quad 0xc08627e4fa883bf0, 0xbe1cf16bf74a3522 + .quad 0xc08627e731a2f848, 0xbe1cee6a24623d57 + .quad 0xc08627e96820d718, 0xbe1cefc7b4f1528e + .quad 0xc08627eb9e022f18, 0xbe1cf163051f3548 + .quad 0xc08627edd34756b8, 0xbe1cef36b3366305 + .quad 0xc08627f007f0a408, 0xbe1cf18134625550 + .quad 0xc08627f23bfe6cf0, 0xbe1cf0ec32ec1a11 + .quad 0xc08627f46f710700, 0xbe1ceeb3b64f3edc + .quad 0xc08627f6a248c778, 0xbe1cf0cd15805bc8 + .quad 0xc08627f8d4860368, 0xbe1cf20db3bddebe + .quad 0xc08627fb06290f90, 0xbe1cf25188430e25 + .quad 0xc08627fd37324070, 0xbe1ceea1713490f9 + .quad 0xc08627ff67a1ea28, 0xbe1cf159521d234c + .quad 0xc0862801977860b8, 0xbe1cf24dfe50783b + .quad 0xc0862803c6b5f7d0, 0xbe1ceef2ef89a60b + .quad 0xc0862805f55b02c8, 0xbe1cee7fc919d62c + .quad 0xc08628082367d4c0, 0xbe1cf215a7fb513a + .quad 0xc086280a50dcc0a8, 0xbe1cf0e4401c5ed4 + .quad 0xc086280c7dba1910, 0xbe1cf04ec734d256 + .quad 0xc086280eaa003050, 0xbe1cf010ad787fea + .quad 0xc0862810d5af5880, 0xbe1cee622478393d + .quad 0xc086281300c7e368, 0xbe1cf01c7482564f + .quad 0xc08628152b4a22a0, 0xbe1cf0de20d33536 + .quad 0xc086281755366778, 0xbe1cef2edae5837d + .quad 0xc08628197e8d02f0, 0xbe1cf0a345318cc9 + .quad 0xc086281ba74e45d8, 0xbe1cf20085aa34b8 + .quad 0xc086281dcf7a80c0, 0xbe1cef5fa845ad83 + .quad 0xc086281ff71203e0, 0xbe1cf050d1df69c4 + .quad 0xc08628221e151f48, 0xbe1ceffe43c035b9 + .quad 0xc0862824448422b8, 0xbe1cf14f3018d3c2 + .quad 0xc08628266a5f5dc0, 0xbe1cef0a5fbae83d + .quad 0xc08628288fa71f98, 0xbe1ceff8a95b72a1 + .quad 0xc086282ab45bb750, 0xbe1cef073aa9849b + .quad 0xc086282cd87d73a8, 0xbe1cef69b3835c02 + .quad 0xc086282efc0ca328, 0xbe1cf0bc139379a9 + .quad 0xc08628311f099420, 0xbe1cef247a9ec596 + .quad 0xc086283341749490, 0xbe1cef74bbcc488a + .quad 0xc0862835634df248, 0xbe1cef4bc42e7b8e + .quad 0xc08628378495fad0, 0xbe1cf136d4d5a810 + .quad 0xc0862839a54cfb80, 0xbe1cf0d290b24dd8 + .quad 0xc086283bc5734168, 0xbe1ceeebde8e0065 + .quad 0xc086283de5091950, 0xbe1cf1a09f60aa1e + .quad 0xc0862840040ecfe0, 0xbe1cf0803947a234 + .quad 0xc08628422284b168, 0xbe1cf0abf7638127 + .quad 0xc0862844406b0a08, 0xbe1cf0f73ee12058 + .quad 0xc08628465dc225a0, 0xbe1cf2079971b26c + .quad 0xc08628487a8a4fe0, 0xbe1cee74957564b1 + .quad 0xc086284a96c3d420, 0xbe1ceee77c1b7d43 + .quad 0xc086284cb26efd90, 0xbe1cf23addba6e09 + .quad 0xc086284ecd8c1730, 0xbe1cf199f4a1da60 + .quad 0xc0862850e81b6bb0, 0xbe1cf09fdea81393 + .quad 0xc0862853021d4588, 0xbe1cf176adb417f7 + .quad 0xc08628551b91ef00, 0xbe1cf0f64f84a8da + .quad 0xc08628573479b220, 0xbe1ceec34cf49523 + .quad 0xc08628594cd4d8a8, 0xbe1cf16d60fbe0bb + .quad 0xc086285b64a3ac40, 0xbe1cee8de7acfc7b + .quad 0xc086285d7be67630, 0xbe1ceee6256cce8d + .quad 0xc086285f929d7fa0, 0xbe1cee7d66a3d8a5 + .quad 0xc0862861a8c91170, 0xbe1cf0bef8265792 + .quad 0xc0862863be697458, 0xbe1cf097f890c6f8 + .quad 0xc0862865d37ef0c8, 0xbe1cf09502d5c3fc + .quad 0xc0862867e809cf00, 0xbe1ceeffb239dac7 + .quad 0xc0862869fc0a56f8, 0xbe1cf1fbfff95c98 + .quad 0xc086286c0f80d090, 0xbe1cefa57ad3eef7 + .quad 0xc086286e226d8348, 0xbe1cf22c58b9183d + .quad 0xc086287034d0b690, 0xbe1ceff262d0a248 + .quad 0xc086287246aab180, 0xbe1cefa7bc194186 + .quad 0xc086287457fbbb08, 0xbe1cf06782d784d9 + .quad 0xc086287668c419e0, 0xbe1cf1d44d0eaa07 + .quad 0xc086287879041490, 0xbe1cf034803c8a48 + .quad 0xc086287a88bbf158, 0xbe1cf08e84916b6f + .quad 0xc086287c97ebf650, 0xbe1cf0c4d3dc1bc7 + .quad 0xc086287ea6946958, 0xbe1cefb1e4625943 + .quad 0xc0862880b4b59010, 0xbe1cf143efdd1fd0 + .quad 0xc0862882c24faff8, 0xbe1cee9896d016da + .quad 0xc0862884cf630e38, 0xbe1cf2186072f2cc + .quad 0xc0862886dbefeff0, 0xbe1cef9217633d34 + .quad 0xc0862888e7f699e0, 0xbe1cf05603549486 + .quad 0xc086288af37750b0, 0xbe1cef50fff513d3 + .quad 0xc086288cfe7258c0, 0xbe1cf127713b32d0 + .quad 0xc086288f08e7f650, 0xbe1cf05015520f3d + .quad 0xc086289112d86d58, 0xbe1cf12eb458b26f + .quad 0xc08628931c4401a8, 0xbe1cf22eae2887ed + .quad 0xc0862895252af6e0, 0xbe1cefdd6656dd2d + .quad 0xc08628972d8d9058, 0xbe1cf1048ea4e646 + .quad 0xc0862899356c1150, 0xbe1ceec4501167e9 + .quad 0xc086289b3cc6bcb8, 0xbe1cf0ad52becc3f + .quad 0xc086289d439dd568, 0xbe1cf0daa4e00e35 + .quad 0xc086289f49f19df8, 0xbe1cf00b80de8d6a + .quad 0xc08628a14fc258c8, 0xbe1cf1bcf2ea8464 + .quad 0xc08628a355104818, 0xbe1cf0435e2782b0 + .quad 0xc08628a559dbade0, 0xbe1cf0e3e1a5f56c + .quad 0xc08628a75e24cbf8, 0xbe1cefed9d5a721d + .quad 0xc08628a961ebe3f8, 0xbe1cf0d2d74321e2 + .quad 0xc08628ab65313750, 0xbe1cf24200eb55e9 + .quad 0xc08628ad67f50740, 0xbe1cf23e9d7cf979 + .quad 0xc08628af6a3794d0, 0xbe1cf23a088f421c + .quad 0xc08628b16bf920e0, 0xbe1cef2c1de1ab32 + .quad 0xc08628b36d39ec08, 0xbe1cf1abc231f7b2 + .quad 0xc08628b56dfa36d0, 0xbe1cf2074d5ba303 + .quad 0xc08628b76e3a4180, 0xbe1cf05cd5eed880 + /*== Log_LA_table ==*/ + .align 32 + .quad 0x8000000000000000 + .quad 0xbf5ff802a9ab10e6 + .quad 0xbf6ff00aa2b10bc0 + .quad 0xbf77ee11ebd82e94 + .quad 0xbf7fe02a6b106789 + .quad 0xbf83e7295d25a7d9 + .quad 0xbf87dc475f810a77 + .quad 0xbf8bcf712c74384c + .quad 0xbf8fc0a8b0fc03e4 + .quad 0xbf91d7f7eb9eebe7 + .quad 0xbf93cea44346a575 + .quad 0xbf95c45a51b8d389 + .quad 0xbf97b91b07d5b11b + .quad 0xbf99ace7551cc514 + .quad 0xbf9b9fc027af9198 + .quad 0xbf9d91a66c543cc4 + .quad 0xbf9f829b0e783300 + .quad 0xbfa0b94f7c196176 + .quad 0xbfa1b0d98923d980 + .quad 0xbfa2a7ec2214e873 + .quad 0xbfa39e87b9febd60 + .quad 0xbfa494acc34d911c + .quad 0xbfa58a5bafc8e4d5 + .quad 0xbfa67f94f094bd98 + .quad 0xbfa77458f632dcfc + .quad 0xbfa868a83083f6cf + .quad 0xbfa95c830ec8e3eb + .quad 0xbfaa4fe9ffa3d235 + .quad 0xbfab42dd711971bf + .quad 0xbfac355dd0921f2d + .quad 0xbfad276b8adb0b52 + .quad 0xbfae19070c276016 + .quad 0xbfaf0a30c01162a6 + .quad 0xbfaffae9119b9303 + .quad 0xbfb075983598e471 + .quad 0xbfb0ed839b5526fe + .quad 0xbfb16536eea37ae1 + .quad 0xbfb1dcb263db1944 + .quad 0xbfb253f62f0a1417 + .quad 0xbfb2cb0283f5de1f + .quad 0xbfb341d7961bd1d1 + .quad 0xbfb3b87598b1b6ee + .quad 0xbfb42edcbea646f0 + .quad 0xbfb4a50d3aa1b040 + .quad 0xbfb51b073f06183f + .quad 0xbfb590cafdf01c28 + .quad 0xbfb60658a93750c4 + .quad 0xbfb67bb0726ec0fc + .quad 0xbfb6f0d28ae56b4c + .quad 0xbfb765bf23a6be13 + .quad 0xbfb7da766d7b12cd + .quad 0xbfb84ef898e8282a + .quad 0xbfb8c345d6319b21 + .quad 0xbfb9375e55595ede + .quad 0xbfb9ab42462033ad + .quad 0xbfba1ef1d8061cd4 + .quad 0xbfba926d3a4ad563 + .quad 0xbfbb05b49bee43fe + .quad 0xbfbb78c82bb0eda1 + .quad 0xbfbbeba818146765 + .quad 0xbfbc5e548f5bc743 + .quad 0xbfbcd0cdbf8c13e1 + .quad 0xbfbd4313d66cb35d + .quad 0xbfbdb5270187d927 + .quad 0xbfbe27076e2af2e6 + .quad 0xbfbe98b549671467 + .quad 0xbfbf0a30c01162a6 + .quad 0xbfbf7b79fec37ddf + .quad 0xbfbfec9131dbeabb + .quad 0xbfc02ebb42bf3d4b + .quad 0xbfc0671512ca596e + .quad 0xbfc09f561ee719c3 + .quad 0xbfc0d77e7cd08e59 + .quad 0xbfc10f8e422539b1 + .quad 0xbfc14785846742ac + .quad 0xbfc17f6458fca611 + .quad 0xbfc1b72ad52f67a0 + .quad 0xbfc1eed90e2dc2c3 + .quad 0xbfc2266f190a5acb + .quad 0xbfc25ded0abc6ad2 + .quad 0xbfc29552f81ff523 + .quad 0xbfc2cca0f5f5f251 + .quad 0xbfc303d718e47fd3 + .quad 0xbfc33af575770e4f + .quad 0xbfc371fc201e8f74 + .quad 0xbfc3a8eb2d31a376 + .quad 0xbfc3dfc2b0ecc62a + .quad 0xbfc41682bf727bc0 + .quad 0xbfc44d2b6ccb7d1e + .quad 0xbfc483bccce6e3dd + .quad 0xbfc4ba36f39a55e5 + .quad 0xbfc4f099f4a230b2 + .quad 0xbfc526e5e3a1b438 + .quad 0xbfc55d1ad4232d6f + .quad 0xbfc59338d9982086 + .quad 0xbfc5c940075972b9 + .quad 0xbfc5ff3070a793d4 + .quad 0xbfc6350a28aaa758 + .quad 0xbfc66acd4272ad51 + .quad 0xbfc6a079d0f7aad2 + .quad 0xbfc6d60fe719d21d + .quad 0xbfc70b8f97a1aa75 + .quad 0xbfc740f8f54037a5 + .quad 0xbfc7764c128f2127 + .quad 0xbfc7ab890210d909 + .quad 0xbfc7e0afd630c274 + .quad 0xbfc815c0a14357eb + .quad 0xbfc84abb75865139 + .quad 0xbfc87fa06520c911 + .quad 0xbfc8b46f8223625b + .quad 0xbfc8e928de886d41 + .quad 0xbfc91dcc8c340bde + .quad 0xbfc9525a9cf456b4 + .quad 0xbfc986d3228180ca + .quad 0xbfc9bb362e7dfb83 + .quad 0xbfc9ef83d2769a34 + .quad 0xbfca23bc1fe2b563 + .quad 0xbfca57df28244dcd + .quad 0xbfca8becfc882f19 + .quad 0xbfcabfe5ae46124c + .quad 0xbfcaf3c94e80bff3 + .quad 0xbfcb2797ee46320c + .quad 0xbfcb5b519e8fb5a4 + .quad 0xbfcb8ef670420c3b + .quad 0xbfcbc286742d8cd6 + .quad 0xbfcbf601bb0e44e2 + .quad 0xbfcc2968558c18c1 + .quad 0xbfcc5cba543ae425 + .quad 0xbfcc8ff7c79a9a22 + .quad 0xbfccc320c0176502 + .quad 0xbfccf6354e09c5dc + .quad 0xbfcd293581b6b3e7 + .quad 0xbfcd5c216b4fbb91 + .quad 0xbfcd8ef91af31d5e + .quad 0xbfcdc1bca0abec7d + .quad 0xbfcdf46c0c722d2f + .quad 0xbfce27076e2af2e6 + .quad 0xbfce598ed5a87e2f + .quad 0xbfce8c0252aa5a60 + .quad 0xbfcebe61f4dd7b0b + .quad 0xbfcef0adcbdc5936 + .quad 0xbfcf22e5e72f105d + .quad 0xbfcf550a564b7b37 + .quad 0xbfcf871b28955045 + .quad 0xbfcfb9186d5e3e2b + .quad 0xbfcfeb0233e607cc + .quad 0xbfd00e6c45ad501d + .quad 0xbfd0274dc16c232f + .quad 0xbfd0402594b4d041 + .quad 0xbfd058f3c703ebc6 + .quad 0xbfd071b85fcd590d + .quad 0xbfd08a73667c57af + .quad 0xbfd0a324e27390e3 + .quad 0xbfd0bbccdb0d24bd + .quad 0xbfd0d46b579ab74b + .quad 0xbfd0ed005f657da4 + .quad 0xbfd1058bf9ae4ad5 + .quad 0xbfd11e0e2dad9cb7 + .quad 0xbfd136870293a8b0 + .quad 0xbfd14ef67f88685a + .quad 0xbfd1675cababa60e + .quad 0xbfd17fb98e15095d + .quad 0xbfd1980d2dd4236f + .quad 0xbfd1b05791f07b49 + .quad 0xbfd1c898c16999fb + .quad 0xbfd1e0d0c33716be + .quad 0xbfd1f8ff9e48a2f3 + .quad 0xbfd211255986160c + .quad 0xbfd22941fbcf7966 + .quad 0xbfd241558bfd1404 + .quad 0xbfd2596010df763a + .quad 0xbfd27161913f853d + .quad 0xbfd2895a13de86a3 + .quad 0xbfd2a1499f762bc9 + .quad 0xbfd2b9303ab89d25 + .quad 0xbfd2d10dec508583 + .quad 0xbfd2e8e2bae11d31 + .quad 0xbfd300aead06350c + .quad 0xbfd31871c9544185 + .quad 0xbfd3302c16586588 + .quad 0xbfd347dd9a987d55 + .quad 0xbfd35f865c93293e + .quad 0xbfd3772662bfd85b + .quad 0xbfd38ebdb38ed321 + .quad 0xbfd3a64c556945ea + .quad 0xbfd3bdd24eb14b6a + .quad 0xbfd3d54fa5c1f710 + .quad 0xbfd3ecc460ef5f50 + .quad 0xbfd404308686a7e4 + .quad 0xbfd41b941cce0bee + .quad 0xbfd432ef2a04e814 + .quad 0xbfd44a41b463c47c + .quad 0xbfd4618bc21c5ec2 + .quad 0xbfd478cd5959b3d9 + .quad 0xbfd49006804009d1 + .quad 0xbfd4a7373cecf997 + .quad 0xbfd4be5f957778a1 + .quad 0xbfd4d57f8fefe27f + .quad 0xbfd4ec973260026a + .quad 0xbfd503a682cb1cb3 + .quad 0xbfd51aad872df82d + .quad 0xbfd531ac457ee77e + .quad 0xbfd548a2c3add263 + .quad 0xbfd55f9107a43ee2 + .quad 0xbfd5767717455a6c + .quad 0xbfd58d54f86e02f2 + .quad 0xbfd5a42ab0f4cfe2 + .quad 0xbfd5baf846aa1b19 + .quad 0xbfd5d1bdbf5809ca + .quad 0xbfd5e87b20c2954a + .quad 0xbfd5ff3070a793d4 + .quad 0xbfd615ddb4bec13c + .quad 0xbfd62c82f2b9c795 + .quad 0x3fd61965cdb02c1f + .quad 0x3fd602d08af091ec + .quad 0x3fd5ec433d5c35ae + .quad 0x3fd5d5bddf595f30 + .quad 0x3fd5bf406b543db2 + .quad 0x3fd5a8cadbbedfa1 + .quad 0x3fd5925d2b112a59 + .quad 0x3fd57bf753c8d1fb + .quad 0x3fd565995069514c + .quad 0x3fd54f431b7be1a9 + .quad 0x3fd538f4af8f72fe + .quad 0x3fd522ae0738a3d8 + .quad 0x3fd50c6f1d11b97c + .quad 0x3fd4f637ebba9810 + .quad 0x3fd4e0086dd8baca + .quad 0x3fd4c9e09e172c3c + .quad 0x3fd4b3c077267e9a + .quad 0x3fd49da7f3bcc41f + .quad 0x3fd487970e958770 + .quad 0x3fd4718dc271c41b + .quad 0x3fd45b8c0a17df13 + .quad 0x3fd44591e0539f49 + .quad 0x3fd42f9f3ff62642 + .quad 0x3fd419b423d5e8c7 + .quad 0x3fd403d086cea79c + .quad 0x3fd3edf463c1683e + .quad 0x3fd3d81fb5946dba + .quad 0x3fd3c25277333184 + .quad 0x3fd3ac8ca38e5c5f + .quad 0x3fd396ce359bbf54 + .quad 0x3fd3811728564cb2 + .quad 0x3fd36b6776be1117 + .quad 0x3fd355bf1bd82c8b + .quad 0x3fd3401e12aecba1 + .quad 0x3fd32a84565120a8 + .quad 0x3fd314f1e1d35ce4 + .quad 0x3fd2ff66b04ea9d4 + .quad 0x3fd2e9e2bce12286 + .quad 0x3fd2d46602adccee + .quad 0x3fd2bef07cdc9354 + .quad 0x3fd2a982269a3dbf + .quad 0x3fd2941afb186b7c + .quad 0x3fd27ebaf58d8c9d + .quad 0x3fd269621134db92 + .quad 0x3fd25410494e56c7 + .quad 0x3fd23ec5991eba49 + .quad 0x3fd22981fbef797b + .quad 0x3fd214456d0eb8d4 + .quad 0x3fd1ff0fe7cf47a7 + .quad 0x3fd1e9e1678899f4 + .quad 0x3fd1d4b9e796c245 + .quad 0x3fd1bf99635a6b95 + .quad 0x3fd1aa7fd638d33f + .quad 0x3fd1956d3b9bc2fa + .quad 0x3fd180618ef18adf + .quad 0x3fd16b5ccbacfb73 + .quad 0x3fd1565eed455fc3 + .quad 0x3fd14167ef367783 + .quad 0x3fd12c77cd00713b + .quad 0x3fd1178e8227e47c + .quad 0x3fd102ac0a35cc1c + .quad 0x3fd0edd060b78081 + .quad 0x3fd0d8fb813eb1ef + .quad 0x3fd0c42d676162e3 + .quad 0x3fd0af660eb9e279 + .quad 0x3fd09aa572e6c6d4 + .quad 0x3fd085eb8f8ae797 + .quad 0x3fd07138604d5862 + .quad 0x3fd05c8be0d9635a + .quad 0x3fd047e60cde83b8 + .quad 0x3fd03346e0106062 + .quad 0x3fd01eae5626c691 + .quad 0x3fd00a1c6adda473 + .quad 0x3fcfeb2233ea07cd + .quad 0x3fcfc218be620a5e + .quad 0x3fcf991c6cb3b379 + .quad 0x3fcf702d36777df0 + .quad 0x3fcf474b134df229 + .quad 0x3fcf1e75fadf9bde + .quad 0x3fcef5ade4dcffe6 + .quad 0x3fceccf2c8fe920a + .quad 0x3fcea4449f04aaf5 + .quad 0x3fce7ba35eb77e2a + .quad 0x3fce530effe71012 + .quad 0x3fce2a877a6b2c12 + .quad 0x3fce020cc6235ab5 + .quad 0x3fcdd99edaf6d7e9 + .quad 0x3fcdb13db0d48940 + .quad 0x3fcd88e93fb2f450 + .quad 0x3fcd60a17f903515 + .quad 0x3fcd38666871f465 + .quad 0x3fcd1037f2655e7b + .quad 0x3fcce816157f1988 + .quad 0x3fccc000c9db3c52 + .quad 0x3fcc97f8079d44ec + .quad 0x3fcc6ffbc6f00f71 + .quad 0x3fcc480c0005ccd1 + .quad 0x3fcc2028ab17f9b4 + .quad 0x3fcbf851c067555f + .quad 0x3fcbd087383bd8ad + .quad 0x3fcba8c90ae4ad19 + .quad 0x3fcb811730b823d2 + .quad 0x3fcb5971a213acdb + .quad 0x3fcb31d8575bce3d + .quad 0x3fcb0a4b48fc1b46 + .quad 0x3fcae2ca6f672bd4 + .quad 0x3fcabb55c31693ad + .quad 0x3fca93ed3c8ad9e3 + .quad 0x3fca6c90d44b704e + .quad 0x3fca454082e6ab05 + .quad 0x3fca1dfc40f1b7f1 + .quad 0x3fc9f6c407089664 + .quad 0x3fc9cf97cdce0ec3 + .quad 0x3fc9a8778debaa38 + .quad 0x3fc981634011aa75 + .quad 0x3fc95a5adcf7017f + .quad 0x3fc9335e5d594989 + .quad 0x3fc90c6db9fcbcd9 + .quad 0x3fc8e588ebac2dbf + .quad 0x3fc8beafeb38fe8c + .quad 0x3fc897e2b17b19a5 + .quad 0x3fc871213750e994 + .quad 0x3fc84a6b759f512f + .quad 0x3fc823c16551a3c2 + .quad 0x3fc7fd22ff599d4f + .quad 0x3fc7d6903caf5ad0 + .quad 0x3fc7b0091651528c + .quad 0x3fc7898d85444c73 + .quad 0x3fc7631d82935a86 + .quad 0x3fc73cb9074fd14d + .quad 0x3fc716600c914054 + .quad 0x3fc6f0128b756abc + .quad 0x3fc6c9d07d203fc7 + .quad 0x3fc6a399dabbd383 + .quad 0x3fc67d6e9d785771 + .quad 0x3fc6574ebe8c133a + .quad 0x3fc6313a37335d76 + .quad 0x3fc60b3100b09476 + .quad 0x3fc5e533144c1719 + .quad 0x3fc5bf406b543db2 + .quad 0x3fc59958ff1d52f1 + .quad 0x3fc5737cc9018cdd + .quad 0x3fc54dabc26105d2 + .quad 0x3fc527e5e4a1b58d + .quad 0x3fc5022b292f6a45 + .quad 0x3fc4dc7b897bc1c8 + .quad 0x3fc4b6d6fefe22a4 + .quad 0x3fc4913d8333b561 + .quad 0x3fc46baf0f9f5db7 + .quad 0x3fc4462b9dc9b3dc + .quad 0x3fc420b32740fdd4 + .quad 0x3fc3fb45a59928cc + .quad 0x3fc3d5e3126bc27f + .quad 0x3fc3b08b6757f2a9 + .quad 0x3fc38b3e9e027479 + .quad 0x3fc365fcb0159016 + .quad 0x3fc340c59741142e + .quad 0x3fc31b994d3a4f85 + .quad 0x3fc2f677cbbc0a96 + .quad 0x3fc2d1610c86813a + .quad 0x3fc2ac55095f5c59 + .quad 0x3fc28753bc11aba5 + .quad 0x3fc2625d1e6ddf57 + .quad 0x3fc23d712a49c202 + .quad 0x3fc2188fd9807263 + .quad 0x3fc1f3b925f25d41 + .quad 0x3fc1ceed09853752 + .quad 0x3fc1aa2b7e23f72a + .quad 0x3fc185747dbecf34 + .quad 0x3fc160c8024b27b1 + .quad 0x3fc13c2605c398c3 + .quad 0x3fc1178e8227e47c + .quad 0x3fc0f301717cf0fb + .quad 0x3fc0ce7ecdccc28d + .quad 0x3fc0aa06912675d5 + .quad 0x3fc08598b59e3a07 + .quad 0x3fc06135354d4b18 + .quad 0x3fc03cdc0a51ec0d + .quad 0x3fc0188d2ecf6140 + .quad 0x3fbfe89139dbd566 + .quad 0x3fbfa01c9db57ce2 + .quad 0x3fbf57bc7d9005db + .quad 0x3fbf0f70cdd992e3 + .quad 0x3fbec739830a1120 + .quad 0x3fbe7f1691a32d3e + .quad 0x3fbe3707ee30487b + .quad 0x3fbdef0d8d466db9 + .quad 0x3fbda727638446a2 + .quad 0x3fbd5f55659210e2 + .quad 0x3fbd179788219364 + .quad 0x3fbccfedbfee13a8 + .quad 0x3fbc885801bc4b23 + .quad 0x3fbc40d6425a5cb1 + .quad 0x3fbbf968769fca11 + .quad 0x3fbbb20e936d6974 + .quad 0x3fbb6ac88dad5b1c + .quad 0x3fbb23965a52ff00 + .quad 0x3fbadc77ee5aea8c + .quad 0x3fba956d3ecade63 + .quad 0x3fba4e7640b1bc38 + .quad 0x3fba0792e9277cac + .quad 0x3fb9c0c32d4d2548 + .quad 0x3fb97a07024cbe74 + .quad 0x3fb9335e5d594989 + .quad 0x3fb8ecc933aeb6e8 + .quad 0x3fb8a6477a91dc29 + .quad 0x3fb85fd927506a48 + .quad 0x3fb8197e2f40e3f0 + .quad 0x3fb7d33687c293c9 + .quad 0x3fb78d02263d82d3 + .quad 0x3fb746e100226ed9 + .quad 0x3fb700d30aeac0e1 + .quad 0x3fb6bad83c1883b6 + .quad 0x3fb674f089365a7a + .quad 0x3fb62f1be7d77743 + .quad 0x3fb5e95a4d9791cb + .quad 0x3fb5a3abb01ade25 + .quad 0x3fb55e10050e0384 + .quad 0x3fb518874226130a + .quad 0x3fb4d3115d207eac + .quad 0x3fb48dae4bc31018 + .quad 0x3fb4485e03dbdfad + .quad 0x3fb403207b414b7f + .quad 0x3fb3bdf5a7d1ee64 + .quad 0x3fb378dd7f749714 + .quad 0x3fb333d7f8183f4b + .quad 0x3fb2eee507b40301 + .quad 0x3fb2aa04a44717a5 + .quad 0x3fb26536c3d8c369 + .quad 0x3fb2207b5c78549e + .quad 0x3fb1dbd2643d190b + .quad 0x3fb1973bd1465567 + .quad 0x3fb152b799bb3cc9 + .quad 0x3fb10e45b3cae831 + .quad 0x3fb0c9e615ac4e17 + .quad 0x3fb08598b59e3a07 + .quad 0x3fb0415d89e74444 + .quad 0x3faffa6911ab9301 + .quad 0x3faf723b517fc523 + .quad 0x3faeea31c006b87c + .quad 0x3fae624c4a0b5e1b + .quad 0x3fadda8adc67ee4e + .quad 0x3fad52ed6405d86f + .quad 0x3faccb73cdddb2cc + .quad 0x3fac441e06f72a9e + .quad 0x3fabbcebfc68f420 + .quad 0x3fab35dd9b58baad + .quad 0x3faaaef2d0fb10fc + .quad 0x3faa282b8a936171 + .quad 0x3fa9a187b573de7c + .quad 0x3fa91b073efd7314 + .quad 0x3fa894aa149fb343 + .quad 0x3fa80e7023d8ccc4 + .quad 0x3fa788595a3577ba + .quad 0x3fa70265a550e777 + .quad 0x3fa67c94f2d4bb58 + .quad 0x3fa5f6e73078efb8 + .quad 0x3fa5715c4c03ceef + .quad 0x3fa4ebf43349e26f + .quad 0x3fa466aed42de3ea + .quad 0x3fa3e18c1ca0ae92 + .quad 0x3fa35c8bfaa1306b + .quad 0x3fa2d7ae5c3c5bae + .quad 0x3fa252f32f8d183f + .quad 0x3fa1ce5a62bc353a + .quad 0x3fa149e3e4005a8d + .quad 0x3fa0c58fa19dfaaa + .quad 0x3fa0415d89e74444 + .quad 0x3f9f7a9b16782856 + .quad 0x3f9e72bf2813ce51 + .quad 0x3f9d6b2725979802 + .quad 0x3f9c63d2ec14aaf2 + .quad 0x3f9b5cc258b718e6 + .quad 0x3f9a55f548c5c43f + .quad 0x3f994f6b99a24475 + .quad 0x3f98492528c8cabf + .quad 0x3f974321d3d006d3 + .quad 0x3f963d6178690bd6 + .quad 0x3f9537e3f45f3565 + .quad 0x3f9432a925980cc1 + .quad 0x3f932db0ea132e22 + .quad 0x3f9228fb1fea2e28 + .quad 0x3f912487a5507f70 + .quad 0x3f90205658935847 + .quad 0x3f8e38ce3033310c + .quad 0x3f8c317384c75f06 + .quad 0x3f8a2a9c6c170462 + .quad 0x3f882448a388a2aa + .quad 0x3f861e77e8b53fc6 + .quad 0x3f841929f96832f0 + .quad 0x3f82145e939ef1e9 + .quad 0x3f8010157588de71 + .quad 0x3f7c189cbb0e27fb + .quad 0x3f78121214586b54 + .quad 0x3f740c8a747878e2 + .quad 0x3f70080559588b35 + .quad 0x3f680904828985c0 + .quad 0x3f60040155d5889e + .quad 0x3f50020055655889 + .quad 0x0000000000000000 + /*== poly_coeff[4] ==*/ + .align 32 + .quad 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A, 0x3fc9999CACDB4D0A /* coeff4 */ + .quad 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1, 0xbfd0000148058EE1 /* coeff3 */ + .quad 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5, 0x3fd55555555543C5 /* coeff2 */ + .quad 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F, 0xbfdFFFFFFFFFF81F /* coeff1 */ + /*== ExpMask ==*/ + .align 32 + .quad 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff + /*== Two10 ==*/ + .align 32 + .quad 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000, 0x3f50000000000000 + /*== MinLog1p = -1+2^(-53) ==*/ + .align 32 + .quad 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff, 0xbfefffffffffffff + /*== MaxLog1p ==*/ + .align 32 + .quad 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000, 0x7f3ffffffffff000 + /*== One ==*/ + .align 32 + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 + /*== SgnMask ==*/ + .align 32 + .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff + /*== XThreshold ==*/ + .align 32 + .quad 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000, 0x3e00000000000000 + /*== XhMask ==*/ + .align 32 + .quad 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00, 0xfffffffffffffc00 + /*== Threshold ==*/ + .align 32 + .quad 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000, 0x4086a00000000000 + /*== Bias ==*/ + .align 32 + .quad 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000, 0x408ff80000000000 + /*== Bias1 ==*/ + .align 32 + .quad 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000, 0x408ff00000000000 + /*== ExpMask ==*/ + .align 32 + .quad 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 + /*== ExpMask2 ==*/ + .align 32 + .quad 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000, 0x7f40000000000000 + /*== L2L ==*/ + .align 32 + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF + .align 32 + .type __svml_dlog1p_data_internal,@object + .size __svml_dlog1p_data_internal,.-__svml_dlog1p_data_internal diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S new file mode 100644 index 0000000000..ca174a5f52 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized log1p, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_log1p _ZGVeN8v_log1p_avx2_wrapper +#include "../svml_d_log1p8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c new file mode 100644 index 0000000000..0aa35ec8c5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log1p, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_log1p +#include "ifunc-mathvec-avx512-skx.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_log1p, __GI__ZGVeN8v_log1p, __redirect__ZGVeN8v_log1p) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S new file mode 100644 index 0000000000..5e38ff8d39 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p8_core_avx512.S @@ -0,0 +1,317 @@ +/* Function log1p vectorized with AVX-512. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_dlog1p_data_internal_avx512 + */ +#define Log_tbl 0 +#define One 128 +#define SgnMask 192 +#define C075 256 +#define poly_coeff9 320 +#define poly_coeff8 384 +#define poly_coeff7 448 +#define poly_coeff6 512 +#define poly_coeff5 576 +#define poly_coeff4 640 +#define poly_coeff3 704 +#define poly_coeff2 768 +#define L2 832 + +#include <sysdep.h> + + .text + .section .text.evex512,"ax",@progbits +ENTRY(_ZGVeN8v_log1p_skx) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-64, %rsp + subq $192, %rsp + vmovups One+__svml_dlog1p_data_internal_avx512(%rip), %zmm7 + vmovups SgnMask+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 + vmovaps %zmm0, %zmm9 + vaddpd {rn-sae}, %zmm9, %zmm7, %zmm11 + vandpd %zmm14, %zmm9, %zmm8 + +/* compute 1+x as high, low parts */ + vmaxpd {sae}, %zmm9, %zmm7, %zmm10 + vminpd {sae}, %zmm9, %zmm7, %zmm12 + +/* GetMant(x), normalized to [1,2) for x>=0, NaN for x<0 */ + vgetmantpd $8, {sae}, %zmm11, %zmm6 + +/* GetExp(x) */ + vgetexppd {sae}, %zmm11, %zmm5 + vsubpd {rn-sae}, %zmm10, %zmm11, %zmm13 + +/* DblRcp ~ 1/Mantissa */ + vrcp14pd %zmm6, %zmm15 + +/* Start polynomial evaluation */ + vmovups poly_coeff9+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 + vmovups poly_coeff7+__svml_dlog1p_data_internal_avx512(%rip), %zmm11 + +/* Xl */ + vsubpd {rn-sae}, %zmm13, %zmm12, %zmm2 + vxorpd %zmm14, %zmm5, %zmm3 + +/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */ + vrndscalepd $88, {sae}, %zmm15, %zmm4 + vmovups poly_coeff5+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 + vmovups poly_coeff6+__svml_dlog1p_data_internal_avx512(%rip), %zmm14 + vmovups poly_coeff3+__svml_dlog1p_data_internal_avx512(%rip), %zmm13 + +/* Xl*2^(-Expon) */ + vscalefpd {rn-sae}, %zmm3, %zmm2, %zmm1 + +/* Reduced argument: R = DblRcp*(Mantissa+Xl) - 1 */ + vfmsub213pd {rn-sae}, %zmm7, %zmm4, %zmm6 + vmovups __svml_dlog1p_data_internal_avx512(%rip), %zmm3 + +/* + * Table lookup + * Prepare exponent correction: DblRcp<0.75? + */ + vmovups C075+__svml_dlog1p_data_internal_avx512(%rip), %zmm2 + +/* Prepare table index */ + vpsrlq $48, %zmm4, %zmm0 + vfmadd231pd {rn-sae}, %zmm4, %zmm1, %zmm6 + vmovups poly_coeff8+__svml_dlog1p_data_internal_avx512(%rip), %zmm1 + vcmppd $17, {sae}, %zmm2, %zmm4, %k1 + vcmppd $4, {sae}, %zmm6, %zmm6, %k0 + vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm1 + vmovups poly_coeff4+__svml_dlog1p_data_internal_avx512(%rip), %zmm10 + vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14 + vmovups L2+__svml_dlog1p_data_internal_avx512(%rip), %zmm4 + vpermt2pd Log_tbl+64+__svml_dlog1p_data_internal_avx512(%rip), %zmm0, %zmm3 + +/* add 1 to Expon if DblRcp<0.75 */ + vaddpd {rn-sae}, %zmm7, %zmm5, %zmm5{%k1} + +/* R^2 */ + vmulpd {rn-sae}, %zmm6, %zmm6, %zmm0 + vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm10 + vmovups poly_coeff2+__svml_dlog1p_data_internal_avx512(%rip), %zmm12 + vmulpd {rn-sae}, %zmm0, %zmm0, %zmm15 + vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12 + vfmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm1 + kmovw %k0, %edx + vfmadd213pd {rn-sae}, %zmm12, %zmm0, %zmm10 + +/* polynomial */ + vfmadd213pd {rn-sae}, %zmm10, %zmm15, %zmm1 + vfmadd213pd {rn-sae}, %zmm6, %zmm0, %zmm1 + vaddpd {rn-sae}, %zmm1, %zmm3, %zmm6 + vfmadd213pd {rn-sae}, %zmm6, %zmm4, %zmm5 + vorpd %zmm8, %zmm5, %zmm0 + testl %edx, %edx + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm9 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + vmovups %zmm9, 64(%rsp) + vmovups %zmm0, 128(%rsp) + # LOE rbx r12 r13 r14 r15 edx zmm0 + + xorl %eax, %eax + # LOE rbx r12 r13 r14 r15 eax edx + + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $8, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovups 128(%rsp), %zmm0 + +/* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 zmm0 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movsd 64(%rsp,%r14,8), %xmm0 + call log1p@PLT + # LOE rbx r14 r15 r12d r13d xmm0 + + movsd %xmm0, 128(%rsp,%r14,8) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d +END(_ZGVeN8v_log1p_skx) + + .section .rodata, "a" + .align 64 + +#ifdef __svml_dlog1p_data_internal_avx512_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(64)) VUINT32 Log_tbl[16][2]; + __declspec(align(64)) VUINT32 One[8][2]; + __declspec(align(64)) VUINT32 SgnMask[8][2]; + __declspec(align(64)) VUINT32 C075[8][2]; + __declspec(align(64)) VUINT32 poly_coeff9[8][2]; + __declspec(align(64)) VUINT32 poly_coeff8[8][2]; + __declspec(align(64)) VUINT32 poly_coeff7[8][2]; + __declspec(align(64)) VUINT32 poly_coeff6[8][2]; + __declspec(align(64)) VUINT32 poly_coeff5[8][2]; + __declspec(align(64)) VUINT32 poly_coeff4[8][2]; + __declspec(align(64)) VUINT32 poly_coeff3[8][2]; + __declspec(align(64)) VUINT32 poly_coeff2[8][2]; + __declspec(align(64)) VUINT32 L2[8][2]; + } __svml_dlog1p_data_internal_avx512; +#endif +__svml_dlog1p_data_internal_avx512: + /*== Log_tbl ==*/ + .quad 0x0000000000000000 + .quad 0xbfaf0a30c01162a6 + .quad 0xbfbe27076e2af2e6 + .quad 0xbfc5ff3070a793d4 + .quad 0xbfcc8ff7c79a9a22 + .quad 0xbfd1675cababa60e + .quad 0xbfd4618bc21c5ec2 + .quad 0xbfd739d7f6bbd007 + .quad 0x3fd269621134db92 + .quad 0x3fcf991c6cb3b379 + .quad 0x3fca93ed3c8ad9e3 + .quad 0x3fc5bf406b543db2 + .quad 0x3fc1178e8227e47c + .quad 0x3fb9335e5d594989 + .quad 0x3fb08598b59e3a07 + .quad 0x3fa0415d89e74444 + /*== One ==*/ + .align 64 + .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 + /*== SgnMask ==*/ + .align 64 + .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 + /*== C075 0.75 ==*/ + .align 64 + .quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000 + /*== poly_coeff9 ==*/ + .align 64 + .quad 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70, 0x3fbC81CD309D7C70 + /*== poly_coeff8 ==*/ + .align 64 + .quad 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62, 0xbfc007357E93AF62 + /*== poly_coeff7 ==*/ + .align 64 + .quad 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF, 0x3fc249229CEE81EF + /*== poly_coeff6 ==*/ + .align 64 + .quad 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06, 0xbfc55553FB28DB06 + /*== poly_coeff5 ==*/ + .align 64 + .quad 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C, 0x3fc9999999CC9F5C + /*== poly_coeff4 ==*/ + .align 64 + .quad 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD, 0xbfd00000000C05BD + /*== poly_coeff3 ==*/ + .align 64 + .quad 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466, 0x3fd5555555555466 + /*== poly_coeff2 ==*/ + .align 64 + .quad 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6, 0xbfdFFFFFFFFFFFC6 + /*== L2 = log(2) ==*/ + .align 64 + .quad 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF, 0x3fe62E42FEFA39EF + .align 64 + .type __svml_dlog1p_data_internal_avx512,@object + .size __svml_dlog1p_data_internal_avx512,.-__svml_dlog1p_data_internal_avx512 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S new file mode 100644 index 0000000000..3c0a0a01a2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized log1pf. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_log1pf _ZGVeN16v_log1pf_avx2_wrapper +#include "../svml_s_log1pf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c new file mode 100644 index 0000000000..9af1320547 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized log1pf, vector length is 16. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_log1pf +#include "ifunc-mathvec-avx512-skx.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_log1pf, __GI__ZGVeN16v_log1pf, + __redirect__ZGVeN16v_log1pf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S new file mode 100644 index 0000000000..78b2fe417f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf16_core_avx512.S @@ -0,0 +1,271 @@ +/* Function log1pf vectorized with AVX-512. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_slog1p_data_internal + */ +#define SgnMask 0 +#define sOne 64 +#define sPoly_1 128 +#define sPoly_2 192 +#define sPoly_3 256 +#define sPoly_4 320 +#define sPoly_5 384 +#define sPoly_6 448 +#define sPoly_7 512 +#define sPoly_8 576 +#define iHiDelta 640 +#define iLoRange 704 +#define iBrkValue 768 +#define iOffExpoMask 832 +#define sLn2 896 + +#include <sysdep.h> + + .text + .section .text.exex512,"ax",@progbits +ENTRY(_ZGVeN16v_log1pf_skx) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-64, %rsp + subq $192, %rsp + vmovups sOne+__svml_slog1p_data_internal(%rip), %zmm2 + +/* reduction: compute r,n */ + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %zmm12 + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %zmm4 + vmovaps %zmm0, %zmm3 + +/* compute 1+x as high, low parts */ + vmaxps {sae}, %zmm3, %zmm2, %zmm5 + vminps {sae}, %zmm3, %zmm2, %zmm7 + vandnps %zmm3, %zmm4, %zmm1 + vpternlogd $255, %zmm4, %zmm4, %zmm4 + vaddps {rn-sae}, %zmm7, %zmm5, %zmm9 + vpsubd %zmm12, %zmm9, %zmm10 + vsubps {rn-sae}, %zmm9, %zmm5, %zmm6 + +/* check argument value ranges */ + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %zmm9, %zmm8 + vpsrad $23, %zmm10, %zmm13 + vmovups sPoly_5+__svml_slog1p_data_internal(%rip), %zmm9 + vpcmpd $5, iLoRange+__svml_slog1p_data_internal(%rip), %zmm8, %k1 + vpslld $23, %zmm13, %zmm14 + vaddps {rn-sae}, %zmm7, %zmm6, %zmm15 + vcvtdq2ps {rn-sae}, %zmm13, %zmm0 + vpsubd %zmm14, %zmm2, %zmm13 + vmovups sPoly_8+__svml_slog1p_data_internal(%rip), %zmm7 + vmovups sPoly_1+__svml_slog1p_data_internal(%rip), %zmm14 + vmulps {rn-sae}, %zmm13, %zmm15, %zmm6 + vpandd iOffExpoMask+__svml_slog1p_data_internal(%rip), %zmm10, %zmm11 + vpaddd %zmm12, %zmm11, %zmm5 + vmovups sPoly_4+__svml_slog1p_data_internal(%rip), %zmm10 + vmovups sPoly_3+__svml_slog1p_data_internal(%rip), %zmm11 + vmovups sPoly_2+__svml_slog1p_data_internal(%rip), %zmm12 + +/* polynomial evaluation */ + vsubps {rn-sae}, %zmm2, %zmm5, %zmm2 + vaddps {rn-sae}, %zmm6, %zmm2, %zmm15 + vmovups sPoly_7+__svml_slog1p_data_internal(%rip), %zmm2 + vfmadd231ps {rn-sae}, %zmm15, %zmm7, %zmm2 + vpandnd %zmm8, %zmm8, %zmm4{%k1} + vmovups sPoly_6+__svml_slog1p_data_internal(%rip), %zmm8 + +/* combine and get argument value range mask */ + vptestmd %zmm4, %zmm4, %k0 + vfmadd213ps {rn-sae}, %zmm8, %zmm15, %zmm2 + kmovw %k0, %edx + vfmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm2 + vfmadd213ps {rn-sae}, %zmm10, %zmm15, %zmm2 + vfmadd213ps {rn-sae}, %zmm11, %zmm15, %zmm2 + vfmadd213ps {rn-sae}, %zmm12, %zmm15, %zmm2 + vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm2 + vmulps {rn-sae}, %zmm15, %zmm2, %zmm4 + vfmadd213ps {rn-sae}, %zmm15, %zmm15, %zmm4 + +/* final reconstruction */ + vmovups sLn2+__svml_slog1p_data_internal(%rip), %zmm15 + vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm0 + vorps %zmm1, %zmm0, %zmm0 + testl %edx, %edx + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 edx zmm0 zmm3 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + vmovups %zmm3, 64(%rsp) + vmovups %zmm0, 128(%rsp) + # LOE rbx r12 r13 r14 r15 edx zmm0 + + xorl %eax, %eax + # LOE rbx r12 r13 r14 r15 eax edx + + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $16, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovups 128(%rsp), %zmm0 + +/* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 zmm0 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movss 64(%rsp,%r14,4), %xmm0 + call log1pf@PLT + # LOE rbx r14 r15 r12d r13d xmm0 + + movss %xmm0, 128(%rsp,%r14,4) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d +END(_ZGVeN16v_log1pf_skx) + + .section .rodata, "a" + .align 64 + +#ifdef __svml_slog1p_data_internal_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(64)) VUINT32 SgnMask[16][1]; + __declspec(align(64)) VUINT32 sOne[16][1]; + __declspec(align(64)) VUINT32 sPoly[8][16][1]; + __declspec(align(64)) VUINT32 iHiDelta[16][1]; + __declspec(align(64)) VUINT32 iLoRange[16][1]; + __declspec(align(64)) VUINT32 iBrkValue[16][1]; + __declspec(align(64)) VUINT32 iOffExpoMask[16][1]; + __declspec(align(64)) VUINT32 sLn2[16][1]; +} __svml_slog1p_data_internal; +#endif +__svml_slog1p_data_internal: + /*== SgnMask ==*/ + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + /*== sOne = SP 1.0 ==*/ + .align 64 + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 + /*== sPoly[] = SP polynomial ==*/ + .align 64 + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ + /*== iHiDelta = SP 80000000-7f000000 ==*/ + .align 64 + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 + /*== iLoRange = SP 00800000+iHiDelta ==*/ + .align 64 + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 + /*== iBrkValue = SP 2/3 ==*/ + .align 64 + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab + /*== iOffExpoMask = SP significand mask ==*/ + .align 64 + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff + /*== sLn2 = SP ln(2) ==*/ + .align 64 + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 + .align 64 + .type __svml_slog1p_data_internal,@object + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S new file mode 100644 index 0000000000..913c8290c8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized log1pf, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN4v_log1pf _ZGVbN4v_log1pf_sse2 +#include "../svml_s_log1pf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c new file mode 100644 index 0000000000..b6aff48023 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized log1pf, vector length is 4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_log1pf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_log1pf, __GI__ZGVbN4v_log1pf, + __redirect__ZGVbN4v_log1pf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S new file mode 100644 index 0000000000..ef1bae58c0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf4_core_sse4.S @@ -0,0 +1,252 @@ +/* Function log1pf vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_slog1p_data_internal + */ +#define SgnMask 0 +#define sOne 16 +#define sPoly 32 +#define iHiDelta 160 +#define iLoRange 176 +#define iBrkValue 192 +#define iOffExpoMask 208 +#define sLn2 224 + +#include <sysdep.h> + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN4v_log1pf_sse4) + subq $72, %rsp + cfi_def_cfa_offset(80) + movups sOne+__svml_slog1p_data_internal(%rip), %xmm7 + +/* compute 1+x as high, low parts */ + movaps %xmm7, %xmm1 + movaps %xmm7, %xmm5 + maxps %xmm0, %xmm1 + minps %xmm0, %xmm5 + movaps %xmm1, %xmm4 + +/* check argument value ranges */ + movdqu iHiDelta+__svml_slog1p_data_internal(%rip), %xmm2 + addps %xmm5, %xmm4 + +/* reduction: compute r,n */ + movdqu iBrkValue+__svml_slog1p_data_internal(%rip), %xmm3 + paddd %xmm4, %xmm2 + movdqu iOffExpoMask+__svml_slog1p_data_internal(%rip), %xmm8 + subps %xmm4, %xmm1 + psubd %xmm3, %xmm4 + addps %xmm1, %xmm5 + pand %xmm4, %xmm8 + psrad $23, %xmm4 + cvtdq2ps %xmm4, %xmm10 + pslld $23, %xmm4 + movaps %xmm7, %xmm1 + paddd %xmm3, %xmm8 + psubd %xmm4, %xmm1 + mulps %xmm5, %xmm1 + +/* polynomial evaluation */ + subps %xmm7, %xmm8 + +/* final reconstruction */ + mulps sLn2+__svml_slog1p_data_internal(%rip), %xmm10 + addps %xmm8, %xmm1 + movups sPoly+112+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + movdqu iLoRange+__svml_slog1p_data_internal(%rip), %xmm6 + pcmpgtd %xmm2, %xmm6 + addps sPoly+96+__svml_slog1p_data_internal(%rip), %xmm9 + +/* combine and get argument value range mask */ + movmskps %xmm6, %edx + movups SgnMask+__svml_slog1p_data_internal(%rip), %xmm11 + mulps %xmm1, %xmm9 + andnps %xmm0, %xmm11 + addps sPoly+80+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + addps sPoly+64+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + addps sPoly+48+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + addps sPoly+32+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + addps sPoly+16+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + addps sPoly+__svml_slog1p_data_internal(%rip), %xmm9 + mulps %xmm1, %xmm9 + mulps %xmm1, %xmm9 + addps %xmm9, %xmm1 + addps %xmm10, %xmm1 + orps %xmm11, %xmm1 + testl %edx, %edx + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movaps %xmm1, %xmm0 + addq $72, %rsp + cfi_def_cfa_offset(8) + ret + cfi_def_cfa_offset(80) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + movups %xmm0, 32(%rsp) + movups %xmm1, 48(%rsp) + # LOE rbx rbp r12 r13 r14 r15 edx + + xorl %eax, %eax + movq %r12, 16(%rsp) + cfi_offset(12, -64) + movl %eax, %r12d + movq %r13, 8(%rsp) + cfi_offset(13, -72) + movl %edx, %r13d + movq %r14, (%rsp) + cfi_offset(14, -80) + # LOE rbx rbp r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx rbp r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $4, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx rbp r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + movups 48(%rsp), %xmm1 + +/* Go to exit */ + jmp L(EXIT) + cfi_offset(12, -64) + cfi_offset(13, -72) + cfi_offset(14, -80) + # LOE rbx rbp r12 r13 r14 r15 xmm1 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movss 32(%rsp,%r14,4), %xmm0 + call log1pf@PLT + # LOE rbx rbp r14 r15 r12d r13d xmm0 + + movss %xmm0, 48(%rsp,%r14,4) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx rbp r15 r12d r13d +END(_ZGVbN4v_log1pf_sse4) + + .section .rodata, "a" + .align 16 + +#ifdef __svml_slog1p_data_internal_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(16)) VUINT32 SgnMask[4][1]; + __declspec(align(16)) VUINT32 sOne[4][1]; + __declspec(align(16)) VUINT32 sPoly[8][4][1]; + __declspec(align(16)) VUINT32 iHiDelta[4][1]; + __declspec(align(16)) VUINT32 iLoRange[4][1]; + __declspec(align(16)) VUINT32 iBrkValue[4][1]; + __declspec(align(16)) VUINT32 iOffExpoMask[4][1]; + __declspec(align(16)) VUINT32 sLn2[4][1]; +} __svml_slog1p_data_internal; +#endif +__svml_slog1p_data_internal: + /*== SgnMask ==*/ + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + /*== sOne = SP 1.0 ==*/ + .align 16 + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 + /*== sPoly[] = SP polynomial ==*/ + .align 16 + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ + /*== iHiDelta = SP 80000000-7f000000 ==*/ + .align 16 + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 + /*== iLoRange = SP 00800000+iHiDelta ==*/ + .align 16 + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000 + /*== iBrkValue = SP 2/3 ==*/ + .align 16 + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab + /*== iOffExpoMask = SP significand mask ==*/ + .align 16 + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff + /*== sLn2 = SP ln(2) ==*/ + .align 16 + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 + .align 16 + .type __svml_slog1p_data_internal,@object + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S new file mode 100644 index 0000000000..c0b97d89e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized log1pf, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVdN8v_log1pf _ZGVdN8v_log1pf_sse_wrapper +#include "../svml_s_log1pf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c new file mode 100644 index 0000000000..a2bbe37129 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized log1pf, vector length is 8. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_log1pf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_log1pf, __GI__ZGVdN8v_log1pf, + __redirect__ZGVdN8v_log1pf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S new file mode 100644 index 0000000000..957dc23e3f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_log1pf8_core_avx2.S @@ -0,0 +1,254 @@ +/* Function log1pf vectorized with AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * Get short reciprocal approximation Rcp ~ 1/xh + * R = (Rcp*xh - 1.0) + Rcp*xl + * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) + * log(Rcp) is tabulated + * + * + */ + +/* Offsets for data table __svml_slog1p_data_internal + */ +#define SgnMask 0 +#define sOne 32 +#define sPoly 64 +#define iHiDelta 320 +#define iLoRange 352 +#define iBrkValue 384 +#define iOffExpoMask 416 +#define sLn2 448 + +#include <sysdep.h> + + .text + .section .text.avx2,"ax",@progbits +ENTRY(_ZGVdN8v_log1pf_avx2) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + vmovups sOne+__svml_slog1p_data_internal(%rip), %ymm2 + +/* reduction: compute r,n */ + vmovups iBrkValue+__svml_slog1p_data_internal(%rip), %ymm13 + vmovups SgnMask+__svml_slog1p_data_internal(%rip), %ymm4 + vmovups iLoRange+__svml_slog1p_data_internal(%rip), %ymm8 + vmovaps %ymm0, %ymm3 + +/* compute 1+x as high, low parts */ + vmaxps %ymm3, %ymm2, %ymm5 + vminps %ymm3, %ymm2, %ymm6 + vaddps %ymm6, %ymm5, %ymm10 + vpsubd %ymm13, %ymm10, %ymm11 + +/* check argument value ranges */ + vpaddd iHiDelta+__svml_slog1p_data_internal(%rip), %ymm10, %ymm9 + vsubps %ymm10, %ymm5, %ymm7 + vpsrad $23, %ymm11, %ymm14 + vpand iOffExpoMask+__svml_slog1p_data_internal(%rip), %ymm11, %ymm12 + vpslld $23, %ymm14, %ymm15 + vcvtdq2ps %ymm14, %ymm0 + vpsubd %ymm15, %ymm2, %ymm14 + vandnps %ymm3, %ymm4, %ymm1 + vaddps %ymm7, %ymm6, %ymm4 + vpaddd %ymm13, %ymm12, %ymm6 + vmulps %ymm4, %ymm14, %ymm7 + +/* polynomial evaluation */ + vsubps %ymm2, %ymm6, %ymm2 + vpcmpgtd %ymm9, %ymm8, %ymm5 + vmovups sPoly+224+__svml_slog1p_data_internal(%rip), %ymm8 + vaddps %ymm2, %ymm7, %ymm9 + vfmadd213ps sPoly+192+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+160+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+128+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+96+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+64+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+32+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vfmadd213ps sPoly+__svml_slog1p_data_internal(%rip), %ymm9, %ymm8 + vmulps %ymm8, %ymm9, %ymm10 + vfmadd213ps %ymm9, %ymm9, %ymm10 + +/* final reconstruction */ + vfmadd132ps sLn2+__svml_slog1p_data_internal(%rip), %ymm10, %ymm0 + +/* combine and get argument value range mask */ + vmovmskps %ymm5, %edx + vorps %ymm1, %ymm0, %ymm0 + testl %edx, %edx + +/* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 edx ymm0 ymm3 + +/* Restore registers + * and exit the function + */ + +L(EXIT): + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + +/* Branch to process + * special inputs + */ + +L(SPECIAL_VALUES_BRANCH): + vmovups %ymm3, 32(%rsp) + vmovups %ymm0, 64(%rsp) + # LOE rbx r12 r13 r14 r15 edx ymm0 + + xorl %eax, %eax + # LOE rbx r12 r13 r14 r15 eax edx + + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + +/* Range mask + * bits check + */ + +L(RANGEMASK_CHECK): + btl %r12d, %r13d + +/* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d + +/* Special inputs + * processing loop + */ + +L(SPECIAL_VALUES_LOOP): + incl %r12d + cmpl $8, %r12d + +/* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovups 64(%rsp), %ymm0 + +/* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 ymm0 + +/* Scalar math fucntion call + * to process special input + */ + +L(SCALAR_MATH_CALL): + movl %r12d, %r14d + movss 32(%rsp,%r14,4), %xmm0 + call log1pf@PLT + # LOE rbx r14 r15 r12d r13d xmm0 + + movss %xmm0, 64(%rsp,%r14,4) + +/* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d +END(_ZGVdN8v_log1pf_avx2) + + .section .rodata, "a" + .align 32 + +#ifdef __svml_slog1p_data_internal_typedef +typedef unsigned int VUINT32; +typedef struct { + __declspec(align(32)) VUINT32 SgnMask[8][1]; + __declspec(align(32)) VUINT32 sOne[8][1]; + __declspec(align(32)) VUINT32 sPoly[8][8][1]; + __declspec(align(32)) VUINT32 iHiDelta[8][1]; + __declspec(align(32)) VUINT32 iLoRange[8][1]; + __declspec(align(32)) VUINT32 iBrkValue[8][1]; + __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; + __declspec(align(32)) VUINT32 sLn2[8][1]; +} __svml_slog1p_data_internal; +#endif +__svml_slog1p_data_internal: + /*== SgnMask ==*/ + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + /*== sOne = SP 1.0 ==*/ + .align 32 + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 + /*== sPoly[] = SP polynomial ==*/ + .align 32 + .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ + .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ + .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ + .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ + .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ + .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ + .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ + .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ + /*== iHiDelta = SP 80000000-7f000000 ==*/ + .align 32 + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000 + /*== iLoRange = SP 00800000+iHiDelta ==*/ + .align 32 + .long 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000, 0x01800000 + /*== iBrkValue = SP 2/3 ==*/ + .align 32 + .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab + /*== iOffExpoMask = SP significand mask ==*/ + .align 32 + .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff + /*== sLn2 = SP ln(2) ==*/ + .align 32 + .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 + .align 32 + .type __svml_slog1p_data_internal,@object + .size __svml_slog1p_data_internal,.-__svml_slog1p_data_internal diff --git a/sysdeps/x86_64/fpu/svml_d_log1p2_core.S b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S new file mode 100644 index 0000000000..e3f01717d9 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log1p2_core.S @@ -0,0 +1,29 @@ +/* Function log1p vectorized with SSE2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVbN2v_log1p) +WRAPPER_IMPL_SSE2 log1p +END (_ZGVbN2v_log1p) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN2v_log1p) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S new file mode 100644 index 0000000000..49beb96183 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core.S @@ -0,0 +1,29 @@ +/* Function log1p vectorized with AVX2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVdN4v_log1p) +WRAPPER_IMPL_AVX _ZGVbN2v_log1p +END (_ZGVdN4v_log1p) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVdN4v_log1p) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S new file mode 100644 index 0000000000..8b89768b7c --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log1p4_core_avx.S @@ -0,0 +1,25 @@ +/* Function log1p vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVcN4v_log1p) +WRAPPER_IMPL_AVX _ZGVbN2v_log1p +END (_ZGVcN4v_log1p) diff --git a/sysdeps/x86_64/fpu/svml_d_log1p8_core.S b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S new file mode 100644 index 0000000000..54b4d4ede8 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log1p8_core.S @@ -0,0 +1,25 @@ +/* Function log1p vectorized with AVX-512, wrapper to AVX2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVeN8v_log1p) +WRAPPER_IMPL_AVX512 _ZGVdN4v_log1p +END (_ZGVeN8v_log1p) diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S new file mode 100644 index 0000000000..2c953d00fb --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_log1pf16_core.S @@ -0,0 +1,25 @@ +/* Function log1pf vectorized with AVX-512. Wrapper to AVX2 version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVeN16v_log1pf) +WRAPPER_IMPL_AVX512 _ZGVdN8v_log1pf +END (_ZGVeN16v_log1pf) diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S new file mode 100644 index 0000000000..6f68762eaa --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_log1pf4_core.S @@ -0,0 +1,29 @@ +/* Function log1pf vectorized with SSE2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVbN4v_log1pf) +WRAPPER_IMPL_SSE2 log1pf +END (_ZGVbN4v_log1pf) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN4v_log1pf) +#endif diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S new file mode 100644 index 0000000000..74f81283b1 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core.S @@ -0,0 +1,29 @@ +/* Function log1pf vectorized with AVX2, wrapper version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVdN8v_log1pf) +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf +END (_ZGVdN8v_log1pf) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVdN8v_log1pf) +#endif diff --git a/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S new file mode 100644 index 0000000000..f33be0e904 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_s_log1pf8_core_avx.S @@ -0,0 +1,25 @@ +/* Function log1pf vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "svml_s_wrapper_impl.h" + + .text +ENTRY (_ZGVcN8v_log1pf) +WRAPPER_IMPL_AVX _ZGVbN4v_log1pf +END (_ZGVcN8v_log1pf) diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c new file mode 100644 index 0000000000..18aa6aaeaa --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx.c @@ -0,0 +1 @@ +#include "test-double-libmvec-log1p.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c new file mode 100644 index 0000000000..18aa6aaeaa --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx2.c @@ -0,0 +1 @@ +#include "test-double-libmvec-log1p.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c new file mode 100644 index 0000000000..18aa6aaeaa --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p-avx512f.c @@ -0,0 +1 @@ +#include "test-double-libmvec-log1p.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c new file mode 100644 index 0000000000..40937f987a --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-log1p.c @@ -0,0 +1,3 @@ +#define LIBMVEC_TYPE double +#define LIBMVEC_FUNC log1p +#include "test-vector-abi-arg1.h" diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c index 08c91ff634..38359b05e3 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2) VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10) VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2) +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c index a2fb0de309..17701e7731 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2) VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10) VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2) +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p) #ifndef __ILP32__ # define VEC_INT_TYPE __m256i diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c index dc65a4ee25..bba62b2446 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2) VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10) VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2) +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c index 253ee8c906..8a04e13a07 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2) VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10) VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2) +VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p) #ifndef __ILP32__ # define VEC_INT_TYPE __m512i diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c new file mode 100644 index 0000000000..3395decaf4 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx.c @@ -0,0 +1 @@ +#include "test-float-libmvec-log1pf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c new file mode 100644 index 0000000000..3395decaf4 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx2.c @@ -0,0 +1 @@ +#include "test-float-libmvec-log1pf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c new file mode 100644 index 0000000000..3395decaf4 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf-avx512f.c @@ -0,0 +1 @@ +#include "test-float-libmvec-log1pf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c new file mode 100644 index 0000000000..1b36069ded --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-log1pf.c @@ -0,0 +1,3 @@ +#define LIBMVEC_TYPE float +#define LIBMVEC_FUNC log1pf +#include "test-vector-abi-arg1.h" diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c index 1c7db5146c..706f52c618 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f) VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f) VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f) +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf) #define VEC_INT_TYPE __m512i diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c index 8ec51603b3..ceace4c53a 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f) VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f) VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f) +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf) #define VEC_INT_TYPE __m128i diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c index 1cb4553c7a..06a4753409 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c @@ -43,6 +43,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f) VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f) VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f) +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf) /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ #undef VECTOR_WRAPPER_fFF diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c index 6ecc1792bb..a87e5298e0 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c @@ -40,6 +40,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf) VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f) VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f) VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f) +VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf) #define VEC_INT_TYPE __m128i