diff mbox series

[v3,01/18] x86-64: Add vector atan/atanf implementation to libmvec

Message ID 20211227150457.1680245-2-skpgkp2@gmail.com
State New
Headers show
Series x86-64: Add vector math functions to libmvec | expand

Commit Message

Sunil Pandey Dec. 27, 2021, 3:04 p.m. UTC
Implement vectorized atan/atanf containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI.  It also contains
accuracy and ABI tests for vector atan/atanf with regenerated ulps.
---
 bits/libm-simd-decl-stubs.h                   |  11 +
 math/bits/mathcalls.h                         |   2 +-
 .../unix/sysv/linux/x86_64/libmvec.abilist    |   8 +
 sysdeps/x86/fpu/bits/math-vector.h            |   4 +
 .../x86/fpu/finclude/math-vector-fortran.h    |   4 +
 sysdeps/x86_64/fpu/Makeconfig                 |   1 +
 sysdeps/x86_64/fpu/Versions                   |   2 +
 sysdeps/x86_64/fpu/libm-test-ulps             |  20 ++
 .../fpu/multiarch/svml_d_atan2_core-sse2.S    |  20 ++
 .../x86_64/fpu/multiarch/svml_d_atan2_core.c  |  27 ++
 .../fpu/multiarch/svml_d_atan2_core_sse4.S    | 245 ++++++++++++++++++
 .../fpu/multiarch/svml_d_atan4_core-sse.S     |  20 ++
 .../x86_64/fpu/multiarch/svml_d_atan4_core.c  |  27 ++
 .../fpu/multiarch/svml_d_atan4_core_avx2.S    | 225 ++++++++++++++++
 .../fpu/multiarch/svml_d_atan8_core-avx2.S    |  20 ++
 .../x86_64/fpu/multiarch/svml_d_atan8_core.c  |  27 ++
 .../fpu/multiarch/svml_d_atan8_core_avx512.S  | 213 +++++++++++++++
 .../fpu/multiarch/svml_s_atanf16_core-avx2.S  |  20 ++
 .../fpu/multiarch/svml_s_atanf16_core.c       |  28 ++
 .../multiarch/svml_s_atanf16_core_avx512.S    | 174 +++++++++++++
 .../fpu/multiarch/svml_s_atanf4_core-sse2.S   |  20 ++
 .../x86_64/fpu/multiarch/svml_s_atanf4_core.c |  28 ++
 .../fpu/multiarch/svml_s_atanf4_core_sse4.S   | 164 ++++++++++++
 .../fpu/multiarch/svml_s_atanf8_core-sse.S    |  20 ++
 .../x86_64/fpu/multiarch/svml_s_atanf8_core.c |  28 ++
 .../fpu/multiarch/svml_s_atanf8_core_avx2.S   | 148 +++++++++++
 sysdeps/x86_64/fpu/svml_d_atan2_core.S        |  29 +++
 sysdeps/x86_64/fpu/svml_d_atan4_core.S        |  29 +++
 sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S    |  25 ++
 sysdeps/x86_64/fpu/svml_d_atan8_core.S        |  25 ++
 sysdeps/x86_64/fpu/svml_s_atanf16_core.S      |  25 ++
 sysdeps/x86_64/fpu/svml_s_atanf4_core.S       |  29 +++
 sysdeps/x86_64/fpu/svml_s_atanf8_core.S       |  29 +++
 sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S   |  25 ++
 .../x86_64/fpu/test-double-libmvec-atan-avx.c |   1 +
 .../fpu/test-double-libmvec-atan-avx2.c       |   1 +
 .../fpu/test-double-libmvec-atan-avx512f.c    |   1 +
 sysdeps/x86_64/fpu/test-double-libmvec-atan.c |   3 +
 .../x86_64/fpu/test-double-vlen2-wrappers.c   |   1 +
 .../fpu/test-double-vlen4-avx2-wrappers.c     |   1 +
 .../x86_64/fpu/test-double-vlen4-wrappers.c   |   1 +
 .../x86_64/fpu/test-double-vlen8-wrappers.c   |   1 +
 .../x86_64/fpu/test-float-libmvec-atanf-avx.c |   1 +
 .../fpu/test-float-libmvec-atanf-avx2.c       |   1 +
 .../fpu/test-float-libmvec-atanf-avx512f.c    |   1 +
 sysdeps/x86_64/fpu/test-float-libmvec-atanf.c |   3 +
 .../x86_64/fpu/test-float-vlen16-wrappers.c   |   1 +
 .../x86_64/fpu/test-float-vlen4-wrappers.c    |   1 +
 .../fpu/test-float-vlen8-avx2-wrappers.c      |   1 +
 .../x86_64/fpu/test-float-vlen8-wrappers.c    |   1 +
 50 files changed, 1741 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core-sse.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core_avx2.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core-avx2.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core_avx512.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core-avx2.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core-sse2.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core_sse4.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core-sse.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core_avx2.S
 create mode 100644 sysdeps/x86_64/fpu/svml_d_atan2_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_d_atan4_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S
 create mode 100644 sysdeps/x86_64/fpu/svml_d_atan8_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_atanf16_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_atanf4_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_atanf8_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S
 create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atan-avx.c
 create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atan-avx2.c
 create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atan-avx512f.c
 create mode 100644 sysdeps/x86_64/fpu/test-double-libmvec-atan.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx2.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx512f.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-libmvec-atanf.c
diff mbox series

Patch

diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h
index 2ccdd1fc53..b4647ca918 100644
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@@ -109,4 +109,15 @@ 
 #define __DECL_SIMD_acosf32x
 #define __DECL_SIMD_acosf64x
 #define __DECL_SIMD_acosf128x
+
+#define __DECL_SIMD_atan
+#define __DECL_SIMD_atanf
+#define __DECL_SIMD_atanl
+#define __DECL_SIMD_atanf16
+#define __DECL_SIMD_atanf32
+#define __DECL_SIMD_atanf64
+#define __DECL_SIMD_atanf128
+#define __DECL_SIMD_atanf32x
+#define __DECL_SIMD_atanf64x
+#define __DECL_SIMD_atanf128x
 #endif
diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
index 2cc6654208..3e27c21f21 100644
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -54,7 +54,7 @@  __MATHCALL_VEC (acos,, (_Mdouble_ __x));
 /* Arc sine of X.  */
 __MATHCALL (asin,, (_Mdouble_ __x));
 /* Arc tangent of X.  */
-__MATHCALL (atan,, (_Mdouble_ __x));
+__MATHCALL_VEC (atan,, (_Mdouble_ __x));
 /* Arc tangent of Y/X.  */
 __MATHCALL (atan2,, (_Mdouble_ __y, _Mdouble_ __x));
 
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index b37b55777e..a93258db6f 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -47,10 +47,18 @@  GLIBC_2.22 _ZGVeN8v_sin F
 GLIBC_2.22 _ZGVeN8vv_pow F
 GLIBC_2.22 _ZGVeN8vvv_sincos F
 GLIBC_2.35 _ZGVbN2v_acos F
+GLIBC_2.35 _ZGVbN2v_atan F
 GLIBC_2.35 _ZGVbN4v_acosf F
+GLIBC_2.35 _ZGVbN4v_atanf F
 GLIBC_2.35 _ZGVcN4v_acos F
+GLIBC_2.35 _ZGVcN4v_atan F
 GLIBC_2.35 _ZGVcN8v_acosf F
+GLIBC_2.35 _ZGVcN8v_atanf F
 GLIBC_2.35 _ZGVdN4v_acos F
+GLIBC_2.35 _ZGVdN4v_atan F
 GLIBC_2.35 _ZGVdN8v_acosf F
+GLIBC_2.35 _ZGVdN8v_atanf F
 GLIBC_2.35 _ZGVeN16v_acosf F
+GLIBC_2.35 _ZGVeN16v_atanf F
 GLIBC_2.35 _ZGVeN8v_acos F
+GLIBC_2.35 _ZGVeN8v_atan F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index dabb74cbb9..1c0e5c5e35 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -62,6 +62,10 @@ 
 #  define __DECL_SIMD_acos __DECL_SIMD_x86_64
 #  undef __DECL_SIMD_acosf
 #  define __DECL_SIMD_acosf __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_atan
+#  define __DECL_SIMD_atan __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_atanf
+#  define __DECL_SIMD_atanf __DECL_SIMD_x86_64
 
 # endif
 #endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index 4bcbd1fbce..ddcccb11d7 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -30,6 +30,8 @@ 
 !GCC$ builtin (powf) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (acos) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64')
 
 !GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -45,3 +47,5 @@ 
 !GCC$ builtin (powf) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (acos) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (acosf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atan) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atanf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index 7acf1f306c..dae0887f13 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -23,6 +23,7 @@  postclean-generated += libmvec.mk
 # Define for both math and mathvec directories.
 libmvec-funcs = \
   acos \
+  atan \
   cos \
   exp \
   log \
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index 2985fe7ca7..424f6d526e 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -15,6 +15,8 @@  libmvec {
   }
   GLIBC_2.35 {
     _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
+    _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
     _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
+    _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
   }
 }
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 6c12976c82..2e64e59803 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -164,6 +164,26 @@  float: 2
 float128: 2
 ldouble: 1
 
+Function: "atan_vlen16":
+float: 1
+
+Function: "atan_vlen2":
+double: 1
+
+Function: "atan_vlen4":
+double: 1
+float: 1
+
+Function: "atan_vlen4_avx2":
+double: 1
+
+Function: "atan_vlen8":
+double: 1
+float: 1
+
+Function: "atan_vlen8_avx2":
+float: 1
+
 Function: "atanh":
 double: 2
 float: 2
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S
new file mode 100644
index 0000000000..115e5223aa
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S
@@ -0,0 +1,20 @@ 
+/* SSE2 version of vectorized atan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_atan _ZGVbN2v_atan_sse2
+#include "../svml_d_atan2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c
new file mode 100644
index 0000000000..93f079ffcb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c
@@ -0,0 +1,27 @@ 
+/* Multiple versions of vectorized atan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_atan
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_atan, __GI__ZGVbN2v_atan, __redirect__ZGVbN2v_atan)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S
new file mode 100644
index 0000000000..6ce845b96c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S
@@ -0,0 +1,245 @@ 
+/* Function atan vectorized with SSE4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_datan_data_internal_avx512
+ */
+#define AbsMask                       	0
+#define Shifter                       	16
+#define MaxThreshold                  	32
+#define MOne                          	48
+#define One                           	64
+#define LargeX                        	80
+#define Zero                          	96
+#define Tbl_H                         	112
+#define Tbl_L                         	368
+#define dIndexMed                     	624
+#define Pi2                           	640
+#define Pi2_low                       	656
+#define coeff                         	672
+
+#include <sysdep.h>
+
+        .text
+	.section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN2v_atan_sse4)
+        lea       Tbl_H+128+__svml_datan_data_internal_avx512(%rip), %rcx
+        movups    __svml_datan_data_internal_avx512(%rip), %xmm4
+        movups    Shifter+__svml_datan_data_internal_avx512(%rip), %xmm3
+        andps     %xmm0, %xmm4
+        movaps    %xmm3, %xmm12
+        movaps    %xmm4, %xmm5
+        addpd     %xmm4, %xmm12
+        movaps    %xmm12, %xmm7
+
+/*
+ * table lookup sequence
+ * VPERMUTE not available
+ */
+        movaps    %xmm12, %xmm10
+        subpd     %xmm3, %xmm7
+        subpd     %xmm7, %xmm5
+        mulpd     %xmm4, %xmm7
+        movups    MaxThreshold+__svml_datan_data_internal_avx512(%rip), %xmm2
+        psllq     $3, %xmm10
+
+/* saturate X range */
+        movups    LargeX+__svml_datan_data_internal_avx512(%rip), %xmm8
+        pxor      %xmm4, %xmm0
+        cmplepd   %xmm4, %xmm2
+        addpd     One+__svml_datan_data_internal_avx512(%rip), %xmm7
+        minpd     %xmm4, %xmm8
+        movups    MOne+__svml_datan_data_internal_avx512(%rip), %xmm6
+        movaps    %xmm2, %xmm1
+        movaps    %xmm2, %xmm9
+        andnps    %xmm5, %xmm1
+        andps     %xmm2, %xmm6
+        andnps    %xmm7, %xmm9
+        andps     %xmm2, %xmm8
+        orps      %xmm6, %xmm1
+        orps      %xmm8, %xmm9
+
+/* R+Rl = DiffX/Y */
+        divpd     %xmm9, %xmm1
+        pand      .FLT_11(%rip), %xmm10
+
+/* set table value to Pi/2 for large X */
+        movups    Pi2+__svml_datan_data_internal_avx512(%rip), %xmm4
+        movd      %xmm10, %eax
+        andps     %xmm2, %xmm4
+        pshufd    $2, %xmm10, %xmm11
+        movaps    %xmm2, %xmm10
+
+/* polynomial evaluation */
+        movaps    %xmm1, %xmm2
+        mulpd     %xmm1, %xmm2
+        movd      %xmm11, %edx
+        movups    coeff+__svml_datan_data_internal_avx512(%rip), %xmm5
+        movaps    %xmm2, %xmm7
+        movups    704+__svml_datan_data_internal_avx512(%rip), %xmm6
+        movaps    %xmm2, %xmm9
+        mulpd     %xmm2, %xmm5
+        mulpd     %xmm2, %xmm7
+        addpd     688+__svml_datan_data_internal_avx512(%rip), %xmm5
+        mulpd     %xmm2, %xmm6
+        mulpd     %xmm7, %xmm5
+        addpd     720+__svml_datan_data_internal_avx512(%rip), %xmm6
+        mulpd     %xmm1, %xmm9
+        addpd     %xmm5, %xmm6
+        movups    736+__svml_datan_data_internal_avx512(%rip), %xmm8
+        mulpd     %xmm2, %xmm8
+        mulpd     %xmm6, %xmm7
+        addpd     752+__svml_datan_data_internal_avx512(%rip), %xmm8
+        addpd     %xmm7, %xmm8
+        mulpd     %xmm8, %xmm9
+        movups    dIndexMed+__svml_datan_data_internal_avx512(%rip), %xmm14
+        cmplepd   %xmm12, %xmm14
+        addpd     %xmm9, %xmm1
+        movslq    %eax, %rax
+        movaps    %xmm14, %xmm3
+        movslq    %edx, %rdx
+        movsd     -128(%rax,%rcx), %xmm13
+        movsd     (%rcx,%rax), %xmm15
+        movhpd    -128(%rdx,%rcx), %xmm13
+        movhpd    (%rcx,%rdx), %xmm15
+        andnps    %xmm13, %xmm3
+        andps     %xmm14, %xmm15
+        orps      %xmm15, %xmm3
+        andnps    %xmm3, %xmm10
+        orps      %xmm4, %xmm10
+        addpd     %xmm1, %xmm10
+        pxor      %xmm10, %xmm0
+        ret
+
+END(_ZGVbN2v_atan_sse4)
+
+        .section .rodata, "a"
+        .align 16
+
+#ifdef __svml_datan_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(16)) VUINT32 AbsMask[2][2];
+        __declspec(align(16)) VUINT32 Shifter[2][2];
+        __declspec(align(16)) VUINT32 MaxThreshold[2][2];
+        __declspec(align(16)) VUINT32 MOne[2][2];
+        __declspec(align(16)) VUINT32 One[2][2];
+        __declspec(align(16)) VUINT32 LargeX[2][2];
+        __declspec(align(16)) VUINT32 Zero[2][2];
+        __declspec(align(16)) VUINT32 Tbl_H[32][2];
+        __declspec(align(16)) VUINT32 Tbl_L[32][2];
+        __declspec(align(16)) VUINT32 dIndexMed[2][2];
+        __declspec(align(16)) VUINT32 Pi2[2][2];
+        __declspec(align(16)) VUINT32 Pi2_low[2][2];
+        __declspec(align(16)) VUINT32 coeff[6][2][2];
+    } __svml_datan_data_internal_avx512;
+#endif
+__svml_datan_data_internal_avx512:
+        /*== AbsMask ==*/
+        .quad 0x7fffffffffffffff, 0x7fffffffffffffff
+        /*== Shifter ==*/
+        .align 16
+        .quad 0x4318000000000000, 0x4318000000000000
+        /*== MaxThreshold ==*/
+        .align 16
+        .quad 0x401f800000000000, 0x401f800000000000
+        /*== MOne ==*/
+        .align 16
+        .quad 0xbff0000000000000, 0xbff0000000000000
+        /*== One ==*/
+        .align 16
+        .quad 0x3ff0000000000000, 0x3ff0000000000000
+        /*== LargeX ==*/
+        .align 16
+        .quad 0x47f0000000000000, 0x47f0000000000000
+        /*== Zero ==*/
+        .align 16
+        .quad 0x0000000000000000, 0x0000000000000000
+        /*== Tbl_H ==*/
+        .align 16
+        .quad 0x0000000000000000, 0x3fcf5b75f92c80dd
+        .quad 0x3fddac670561bb4f, 0x3fe4978fa3269ee1
+        .quad 0x3fe921fb54442d18, 0x3fecac7c57846f9e
+        .quad 0x3fef730bd281f69b, 0x3ff0d38f2c5ba09f
+        .quad 0x3ff1b6e192ebbe44, 0x3ff270ef55a53a25
+        .quad 0x3ff30b6d796a4da8, 0x3ff38d6a6ce13353
+        .quad 0x3ff3fc176b7a8560, 0x3ff45b54837351a0
+        .quad 0x3ff4ae10fc6589a5, 0x3ff4f68dea672617
+        .quad 0x3ff5368c951e9cfd, 0x3ff56f6f33a3e6a7
+        .quad 0x3ff5a25052114e60, 0x3ff5d013c41adabd
+        .quad 0x3ff5f97315254857, 0x3ff61f06c6a92b89
+        .quad 0x3ff6414d44094c7c, 0x3ff660b02c736a06
+        .quad 0x3ff67d8863bc99bd, 0x3ff698213a9d5053
+        .quad 0x3ff6b0bae830c070, 0x3ff6c78c7edeb195
+        .quad 0x3ff6dcc57bb565fd, 0x3ff6f08f07435fec
+        .quad 0x3ff7030cf9403197, 0x3ff7145eac2088a4
+        /*== Tbl_L ==*/
+        .align 16
+        .quad 0x0000000000000000, 0x3c68ab6e3cf7afbd
+        .quad 0x3c7a2b7f222f65e2, 0x3c72419a87f2a458
+        .quad 0x3c81a62633145c07, 0x3c80dae13ad18a6b
+        .quad 0x3c7007887af0cbbd, 0xbc9bd0dc231bfd70
+        .quad 0x3c9b1b466a88828e, 0xbc9a66b1af5f84fb
+        .quad 0x3c96254cb03bb199, 0xbc812c77e8a80f5c
+        .quad 0xbc4441a3bd3f1084, 0x3c79e4a72eedacc4
+        .quad 0xbc93b03e8a27f555, 0x3c9934f9f2b0020e
+        .quad 0xbc996f47948a99f1, 0xbc7df6edd6f1ec3b
+        .quad 0x3c78c2d0c89de218, 0x3c9f82bba194dd5d
+        .quad 0xbc831151a43b51ca, 0xbc8487d50bceb1a5
+        .quad 0xbc9c5f60a65c7397, 0xbc7acb6afb332a0f
+        .quad 0xbc99b7bd2e1e8c9c, 0xbc9b9839085189e3
+        .quad 0xbc97d1ab82ffb70b, 0x3c99239ad620ffe2
+        .quad 0xbc929c86447928e7, 0xbc8957a7170df016
+        .quad 0xbc7cbe1896221608, 0xbc9fda5797b32a0b
+        /*== dIndexMed ==*/
+        .align 16
+        .quad 0x4318000000000010, 0x4318000000000010
+        /*== Pi2 ==*/
+        .align 16
+        .quad 0x3ff921fb54442d18, 0x3ff921fb54442d18
+        /*== Pi2_low ==*/
+        .align 16
+        .quad 0x3c91a62633145c07, 0x3c91a62633145c07
+        /*== coeff6 ==*/
+        .align 16
+        .quad 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97
+        .quad 0xbfb74257c46790cc, 0xbfb74257c46790cc
+        .quad 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0
+        .quad 0xbfc249248eef04da, 0xbfc249248eef04da
+        .quad 0x3fc999999998741e, 0x3fc999999998741e
+        .quad 0xbfd555555555554d, 0xbfd555555555554d
+        .align 16
+        .type	__svml_datan_data_internal_avx512,@object
+        .size	__svml_datan_data_internal_avx512,.-__svml_datan_data_internal_avx512
+        .align 16
+
+.FLT_11:
+        .long	0x00000078,0x00000000,0x00000078,0x00000000
+        .type	.FLT_11,@object
+        .size	.FLT_11,16
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core-sse.S
new file mode 100644
index 0000000000..79c48dbc91
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core-sse.S
@@ -0,0 +1,20 @@ 
+/* SSE version of vectorized atan, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_atan _ZGVdN4v_atan_sse_wrapper
+#include "../svml_d_atan4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core.c
new file mode 100644
index 0000000000..64ce66b9fd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core.c
@@ -0,0 +1,27 @@ 
+/* Multiple versions of vectorized atan, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_atan
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_atan, __GI__ZGVdN4v_atan, __redirect__ZGVdN4v_atan)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core_avx2.S
new file mode 100644
index 0000000000..3e2597d8ec
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core_avx2.S
@@ -0,0 +1,225 @@ 
+/* Function atan vectorized with AVX2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_datan_data_internal_avx512
+ */
+#define AbsMask                       	0
+#define Shifter                       	32
+#define MaxThreshold                  	64
+#define MOne                          	96
+#define One                           	128
+#define LargeX                        	160
+#define Zero                          	192
+#define Tbl_H                         	224
+#define Tbl_L                         	480
+#define dIndexMed                     	736
+#define Pi2                           	768
+#define Pi2_low                       	800
+#define coeff                         	832
+
+#include <sysdep.h>
+
+        .text
+	.section .text.avx2,"ax",@progbits
+ENTRY(_ZGVdN4v_atan_avx2)
+        lea       Tbl_H+128+__svml_datan_data_internal_avx512(%rip), %rdi
+        vmovupd   Shifter+__svml_datan_data_internal_avx512(%rip), %ymm4
+        vmovupd   One+__svml_datan_data_internal_avx512(%rip), %ymm9
+
+/* saturate X range */
+        vmovupd   LargeX+__svml_datan_data_internal_avx512(%rip), %ymm6
+        vandpd    __svml_datan_data_internal_avx512(%rip), %ymm0, %ymm7
+        vaddpd    %ymm4, %ymm7, %ymm2
+        vcmpge_oqpd MaxThreshold+__svml_datan_data_internal_avx512(%rip), %ymm7, %ymm3
+        vminpd    %ymm7, %ymm6, %ymm10
+        vsubpd    %ymm4, %ymm2, %ymm5
+
+/*
+ * table lookup sequence
+ * VPERMUTE not available
+ */
+        vpsllq    $3, %ymm2, %ymm13
+        vsubpd    %ymm5, %ymm7, %ymm8
+        vcmpge_oqpd dIndexMed+__svml_datan_data_internal_avx512(%rip), %ymm2, %ymm2
+        vfmadd231pd %ymm7, %ymm5, %ymm9
+        vpand     .FLT_11(%rip), %ymm13, %ymm14
+        vblendvpd %ymm3, MOne+__svml_datan_data_internal_avx512(%rip), %ymm8, %ymm11
+        vblendvpd %ymm3, %ymm10, %ymm9, %ymm12
+        vxorpd    %ymm0, %ymm7, %ymm1
+
+/* R+Rl = DiffX/Y */
+        vdivpd    %ymm12, %ymm11, %ymm0
+        vextractf128 $1, %ymm14, %xmm4
+        vmovd     %xmm14, %eax
+        vmovd     %xmm4, %ecx
+        movslq    %eax, %rax
+        vpextrd   $2, %xmm14, %edx
+        movslq    %ecx, %rcx
+        vpextrd   $2, %xmm4, %esi
+        movslq    %edx, %rdx
+        movslq    %esi, %rsi
+        vmovsd    -128(%rax,%rdi), %xmm15
+        vmovsd    (%rdi,%rax), %xmm7
+        vmovsd    -128(%rcx,%rdi), %xmm5
+        vmovsd    (%rdi,%rcx), %xmm9
+        vmovhpd   -128(%rdx,%rdi), %xmm15, %xmm15
+        vmovhpd   (%rdi,%rdx), %xmm7, %xmm8
+        vmovhpd   -128(%rsi,%rdi), %xmm5, %xmm6
+        vmovhpd   (%rdi,%rsi), %xmm9, %xmm10
+
+/* polynomial evaluation */
+        vmulpd    %ymm0, %ymm0, %ymm5
+        vmulpd    %ymm5, %ymm5, %ymm4
+        vinsertf128 $1, %xmm6, %ymm15, %ymm11
+        vinsertf128 $1, %xmm10, %ymm8, %ymm12
+        vblendvpd %ymm2, %ymm12, %ymm11, %ymm13
+        vmovupd   coeff+__svml_datan_data_internal_avx512(%rip), %ymm8
+        vmovupd   896+__svml_datan_data_internal_avx512(%rip), %ymm2
+        vmulpd    %ymm5, %ymm0, %ymm6
+        vfmadd213pd 864+__svml_datan_data_internal_avx512(%rip), %ymm5, %ymm8
+        vfmadd213pd 928+__svml_datan_data_internal_avx512(%rip), %ymm5, %ymm2
+
+/* set table value to Pi/2 for large X */
+        vblendvpd %ymm3, Pi2+__svml_datan_data_internal_avx512(%rip), %ymm13, %ymm7
+        vmovupd   960+__svml_datan_data_internal_avx512(%rip), %ymm3
+        vfmadd213pd %ymm2, %ymm4, %ymm8
+        vfmadd213pd 992+__svml_datan_data_internal_avx512(%rip), %ymm3, %ymm5
+        vfmadd213pd %ymm5, %ymm4, %ymm8
+        vfmadd213pd %ymm0, %ymm6, %ymm8
+        vaddpd    %ymm8, %ymm7, %ymm0
+        vxorpd    %ymm1, %ymm0, %ymm0
+        ret
+
+END(_ZGVdN4v_atan_avx2)
+
+        .section .rodata, "a"
+        .align 32
+
+.FLT_11:
+        .long	0x00000078,0x00000000,0x00000078,0x00000000,0x00000078,0x00000000,0x00000078,0x00000000
+        .type	.FLT_11,@object
+        .size	.FLT_11,32
+        .align 32
+
+#ifdef __svml_datan_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(32)) VUINT32 AbsMask[4][2];
+        __declspec(align(32)) VUINT32 Shifter[4][2];
+        __declspec(align(32)) VUINT32 MaxThreshold[4][2];
+        __declspec(align(32)) VUINT32 MOne[4][2];
+        __declspec(align(32)) VUINT32 One[4][2];
+        __declspec(align(32)) VUINT32 LargeX[4][2];
+        __declspec(align(32)) VUINT32 Zero[4][2];
+        __declspec(align(32)) VUINT32 Tbl_H[32][2];
+        __declspec(align(32)) VUINT32 Tbl_L[32][2];
+        __declspec(align(32)) VUINT32 dIndexMed[4][2];
+        __declspec(align(32)) VUINT32 Pi2[4][2];
+        __declspec(align(32)) VUINT32 Pi2_low[4][2];
+        __declspec(align(32)) VUINT32 coeff[6][4][2];
+    } __svml_datan_data_internal_avx512;
+#endif
+__svml_datan_data_internal_avx512:
+        /*== AbsMask ==*/
+        .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
+        /*== Shifter ==*/
+        .align 32
+        .quad 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000
+        /*== MaxThreshold ==*/
+        .align 32
+        .quad 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000
+        /*== MOne ==*/
+        .align 32
+        .quad 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000
+        /*== One ==*/
+        .align 32
+        .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
+        /*== LargeX ==*/
+        .align 32
+        .quad 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000
+        /*== Zero ==*/
+        .align 32
+        .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
+        /*== Tbl_H ==*/
+        .align 32
+        .quad 0x0000000000000000, 0x3fcf5b75f92c80dd
+        .quad 0x3fddac670561bb4f, 0x3fe4978fa3269ee1
+        .quad 0x3fe921fb54442d18, 0x3fecac7c57846f9e
+        .quad 0x3fef730bd281f69b, 0x3ff0d38f2c5ba09f
+        .quad 0x3ff1b6e192ebbe44, 0x3ff270ef55a53a25
+        .quad 0x3ff30b6d796a4da8, 0x3ff38d6a6ce13353
+        .quad 0x3ff3fc176b7a8560, 0x3ff45b54837351a0
+        .quad 0x3ff4ae10fc6589a5, 0x3ff4f68dea672617
+        .quad 0x3ff5368c951e9cfd, 0x3ff56f6f33a3e6a7
+        .quad 0x3ff5a25052114e60, 0x3ff5d013c41adabd
+        .quad 0x3ff5f97315254857, 0x3ff61f06c6a92b89
+        .quad 0x3ff6414d44094c7c, 0x3ff660b02c736a06
+        .quad 0x3ff67d8863bc99bd, 0x3ff698213a9d5053
+        .quad 0x3ff6b0bae830c070, 0x3ff6c78c7edeb195
+        .quad 0x3ff6dcc57bb565fd, 0x3ff6f08f07435fec
+        .quad 0x3ff7030cf9403197, 0x3ff7145eac2088a4
+        /*== Tbl_L ==*/
+        .align 32
+        .quad 0x0000000000000000, 0x3c68ab6e3cf7afbd
+        .quad 0x3c7a2b7f222f65e2, 0x3c72419a87f2a458
+        .quad 0x3c81a62633145c07, 0x3c80dae13ad18a6b
+        .quad 0x3c7007887af0cbbd, 0xbc9bd0dc231bfd70
+        .quad 0x3c9b1b466a88828e, 0xbc9a66b1af5f84fb
+        .quad 0x3c96254cb03bb199, 0xbc812c77e8a80f5c
+        .quad 0xbc4441a3bd3f1084, 0x3c79e4a72eedacc4
+        .quad 0xbc93b03e8a27f555, 0x3c9934f9f2b0020e
+        .quad 0xbc996f47948a99f1, 0xbc7df6edd6f1ec3b
+        .quad 0x3c78c2d0c89de218, 0x3c9f82bba194dd5d
+        .quad 0xbc831151a43b51ca, 0xbc8487d50bceb1a5
+        .quad 0xbc9c5f60a65c7397, 0xbc7acb6afb332a0f
+        .quad 0xbc99b7bd2e1e8c9c, 0xbc9b9839085189e3
+        .quad 0xbc97d1ab82ffb70b, 0x3c99239ad620ffe2
+        .quad 0xbc929c86447928e7, 0xbc8957a7170df016
+        .quad 0xbc7cbe1896221608, 0xbc9fda5797b32a0b
+        /*== dIndexMed ==*/
+        .align 32
+        .quad 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010
+        /*== Pi2 ==*/
+        .align 32
+        .quad 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18
+        /*== Pi2_low ==*/
+        .align 32
+        .quad 0x3c91a62633145c07, 0x3c91a62633145c07, 0x3c91a62633145c07, 0x3c91a62633145c07
+        /*== coeff6 ==*/
+        .align 32
+        .quad 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97
+        .quad 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc
+        .quad 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0
+        .quad 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da
+        .quad 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e
+        .quad 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d
+        .align 32
+        .type	__svml_datan_data_internal_avx512,@object
+        .size	__svml_datan_data_internal_avx512,.-__svml_datan_data_internal_avx512
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core-avx2.S
new file mode 100644
index 0000000000..723734e10b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core-avx2.S
@@ -0,0 +1,20 @@ 
+/* AVX2 version of vectorized atan, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8v_atan _ZGVeN8v_atan_avx2_wrapper
+#include "../svml_d_atan8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core.c
new file mode 100644
index 0000000000..e97a41b6bc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core.c
@@ -0,0 +1,27 @@ 
+/* Multiple versions of vectorized atan, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_atan
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_atan, __GI__ZGVeN8v_atan, __redirect__ZGVeN8v_atan)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core_avx512.S
new file mode 100644
index 0000000000..fa6cb47308
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core_avx512.S
@@ -0,0 +1,213 @@ 
+/* Function atan vectorized with AVX-512.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_datan_data_internal_avx512
+ */
+#define AbsMask                       	0
+#define Shifter                       	64
+#define MaxThreshold                  	128
+#define MOne                          	192
+#define One                           	256
+#define LargeX                        	320
+#define Zero                          	384
+#define Tbl_H                         	448
+#define dIndexMed                     	704
+#define Pi2                           	768
+#define coeff_1                       	832
+#define coeff_2                       	896
+#define coeff_3                       	960
+#define coeff_4                       	1024
+#define coeff_5                       	1088
+#define coeff_6                       	1152
+
+#include <sysdep.h>
+
+        .text
+	.section .text.evex512,"ax",@progbits
+ENTRY(_ZGVeN8v_atan_skx)
+        vmovups   Shifter+__svml_datan_data_internal_avx512(%rip), %zmm4
+        vmovups   MaxThreshold+__svml_datan_data_internal_avx512(%rip), %zmm3
+        vmovups   One+__svml_datan_data_internal_avx512(%rip), %zmm9
+
+/* saturate X range */
+        vmovups   LargeX+__svml_datan_data_internal_avx512(%rip), %zmm7
+        vandpd    __svml_datan_data_internal_avx512(%rip), %zmm0, %zmm8
+
+/* R+Rl = DiffX/Y */
+        vbroadcastsd .FLT_10(%rip), %zmm15
+        vaddpd    {rn-sae}, %zmm4, %zmm8, %zmm2
+        vxorpd    %zmm0, %zmm8, %zmm1
+        vcmppd    $29, {sae}, %zmm3, %zmm8, %k2
+
+/* round to 2 bits after binary point */
+        vreducepd $40, {sae}, %zmm8, %zmm6
+        vsubpd    {rn-sae}, %zmm4, %zmm2, %zmm5
+
+/*
+ * if|X|>=MaxThreshold, set DiffX=-1
+ * VMSUB(D, DiffX, LargeMask, Zero, One);
+ */
+        vblendmpd MOne+__svml_datan_data_internal_avx512(%rip), %zmm6, %zmm10{%k2}
+        vfmadd231pd {rn-sae}, %zmm8, %zmm5, %zmm9
+        vmovups   dIndexMed+__svml_datan_data_internal_avx512(%rip), %zmm5
+
+/* table lookup sequence */
+        vmovups   Tbl_H+__svml_datan_data_internal_avx512(%rip), %zmm6
+        vgetmantpd $0, {sae}, %zmm10, %zmm14
+        vgetexppd {sae}, %zmm10, %zmm11
+        vmovups   coeff_5+__svml_datan_data_internal_avx512(%rip), %zmm10
+
+/*
+ * if|X|>=MaxThreshold, set Y=X
+ * VMADD(D, Y, LargeMask, X, Zero);
+ */
+        vminpd    {sae}, %zmm8, %zmm7, %zmm9{%k2}
+        vcmppd    $29, {sae}, %zmm5, %zmm2, %k1
+        vmovups   Tbl_H+128+__svml_datan_data_internal_avx512(%rip), %zmm7
+        vmovups   coeff_1+__svml_datan_data_internal_avx512(%rip), %zmm8
+        vgetmantpd $0, {sae}, %zmm9, %zmm3
+        vgetexppd {sae}, %zmm9, %zmm12
+        vmovups   coeff_3+__svml_datan_data_internal_avx512(%rip), %zmm9
+        vpermt2pd Tbl_H+64+__svml_datan_data_internal_avx512(%rip), %zmm2, %zmm6
+        vsubpd    {rn-sae}, %zmm12, %zmm11, %zmm4
+        vpermt2pd Tbl_H+192+__svml_datan_data_internal_avx512(%rip), %zmm2, %zmm7
+        vrcp14pd  %zmm3, %zmm13
+        vmovups   coeff_4+__svml_datan_data_internal_avx512(%rip), %zmm12
+        vmovups   coeff_6+__svml_datan_data_internal_avx512(%rip), %zmm11
+        vblendmpd %zmm7, %zmm6, %zmm2{%k1}
+        vmulpd    {rn-sae}, %zmm13, %zmm14, %zmm0
+        vfnmadd231pd {rn-sae}, %zmm3, %zmm13, %zmm15
+        vfnmadd213pd {rn-sae}, %zmm14, %zmm0, %zmm3
+        vfmadd213pd {rn-sae}, %zmm15, %zmm15, %zmm15
+        vfmadd213pd {rn-sae}, %zmm13, %zmm13, %zmm15
+        vfmadd213pd {rn-sae}, %zmm0, %zmm15, %zmm3
+        vscalefpd {rn-sae}, %zmm4, %zmm3, %zmm0
+
+/* set table value to Pi/2 for large X */
+        vblendmpd Pi2+__svml_datan_data_internal_avx512(%rip), %zmm2, %zmm3{%k2}
+        vmovups   coeff_2+__svml_datan_data_internal_avx512(%rip), %zmm2
+
+/* polynomial evaluation */
+        vmulpd    {rn-sae}, %zmm0, %zmm0, %zmm14
+        vmulpd    {rn-sae}, %zmm14, %zmm14, %zmm13
+        vmulpd    {rn-sae}, %zmm0, %zmm14, %zmm15
+        vfmadd231pd {rn-sae}, %zmm14, %zmm8, %zmm2
+        vfmadd231pd {rn-sae}, %zmm14, %zmm9, %zmm12
+        vfmadd213pd {rn-sae}, %zmm11, %zmm10, %zmm14
+        vfmadd213pd {rn-sae}, %zmm12, %zmm13, %zmm2
+        vfmadd213pd {rn-sae}, %zmm14, %zmm13, %zmm2
+        vfmadd213pd {rn-sae}, %zmm0, %zmm15, %zmm2
+        vaddpd    {rn-sae}, %zmm3, %zmm2, %zmm0
+        vxorpd    %zmm1, %zmm0, %zmm0
+        ret
+
+END(_ZGVeN8v_atan_skx)
+
+        .section .rodata, "a"
+        .align 64
+
+#ifdef __svml_datan_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(64)) VUINT32 AbsMask[8][2];
+        __declspec(align(64)) VUINT32 Shifter[8][2];
+        __declspec(align(64)) VUINT32 MaxThreshold[8][2];
+        __declspec(align(64)) VUINT32 MOne[8][2];
+        __declspec(align(64)) VUINT32 One[8][2];
+        __declspec(align(64)) VUINT32 LargeX[8][2];
+        __declspec(align(64)) VUINT32 Zero[8][2];
+        __declspec(align(64)) VUINT32 Tbl_H[32][2];
+        __declspec(align(64)) VUINT32 dIndexMed[8][2];
+        __declspec(align(64)) VUINT32 Pi2[8][2];
+        __declspec(align(64)) VUINT32 coeff[6][8][2];
+    } __svml_datan_data_internal_avx512;
+#endif
+__svml_datan_data_internal_avx512:
+        /*== AbsMask ==*/
+        .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
+        /*== Shifter ==*/
+        .align 64
+        .quad 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000, 0x4318000000000000
+        /*== MaxThreshold ==*/
+        .align 64
+        .quad 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000, 0x401f800000000000
+        /*== MOne ==*/
+        .align 64
+        .quad 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000
+        /*== One ==*/
+        .align 64
+        .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
+        /*== LargeX ==*/
+        .align 64
+        .quad 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000, 0x47f0000000000000
+        /*== Zero ==*/
+        .align 64
+        .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
+        /*== Tbl_H ==*/
+        .align 64
+        .quad 0x0000000000000000, 0x3fcf5b75f92c80dd
+        .quad 0x3fddac670561bb4f, 0x3fe4978fa3269ee1
+        .quad 0x3fe921fb54442d18, 0x3fecac7c57846f9e
+        .quad 0x3fef730bd281f69b, 0x3ff0d38f2c5ba09f
+        .quad 0x3ff1b6e192ebbe44, 0x3ff270ef55a53a25
+        .quad 0x3ff30b6d796a4da8, 0x3ff38d6a6ce13353
+        .quad 0x3ff3fc176b7a8560, 0x3ff45b54837351a0
+        .quad 0x3ff4ae10fc6589a5, 0x3ff4f68dea672617
+        .quad 0x3ff5368c951e9cfd, 0x3ff56f6f33a3e6a7
+        .quad 0x3ff5a25052114e60, 0x3ff5d013c41adabd
+        .quad 0x3ff5f97315254857, 0x3ff61f06c6a92b89
+        .quad 0x3ff6414d44094c7c, 0x3ff660b02c736a06
+        .quad 0x3ff67d8863bc99bd, 0x3ff698213a9d5053
+        .quad 0x3ff6b0bae830c070, 0x3ff6c78c7edeb195
+        .quad 0x3ff6dcc57bb565fd, 0x3ff6f08f07435fec
+        .quad 0x3ff7030cf9403197, 0x3ff7145eac2088a4
+        /*== dIndexMed ==*/
+        .align 64
+        .quad 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010, 0x4318000000000010
+        /*== Pi2 ==*/
+        .align 64
+        .quad 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18, 0x3ff921fb54442d18
+        /*== coeff6 ==*/
+        .align 64
+        .quad 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97, 0x3fb2e9b9f5c4fe97
+        .quad 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc, 0xbfb74257c46790cc
+        .quad 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0, 0x3fbc71bfeff916a0
+        .quad 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da, 0xbfc249248eef04da
+        .quad 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e, 0x3fc999999998741e
+        .quad 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d, 0xbfd555555555554d
+        .align 64
+        .type	__svml_datan_data_internal_avx512,@object
+        .size	__svml_datan_data_internal_avx512,.-__svml_datan_data_internal_avx512
+        .align 8
+
+.FLT_10:
+        .long	0x00000000,0x3ff00000
+        .type	.FLT_10,@object
+        .size	.FLT_10,8
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core-avx2.S
new file mode 100644
index 0000000000..27623cdf16
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core-avx2.S
@@ -0,0 +1,20 @@ 
+/* AVX2 version of vectorized atanf.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16v_atanf _ZGVeN16v_atanf_avx2_wrapper
+#include "../svml_s_atanf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core.c
new file mode 100644
index 0000000000..940de26615
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core.c
@@ -0,0 +1,28 @@ 
+/* Multiple versions of vectorized atanf, vector length is 16.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_atanf
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_atanf, __GI__ZGVeN16v_atanf,
+	       __redirect__ZGVeN16v_atanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
new file mode 100644
index 0000000000..4a37f03e69
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
@@ -0,0 +1,174 @@ 
+/* Function atanf vectorized with AVX-512.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_satan_data_internal_avx512
+ */
+#define AbsMask                       	0
+#define Shifter                       	64
+#define MaxThreshold                  	128
+#define MOne                          	192
+#define One                           	256
+#define LargeX                        	320
+#define Zero                          	384
+#define Tbl_H                         	448
+#define Pi2                           	576
+#define coeff_1                       	640
+#define coeff_2                       	704
+#define coeff_3                       	768
+
+#include <sysdep.h>
+
+        .text
+	.section .text.exex512,"ax",@progbits
+ENTRY(_ZGVeN16v_atanf_skx)
+        vandps    __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
+        vmovups   MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
+        vmovups   One+__svml_satan_data_internal_avx512(%rip), %zmm8
+
+/* round to 2 bits after binary point */
+        vreduceps $40, {sae}, %zmm7, %zmm5
+
+/* saturate X range */
+        vmovups   LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
+        vmovups   Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
+        vcmpps    $29, {sae}, %zmm3, %zmm7, %k1
+
+/* table lookup sequence */
+        vmovups   Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
+        vsubps    {rn-sae}, %zmm5, %zmm7, %zmm4
+        vaddps    {rn-sae}, %zmm2, %zmm7, %zmm1
+        vxorps    %zmm0, %zmm7, %zmm0
+        vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
+        vmovups   coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
+
+/* if|X|>=MaxThreshold, set DiffX=-1 */
+        vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
+        vmovups   coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
+
+/* if|X|>=MaxThreshold, set Y=X */
+        vminps    {sae}, %zmm7, %zmm6, %zmm8{%k1}
+
+/* R+Rl = DiffX/Y */
+        vgetmantps $0, {sae}, %zmm9, %zmm12
+        vgetexpps {sae}, %zmm9, %zmm10
+        vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
+        vgetmantps $0, {sae}, %zmm8, %zmm15
+        vgetexpps {sae}, %zmm8, %zmm11
+        vmovups   coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
+
+/* set table value to Pi/2 for large X */
+        vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
+        vrcp14ps  %zmm15, %zmm13
+        vsubps    {rn-sae}, %zmm11, %zmm10, %zmm2
+        vmulps    {rn-sae}, %zmm13, %zmm12, %zmm14
+        vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
+        vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
+        vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
+
+/* polynomial evaluation */
+        vmulps    {rn-sae}, %zmm7, %zmm7, %zmm8
+        vmulps    {rn-sae}, %zmm7, %zmm8, %zmm6
+        vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
+        vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
+        vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
+        vaddps    {rn-sae}, %zmm9, %zmm8, %zmm10
+        vxorps    %zmm0, %zmm10, %zmm0
+        ret
+
+END(_ZGVeN16v_atanf_skx)
+
+        .section .rodata, "a"
+        .align 64
+
+#ifdef __svml_satan_data_internal_avx512_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(64)) VUINT32 AbsMask[16][1];
+        __declspec(align(64)) VUINT32 Shifter[16][1];
+        __declspec(align(64)) VUINT32 MaxThreshold[16][1];
+        __declspec(align(64)) VUINT32 MOne[16][1];
+        __declspec(align(64)) VUINT32 One[16][1];
+        __declspec(align(64)) VUINT32 LargeX[16][1];
+        __declspec(align(64)) VUINT32 Zero[16][1];
+        __declspec(align(64)) VUINT32 Tbl_H[32][1];
+        __declspec(align(64)) VUINT32 Pi2[16][1];
+        __declspec(align(64)) VUINT32 coeff[3][16][1];
+    } __svml_satan_data_internal_avx512;
+#endif
+__svml_satan_data_internal_avx512:
+        /*== AbsMask ==*/
+        .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+        /*== Shifter ==*/
+        .align 64
+        .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
+        /*== MaxThreshold ==*/
+        .align 64
+        .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
+        /*== MOne ==*/
+        .align 64
+        .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
+        /*== One ==*/
+        .align 64
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+        /*== LargeX ==*/
+        .align 64
+        .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
+        /*== Zero ==*/
+        .align 64
+        .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+        /*== Tbl_H ==*/
+        .align 64
+        .long 0x00000000, 0x3e7adbb0
+        .long 0x3eed6338, 0x3f24bc7d
+        .long 0x3f490fdb, 0x3f6563e3
+        .long 0x3f7b985f, 0x3f869c79
+        .long 0x3f8db70d, 0x3f93877b
+        .long 0x3f985b6c, 0x3f9c6b53
+        .long 0x3f9fe0bb, 0x3fa2daa4
+        .long 0x3fa57088, 0x3fa7b46f
+        .long 0x3fa9b465, 0x3fab7b7a
+        .long 0x3fad1283, 0x3fae809e
+        .long 0x3fafcb99, 0x3fb0f836
+        .long 0x3fb20a6a, 0x3fb30581
+        .long 0x3fb3ec43, 0x3fb4c10a
+        .long 0x3fb585d7, 0x3fb63c64
+        .long 0x3fb6e62c, 0x3fb78478
+        .long 0x3fb81868, 0x3fb8a2f5
+        /*== Pi2 ==*/
+        .align 64
+        .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
+        /*== coeff3 ==*/
+        .align 64
+        .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
+        .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
+        .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
+        .align 64
+        .type	__svml_satan_data_internal_avx512,@object
+        .size	__svml_satan_data_internal_avx512,.-__svml_satan_data_internal_avx512
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core-sse2.S
new file mode 100644
index 0000000000..fe81170666
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core-sse2.S
@@ -0,0 +1,20 @@ 
+/* SSE2 version of vectorized atanf, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_atanf _ZGVbN4v_atanf_sse2
+#include "../svml_s_atanf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core.c
new file mode 100644
index 0000000000..975ece6812
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core.c
@@ -0,0 +1,28 @@ 
+/* Multiple versions of vectorized atanf, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_atanf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_atanf, __GI__ZGVbN4v_atanf,
+	       __redirect__ZGVbN4v_atanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core_sse4.S
new file mode 100644
index 0000000000..c58a894e10
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core_sse4.S
@@ -0,0 +1,164 @@ 
+/* Function atanf vectorized with SSE4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_satan_data_internal
+ */
+#define _sSIGN_MASK                   	0
+#define _sABS_MASK                    	16
+#define _sONE                         	32
+#define _sPIO2                        	48
+#define _sPC8                         	64
+#define _sPC7                         	80
+#define _sPC6                         	96
+#define _sPC5                         	112
+#define _sPC4                         	128
+#define _sPC3                         	144
+#define _sPC2                         	160
+#define _sPC1                         	176
+#define _sPC0                         	192
+
+#include <sysdep.h>
+
+        .text
+	.section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN4v_atanf_sse4)
+/*
+ * To use minps\maxps operations for argument reduction
+ * uncomment _AT_USEMINMAX_ definition
+ *  Declarations
+ * Variables
+ * Constants
+ */
+        movups    _sABS_MASK+__svml_satan_data_internal(%rip), %xmm2
+
+/*
+ * 1) If x>1,      then r=-1/x, PIO2=Pi/2
+ * 2) If -1<=x<=1, then r=x,    PIO2=0
+ * 3) If x<-1,     then r=-1/x, PIO2=-Pi/2
+ */
+        movups    _sONE+__svml_satan_data_internal(%rip), %xmm1
+        andps     %xmm0, %xmm2
+        movaps    %xmm2, %xmm9
+        movaps    %xmm1, %xmm3
+        cmpleps   %xmm1, %xmm9
+        maxps     %xmm2, %xmm3
+        minps     %xmm2, %xmm1
+        divps     %xmm3, %xmm1
+        movups    __svml_satan_data_internal(%rip), %xmm4
+        movaps    %xmm9, %xmm10
+        andps     %xmm4, %xmm0
+        andnps    %xmm4, %xmm9
+        pxor      %xmm0, %xmm9
+        pxor      %xmm1, %xmm9
+
+/* Polynomial. */
+        movaps    %xmm9, %xmm8
+        mulps     %xmm9, %xmm8
+        movaps    %xmm8, %xmm7
+        mulps     %xmm8, %xmm7
+        movups    _sPC8+__svml_satan_data_internal(%rip), %xmm6
+        mulps     %xmm7, %xmm6
+        movups    _sPC7+__svml_satan_data_internal(%rip), %xmm5
+        mulps     %xmm7, %xmm5
+        addps     _sPC6+__svml_satan_data_internal(%rip), %xmm6
+        mulps     %xmm7, %xmm6
+        addps     _sPC5+__svml_satan_data_internal(%rip), %xmm5
+        mulps     %xmm7, %xmm5
+        addps     _sPC4+__svml_satan_data_internal(%rip), %xmm6
+        mulps     %xmm7, %xmm6
+        addps     _sPC3+__svml_satan_data_internal(%rip), %xmm5
+        mulps     %xmm5, %xmm7
+        addps     _sPC2+__svml_satan_data_internal(%rip), %xmm6
+        mulps     %xmm8, %xmm6
+        addps     _sPC1+__svml_satan_data_internal(%rip), %xmm7
+        andnps    _sPIO2+__svml_satan_data_internal(%rip), %xmm10
+        addps     %xmm6, %xmm7
+        mulps     %xmm7, %xmm8
+        pxor      %xmm0, %xmm10
+        addps     _sPC0+__svml_satan_data_internal(%rip), %xmm8
+
+/* Reconstruction. */
+        mulps     %xmm8, %xmm9
+        addps     %xmm9, %xmm10
+        movaps    %xmm10, %xmm0
+        ret
+
+END(_ZGVbN4v_atanf_sse4)
+
+        .section .rodata, "a"
+        .align 16
+
+#ifdef __svml_satan_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(16)) VUINT32 _sSIGN_MASK[4][1];
+        __declspec(align(16)) VUINT32 _sABS_MASK[4][1];
+        __declspec(align(16)) VUINT32 _sONE[4][1];
+        __declspec(align(16)) VUINT32 _sPIO2[4][1];
+        __declspec(align(16)) VUINT32 _sPC8[4][1];
+        __declspec(align(16)) VUINT32 _sPC7[4][1];
+        __declspec(align(16)) VUINT32 _sPC6[4][1];
+        __declspec(align(16)) VUINT32 _sPC5[4][1];
+        __declspec(align(16)) VUINT32 _sPC4[4][1];
+        __declspec(align(16)) VUINT32 _sPC3[4][1];
+        __declspec(align(16)) VUINT32 _sPC2[4][1];
+        __declspec(align(16)) VUINT32 _sPC1[4][1];
+        __declspec(align(16)) VUINT32 _sPC0[4][1];
+} __svml_satan_data_internal;
+#endif
+__svml_satan_data_internal:
+        .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 //_sSIGN_MASK
+        .align 16
+        .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF //_sABS_MASK
+        .align 16
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 //_sONE
+        .align 16
+        .long 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB //_sPIO2
+        .align 16
+        .long 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0 //_sPC8
+        .align 16
+        .long 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631 //_sPC7
+        .align 16
+        .long 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384 //_sPC6
+        .align 16
+        .long 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629 //_sPC5
+        .align 16
+        .long 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474 //_sPC4
+        .align 16
+        .long 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8 //_sPC3
+        .align 16
+        .long 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F //_sPC2
+        .align 16
+        .long 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49 //_sPC1
+        .align 16
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 //_sPC0
+        .align 16
+        .type	__svml_satan_data_internal,@object
+        .size	__svml_satan_data_internal,.-__svml_satan_data_internal
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core-sse.S
new file mode 100644
index 0000000000..1652a8f5c6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core-sse.S
@@ -0,0 +1,20 @@ 
+/* SSE version of vectorized atanf, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_atanf _ZGVdN8v_atanf_sse_wrapper
+#include "../svml_s_atanf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core.c
new file mode 100644
index 0000000000..733d8c3bc3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core.c
@@ -0,0 +1,28 @@ 
+/* Multiple versions of vectorized atanf, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_atanf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_atanf, __GI__ZGVdN8v_atanf,
+	       __redirect__ZGVdN8v_atanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core_avx2.S
new file mode 100644
index 0000000000..e333f979c4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core_avx2.S
@@ -0,0 +1,148 @@ 
+/* Function atanf vectorized with AVX2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      For    0.0    <= x <=  7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ *      For  7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ *      For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ *      For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ *      For 39.0/16.0 <= x <=    inf   : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ *      Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_satan_data_internal
+ */
+#define _sSIGN_MASK                   	0
+#define _sABS_MASK                    	32
+#define _sONE                         	64
+#define _sPIO2                        	96
+#define _sPC8                         	128
+#define _sPC7                         	160
+#define _sPC6                         	192
+#define _sPC5                         	224
+#define _sPC4                         	256
+#define _sPC3                         	288
+#define _sPC2                         	320
+#define _sPC1                         	352
+#define _sPC0                         	384
+
+#include <sysdep.h>
+
+        .text
+	.section .text.avx2,"ax",@progbits
+ENTRY(_ZGVdN8v_atanf_avx2)
+/*
+ * 1) If x>1,      then r=-1/x, PIO2=Pi/2
+ * 2) If -1<=x<=1, then r=x,    PIO2=0
+ * 3) If x<-1,     then r=-1/x, PIO2=-Pi/2
+ */
+        vmovups   _sONE+__svml_satan_data_internal(%rip), %ymm2
+        vmovups   __svml_satan_data_internal(%rip), %ymm7
+        vmovups   _sPC7+__svml_satan_data_internal(%rip), %ymm13
+
+/*
+ * To use minps\maxps operations for argument reduction
+ * uncomment _AT_USEMINMAX_ definition
+ *  Declarations
+ * Variables
+ * Constants
+ */
+        vandps    _sABS_MASK+__svml_satan_data_internal(%rip), %ymm0, %ymm3
+        vmaxps    %ymm3, %ymm2, %ymm5
+        vminps    %ymm3, %ymm2, %ymm4
+        vcmple_oqps %ymm2, %ymm3, %ymm6
+        vdivps    %ymm5, %ymm4, %ymm11
+        vandps    %ymm7, %ymm0, %ymm9
+        vandnps   %ymm7, %ymm6, %ymm8
+        vxorps    %ymm9, %ymm8, %ymm10
+        vxorps    %ymm11, %ymm10, %ymm15
+
+/* Polynomial. */
+        vmulps    %ymm15, %ymm15, %ymm14
+        vmovups   _sPC8+__svml_satan_data_internal(%rip), %ymm0
+        vmulps    %ymm14, %ymm14, %ymm12
+        vfmadd213ps _sPC6+__svml_satan_data_internal(%rip), %ymm12, %ymm0
+        vfmadd213ps _sPC5+__svml_satan_data_internal(%rip), %ymm12, %ymm13
+        vfmadd213ps _sPC4+__svml_satan_data_internal(%rip), %ymm12, %ymm0
+        vfmadd213ps _sPC3+__svml_satan_data_internal(%rip), %ymm12, %ymm13
+        vfmadd213ps _sPC2+__svml_satan_data_internal(%rip), %ymm12, %ymm0
+        vfmadd213ps _sPC1+__svml_satan_data_internal(%rip), %ymm12, %ymm13
+        vfmadd213ps %ymm13, %ymm14, %ymm0
+        vfmadd213ps _sPC0+__svml_satan_data_internal(%rip), %ymm14, %ymm0
+        vandnps   _sPIO2+__svml_satan_data_internal(%rip), %ymm6, %ymm1
+        vxorps    %ymm9, %ymm1, %ymm1
+
+/* Reconstruction. */
+        vfmadd213ps %ymm1, %ymm15, %ymm0
+        ret
+
+END(_ZGVdN8v_atanf_avx2)
+
+        .section .rodata, "a"
+        .align 32
+
+#ifdef __svml_satan_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct {
+        __declspec(align(32)) VUINT32 _sSIGN_MASK[8][1];
+        __declspec(align(32)) VUINT32 _sABS_MASK[8][1];
+        __declspec(align(32)) VUINT32 _sONE[8][1];
+        __declspec(align(32)) VUINT32 _sPIO2[8][1];
+        __declspec(align(32)) VUINT32 _sPC8[8][1];
+        __declspec(align(32)) VUINT32 _sPC7[8][1];
+        __declspec(align(32)) VUINT32 _sPC6[8][1];
+        __declspec(align(32)) VUINT32 _sPC5[8][1];
+        __declspec(align(32)) VUINT32 _sPC4[8][1];
+        __declspec(align(32)) VUINT32 _sPC3[8][1];
+        __declspec(align(32)) VUINT32 _sPC2[8][1];
+        __declspec(align(32)) VUINT32 _sPC1[8][1];
+        __declspec(align(32)) VUINT32 _sPC0[8][1];
+} __svml_satan_data_internal;
+#endif
+__svml_satan_data_internal:
+        .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 //_sSIGN_MASK
+        .align 32
+        .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF //_sABS_MASK
+        .align 32
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 //_sONE
+        .align 32
+        .long 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB //_sPIO2
+        .align 32
+        .long 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0 //_sPC8
+        .align 32
+        .long 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631 //_sPC7
+        .align 32
+        .long 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384 //_sPC6
+        .align 32
+        .long 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629 //_sPC5
+        .align 32
+        .long 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474 //_sPC4
+        .align 32
+        .long 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8 //_sPC3
+        .align 32
+        .long 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F //_sPC2
+        .align 32
+        .long 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49 //_sPC1
+        .align 32
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 //_sPC0
+        .align 32
+        .type	__svml_satan_data_internal,@object
+        .size	__svml_satan_data_internal,.-__svml_satan_data_internal
diff --git a/sysdeps/x86_64/fpu/svml_d_atan2_core.S b/sysdeps/x86_64/fpu/svml_d_atan2_core.S
new file mode 100644
index 0000000000..e86d5b7047
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_atan2_core.S
@@ -0,0 +1,29 @@ 
+/* Function atan vectorized with SSE2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVbN2v_atan)
+WRAPPER_IMPL_SSE2 atan
+END (_ZGVbN2v_atan)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN2v_atan)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_atan4_core.S b/sysdeps/x86_64/fpu/svml_d_atan4_core.S
new file mode 100644
index 0000000000..eb11fd2f17
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_atan4_core.S
@@ -0,0 +1,29 @@ 
+/* Function atan vectorized with AVX2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVdN4v_atan)
+WRAPPER_IMPL_AVX _ZGVbN2v_atan
+END (_ZGVdN4v_atan)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN4v_atan)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S
new file mode 100644
index 0000000000..b83a4be33d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S
@@ -0,0 +1,25 @@ 
+/* Function atan vectorized in AVX ISA as wrapper to SSE4 ISA version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVcN4v_atan)
+WRAPPER_IMPL_AVX _ZGVbN2v_atan
+END (_ZGVcN4v_atan)
diff --git a/sysdeps/x86_64/fpu/svml_d_atan8_core.S b/sysdeps/x86_64/fpu/svml_d_atan8_core.S
new file mode 100644
index 0000000000..9685a32bdc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_atan8_core.S
@@ -0,0 +1,25 @@ 
+/* Function atan vectorized with AVX-512, wrapper to AVX2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVeN8v_atan)
+WRAPPER_IMPL_AVX512 _ZGVdN4v_atan
+END (_ZGVeN8v_atan)
diff --git a/sysdeps/x86_64/fpu/svml_s_atanf16_core.S b/sysdeps/x86_64/fpu/svml_s_atanf16_core.S
new file mode 100644
index 0000000000..f82d2422ae
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_atanf16_core.S
@@ -0,0 +1,25 @@ 
+/* Function atanf vectorized with AVX-512. Wrapper to AVX2 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVeN16v_atanf)
+WRAPPER_IMPL_AVX512 _ZGVdN8v_atanf
+END (_ZGVeN16v_atanf)
diff --git a/sysdeps/x86_64/fpu/svml_s_atanf4_core.S b/sysdeps/x86_64/fpu/svml_s_atanf4_core.S
new file mode 100644
index 0000000000..6b8c4d9624
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_atanf4_core.S
@@ -0,0 +1,29 @@ 
+/* Function atanf vectorized with SSE2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVbN4v_atanf)
+WRAPPER_IMPL_SSE2 atanf
+END (_ZGVbN4v_atanf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4v_atanf)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_s_atanf8_core.S b/sysdeps/x86_64/fpu/svml_s_atanf8_core.S
new file mode 100644
index 0000000000..315681f6c0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_atanf8_core.S
@@ -0,0 +1,29 @@ 
+/* Function atanf vectorized with AVX2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVdN8v_atanf)
+WRAPPER_IMPL_AVX _ZGVbN4v_atanf
+END (_ZGVdN8v_atanf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN8v_atanf)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S
new file mode 100644
index 0000000000..b9cd502186
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S
@@ -0,0 +1,25 @@ 
+/* Function atanf vectorized in AVX ISA as wrapper to SSE4 ISA version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+        .text
+ENTRY (_ZGVcN8v_atanf)
+WRAPPER_IMPL_AVX _ZGVbN4v_atanf
+END (_ZGVcN8v_atanf)
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx.c
new file mode 100644
index 0000000000..0f7176a20b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx.c
@@ -0,0 +1 @@ 
+#include "test-double-libmvec-atan.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx2.c
new file mode 100644
index 0000000000..0f7176a20b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx2.c
@@ -0,0 +1 @@ 
+#include "test-double-libmvec-atan.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx512f.c b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx512f.c
new file mode 100644
index 0000000000..0f7176a20b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-atan-avx512f.c
@@ -0,0 +1 @@ 
+#include "test-double-libmvec-atan.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-atan.c b/sysdeps/x86_64/fpu/test-double-libmvec-atan.c
new file mode 100644
index 0000000000..982687b169
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-atan.c
@@ -0,0 +1,3 @@ 
+#define LIBMVEC_TYPE double
+#define LIBMVEC_FUNC atan
+#include "test-vector-abi-arg1.h"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index 0abc7d2021..467c913990 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow)
 VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVbN2v_acos)
+VECTOR_WRAPPER (WRAPPER_NAME (atan), _ZGVbN2v_atan)
 
 #define VEC_INT_TYPE __m128i
 
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index dda093b914..b72a7de84e 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -31,6 +31,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow)
 VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVdN4v_acos)
+VECTOR_WRAPPER (WRAPPER_NAME (atan), _ZGVdN4v_atan)
 
 #ifndef __ILP32__
 # define VEC_INT_TYPE __m256i
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index f3230463bb..d2434df21e 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow)
 VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVcN4v_acos)
+VECTOR_WRAPPER (WRAPPER_NAME (atan), _ZGVcN4v_atan)
 
 #define VEC_INT_TYPE __m128i
 
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index cf9f52faf0..f7aaf8159e 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow)
 VECTOR_WRAPPER (WRAPPER_NAME (acos), _ZGVeN8v_acos)
+VECTOR_WRAPPER (WRAPPER_NAME (atan), _ZGVeN8v_atan)
 
 #ifndef __ILP32__
 # define VEC_INT_TYPE __m512i
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx.c
new file mode 100644
index 0000000000..9251c65f8a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx.c
@@ -0,0 +1 @@ 
+#include "test-float-libmvec-atanf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx2.c
new file mode 100644
index 0000000000..9251c65f8a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx2.c
@@ -0,0 +1 @@ 
+#include "test-float-libmvec-atanf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx512f.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx512f.c
new file mode 100644
index 0000000000..9251c65f8a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx512f.c
@@ -0,0 +1 @@ 
+#include "test-float-libmvec-atanf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-atanf.c b/sysdeps/x86_64/fpu/test-float-libmvec-atanf.c
new file mode 100644
index 0000000000..2a8ab87e86
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-atanf.c
@@ -0,0 +1,3 @@ 
+#define LIBMVEC_TYPE float
+#define LIBMVEC_FUNC atanf
+#include "test-vector-abi-arg1.h"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
index abbd3ed870..af769c56fa 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf)
 VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVeN16v_acosf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanf), _ZGVeN16v_atanf)
 
 #define VEC_INT_TYPE __m512i
 
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
index 8a24027952..76e61d2f1e 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf)
 VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVbN4v_acosf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanf), _ZGVbN4v_atanf)
 
 #define VEC_INT_TYPE __m128i
 
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
index aff0442606..5e27eaaf29 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -31,6 +31,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf)
 VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVdN8v_acosf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanf), _ZGVdN8v_atanf)
 
 /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf.  */
 #undef VECTOR_WRAPPER_fFF
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
index 913584d111..28daf79aa9 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -28,6 +28,7 @@  VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf)
 VECTOR_WRAPPER (WRAPPER_NAME (acosf), _ZGVcN8v_acosf)
+VECTOR_WRAPPER (WRAPPER_NAME (atanf), _ZGVcN8v_atanf)
 
 #define VEC_INT_TYPE __m128i