@@ -476,7 +476,7 @@ link-libc = $(link-libc-rpath-link)
$(link-libc-before-gnulib) $(gnulib)
link-libc-tests = $(link-libc-tests-rpath-link) \
$(link-libc-before-gnulib) $(gnulib-tests)
# This is how to find at build-time things that will be installed there.
-rpath-dirs = math elf dlfcn nss nis rt resolv crypt
+rpath-dirs = math elf dlfcn nss nis rt resolv crypt mathvec
rpath-link = \
$(common-objdir):$(subst $(empty) ,:,$(patsubst
../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%)))
else
@@ -1018,7 +1018,7 @@ all-subdirs = csu assert ctype locale intl
catgets math setjmp signal \
stdlib stdio-common libio malloc string wcsmbs time dirent \
grp pwd posix io termios resource misc socket sysvipc gmon \
gnulib iconv iconvdata wctype manual shadow gshadow po argp \
- crypt localedata timezone rt conform debug \
+ crypt localedata timezone rt conform debug mathvec \
$(add-on-subdirs) dlfcn elf
ifndef avoid-generated
@@ -93,6 +93,7 @@ use-nscd = @use_nscd@
build-hardcoded-path-in-tests= @hardcoded_path_in_tests@
build-pt-chown = @build_pt_chown@
enable-lock-elision = @enable_lock_elision@
+build-mathvect = @build_mathvec@
# Build tools.
CC = @CC@
@@ -353,6 +353,17 @@ if test "$build_pt_chown" = yes; then
AC_DEFINE(HAVE_PT_CHOWN)
fi
+AC_ARG_ENABLE([mathvec],
+ [AS_HELP_STRING([--enable-mathvec],
+ [Enable building and installing mathvec @<:@default=yes on
x86_64 build, else default=no@:>@])],
+ [build_mathvec=$enableval],
+ [if test -n "$(gcc -v 2>&1 | grep 'Target: x86_64')"; then :
+ build_mathvec=yes
+ else
+ build_mathvec=no
+ fi])
+AC_SUBST(build_mathvec)
+
# We keep the original values in `$config_*' and never modify them, so we
# can write them unchanged into config.make. Everything else uses
# $machine, $vendor, and $os, and changes them whenever convenient.
@@ -87,7 +87,7 @@ if ($opt_h) {
$ulps_file = $opt_u if ($opt_u);
$output_dir = $opt_o if ($opt_o);
-$input = "libm-test.inc";
+$input = "${srcdir}libm-test.inc";
$auto_input = "${srcdir}auto-libm-test-out";
$output = "${output_dir}libm-test.c";
@@ -706,13 +706,15 @@ test_single_errno (const char *test_name, int errno_value,
static void
test_errno (const char *test_name, int errno_value, int exceptions)
{
- ++noErrnoTests;
- if (exceptions & ERRNO_UNCHANGED)
- test_single_errno (test_name, errno_value, 0, "unchanged");
- if (exceptions & ERRNO_EDOM)
- test_single_errno (test_name, errno_value, EDOM, "EDOM");
- if (exceptions & ERRNO_ERANGE)
- test_single_errno (test_name, errno_value, ERANGE, "ERANGE");
+#ifndef TEST_MATHVEC
+ ++noErrnoTests;
+ if (exceptions & ERRNO_UNCHANGED)
+ test_single_errno (test_name, errno_value, 0, "unchanged");
+ if (exceptions & ERRNO_EDOM)
+ test_single_errno (test_name, errno_value, EDOM, "EDOM");
+ if (exceptions & ERRNO_ERANGE)
+ test_single_errno (test_name, errno_value, ERANGE, "ERANGE");
+#endif
}
/* Returns the number of ulps that GIVEN is away from EXPECTED. */
@@ -1734,6 +1736,20 @@ struct test_fFF_11_data
} \
while (0);
+/* Run tests for a given function in TONEAREST rounding modes. */
+#define TN_RM_TEST(FUNC, EXACT, ARRAY, LOOP_MACRO, END_MACRO, ...) \
+ do \
+ { \
+ do \
+ { \
+ START (FUNC, EXACT); \
+ LOOP_MACRO (FUNC, ARRAY, FE_TONEAREST, ## __VA_ARGS__); \
+ END_MACRO; \
+ } \
+ while (0); \
+ } \
+ while (0);
+
/* This is to prevent messages from the SVID libm emulation. */
int
matherr (struct exception *x __attribute__ ((unused)))
@@ -6258,7 +6274,11 @@ static const struct test_f_f_data cos_test_data[] =
static void
cos_test (void)
{
+#ifndef TEST_MATHVEC
ALL_RM_TEST (cos, 0, cos_test_data, RUN_TEST_LOOP_f_f, END);
+#else
+ TN_RM_TEST (vector_cos, 0, cos_test_data, RUN_TEST_LOOP_f_f, END);
+#endif
}
@@ -9824,6 +9844,7 @@ main (int argc, char **argv)
initialize ();
printf (TEST_MSG);
+#ifndef TEST_MATHVEC
check_ulp ();
/* Keep the tests a wee bit ordered (according to ISO C99). */
@@ -9960,6 +9981,11 @@ main (int argc, char **argv)
y0_test ();
y1_test ();
yn_test ();
+#else
+ /* Vector trigonometric functions: */
+ cos_test ();
+
+#endif
if (output_ulps)
fclose (ulps_file);
new file mode 100644
@@ -0,0 +1,63 @@
+# Copyright (C) 2014 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+# Makefile for the vector math library.
+
+subdir := mathvec
+
+include ../Makeconfig
+
+ifeq ($(build-mathvect),yes)
+extra-libs := libmvec
+extra-libs-others = $(extra-libs)
+endif
+
+libmvec-routines = $(strip $(libmvec-support))
+
+$(objpfx)libmvec.so: $(common-objpfx)math/libm.so
+
+# Rules for the test suite.
+ifeq ($(build-mathvect),yes)
+ifneq (no,$(PERL))
+libmvec-tests = test-vec-double
+libmvec-tests.o = $(addsuffix .o,$(libmvec-tests))
+tests = $(libmvec-tests)
+
+libmvec-tests-generated = $(common-objpfx)math/libm-test-ulps.h
$(common-objpfx)math/libm-test.c
+generated += $(libmvec-tests-generated) libmvec-test.stmp
+
+# This is needed for dependencies
+before-compile += $(common-objpfx)math/libm-test.c
+ulps-file = $(firstword $(wildcard $(sysdirs:%=%/libm-test-ulps)))
+
+$(addprefix $(objpfx), $(libmvec-tests-generated)): $(objpfx)libmvec-test.stmp
+
+$(objpfx)libmvec-test.stmp: $(ulps-file) ../math/libm-test.inc \
+ ../math/gen-libm-test.pl ../math/auto-libm-test-out
+ $(make-target-directory)
+ $(PERL) ../math/gen-libm-test.pl -u $< -o "$(common-objpfx)math/"
+ @echo > $@
+
+$(objpfx)test-vec-double.o: $(objpfx)libmvec-test.stmp
+endif
+endif
+
+CFLAGS-test-vec-double.c = -fno-inline -ffloat-store -fno-builtin
-frounding-math -mavx2 -Wno-unused-function
+
+rtld-tests-LDFLAGS += $(common-objpfx)math/libm.so $(objpfx)libmvec.so
+
+include ../Rules
new file mode 100644
@@ -0,0 +1,58 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define FUNC(function) function
+#define FLOAT double
+#define TEST_MSG "testing double vector math (without inline functions)\n"
+#define MATHCONST(x) x
+#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat)
Cdouble
+#define PRINTF_EXPR "e"
+#define PRINTF_XEXPR "a"
+#define PRINTF_NEXPR "f"
+#define TEST_DOUBLE 1
+
+#ifndef __NO_MATH_INLINES
+# define __NO_MATH_INLINES
+#endif
+
+#include <immintrin.h>
+
+extern __m256d _ZGVdN4v_cos(__m256d);
+
+double vector_cos(double x)
+{
+ int i;
+ __m256d mx = _mm256_set1_pd(x);
+ __m256d mr = _ZGVdN4v_cos(mx);
+
+ for(i=1;i<4;i++)
+ {
+ if (((double*)&mr)[0]!=((double*)&mr)[i])
+ {
+ return ((double*)&mr)[0]+0.1;
+ }
+ }
+
+ return ((double*)&mr)[0];
+}
+
+#define TEST_MATHVEC
+#define EXCEPTION_TESTS_double 0
+
+#include "../math/libm-test.c"
b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -905,6 +905,9 @@ idouble: 1
ildouble: 2
ldouble: 2
+Function: "vector_cos":
+double: 1
+
Function: "cosh":
double: 1
float: 1
b/sysdeps/x86_64/fpu/svml_d_cos_data.S
new file mode 100644
@@ -0,0 +1,492 @@
+/* Data for vectorized cos.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+ .section .rodata, "a"
+
+ .align 64
+ .globl __gnu_svml_dcos_data
+
+/* Data table for vector implementations of function cos.
+ * The table may contain polynomial, reduction, lookup
+ * coefficients and other constants obtained through different
+ * methods of research and experimental work.
+ */
+__gnu_svml_dcos_data:
+
+/* General constants:
+ * lAbsMask
+ */
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+
+/* lRangeVal */
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+ .long 0x00000000
+ .long 0x41600000
+
+/* HalfPI */
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+ .long 0x54442d18
+ .long 0x3ff921fb
+
+/* InvPI */
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+
+/* RShifter */
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+ .long 0x00000000
+ .long 0x43380000
+
+/* OneHalf */
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+ .long 0x00000000
+ .long 0x3fe00000
+
+/* Range reduction PI-based constants:
+ * PI1
+ */
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+ .long 0x40000000
+ .long 0x400921fb
+
+/* PI2 */
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+ .long 0x00000000
+ .long 0x3e84442d
+
+/* PI3 */
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+ .long 0x80000000
+ .long 0x3d084698
+
+/* PI4 */
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+ .long 0x701b839a
+ .long 0x3b88cc51
+
+/* Range reduction PI-based constants if FMA available:
+ * PI1_FMA
+ */
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+ .long 0x54442d18
+ .long 0x400921fb
+
+/* PI2_FMA */
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+ .long 0x33145c06
+ .long 0x3ca1a626
+
+/* PI3_FMA */
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+ .long 0x29024e09
+ .long 0x395c1cd1
+
+/* Polynomial coeffifients (relative error 2^(-52.115)):
+ * C1
+ */
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+ .long 0x555554a7
+ .long 0xbfc55555
+
+/* C2 */
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+ .long 0x1110a4a8
+ .long 0x3f811111
+
+/* C3 */
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+ .long 0x19a5b86d
+ .long 0xbf2a01a0
+
+/* C4 */
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+ .long 0x8030fea0
+ .long 0x3ec71de3
+
+/* C5 */
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+ .long 0x46002231
+ .long 0xbe5ae635
+
+/* C6 */
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+ .long 0x57a2f220
+ .long 0x3de60e68
+
+/* C7 */
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+ .long 0x0811aac8
+ .long 0xbd69f0d6
+
+/* Additional constants:
+ * AbsMask
+ */
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+ .long 0xffffffff
+ .long 0x7fffffff
+
+/* InvPI */
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+ .long 0x6dc9c883
+ .long 0x3fd45f30
+
+/* RShifter_la */
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+ .long 0x00000000
+ .long 0x43300000
+
+/* RShifter_la */
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+ .long 0xffffffff
+ .long 0x432fffff
+
+/* RSXmax_la */
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .long 0x007ffffe
+ .long 0x43300000
+ .type __gnu_svml_dcos_data,@object
+ .size __gnu_svml_dcos_data,.-__gnu_svml_dcos_data