@@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
strcoll
+
string-bench-all := $(string-bench)
# We have to generate locales
@@ -50,7 +51,10 @@ stdlib-bench := strtod
stdio-common-bench := sprintf
-benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
+math-benchset := math-inlines
+
+benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench) \
+ $(math-benchset)
CFLAGS-bench-ffs.c += -fno-builtin
CFLAGS-bench-ffsll.c += -fno-builtin
@@ -58,6 +62,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
bench-malloc := malloc-thread
$(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
+$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
$(objpfx)bench-malloc-thread: $(shared-thread-library)
new file mode 100644
@@ -0,0 +1,353 @@
+/* Measure math inline functions.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SIZE 1024
+#define TEST_MAIN
+#define TEST_NAME "math-inlines"
+#define TEST_FUNCTION test_main ()
+#include "bench-timing.h"
+#include "json-lib.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+
+
+#define BOOLTEST(func) \
+int \
+func ## _t (volatile double *p, size_t n, size_t iters) \
+{ \
+ int i, j; \
+ int res = 0; \
+ for (j = 0; j < iters; j++) \
+ for (i = 0; i < n; i++) \
+ { double tmp = p[i] * 2.0; \
+ if (func (tmp)) res += 5; } \
+ return res; \
+}
+
+#define VALUETEST(func) \
+int \
+func ## _t (volatile double *p, size_t n, size_t iters) \
+{ \
+ int i, j; \
+ int res = 0; \
+ for (j = 0; j < iters; j++) \
+ for (i = 0; i < n; i++) \
+ { double tmp = p[i] * 2.0; \
+ if (func (tmp)) res += 5; } \
+ return res; \
+}
+
+typedef union
+{
+ double value;
+ uint64_t word;
+} ieee_double_shape_type;
+
+#define EXTRACT_WORDS64(i,d) \
+do { \
+ ieee_double_shape_type gh_u; \
+ gh_u.value = (d); \
+ (i) = gh_u.word; \
+} while (0)
+
+/* Explicit inlines similar to math_private.h versions. */
+
+extern __always_inline int
+__isnan_inl (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isnan_builtin (double d)
+{
+ return __builtin_isnan (d);
+}
+
+extern __always_inline int
+__isinf_inl (double x)
+{
+ uint64_t ix;
+ EXTRACT_WORDS64 (ix,x);
+ if ((ix << 1) != 0xffe0000000000000ull)
+ return 0;
+ return (int)(ix >> 32);
+}
+
+extern __always_inline int
+__isinf_ns (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isinf_ns_builtin (double d)
+{
+ return __builtin_isinf (d);
+}
+
+extern __always_inline int
+__isinf_builtin (double d)
+{
+ return __builtin_isinf_sign (d);
+}
+
+
+extern __always_inline int
+__finite_inl (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isfinite_builtin (double d)
+{
+ return __builtin_isfinite (d);
+}
+
+
+/* Explicit inline similar to existing math.h implementation. */
+
+#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
+#define __isnormal_inl2(X) (fpclassify (X) == FP_NORMAL)
+
+extern __always_inline int
+__isnormal_builtin (double d)
+{
+ return __builtin_isnormal (d);
+}
+
+/* Test fpclassify with use of only 2 of the 5 results. */
+
+extern __always_inline int
+__fpclassify_test1 (double d)
+{
+ int cl = fpclassify (d);
+ return cl == FP_NAN || cl == FP_INFINITE;
+}
+
+extern __always_inline int
+__fpclassify_test2 (double d)
+{
+ return __builtin_isnan (d) || __builtin_isinf (d);
+}
+
+double __attribute ((noinline))
+kernel_standard (double x, double y, int z)
+{
+ return x * y + z;
+}
+
+double __attribute ((noinline))
+remainder2 (double x, double y)
+{
+ if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
+ || (__builtin_expect (__builtin_isinf (x), 0) && !__builtin_isnan (y))))
+ return kernel_standard (x, y, 10);
+
+ return remainder (x, y);
+}
+
+double __attribute ((noinline))
+remainder1 (double x, double y)
+{
+ if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
+ || (__builtin_expect (__isinf_ns (x), 0) && !__isnan_inl (y))))
+ return kernel_standard (x, y, 10);
+
+ return remainder (x, y);
+}
+
+volatile double rem1 = 2.5;
+
+extern __always_inline int
+remainder_test1 (double d)
+{
+ return remainder1 (d, rem1);
+}
+
+extern __always_inline int
+remainder_test2 (double d)
+{
+ return remainder2 (d, rem1);
+}
+
+/* Create test functions for each possibility. */
+
+BOOLTEST (__isnan)
+BOOLTEST (__isnan_inl)
+BOOLTEST (__isnan_builtin)
+BOOLTEST (isnan)
+
+BOOLTEST (__isinf)
+BOOLTEST (__isinf_inl)
+BOOLTEST (__isinf_ns)
+BOOLTEST (__isinf_ns_builtin)
+BOOLTEST (__isinf_builtin)
+BOOLTEST (isinf)
+
+BOOLTEST (__finite)
+BOOLTEST (__finite_inl)
+BOOLTEST (__isfinite_builtin)
+BOOLTEST (isfinite)
+
+BOOLTEST (__isnormal_inl)
+BOOLTEST (__isnormal_inl2)
+BOOLTEST (__isnormal_builtin)
+BOOLTEST (isnormal)
+
+BOOLTEST (__fpclassify_test1)
+BOOLTEST (__fpclassify_test2)
+VALUETEST (__fpclassify)
+VALUETEST (fpclassify)
+
+BOOLTEST (remainder_test1)
+BOOLTEST (remainder_test2)
+
+typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
+
+typedef struct
+{
+ const char *name;
+ proto_t fn;
+} impl_t;
+
+#define IMPL(name) { #name, name }
+
+impl_t test_list[] =
+{
+ IMPL (__isnan_t),
+ IMPL (__isnan_inl_t),
+ IMPL (__isnan_builtin_t),
+ IMPL (isnan_t),
+
+ IMPL (__isinf_t),
+ IMPL (__isinf_inl_t),
+ IMPL (__isinf_ns_t),
+ IMPL (__isinf_ns_builtin_t),
+ IMPL (__isinf_builtin_t),
+ IMPL (isinf_t),
+
+ IMPL (__finite_t),
+ IMPL (__finite_inl_t),
+ IMPL (__isfinite_builtin_t),
+ IMPL (isfinite_t),
+
+ IMPL (__isnormal_inl_t),
+ IMPL (__isnormal_inl2_t),
+ IMPL (__isnormal_builtin_t),
+ IMPL (isnormal_t),
+
+ IMPL (__fpclassify_test1_t),
+ IMPL (__fpclassify_test2_t),
+ IMPL (__fpclassify_t),
+ IMPL (fpclassify_t),
+
+ IMPL (remainder_test1_t),
+ IMPL (remainder_test2_t)
+};
+
+static void
+do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double *arr,
+ size_t len, const char *testname)
+{
+ size_t iters = 500;
+ timing_t start, stop, cur;
+
+ json_attr_object_begin (json_ctx, testname);
+
+ TIMING_NOW (start);
+ test_fn (arr, len, iters);
+ TIMING_NOW (stop);
+ TIMING_DIFF (cur, start, stop);
+
+ json_attr_double (json_ctx, "duration", cur);
+ json_attr_double (json_ctx, "iterations", iters);
+ json_attr_double (json_ctx, "mean", cur / iters);
+ json_attr_object_end (json_ctx);
+}
+
+volatile unsigned int dontoptimize = 0;
+
+void
+startup (void)
+{
+ /* This loop should cause CPU to switch to maximal freqency.
+ This makes subsequent measurement more accurate. We need a side effect
+ to prevent the loop being deleted by compiler.
+ This should be enough to cause CPU to speed up and it is simpler than
+ running loop for constant time. This is used when user does not have root
+ access to set a constant freqency. */
+ for (int k = 0; k < 100000000; k++)
+ dontoptimize += 23 * dontoptimize + 2;
+}
+
+static volatile double arr1[SIZE];
+static volatile double arr2[SIZE];
+
+int
+test_main (void)
+{
+ json_ctx_t json_ctx;
+ size_t i;
+
+ startup ();
+
+ json_init (&json_ctx, 2, stdout);
+ json_attr_object_begin (&json_ctx, "math-inlines");
+
+ /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
+ 79% positive values and 1% infinity/NaN. The other contains
+ 50% inf, 50% NaN. */
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int x = rand () & 255;
+ arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
+ if (x == 255) arr1[i] = __builtin_inf ();
+ if (x == 254) arr1[i] = __builtin_nan ("0");
+ arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
+ }
+
+ for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+ {
+ json_attr_object_begin (&json_ctx, test_list[i].name);
+ do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE, "inf/nan");
+ json_attr_object_end (&json_ctx);
+ }
+
+ for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+ {
+ json_attr_object_begin (&json_ctx, test_list[i].name);
+ do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE, "normal");
+ json_attr_object_end (&json_ctx);
+ }
+
+ json_attr_object_end (&json_ctx);
+ return 0;
+}
+
+#include "../test-skeleton.c"