@@ -258,20 +258,6 @@ Unless explicitly mentioned otherwise, a precision of 1 implies 24 bits of
precision in the mantissa of the multiple precision number. Hence, a precision
level of 32 implies 768 bits of precision in the mantissa.
-@deftp Probe slowexp_p6 (double @var{$arg1}, double @var{$arg2})
-This probe is triggered when the @code{exp} function is called with an
-input that results in multiple precision computation with precision
-6. Argument @var{$arg1} is the input value and @var{$arg2} is the
-computed output.
-@end deftp
-
-@deftp Probe slowexp_p32 (double @var{$arg1}, double @var{$arg2})
-This probe is triggered when the @code{exp} function is called with an
-input that results in multiple precision computation with precision
-32. Argument @var{$arg1} is the input value and @var{$arg2} is the
-computed output.
-@end deftp
-
@deftp Probe slowpow_p10 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4})
This probe is triggered when the @code{pow} function is called with
inputs that result in multiple precision computation with precision
@@ -114,7 +114,7 @@ type-ldouble-yes := ldouble
# double support
type-double-suffix :=
type-double-routines := branred doasin dosincos halfulp mpa mpatan2 \
- mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \
+ mpatan mpexp mplog mpsqrt mptan sincos32 \
slowpow sincostab k_rem_pio2
# float support
@@ -561,8 +561,10 @@ math-CPPFLAGS += -D__NO_MATH_INLINES -D__LIBC_INTERNAL_MATH_INLINES
ifneq ($(long-double-fcts),yes)
# The `double' and `long double' types are the same on this machine.
# We won't compile the `long double' code at all. Tell the `double' code
-# to define aliases for the `FUNCl' names.
-math-CPPFLAGS += -DNO_LONG_DOUBLE
+# to define aliases for the `FUNCl' names. To avoid type conflicts in
+# defining those aliases, tell <math.h> to declare the `FUNCl' names with
+# `double' instead of `long double'.
+math-CPPFLAGS += -DNO_LONG_DOUBLE -D_Mlong_double_=double
endif
# These files quiet sNaNs in a way that is optimized away without
@@ -262,7 +262,6 @@ extern double __sin32 (double __x, double __res, double __res1);
extern double __cos32 (double __x, double __res, double __res1);
extern double __mpsin (double __x, double __dx, bool __range_reduce);
extern double __mpcos (double __x, double __dx, bool __range_reduce);
-extern double __slowexp (double __x);
extern double __slowpow (double __x, double __y, double __z);
extern void __docos (double __x, double __dx, double __v[]);
@@ -1,3 +1,4 @@
+/* EXP function - Compute double precision exponential */
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
@@ -23,7 +24,7 @@
/* exp1 */
/* */
/* FILES NEEDED:dla.h endian.h mpa.h mydefs.h uexp.h */
-/* mpa.c mpexp.x slowexp.c */
+/* mpa.c mpexp.x */
/* */
/* An ultimate exp routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of e^x */
@@ -32,207 +33,238 @@
/* */
/***************************************************************************/
+/* IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */
+/*
+ exp(x)
+ Hybrid algorithm of Peter Tang's Table driven method (for large
+ arguments) and an accurate table (for small arguments).
+ Written by K.C. Ng, November 1988.
+ Method (large arguments):
+ 1. Argument Reduction: given the input x, find r and integer k
+ and j such that
+ x = (k+j/32)*(ln2) + r, |r| <= (1/64)*ln2
+
+ 2. exp(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r))
+ a. expm1(r) is approximated by a polynomial:
+ expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6
+ Here t1 = 1/2 exactly.
+ b. 2^(j/32) is represented to twice double precision
+ as TBL[2j]+TBL[2j+1].
+
+ Note: If divide were fast enough, we could use another approximation
+ in 2.a:
+ expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2)
+ (for the same t1 and t2 as above)
+
+ Special cases:
+ exp(INF) is INF, exp(NaN) is NaN;
+ exp(-INF)= 0;
+ for finite argument, only exp(0)=1 is exact.
+
+ Accuracy:
+ According to an error analysis, the error is always less than
+ an ulp (unit in the last place). The largest errors observed
+ are less than 0.55 ulp for normal results and less than 0.75 ulp
+ for subnormal results.
+
+ Misc. info.
+ For IEEE double
+ if x > 7.09782712893383973096e+02 then exp(x) overflow
+ if x < -7.45133219101941108420e+02 then exp(x) underflow
+ */
+
#include <math.h>
+#include <math-svid-compat.h>
+#include <math_private.h>
+#include <errno.h>
#include "endian.h"
#include "uexp.h"
+#include "uexp.tbl"
#include "mydefs.h"
#include "MathLib.h"
-#include "uexp.tbl"
-#include <math_private.h>
#include <fenv.h>
#include <float.h>
-#ifndef SECTION
-# define SECTION
-#endif
+extern double __ieee754_exp (double);
+
+#include "eexp.tbl"
+
+static const double
+ half = 0.5,
+ one = 1.0;
-double __slowexp (double);
-/* An ultimate exp routine. Given an IEEE double machine number x it computes
- the correctly rounded (to nearest) value of e^x. */
double
-SECTION
-__ieee754_exp (double x)
+__ieee754_exp (double x_arg)
{
- double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0, 0}};
- int4 i, j, m, n, ex;
+ double z, t;
double retval;
-
+ int hx, ix, k, j, m;
+ int fe_val;
+ union
{
- SET_RESTORE_ROUND (FE_TONEAREST);
-
- junk1.x = x;
- m = junk1.i[HIGH_HALF];
- n = m & hugeint;
-
- if (n > smallint && n < bigint)
- {
- y = x * log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp * ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
- eps = del + del * del * (p3.x * del + p2.x);
-
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
-
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
-
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
-
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (res == (res + cor * err_0))
- {
- retval = res * binexp.x;
- goto ret;
+ int i_part[2];
+ double x;
+ } xx;
+ union
+ {
+ int y_part[2];
+ double y;
+ } yy;
+ xx.x = x_arg;
+
+ ix = xx.i_part[HIGH_HALF];
+ hx = ix & ~0x80000000;
+
+ if (hx < 0x3ff0a2b2)
+ { /* |x| < 3/2 ln 2 */
+ if (hx < 0x3f862e42)
+ { /* |x| < 1/64 ln 2 */
+ if (hx < 0x3ed00000)
+ { /* |x| < 2^-18 */
+ if (hx < 0x3e300000)
+ {
+ retval = one + xx.x;
+ return (retval);
+ }
+ retval = one + xx.x * (one + half * xx.x);
+ return (retval);
+ }
+ /*
+ Use FE_TONEAREST rounding mode for computing yy.y
+ Avoid set/reset of rounding mode if already in FE_TONEAREST mode
+ */
+ fe_val = get_rounding_mode ();
+ if (fe_val == FE_TONEAREST) {
+ t = xx.x * xx.x;
+ yy.y = xx.x + (t * (half + xx.x * t2) +
+ (t * t) * (t3 + xx.x * t4 + t * t5));
+ retval = one + yy.y;
+ } else {
+ libc_fesetround (FE_TONEAREST);
+ t = xx.x * xx.x;
+ yy.y = xx.x + (t * (half + xx.x * t2) +
+ (t * t) * (t3 + xx.x * t4 + t * t5));
+ retval = one + yy.y;
+ libc_fesetround (fe_val);
}
- else
- {
- retval = __slowexp (x);
- goto ret;
- } /*if error is over bound */
- }
+ return (retval);
+ }
- if (n <= smallint)
- {
- retval = 1.0;
- goto ret;
+ /* find the multiple of 2^-6 nearest x */
+ k = hx >> 20;
+ j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
+ j = (j - 1) & ~1;
+ if (ix < 0)
+ j += 134;
+ /*
+ Use FE_TONEAREST rounding mode for computing yy.y
+ Avoid set/reset of rounding mode if already in FE_TONEAREST mode
+ */
+ fe_val = get_rounding_mode ();
+ if (fe_val == FE_TONEAREST) {
+ z = xx.x - TBL2[j];
+ t = z * z;
+ yy.y = z + (t * (half + (z * t2)) +
+ (t * t) * (t3 + z * t4 + t * t5));
+ retval = TBL2[j + 1] + TBL2[j + 1] * yy.y;
+ } else {
+ libc_fesetround (FE_TONEAREST);
+ z = xx.x - TBL2[j];
+ t = z * z;
+ yy.y = z + (t * (half + (z * t2)) +
+ (t * t) * (t3 + z * t4 + t * t5));
+ retval = TBL2[j + 1] + TBL2[j + 1] * yy.y;
+ libc_fesetround (fe_val);
}
+ return (retval);
+ }
- if (n >= badint)
- {
- if (n > infint)
- {
- retval = x + x;
- goto ret;
- } /* x is NaN */
- if (n < infint)
- {
- if (x > 0)
- goto ret_huge;
- else
- goto ret_tiny;
- }
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0)
- {
- retval = x + x;
- goto ret;
- } /* x is NaN */
- retval = (x > 0) ? inf.x : zero; /* |x| = inf; return either inf or 0 */
- goto ret;
- }
+ if (hx >= 0x40862e42)
+ { /* x is large, infinite, or nan */
+ if (hx >= 0x7ff00000)
+ {
+ if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0)
+ return (zero); /* exp(-inf) = 0 */
+ return (xx.x * xx.x); /* exp(nan/inf) is nan or inf */
+ }
+ if (xx.x > threshold1)
+ { /* set overflow error condition */
+ retval = hhuge * hhuge;
+ return retval;
+ }
+ if (-xx.x > threshold2)
+ { /* set underflow error condition */
+ double force_underflow = tiny * tiny;
+ math_force_eval (force_underflow);
+ retval = zero;
+ return retval;
+ }
+ }
- y = x * log2e.x + three51.x;
- bexp = y - three51.x;
- junk1.x = y;
- eps = bexp * ln_two2.x;
- t = x - bexp * ln_two1.x;
- y = t + three33.x;
- base = y - three33.x;
- junk2.x = y;
- del = (t - base) - eps;
- eps = del + del * del * (p3.x * del + p2.x);
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (m >> 31)
- {
- ex = junk1.i[LOW_HALF];
- if (res < 1.0)
- {
- res += res;
- cor += cor;
- ex -= 1;
- }
- if (ex >= -1022)
- {
- binexp.i[HIGH_HALF] = (1023 + ex) << 20;
- if (res == (res + cor * err_0))
- {
- retval = res * binexp.x;
- goto ret;
- }
- else
- {
- retval = __slowexp (x);
- goto check_uflow_ret;
- } /*if error is over bound */
- }
- ex = -(1022 + ex);
- binexp.i[HIGH_HALF] = (1023 - ex) << 20;
- res *= binexp.x;
- cor *= binexp.x;
- eps = 1.0000000001 + err_0 * binexp.x;
- t = 1.0 + res;
- y = ((1.0 - t) + res) + cor;
- res = t + y;
- cor = (t - res) + y;
- if (res == (res + eps * cor))
- {
- binexp.i[HIGH_HALF] = 0x00100000;
- retval = (res - 1.0) * binexp.x;
- goto check_uflow_ret;
- }
- else
- {
- retval = __slowexp (x);
- goto check_uflow_ret;
- } /* if error is over bound */
- check_uflow_ret:
- if (retval < DBL_MIN)
- {
- double force_underflow = tiny * tiny;
- math_force_eval (force_underflow);
- }
- if (retval == 0)
- goto ret_tiny;
- goto ret;
- }
+ /*
+ Use FE_TONEAREST rounding mode for computing yy.y
+ Avoid set/reset of rounding mode if already in FE_TONEAREST mode
+ */
+ fe_val = get_rounding_mode ();
+ if (fe_val == FE_TONEAREST) {
+ t = invln2_32 * xx.x;
+ if (ix < 0)
+ t -= half;
else
- {
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
- if (res == (res + cor * err_0))
- retval = res * binexp.x * t256.x;
- else
- retval = __slowexp (x);
- if (isinf (retval))
- goto ret_huge;
- else
- goto ret;
- }
+ t += half;
+ k = (int) t;
+ j = (k & 0x1f) << 1;
+ m = k >> 5;
+ z = (xx.x - k * ln2_32hi) - k * ln2_32lo;
+
+ /* z is now in primary range */
+ t = z * z;
+ yy.y = z + (t * (half + z * t2) +
+ (t * t) * (t3 + z * t4 + t * t5));
+ yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y);
+ } else {
+ libc_fesetround (FE_TONEAREST);
+ t = invln2_32 * xx.x;
+ if (ix < 0)
+ t -= half;
+ else
+ t += half;
+ k = (int) t;
+ j = (k & 0x1f) << 1;
+ m = k >> 5;
+ z = (xx.x - k * ln2_32hi) - k * ln2_32lo;
+
+ /* z is now in primary range */
+ t = z * z;
+ yy.y = z + (t * (half + z * t2) +
+ (t * t) * (t3 + z * t4 + t * t5));
+ yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y);
+ libc_fesetround (fe_val);
}
-ret:
- return retval;
- ret_huge:
- return hhuge * hhuge;
-
- ret_tiny:
- return tiny * tiny;
+ if (m < -1021)
+ {
+ yy.y_part[HIGH_HALF] += (m + 54) << 20;
+ retval = twom54 * yy.y;
+ if (retval < DBL_MIN)
+ {
+ double force_underflow = tiny * tiny;
+ math_force_eval (force_underflow);
+ }
+ return retval;
+ }
+ yy.y_part[HIGH_HALF] += m << 20;
+ return (yy.y);
}
#ifndef __ieee754_exp
strong_alias (__ieee754_exp, __exp_finite)
#endif
+#ifndef SECTION
+# define SECTION
+#endif
+
/* Compute e^(x+xx). The routine also receives bound of error of previous
calculation. If after computing exp the error exceeds the allowed bounds,
the routine returns a non-positive number. Otherwise it returns the
@@ -25,7 +25,7 @@
/* log1 */
/* checkint */
/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h */
-/* halfulp.c mpexp.c mplog.c slowexp.c slowpow.c mpa.c */
+/* halfulp.c mpexp.c mplog.c slowpow.c mpa.c */
/* uexp.c upow.c */
/* root.tbl uexp.tbl upow.tbl */
/* An ultimate power routine. Given two IEEE double machine numbers y,x */
new file mode 100644
@@ -0,0 +1,215 @@
+/* EXP function tables - for use in ocmputing double precisoin exponential
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+static const double TBL[64] = {
+ 0x1.0000000000000p+0, 0x0.0000000000000p+0,
+ 0x1.059b0d3158574p+0, 0x1.d73e2a475b465p-55,
+ 0x1.0b5586cf9890fp+0, 0x1.8a62e4adc610ap-54,
+ 0x1.11301d0125b51p+0, -0x1.6c51039449b3ap-54,
+ 0x1.172b83c7d517bp+0, -0x1.19041b9d78a76p-55,
+ 0x1.1d4873168b9aap+0, 0x1.e016e00a2643cp-54,
+ 0x1.2387a6e756238p+0, 0x1.9b07eb6c70573p-54,
+ 0x1.29e9df51fdee1p+0, 0x1.612e8afad1255p-55,
+ 0x1.306fe0a31b715p+0, 0x1.6f46ad23182e4p-55,
+ 0x1.371a7373aa9cbp+0, -0x1.63aeabf42eae2p-54,
+ 0x1.3dea64c123422p+0, 0x1.ada0911f09ebcp-55,
+ 0x1.44e086061892dp+0, 0x1.89b7a04ef80d0p-59,
+ 0x1.4bfdad5362a27p+0, 0x1.d4397afec42e2p-56,
+ 0x1.5342b569d4f82p+0, -0x1.07abe1db13cacp-55,
+ 0x1.5ab07dd485429p+0, 0x1.6324c054647adp-54,
+ 0x1.6247eb03a5585p+0, -0x1.383c17e40b497p-54,
+ 0x1.6a09e667f3bcdp+0, -0x1.bdd3413b26456p-54,
+ 0x1.71f75e8ec5f74p+0, -0x1.16e4786887a99p-55,
+ 0x1.7a11473eb0187p+0, -0x1.41577ee04992fp-55,
+ 0x1.82589994cce13p+0, -0x1.d4c1dd41532d8p-54,
+ 0x1.8ace5422aa0dbp+0, 0x1.6e9f156864b27p-54,
+ 0x1.93737b0cdc5e5p+0, -0x1.75fc781b57ebcp-57,
+ 0x1.9c49182a3f090p+0, 0x1.c7c46b071f2bep-56,
+ 0x1.a5503b23e255dp+0, -0x1.d2f6edb8d41e1p-54,
+ 0x1.ae89f995ad3adp+0, 0x1.7a1cd345dcc81p-54,
+ 0x1.b7f76f2fb5e47p+0, -0x1.5584f7e54ac3bp-56,
+ 0x1.c199bdd85529cp+0, 0x1.11065895048ddp-55,
+ 0x1.cb720dcef9069p+0, 0x1.503cbd1e949dbp-56,
+ 0x1.d5818dcfba487p+0, 0x1.2ed02d75b3706p-55,
+ 0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54,
+ 0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54,
+ 0x1.f50765b6e4540p+0, 0x1.9d3e12dd8a18bp-54};
+/*
+ For i = 0, ..., 66,
+ TBL2[2*i] is a double precision number near (i+1)*2^-6, and
+ TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
+ than 2^-60.
+
+ For i = 67, ..., 133,
+ TBL2[2*i] is a double precision number near -(i+1)*2^-6, and
+ TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
+ than 2^-60.
+*/
+
+static const double TBL2[268] = {
+ 0x1.ffffffffffc82p-7, 0x1.04080ab55de32p+0,
+ 0x1.fffffffffffdbp-6, 0x1.08205601127ecp+0,
+ 0x1.80000000000a0p-5, 0x1.0c49236829e91p+0,
+ 0x1.fffffffffff79p-5, 0x1.1082b577d34e9p+0,
+ 0x1.3fffffffffffcp-4, 0x1.14cd4fc989cd6p+0,
+ 0x1.8000000000060p-4, 0x1.192937074e0d4p+0,
+ 0x1.c000000000061p-4, 0x1.1d96b0eff0e80p+0,
+ 0x1.fffffffffffd6p-4, 0x1.2216045b6f5cap+0,
+ 0x1.1ffffffffff58p-3, 0x1.26a7793f6014cp+0,
+ 0x1.3ffffffffff75p-3, 0x1.2b4b58b372c65p+0,
+ 0x1.5ffffffffff00p-3, 0x1.3001ecf601ad1p+0,
+ 0x1.8000000000020p-3, 0x1.34cb8170b583ap+0,
+ 0x1.9ffffffffa629p-3, 0x1.39a862bd3b344p+0,
+ 0x1.c00000000000fp-3, 0x1.3e98deaa11dcep+0,
+ 0x1.e00000000007fp-3, 0x1.439d443f5f16dp+0,
+ 0x1.0000000000072p-2, 0x1.48b5e3c3e81abp+0,
+ 0x1.0fffffffffecap-2, 0x1.4de30ec211dfbp+0,
+ 0x1.1ffffffffff8fp-2, 0x1.5325180cfacd2p+0,
+ 0x1.300000000003bp-2, 0x1.587c53c5a7b04p+0,
+ 0x1.4000000000034p-2, 0x1.5de9176046007p+0,
+ 0x1.4ffffffffff89p-2, 0x1.636bb9a98322fp+0,
+ 0x1.5ffffffffffe7p-2, 0x1.690492cbf942ap+0,
+ 0x1.6ffffffffff78p-2, 0x1.6eb3fc55b1e45p+0,
+ 0x1.7ffffffffff65p-2, 0x1.747a513dbef32p+0,
+ 0x1.8ffffffffffd5p-2, 0x1.7a57ede9ea22ep+0,
+ 0x1.9ffffffffff6ep-2, 0x1.804d30347b50fp+0,
+ 0x1.affffffffffc3p-2, 0x1.865a7772164aep+0,
+ 0x1.c000000000053p-2, 0x1.8c802477b0030p+0,
+ 0x1.d00000000004dp-2, 0x1.92be99a09bf1ep+0,
+ 0x1.e000000000096p-2, 0x1.99163ad4b1e08p+0,
+ 0x1.efffffffffefap-2, 0x1.9f876d8e8c4fcp+0,
+ 0x1.fffffffffffd0p-2, 0x1.a61298e1e0688p+0,
+ 0x1.0800000000002p-1, 0x1.acb82581eee56p+0,
+ 0x1.100000000001fp-1, 0x1.b3787dc80f979p+0,
+ 0x1.17ffffffffff8p-1, 0x1.ba540dba56e4fp+0,
+ 0x1.1fffffffffffap-1, 0x1.c14b431256441p+0,
+ 0x1.27fffffffffc4p-1, 0x1.c85e8d43f7c9bp+0,
+ 0x1.2fffffffffffdp-1, 0x1.cf8e5d84758a6p+0,
+ 0x1.380000000001fp-1, 0x1.d6db26d16cd84p+0,
+ 0x1.3ffffffffffd8p-1, 0x1.de455df80e39bp+0,
+ 0x1.4800000000052p-1, 0x1.e5cd799c6a59cp+0,
+ 0x1.4ffffffffffc8p-1, 0x1.ed73f240dc10cp+0,
+ 0x1.5800000000013p-1, 0x1.f539424d90f71p+0,
+ 0x1.5ffffffffffbcp-1, 0x1.fd1de6182f885p+0,
+ 0x1.680000000002dp-1, 0x1.02912df5ce741p+1,
+ 0x1.7000000000040p-1, 0x1.06a39207f0a2ap+1,
+ 0x1.780000000004fp-1, 0x1.0ac660691652ap+1,
+ 0x1.7ffffffffff6fp-1, 0x1.0ef9db467dcabp+1,
+ 0x1.87fffffffffe5p-1, 0x1.133e45d82e943p+1,
+ 0x1.9000000000035p-1, 0x1.1793e4652cc6dp+1,
+ 0x1.97fffffffffb3p-1, 0x1.1bfafc47bda48p+1,
+ 0x1.a000000000000p-1, 0x1.2073d3f1bd518p+1,
+ 0x1.a80000000004ap-1, 0x1.24feb2f105ce2p+1,
+ 0x1.affffffffffedp-1, 0x1.299be1f3e7f11p+1,
+ 0x1.b7ffffffffffbp-1, 0x1.2e4baacdb6611p+1,
+ 0x1.c00000000001dp-1, 0x1.330e587b62b39p+1,
+ 0x1.c800000000079p-1, 0x1.37e437282d538p+1,
+ 0x1.cffffffffff51p-1, 0x1.3ccd943268248p+1,
+ 0x1.d7fffffffff74p-1, 0x1.41cabe304cadcp+1,
+ 0x1.e000000000011p-1, 0x1.46dc04f4e5343p+1,
+ 0x1.e80000000001ep-1, 0x1.4c01b9950a124p+1,
+ 0x1.effffffffff9ep-1, 0x1.513c2e6c73196p+1,
+ 0x1.f7fffffffffedp-1, 0x1.568bb722dd586p+1,
+ 0x1.0000000000034p+0, 0x1.5bf0a8b1457b0p+1,
+ 0x1.03fffffffffe2p+0, 0x1.616b5967376dfp+1,
+ 0x1.07fffffffff4bp+0, 0x1.66fc20f0337a9p+1,
+ 0x1.0bffffffffffdp+0, 0x1.6ca35859290f5p+1,
+ -0x1.fffffffffffe4p-7, 0x1.f80feabfeefa5p-1,
+ -0x1.ffffffffffb0bp-6, 0x1.f03f56a88b5fep-1,
+ -0x1.7ffffffffffa7p-5, 0x1.e88dc6afecfc5p-1,
+ -0x1.ffffffffffea8p-5, 0x1.e0fabfbc702b8p-1,
+ -0x1.3ffffffffffb3p-4, 0x1.d985c89d041acp-1,
+ -0x1.7ffffffffffe3p-4, 0x1.d22e6a0197c06p-1,
+ -0x1.bffffffffff9ap-4, 0x1.caf42e73a4c89p-1,
+ -0x1.fffffffffff98p-4, 0x1.c3d6a24ed822dp-1,
+ -0x1.1ffffffffffe9p-3, 0x1.bcd553b9d7b67p-1,
+ -0x1.3ffffffffffe0p-3, 0x1.b5efd29f24c2dp-1,
+ -0x1.5fffffffff553p-3, 0x1.af25b0a61a9f4p-1,
+ -0x1.7ffffffffff8bp-3, 0x1.a876812c08794p-1,
+ -0x1.9fffffffffe51p-3, 0x1.a1e1d93d68828p-1,
+ -0x1.bffffffffff6ep-3, 0x1.9b674f8f2f3f5p-1,
+ -0x1.dffffffffff7fp-3, 0x1.95067c7837a0cp-1,
+ -0x1.fffffffffff7ap-3, 0x1.8ebef9eac8225p-1,
+ -0x1.0fffffffffffep-2, 0x1.8890636e31f55p-1,
+ -0x1.1ffffffffff41p-2, 0x1.827a56188975ep-1,
+ -0x1.2ffffffffffbap-2, 0x1.7c7c708877656p-1,
+ -0x1.3fffffffffff8p-2, 0x1.769652df22f81p-1,
+ -0x1.4ffffffffff90p-2, 0x1.70c79eba33c2fp-1,
+ -0x1.5ffffffffffdbp-2, 0x1.6b0ff72deb8aap-1,
+ -0x1.6ffffffffff9ap-2, 0x1.656f00bf5798ep-1,
+ -0x1.7ffffffffff9fp-2, 0x1.5fe4615e98eb0p-1,
+ -0x1.8ffffffffffeep-2, 0x1.5a6fc061433cep-1,
+ -0x1.9fffffffffc4ap-2, 0x1.5510c67cd26cdp-1,
+ -0x1.affffffffff30p-2, 0x1.4fc71dc13566bp-1,
+ -0x1.bfffffffffff0p-2, 0x1.4a9271936fd0ep-1,
+ -0x1.cfffffffffff3p-2, 0x1.45726ea84fb8cp-1,
+ -0x1.dfffffffffff3p-2, 0x1.4066c2ff3912bp-1,
+ -0x1.effffffffff80p-2, 0x1.3b6f1ddd05ab9p-1,
+ -0x1.fffffffffffdfp-2, 0x1.368b2fc6f9614p-1,
+ -0x1.0800000000000p-1, 0x1.31baaa7dca843p-1,
+ -0x1.0ffffffffffa4p-1, 0x1.2cfd40f8bdce4p-1,
+ -0x1.17fffffffff0ap-1, 0x1.2852a760d5ce7p-1,
+ -0x1.2000000000000p-1, 0x1.23ba930c1568bp-1,
+ -0x1.27fffffffffbbp-1, 0x1.1f34ba78d568dp-1,
+ -0x1.2fffffffffe32p-1, 0x1.1ac0d5492c1dbp-1,
+ -0x1.37ffffffff042p-1, 0x1.165e9c3e67ef2p-1,
+ -0x1.3ffffffffff77p-1, 0x1.120dc93499431p-1,
+ -0x1.47fffffffff6bp-1, 0x1.0dce171e34ecep-1,
+ -0x1.4fffffffffff1p-1, 0x1.099f41ffbe588p-1,
+ -0x1.57ffffffffe02p-1, 0x1.058106eb8a7aep-1,
+ -0x1.5ffffffffffe5p-1, 0x1.017323fd9002ep-1,
+ -0x1.67fffffffffb0p-1, 0x1.faeab0ae9386cp-2,
+ -0x1.6ffffffffffb2p-1, 0x1.f30ec837503d7p-2,
+ -0x1.77fffffffff7fp-1, 0x1.eb5210d627133p-2,
+ -0x1.7ffffffffffe8p-1, 0x1.e3b40ebefcd95p-2,
+ -0x1.87fffffffffc8p-1, 0x1.dc3448110dae2p-2,
+ -0x1.8fffffffffb30p-1, 0x1.d4d244cf4ef06p-2,
+ -0x1.97fffffffffefp-1, 0x1.cd8d8ed8ee395p-2,
+ -0x1.9ffffffffffa7p-1, 0x1.c665b1e1f1e5cp-2,
+ -0x1.a7fffffffffdcp-1, 0x1.bf5a3b6bf18d6p-2,
+ -0x1.affffffffff95p-1, 0x1.b86ababeef93bp-2,
+ -0x1.b7fffffffffcbp-1, 0x1.b196c0e24d256p-2,
+ -0x1.bffffffffff32p-1, 0x1.aadde095dadf7p-2,
+ -0x1.c7fffffffff6ap-1, 0x1.a43fae4b047c9p-2,
+ -0x1.cffffffffffb6p-1, 0x1.9dbbc01e182a4p-2,
+ -0x1.d7fffffffffcap-1, 0x1.9751adcfa81ecp-2,
+ -0x1.dffffffffffcdp-1, 0x1.910110be0699ep-2,
+ -0x1.e7ffffffffffbp-1, 0x1.8ac983dedbc69p-2,
+ -0x1.effffffffff88p-1, 0x1.84aaa3b8d51a9p-2,
+ -0x1.f7fffffffffbbp-1, 0x1.7ea40e5d6d92ep-2,
+ -0x1.fffffffffffdbp-1, 0x1.78b56362cef53p-2,
+ -0x1.03fffffffff00p+0, 0x1.72de43ddcb1f2p-2,
+ -0x1.07ffffffffe6fp+0, 0x1.6d1e525bed085p-2,
+ -0x1.0bfffffffffd6p+0, 0x1.677532dda1c57p-2};
+
+static const double
+/* Following three values used to scale x to primary range */
+ invln2_32 = 0x1.71547652b82fep+5, /* 4.61662413084468283841e+01 */
+ ln2_32hi = 0x1.62e42fee00000p-6, /* 2.16608493865351192653e-02 */
+ ln2_32lo = 0x1.a39ef35793c76p-38, /* 5.96317165397058656257e-12 */
+/* t2-t5 terms used for polynomial computation */
+ t2 = 0x1.5555555548f7cp-3, /* 1.6666666666526086527e-1 */
+ t3 = 0x1.5555555545d4ep-5, /* 4.1666666666226079285e-2 */
+ t4 = 0x1.11115b7aa905ep-7, /* 8.3333679843421958056e-3 */
+ t5 = 0x1.6c1728d739765p-10, /* 1.3888949086377719040e-3 */
+/* maximum value for x to not overflow */
+ threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */
+/* maximum value for -x to not underflow */
+ threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */
+/* scaling factor used when result near zero*/
+ twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */
deleted file mode 100644
@@ -1,86 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2017 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/**************************************************************************/
-/* MODULE_NAME:slowexp.c */
-/* */
-/* FUNCTION:slowexp */
-/* */
-/* FILES NEEDED:mpa.h */
-/* mpa.c mpexp.c */
-/* */
-/*Converting from double precision to Multi-precision and calculating */
-/* e^x */
-/**************************************************************************/
-#include <math_private.h>
-
-#include <stap-probe.h>
-
-#ifndef USE_LONG_DOUBLE_FOR_MP
-# include "mpa.h"
-void __mpexp (mp_no *x, mp_no *y, int p);
-#endif
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-/*Converting from double precision to Multi-precision and calculating e^x */
-double
-SECTION
-__slowexp (double x)
-{
-#ifndef USE_LONG_DOUBLE_FOR_MP
- double w, z, res, eps = 3.0e-26;
- int p;
- mp_no mpx, mpy, mpz, mpw, mpeps, mpcor;
-
- /* Use the multiple precision __MPEXP function to compute the exponential
- First at 144 bits and if it is not accurate enough, at 768 bits. */
- p = 6;
- __dbl_mp (x, &mpx, p);
- __mpexp (&mpx, &mpy, p);
- __dbl_mp (eps, &mpeps, p);
- __mul (&mpeps, &mpy, &mpcor, p);
- __add (&mpy, &mpcor, &mpw, p);
- __sub (&mpy, &mpcor, &mpz, p);
- __mp_dbl (&mpw, &w, p);
- __mp_dbl (&mpz, &z, p);
- if (w == z)
- {
- /* Track how often we get to the slow exp code plus
- its input/output values. */
- LIBC_PROBE (slowexp_p6, 2, &x, &w);
- return w;
- }
- else
- {
- p = 32;
- __dbl_mp (x, &mpx, p);
- __mpexp (&mpx, &mpy, p);
- __mp_dbl (&mpy, &res, p);
-
- /* Track how often we get to the uber-slow exp code plus
- its input/output values. */
- LIBC_PROBE (slowexp_p32, 2, &x, &res);
- return res;
- }
-#else
- return (double) __ieee754_expl((long double)x);
-#endif
-}
@@ -3,5 +3,4 @@
ifeq ($(subdir),math)
CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops
CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1
-CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1
endif
@@ -10,7 +10,7 @@ libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
- mplog-fma mpa-fma slowexp-fma slowpow-fma \
+ mplog-fma mpa-fma slowpow-fma \
sincos32-fma doasin-fma dosincos-fma \
halfulp-fma mpexp-fma \
mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
@@ -32,7 +32,6 @@ CFLAGS-mpsqrt-fma.c = -mfma -mavx2
CFLAGS-mptan-fma.c = -mfma -mavx2
CFLAGS-s_atan-fma.c = -mfma -mavx2
CFLAGS-sincos32-fma.c = -mfma -mavx2
-CFLAGS-slowexp-fma.c = -mfma -mavx2
CFLAGS-slowpow-fma.c = -mfma -mavx2
CFLAGS-s_sin-fma.c = -mfma -mavx2
CFLAGS-s_tan-fma.c = -mfma -mavx2
@@ -48,7 +47,7 @@ CFLAGS-e_powf-fma.c = -mfma -mavx2
libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
- mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+ mplog-fma4 mpa-fma4 slowpow-fma4 \
sincos32-fma4 doasin-fma4 dosincos-fma4 \
halfulp-fma4 mpexp-fma4 \
mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
@@ -70,14 +69,13 @@ CFLAGS-mpsqrt-fma4.c = -mfma4
CFLAGS-mptan-fma4.c = -mfma4
CFLAGS-s_atan-fma4.c = -mfma4
CFLAGS-sincos32-fma4.c = -mfma4
-CFLAGS-slowexp-fma4.c = -mfma4
CFLAGS-slowpow-fma4.c = -mfma4
CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \
- mplog-avx mpa-avx slowexp-avx \
+ mplog-avx mpa-avx \
mpexp-avx
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
@@ -88,7 +86,6 @@ CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif
@@ -1,6 +1,5 @@
#define __ieee754_exp __ieee754_exp_avx
#define __exp1 __exp1_avx
-#define __slowexp __slowexp_avx
#define SECTION __attribute__ ((section (".text.avx")))
#include <sysdeps/ieee754/dbl-64/e_exp.c>
@@ -1,6 +1,5 @@
#define __ieee754_exp __ieee754_exp_fma
#define __exp1 __exp1_fma
-#define __slowexp __slowexp_fma
#define SECTION __attribute__ ((section (".text.fma")))
#include <sysdeps/ieee754/dbl-64/e_exp.c>
@@ -1,6 +1,5 @@
#define __ieee754_exp __ieee754_exp_fma4
#define __exp1 __exp1_fma4
-#define __slowexp __slowexp_fma4
#define SECTION __attribute__ ((section (".text.fma4")))
#include <sysdeps/ieee754/dbl-64/e_exp.c>
deleted file mode 100644
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __mpexp __mpexp_avx
-#define __mul __mul_avx
-#define __sub __sub_avx
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
deleted file mode 100644
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_fma
-#define __add __add_fma
-#define __dbl_mp __dbl_mp_fma
-#define __mpexp __mpexp_fma
-#define __mul __mul_fma
-#define __sub __sub_fma
-#define SECTION __attribute__ ((section (".text.fma")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
deleted file mode 100644
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>