@@ -8225,8 +8225,9 @@ y0 0x1p-100
y0 0x1p-110
y0 0x1p-600
y0 0x1p-10000
-# the next value generates larger error bounds on x86_64 (binary32)
+# the next values generate large error bounds on x86_64 (binary32)
y0 0xd.3432bp-4
+y0 0x1.3d4e56p+6
y0 min
y0 min_subnorm
@@ -820,6 +820,31 @@ y0 0xd.3432bp-4
= y0 tonearest ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
= y0 towardzero ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
= y0 upward ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
+y0 0x1.3d4e56p+6
+= y0 downward binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 tonearest binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 towardzero binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 upward binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 downward binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 tonearest binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 towardzero binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 upward binary64 0x4.f53958p+4 : 0x1.b428630651a18p-16 : inexact-ok
+= y0 downward intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward intel96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward m68k96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 tonearest binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 towardzero binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 upward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 downward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 tonearest ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
+= y0 towardzero ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 upward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
y0 min
= y0 downward binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
= y0 tonearest binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
@@ -112,6 +112,335 @@ v02 = 7.6006865129e-05, /* 0x389f65e0 */
v03 = 2.5915085189e-07, /* 0x348b216c */
v04 = 4.4111031494e-10; /* 0x2ff280c2 */
+#define FIRST_ZERO_Y0 0.893576f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of y0 and degree-3 polynomial
+ approximations of y0 around these zeros: Py[0] for the first zero (0.893576),
+ Py[1] for the second one (3.957678), and so one. Each line contains:
+ {x0, xmid, x1, p0, p1, p2, p3}
+ where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+ to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+ Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+ around the corresponding zero where the error is larger than 9 ulps for the
+ default code below.
+ Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float Py[SMALL_SIZE][7] = {
+ /* for the first root we use a degree-4 polynomial since degree 3 is not enough,
+ where the coefficient of degree 4 is hard-coded in y0f_near_root() */
+{0xd.bd613p-4,0xe.4c176p-4,0xe.e0897p-4,0x3.274468p-28,0xe.121b7p-4,-0x7.df8eap-4,0x3.88cc2p-4/*,-0x3.9ce95p-4*/},
+{0x3.f2af8cp+0,0x3.f52a68p+0,0x3.fa1fa4p+0,0xa.f1f83p-28,-0x6.70d098p-4,0xd.04dc4p-8,0xe.f2a5p-8},
+{0x7.1464ap+0,0x7.16077p+0,0x7.191dap+0,-0x5.e2a51p-28,0x4.cd3328p-4,-0x5.6bbb5p-8,-0xc.48cfbp-8},
+{0xa.37ec2p+0,0xa.38ebap+0,0xa.3ad94p+0,-0x1.4b0aeep-24,-0x3.fec6b8p-4,0x3.206ccp-8,0xa.72401p-8},
+{0xd.5bd7dp+0,0xd.5c70ep+0,0xd.5d94ap+0,-0x8.179d7p-28,0x3.7e6544p-4,-0x2.178554p-8,-0x9.35f5bp-8},
+{0x1.07f9aap+4,0x1.0803c8p+4,0x1.08170cp+4,-0x2.5b8078p-24,-0x3.24b868p-4,0x1.86265ep-8,0x8.51de2p-8},
+{0x1.3a3d44p+4,0x1.3a42cep+4,0x1.3a4d8ap+4,0x1.cd304ap-28,0x2.e189ecp-4,-0x1.2c673ap-8,-0x7.a4726p-8},
+{0x1.6c7d5ep+4,0x1.6c833p+4,0x1.6c99fp+4,-0x6.c63f1p-28,-0x2.acc9a8p-4,0xf.077a1p-12,0x7.1aba98p-8},
+{0x1.9ebec4p+4,0x1.9ec47p+4,0x1.9ed016p+4,0x1.e53838p-24,0x2.81f2p-4,-0xc.61ccp-12,-0x6.aaa99p-8},
+{0x1.d1008ep+4,0x1.d10644p+4,0x1.d10cf2p+4,0x1.636feep-24,-0x2.5e40dcp-4,0xa.6dfp-12,0x6.4cd5a8p-8},
+{0x2.0344f8p+4,0x2.034884p+4,0x2.034d4p+4,-0x4.04e1bp-28,0x2.3febd8p-4,-0x8.f0ff9p-12,-0x5.fcd088p-8},
+{0x2.358778p+4,0x2.358b1p+4,0x2.359224p+4,0x3.6063d8p-24,-0x2.25baacp-4,0x7.c6a088p-12,0x5.b78ff8p-8},
+{0x2.67ca1p+4,0x2.67cdd8p+4,0x2.67d434p+4,-0x3.f39ebcp-24,0x2.0ed04cp-4,-0x6.d7eaf8p-12,-0x5.7aeaa8p-8},
+{0x2.9a0d0cp+4,0x2.9a10dp+4,0x2.9a1828p+4,-0x4.ea23p-28,-0x1.fa8b4p-4,0x6.158438p-12,0x5.45324p-8},
+{0x2.cc51ecp+4,0x2.cc53e8p+4,0x2.cc580cp+4,-0x3.5df0c8p-24,0x1.e8727ep-4,-0x5.7460d8p-12,-0x5.1536p-8},
+{0x2.fe94f8p+4,0x2.fe972p+4,0x2.fe9b18p+4,0x1.1ef09ep-24,-0x1.d8293ap-4,0x4.ed6058p-12,0x4.e9fcc8p-8},
+{0x3.30d8b8p+4,0x3.30da7p+4,0x3.30debcp+4,0x1.b70cecp-24,0x1.c967p-4,-0x4.7ad838p-12,-0x4.c2c9d8p-8},
+{0x3.631b94p+4,0x3.631ddp+4,0x3.632244p+4,0x1.abaadcp-24,-0x1.bbf246p-4,0x4.187ba8p-12,0x4.9f09f8p-8},
+{0x3.955f7cp+4,0x3.956144p+4,0x3.9565fcp+4,0x1.63989ep-24,0x1.af9cb4p-4,-0x3.c397f8p-12,-0x4.7e4038p-8},
+{0x3.c7a2bp+4,0x3.c7a4c4p+4,0x3.c7a878p+4,-0x1.68a8ecp-24,-0x1.a4407ep-4,0x3.797fdcp-12,0x4.600d3p-8},
+{0x3.f9e62cp+4,0x3.f9e85p+4,0x3.f9ea7p+4,0x1.e1bb5p-24,0x1.99be74p-4,-0x3.38739cp-12,-0x4.441c5p-8},
+{0x4.2c2a1p+4,0x4.2c2be8p+4,0x4.2c2e7p+4,-0x5.5bbcfp-24,-0x1.8ffc9ap-4,0x2.ff0f5cp-12,0x4.2a266p-8},
+{0x4.5e6d98p+4,0x4.5e6f8p+4,0x4.5e71c8p+4,-0x4.9e34a8p-24,0x1.86e51cp-4,-0x2.cba284p-12,-0x4.11f568p-8},
+{0x4.90b17p+4,0x4.90b328p+4,0x4.90b5ap+4,0x1.966706p-24,-0x1.7e657p-4,0x2.9e0d44p-12,0x3.fb56a4p-8},
+{0x4.c2f59p+4,0x4.c2f6d8p+4,0x4.c2fac8p+4,0x3.4b3b68p-24,0x1.766dc2p-4,-0x2.752fbp-12,-0x3.e61fcp-8},
+{0x4.f53968p+4,0x4.f53a88p+4,0x4.f53cb8p+4,0x6.a99008p-28,-0x1.6ef07ep-4,0x2.501294p-12,0x3.d230ep-8},
+{0x5.277dp+4,0x5.277e4p+4,0x5.278108p+4,-0x7.a9fa6p-32,0x1.67e1dap-4,-0x2.2e9388p-12,-0x3.bf663cp-8},
+{0x5.59c0e8p+4,0x5.59c2p+4,0x5.59c398p+4,-0x5.026808p-24,-0x1.613798p-4,0x2.104558p-12,0x3.ada76cp-8},
+{0x5.8c0498p+4,0x5.8c05cp+4,0x5.8c0898p+4,0x4.46aa2p-24,0x1.5ae8c2p-4,-0x1.f474eep-12,-0x3.9cda1cp-8},
+{0x5.be48dp+4,0x5.be498p+4,0x5.be4aap+4,0x1.5cfccp-24,-0x1.54ed76p-4,0x1.dad812p-12,0x3.8cec8p-8},
+{0x5.f08c08p+4,0x5.f08d48p+4,0x5.f08ecp+4,-0xb.4dc4cp-28,0x1.4f3ebcp-4,-0x1.c38338p-12,-0x3.7dc9dp-8},
+{0x6.22d05p+4,0x6.22d11p+4,0x6.22d428p+4,0x3.f5343p-24,-0x1.49d668p-4,0x1.ade97cp-12,0x3.6f610cp-8},
+{0x6.55137p+4,0x6.5514ep+4,0x6.551638p+4,-0x6.e6f32p-28,0x1.44aefap-4,-0x1.9a2d3ep-12,-0x3.61a778p-8},
+{0x6.8757e8p+4,0x6.8758bp+4,0x6.8759c8p+4,0x1.f359c2p-28,-0x1.3fc386p-4,0x1.87d25cp-12,0x3.548be4p-8},
+{0x6.b99bp+4,0x6.b99c8p+4,0x6.b99e2p+4,-0x2.9de748p-24,0x1.3b0fa4p-4,-0x1.76b5aap-12,-0x3.48048p-8},
+{0x6.ebdfb8p+4,0x6.ebe058p+4,0x6.ebe1bp+4,-0x2.24ccc8p-24,-0x1.368f5cp-4,0x1.67061p-12,0x3.3c0608p-8},
+{0x7.1e2368p+4,0x7.1e243p+4,0x7.1e25bp+4,0x4.7dcea8p-24,0x1.323f16p-4,-0x1.5858c4p-12,-0x3.3087acp-8},
+{0x7.50673p+4,0x7.506808p+4,0x7.5069ap+4,-0x4.b538p-24,-0x1.2e1b98p-4,0x1.4a9624p-12,0x3.258078p-8},
+{0x7.82ab38p+4,0x7.82abep+4,0x7.82ad78p+4,0x3.09dc4cp-24,0x1.2a21ecp-4,-0x1.3da94p-12,-0x3.1ae88cp-8},
+{0x7.b4eeep+4,0x7.b4efb8p+4,0x7.b4f158p+4,0x4.d5a58p-28,-0x1.264f66p-4,0x1.317f8cp-12,0x3.10b8fcp-8},
+{0x7.e732cp+4,0x7.e73398p+4,0x7.e73548p+4,0x3.f4c44cp-24,0x1.22a192p-4,-0x1.265128p-12,-0x3.06eb08p-8},
+{0x8.1976bp+4,0x8.19777p+4,0x8.19783p+4,0x2.4beae8p-24,-0x1.1f1634p-4,0x1.1b7d2ap-12,0x2.fd7934p-8},
+{0x8.4bbbp+4,0x8.4bbb5p+4,0x8.4bbcep+4,-0xd.a581ep-28,0x1.1bab3cp-4,-0x1.1186d6p-12,-0x2.f45cep-8},
+{0x8.7dfe8p+4,0x8.7dff3p+4,0x8.7dffbp+4,0xa.7c0bdp-28,-0x1.185eccp-4,0x1.0819c2p-12,0x2.eb92ccp-8},
+{0x8.b042bp+4,0x8.b0431p+4,0x8.b043dp+4,-0x1.9452ecp-24,0x1.152f26p-4,-0xf.f2b59p-16,-0x2.e314bp-8},
+{0x8.e2868p+4,0x8.e286fp+4,0x8.e287cp+4,0x3.83b7b8p-24,-0x1.121ab2p-4,0xf.6b21p-16,0x2.dadf34p-8},
+{0x9.14ca8p+4,0x9.14cadp+4,0x9.14cbbp+4,-0x6.5ca3a8p-24,0x1.0f1ff2p-4,-0xe.ea544p-16,-0x2.d2ee28p-8},
+{0x9.470e6p+4,0x9.470ecp+4,0x9.470fp+4,-0x6.bb61ap-24,-0x1.0c3d8ap-4,0xe.7833fp-16,0x2.cb3e2cp-8},
+{0x9.79524p+4,0x9.7952ap+4,0x9.79539p+4,0x2.2438p-24,0x1.097236p-4,-0xe.03747p-16,-0x2.c3cb48p-8},
+{0x9.ab95cp+4,0x9.ab968p+4,0x9.ab971p+4,0x3.1e0054p-24,-0x1.06bcc8p-4,0xd.94272p-16,0x2.bc9328p-8},
+{0x9.ddd9fp+4,0x9.ddda7p+4,0x9.dddb5p+4,0x7.46c908p-24,0x1.041c28p-4,-0xd.320e5p-16,-0x2.b5920cp-8},
+{0xa.101d7p+4,0xa.101e5p+4,0xa.101f3p+4,-0xb.4f158p-28,-0x1.018f52p-4,0xc.cc7dfp-16,0x2.aec5f4p-8},
+{0xa.4261cp+4,0xa.42623p+4,0xa.4262bp+4,-0x6.5a89c8p-24,0xf.f155p-8,-0xc.6b5cbp-16,-0x2.a82be4p-8},
+{0xa.74a5cp+4,0xa.74a62p+4,0xa.74a6ap+4,-0x1.ef16c8p-24,-0xf.cad3fp-8,0xc.16478p-16,0x2.a1c1ap-8},
+{0xa.a6e9bp+4,0xa.a6eap+4,0xa.a6ea9p+4,-0x6.1e7b68p-24,0xf.a564cp-8,-0xb.bd1eap-16,-0x2.9b84f8p-8},
+{0xa.d92d7p+4,0xa.d92dfp+4,0xa.d92eap+4,-0xf.8c858p-28,-0xf.80faep-8,0xb.6f5dbp-16,0x2.9573dcp-8},
+{0xb.0b71ap+4,0xb.0b71ep+4,0xb.0b727p+4,0x7.75d858p-24,0xf.5d8abp-8,-0xb.24e88p-16,-0x2.8f8c4cp-8},
+{0xb.3db58p+4,0xb.3db5cp+4,0xb.3db68p+4,0x1.d78632p-24,-0xf.3b096p-8,0xa.d5ef1p-16,0x2.89cc8p-8},
+{0xb.6ff96p+4,0xb.6ff9bp+4,0xb.6ffaap+4,0x3.b24794p-24,0xf.196c7p-8,-0xa.918e2p-16,-0x2.8432cp-8},
+{0xb.a23d1p+4,0xb.a23d9p+4,0xb.a23e5p+4,0x6.39cbc8p-24,-0xe.f8aa5p-8,0xa.486fap-16,0x2.7ebd9p-8},
+{0xb.d481p+4,0xb.d4818p+4,0xb.d481cp+4,-0x1.820e3ap-24,0xe.d8b9dp-8,-0xa.0973fp-16,-0x2.796b4cp-8},
+{0xc.06c4fp+4,0xc.06c57p+4,0xc.06c5fp+4,-0x2.c7e038p-24,-0xe.b9925p-8,0x9.cce94p-16,0x2.743a5cp-8},
+{0xc.3908ep+4,0xc.39096p+4,0xc.390a2p+4,0x6.ab31c8p-24,0xe.9b2bep-8,-0x9.92ad2p-16,-0x2.6f2994p-8},
+{0xc.6b4cdp+4,0xc.6b4d4p+4,0xc.6b4d8p+4,0x4.4ef25p-24,-0xe.7d7ecp-8,0x9.5360fp-16,0x2.6a37dp-8},
+};
+
+/* argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi) */
+static double T[128] = {
+0x1p+0,
+0x2p+0,
+-0x2.487ed5110b462p+0,
+0x1.b7812aeef4b9fp+0,
+-0x2.d97c7f3321d24p+0,
+0x9.585d6aac7a1a8p-4,
+0x1.2b0bad558f435p+0,
+0x2.56175aab1e86ap+0,
+-0x1.9c501fbace38dp+0,
+0x3.0fde959b6ed46p+0,
+-0x2.8c1a9da2d9d3cp-4,
+-0x5.18353b45b3a78p-4,
+-0xa.306a768b674fp-4,
+-0x1.460d4ed16ce9ep+0,
+-0x2.8c1a9da2d9d3cp+0,
+0x1.304999cb579e8p+0,
+0x2.60933396af3dp+0,
+-0x1.87586de3accc3p+0,
+-0x3.0eb0dbc759986p+0,
+0x2.b1d1d8258155p-4,
+0x5.63a3b04b02aap-4,
+0xa.c74760960554p-4,
+0x1.58e8ec12c0aa8p+0,
+0x2.b1d1d8258155p+0,
+-0xe.4db24c6089bf8p-4,
+-0x1.c9b6498c1137fp+0,
+0x2.b51241f8e8d64p+0,
+-0xd.e5a511f3999bp-4,
+-0x1.bcb4a23e73336p+0,
+0x2.cf15909424df4p+0,
+-0xa.a53b3e8c1877p-4,
+-0x1.54a767d1830eep+0,
+-0x2.a94ecfa3061dcp+0,
+0xf.5e135caff0a78p-4,
+0x1.ebc26b95fe14fp+0,
+-0x2.70f9fde50f1c4p+0,
+0x1.668ad946ed0dap+0,
+0x2.cd15b28dda1b4p+0,
+-0xa.e536ff5570fbp-4,
+-0x1.5ca6dfeaae1f6p+0,
+-0x2.b94dbfd55c3ecp+0,
+0xd.5e3556652c89p-4,
+0x1.abc6aacca5912p+0,
+-0x2.f0f17f77c023ep+0,
+0x6.69bd6218afe64p-4,
+0xc.d37ac4315fcc8p-4,
+0x1.9a6f58862bf99p+0,
+-0x3.13a02404b352ep+0,
+0x2.13e8d07a4a046p-4,
+0x4.27d1a0f49408cp-4,
+0x8.4fa341e928118p-4,
+0x1.09f4683d25023p+0,
+0x2.13e8d07a4a046p+0,
+-0x2.20ad341c773d4p+0,
+0x2.07246cd81ccb8p+0,
+-0x2.3a35fb60d1af2p+0,
+0x1.d412de4f67e7ep+0,
+-0x2.a05918723b764p+0,
+0x1.07cca42c94599p+0,
+0x2.0f99485928b32p+0,
+-0x2.294c445eb9dfep+0,
+0x1.f5e64c5397865p+0,
+-0x2.5cb23c69dc396p+0,
+0x1.8f1a5c3d52d34p+0,
+0x3.1e34b87aa5a68p+0,
+-0xc.15641bbff90bp-8,
+-0x1.82ac8377ff216p-4,
+-0x3.055906effe42cp-4,
+-0x6.0ab20ddffc858p-4,
+-0xc.15641bbff90bp-4,
+-0x1.82ac8377ff216p+0,
+-0x3.055906effe42cp+0,
+0x3.dccc7310ec09ap-4,
+0x7.b998e621d8134p-4,
+0xf.7331cc43b0268p-4,
+0x1.ee6639887604dp+0,
+-0x2.6bb262001f3c6p+0,
+0x1.711a1110cccd4p+0,
+0x2.e2342221999a8p+0,
+-0x8.41690cdd81128p-4,
+-0x1.082d219bb0225p+0,
+-0x2.105a43376044ap+0,
+0x2.27ca4ea24abcep+0,
+-0x1.f8ea37cc75cc6p+0,
+0x2.56aa65781fad6p+0,
+-0x1.9b2a0a20cbeb6p+0,
+0x3.122ac0cf736f6p+0,
+-0x2.4295372246764p-4,
+-0x4.852a6e448cec8p-4,
+-0x9.0a54dc8919d9p-4,
+-0x1.214a9b91233b2p+0,
+-0x2.4295372246764p+0,
+0x1.c35466cc7e59ap+0,
+-0x2.c1d607780e92ep+0,
+0xc.4d2c620ee206p-4,
+0x1.89a58c41dc40cp+0,
+0x3.134b1883b8818p+0,
+-0x2.1e8a4099a4324p-4,
+-0x4.3d14813348648p-4,
+-0x8.7a29026690c9p-4,
+-0x1.0f45204cd2192p+0,
+-0x2.1e8a4099a4324p+0,
+0x2.0b6a53ddc2e18p+0,
+-0x2.31aa2d558583p+0,
+0x1.e52a7a6600402p+0,
+-0x2.7e29e0450ac5cp+0,
+0x1.4c2b1486f5ba8p+0,
+0x2.9856290deb75p+0,
+-0x1.17d282f5345bfp+0,
+-0x2.2fa505ea68b7ep+0,
+0x1.e934c93c39d64p+0,
+-0x2.761542989799ap+0,
+0x1.5c544fdfdc12fp+0,
+0x2.b8a89fbfb825ep+0,
+-0xd.72d95919afa58p-4,
+-0x1.ae5b2b2335f4bp+0,
+0x2.ebc87eca9f5cap+0,
+-0x7.0edd77bcc8ccp-4,
+-0xe.1dbaef799198p-4,
+-0x1.c3b75def3233p+0,
+0x2.c1101932a6e02p+0,
+-0xc.65ea2abbd85fp-4,
+-0x1.8cbd45577b0bep+0,
+-0x3.197a8aaef617cp+0,
+0x1.589bfb31f1687p-4,
+0x2.b137f663e2d0ep-4,
+0x5.626fecc7c5a1cp-4,
+0xa.c4dfd98f8b438p-4,
+};
+
+/* return h such that x - pi/4 mod (2*pi) ~ h */
+static double
+reduce_mod_twopi (double x)
+{
+ double sign = 1, h;
+ if (x < 0)
+ {
+ x = -x;
+ sign = -1;
+ }
+ h = 0; /* invariant is x+h mod (2*pi) */
+ while (x >= 4)
+ {
+ int i = ilogb (x);
+ x -= ldexp (1.0f, i);
+ h += T[i];
+ }
+ /* add the remainder x */
+ h += x;
+ /* subtract pi/4 */
+ double piover4 = 0xc.90fdaa22168cp-4;
+ h -= piover4;
+ /* reduce mod 2*pi */
+ double twopi = 0x6.487ed5110b46p+0;
+ while (fabs (h) > twopi * 0.5)
+ {
+ if (h > 0)
+ h -= twopi;
+ else
+ h += twopi;
+ }
+ return sign * h;
+}
+
+/* formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+ y0(x) ~ sqrt(2/(pi*x))*beta0(x)*sin(x-pi/4-alpha0(x))
+ where beta0(x) = 1 - 16/x^2 + 53/(512*x^4)
+ and alpha0(x) = 1/(8*x) - 25/(384*x^3) */
+static float
+y0f_asympt (float x)
+{
+ /* the following code fails to give an error <= 9 ulps in only two cases,
+ for which we tabulate the correctly-rounded result */
+ if (x == 0x1.bfad96p+7f)
+ return -0x7.f32bdp-32f;
+ if (x == 0x1.2e2a42p+17f)
+ return 0x1.a48974p-40f;
+ double y = 1.0 / (double) x;
+ double y2 = y * y;
+ double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+ double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+ double h;
+ h = reduce_mod_twopi ((double) x);
+ /* subtract alpha0 */
+ h -= alpha0;
+ /* now reduce mod pi/2 */
+ double piover2 = 0x1.921fb54442d18p+0;
+ int n = 0;
+ while (fabs (h) > piover2 / 2)
+ {
+ if (h > 0)
+ {
+ h -= piover2;
+ n ++;
+ }
+ else
+ {
+ h += piover2;
+ n --;
+ }
+ }
+ /* now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi) */
+ float xr = (float) h;
+ n = n & 3;
+ float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+ float t = cst / sqrtf (x) * (float) beta0;
+ if (n == 0)
+ return t * sinf (xr);
+ else if (n == 2) /* sin(x+pi) = -sin(x) */
+ return -t * sinf (xr);
+ else if (n == 1) /* sin(x+pi/2) = cos(x) */
+ return t * cosf (xr);
+ else /* sin(x+3pi/2) = -cos(x) */
+ return -t * cosf (xr);
+}
+
+/* Special code for x near a root of y0.
+ z is the value computed by the generic code.
+ For small x, we use a polynomial approximating y0 around its root.
+ For large x, we use an asymptotic formula (y0f_asympt). */
+static float
+y0f_near_root (float x, float z)
+{
+ float index_f;
+ int index;
+ index_f = roundf ((x - FIRST_ZERO_Y0) / (float) M_PI);
+ if (index_f >= SMALL_SIZE)
+ return y0f_asympt (x);
+ index = (int) index_f;
+ float *p = Py[index];
+ float x0 = p[0];
+ float x1 = p[2];
+ /* if we are not in the interval [x0,x1] around xmid, we return the value z */
+ if (! (x0 <= x && x <= x1))
+ return z;
+ float xmid = p[1];
+ float y = x - xmid;
+ /* for degree 0 we use a degree-4 polynomial, where the coefficient of degree 4
+ is hard-coded here */
+ float p6 = (index > 0) ? p[6] : p[6] + y * -0x3.9ce95p-4;
+ return p[3] + y * (p[4] + y * (p[5] + y * p6));
+}
+
float
__ieee754_y0f(float x)
{
@@ -124,7 +453,8 @@ __ieee754_y0f(float x)
if(ix>=0x7f800000) return one/(x+x*x);
if(ix==0) return -1/zero; /* -inf and divide by zero exception. */
if(hx<0) return zero/(zero*x);
- if(ix >= 0x40000000) { /* |x| >= 2.0 */
+ if(ix >= 0x40000000 || (0x3f5bd613 <= ix && ix <= 0x3f6e0897)) {
+ /* |x| >= 2.0 or 0x1.b7ac26p-1 <= |x| <= 0x1.dc112ep-1 (around 1st zero) */
/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
* where x0 = x-pi/4
* Better formula:
@@ -143,17 +473,23 @@ __ieee754_y0f(float x)
* j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
* y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
*/
- if(ix<0x7f000000) { /* make sure x+x not overflow */
- z = -__cosf(x+x);
- if ((s*c)<zero) cc = z/ss;
- else ss = z/cc;
- }
- if(ix>0x5c000000) z = (invsqrtpi*ss)/sqrtf(x);
- else {
- u = pzerof(x); v = qzerof(x);
- z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
- }
- return z;
+ if (x >= 0x7f000000) /* x >= 2^127: use asymptotic expansion */
+ return y0f_asympt (x);
+ /* now we are sure that x+x cannot overflow */
+ z = -__cosf(x+x);
+ if ((s*c)<zero) cc = z/ss;
+ else ss = z/cc;
+ if (ix <= 0x5c000000)
+ {
+ u = pzerof(x); v = qzerof(x);
+ ss = u*ss+v*cc;
+ }
+ z = (invsqrtpi*ss)/sqrtf(x);
+ float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+ if (magic + ss != magic) /* most likely */
+ return z;
+ else /* |cc| <= 2^-4 */
+ return y0f_near_root (x, z);
}
if(ix<=0x39800000) { /* x < 2**-13 */
return(u00 + tpi*__ieee754_logf(x));
@@ -165,7 +501,7 @@ __ieee754_y0f(float x)
}
libm_alias_finite (__ieee754_y0f, __y0f)
-/* The asymptotic expansions of pzero is
+/* The asymptotic expansion of pzero is
* 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x.
* For x >= 2, We approximate pzero by
* pzero(x) = 1 + (R/S)
@@ -257,7 +593,7 @@ pzerof(float x)
}
-/* For x >= 8, the asymptotic expansions of qzero is
+/* For x >= 8, the asymptotic expansion of qzero is
* -1/8 s + 75/1024 s^3 - ..., where s = 1/x.
* We approximate pzero by
* qzero(x) = s*(-1.25 + (R/S))
@@ -1748,10 +1748,10 @@ float128: 4
ldouble: 5
Function: "y0":
-double: 3
-float: 8
-float128: 3
-ldouble: 1
+double: 8
+float: 9
+float128: 4
+ldouble: 2
Function: "y0_downward":
double: 3
@@ -1760,15 +1760,15 @@ float128: 4
ldouble: 5
Function: "y0_towardzero":
-double: 3
+double: 5
float: 3
float128: 3
-ldouble: 6
+ldouble: 7
Function: "y0_upward":
double: 3
float: 6
-float128: 3
+float128: 7
ldouble: 5
Function: "y1":