diff mbox series

Fix the inaccuracy of j1f (BZ 14470) and y1f (BZ 14472)

Message ID mwk0s55ubb.fsf@tomate.loria.fr
State New
Headers show
Series Fix the inaccuracy of j1f (BZ 14470) and y1f (BZ 14472) | expand

Commit Message

Paul Zimmermann Jan. 22, 2021, 7:01 a.m. UTC
For both j1f and y1f, the largest error for all binary32 inputs is now less
then 9.5 ulps with respect to the infinite precision result, i.e., less than
9 ulps after rounding, which is the largest error allowed.  (This is for
rounding to nearest; for other rounding modes, the new code should not behave
worse than the old one.)

The new code is enabled only when there is a cancellation at the very end of
the j1f/y1f computation, or for very large inputs, thus should not give any
visible slowdown on average.  Two different algorithms are used:

* around the first 64 zeros of j1/y1, approximation polynomials of degree 3
  or 4 are used, computed using the Sollya tool (https://www.sollya.org/)

* for large inputs, an asymptotic formula from [1] is used

[1] Fast and Accurate Bessel Function Computation,
John Harrison, Proceedings of Arith 19, 2009.

The largest error is now obtained for the following inputs respectively:

libm wrong by up to 9.49e+00 ulp(s) for x=0x1.0fbe5ep+7
j1f     gives -0x1.8b5cd2p-15
mpfr_j1 gives -0x1.8b5ccp-15

libm wrong by up to 9.49e+00 ulp(s) for x=0x1.405122p+5
y1f     gives -0x1.a25b7cp-11
mpfr_y1 gives -0x1.a25b6ap-11
---
 sysdeps/ieee754/flt-32/e_j1f.c | 582 +++++++++++++++++++++++++++++++--
 1 file changed, 553 insertions(+), 29 deletions(-)

Comments

Florian Weimer Jan. 22, 2021, 9:50 a.m. UTC | #1
* Paul Zimmermann:

> +static float Pj[SMALL_SIZE][7] = {

This should be const (below as well for T).  Maybe GCC is able to make
the data constant via optimization, but it's best not to depend on it.

Thanks,
Florian
Paul Zimmermann Jan. 22, 2021, 10:18 a.m. UTC | #2
> > +static float Pj[SMALL_SIZE][7] = {
> 
> This should be const (below as well for T).  Maybe GCC is able to make
> the data constant via optimization, but it's best not to depend on it.

thank you very much Florian, I'll submit a v2 (and similarly for j0f/y0f,
where "const" was missing too).

Paul
diff mbox series

Patch

diff --git a/sysdeps/ieee754/flt-32/e_j1f.c b/sysdeps/ieee754/flt-32/e_j1f.c
index ac5bb76828..6c787ca194 100644
--- a/sysdeps/ieee754/flt-32/e_j1f.c
+++ b/sysdeps/ieee754/flt-32/e_j1f.c
@@ -40,7 +40,350 @@  s03  =  1.1771846857e-06, /* 0x359dffc2 */
 s04  =  5.0463624390e-09, /* 0x31ad6446 */
 s05  =  1.2354227016e-11; /* 0x2d59567e */
 
-static const float zero    = 0.0;
+static const float zero = 0.0;
+
+#define FIRST_ZERO_J1 3.831705f /* First positive zero of j1. */
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of j1 and degree-3 polynomial
+   approximations of j1 around these zeros: Pj[0] for the first positive zero
+   (3.831705), Pj[1] for the second one (7.015586), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough to get an error less than 4 ulps,
+   except around the first zero.
+*/
+static float Pj[SMALL_SIZE][7] = {
+  /* For index 0, we use a degree-4 polynomial generated by Sollya, with the
+     coefficient of degree 4 hard-coded in j1f_near_root(). */
+  { 0x3.c14204p+0, 0x3.d4eabp+0, 0x3.dee2b4p+0, -0x8.4f069p-28, -0x6.71b3d8p-4, 0xd.744a3p-8, 0xd.acd9bp-8 },
+  { 0x6.f6d308p+0, 0x7.03fd8p+0, 0x7.09038p+0, 0xe.c2858p-28, 0x4.cd464p-4, -0x5.79cec8p-8, -0xc.12b1ep-8 },
+  { 0xa.24741p+0, 0xa.2c687p+0, 0xa.30667p+0, -0x1.e7acecp-24, -0x3.feca8cp-4, 0x3.2442b4p-8, 0xa.5c1e2p-8 },
+  { 0xd.4e4cp+0, 0xd.52dd8p+0, 0xd.55249p+0, 0x1.698158p-24, 0x3.7e666cp-4, -0x2.1906ap-8, -0x9.2a415p-8 },
+  { 0x1.073be4p+4, 0x1.0787b4p+4, 0x1.07aep+4, -0x1.f5f658p-24, -0x3.24b8ep-4, 0x1.86dd04p-8, 0x8.4b4f4p-8 },
+  { 0x1.39ae36p+4, 0x1.39da8ep+4, 0x1.39f064p+4, -0x1.4e744p-24, 0x2.e18a24p-4, -0x1.2cca26p-8, -0x7.9ff6cp-8 },
+  { 0x1.6bfdeep+4, 0x1.6c294ep+4, 0x1.6c40f2p+4, 0xa.3fb7fp-28, -0x2.acc9c4p-4, 0xf.0b205p-12, 0x7.17e4e8p-8 },
+  { 0x1.9e4ab2p+4, 0x1.9e757p+4, 0x1.9e8222p+4, -0x2.29f6f4p-24, 0x2.81f21p-4, -0xc.640cap-12, -0x6.a8aa2p-8 },
+  { 0x1.d0a4ep+4, 0x1.d0bfdp+4, 0x1.d0cd3cp+4, -0x1.b5d196p-24, -0x2.5e40e4p-4, 0xa.6f9dp-12, 0x6.4b1ad8p-8 },
+  { 0x2.02ef28p+4, 0x2.0308ecp+4, 0x2.0317p+4, -0x4.0e001p-24, 0x2.3febep-4, -0x8.f1faep-12, -0x5.fb7998p-8 },
+  { 0x2.35425cp+4, 0x2.35512p+4, 0x2.355f1p+4, 0x3.b26f2p-24, -0x2.25babp-4, 0x7.c76918p-12, 0x5.b66ddp-8 },
+  { 0x2.677bbcp+4, 0x2.6798a4p+4, 0x2.67a5c8p+4, -0xf.c8cdap-28, 0x2.0ed05p-4, -0x6.d899ep-12, -0x5.7a1fc8p-8 },
+  { 0x2.99cf8p+4, 0x2.99dfap+4, 0x2.99efa8p+4, -0x3.9940e4p-24, -0x1.fa8b4p-4, 0x6.1610ap-12, 0x5.447178p-8 },
+  { 0x2.cc07dp+4, 0x2.cc262cp+4, 0x2.cc2e54p+4, 0x9.da15dp-28, 0x1.e8727ep-4, -0x5.74db18p-12, -0x5.14b9dp-8 },
+  { 0x2.fe5d78p+4, 0x2.fe6c64p+4, 0x2.fe74ep+4, -0x3.39b218p-24, -0x1.d8293ap-4, 0x4.edc938p-12, 0x4.e97d68p-8 },
+  { 0x3.30a318p+4, 0x3.30b25p+4, 0x3.30bb28p+4, -0x3.7b5108p-24, 0x1.c96702p-4, -0x4.7ae6c8p-12, -0x4.c25f08p-8 },
+  { 0x3.62e5dp+4, 0x3.62f808p+4, 0x3.62ff0cp+4, -0x1.91e43ep-24, -0x1.bbf246p-4, 0x4.18c358p-12, 0x4.9eb48p-8 },
+  { 0x3.952ab4p+4, 0x3.953d8cp+4, 0x3.954334p+4, 0x1.28453cp-24, 0x1.af9cb4p-4, -0x3.c3bbe4p-12, -0x4.7dfa3p-8 },
+  { 0x3.c77928p+4, 0x3.c782e8p+4, 0x3.c787f4p+4, -0x2.e7fef4p-24, -0x1.a4407ep-4, 0x3.79aaecp-12, 0x4.5fc5e8p-8 },
+  { 0x3.f9be2cp+4, 0x3.f9c81cp+4, 0x3.f9cd08p+4, -0x3.23b2fcp-24, 0x1.99be76p-4, -0x3.386584p-12, -0x4.43dc7p-8 },
+  { 0x4.2c034p+4, 0x4.2c0d38p+4, 0x4.2c14c8p+4, -0xd.19e93p-28, -0x1.8ffc9cp-4, 0x2.ff00f8p-12, 0x4.29ec1p-8 },
+  { 0x4.5e4d1p+4, 0x4.5e5238p+4, 0x4.5e5688p+4, 0x1.c2ac48p-24, 0x1.86e51cp-4, -0x2.cbe84p-12, -0x4.11bfd8p-8 },
+  { 0x4.9090e8p+4, 0x4.90972p+4, 0x4.909b88p+4, -0xd.31027p-28, -0x1.7e656ep-4, 0x2.9e309cp-12, 0x3.fb27ep-8 },
+  { 0x4.c2d6ap+4, 0x4.c2dbf8p+4, 0x4.c2e138p+4, 0x5.b5e53p-24, 0x1.766dc2p-4, -0x2.7550d4p-12, -0x3.e5f5ecp-8 },
+  { 0x4.f51b3p+4, 0x4.f520b8p+4, 0x4.f52578p+4, -0x1.340a8ap-24, -0x1.6ef07ep-4, 0x2.502b88p-12, 0x3.d20a7p-8 },
+  { 0x5.275a2p+4, 0x5.276568p+4, 0x5.276af8p+4, -0x3.ba66p-24, 0x1.67e1dcp-4, -0x2.2e807p-12, -0x3.bf474p-8 },
+  { 0x5.59a458p+4, 0x5.59aa1p+4, 0x5.59afp+4, 0x6.47ca5p-28, -0x1.61379ap-4, 0x2.102354p-12, 0x3.ad8764p-8 },
+  { 0x5.8be4p+4, 0x5.8beea8p+4, 0x5.8bf27p+4, -0x2.12affp-24, 0x1.5ae8c4p-4, -0x1.f44a4ep-12, -0x3.9cc18p-8 },
+  { 0x5.be2868p+4, 0x5.be3338p+4, 0x5.be38dp+4, -0x7.4853ap-28, -0x1.54ed76p-4, 0x1.daedc8p-12, 0x3.8cd45p-8 },
+  { 0x5.f0721p+4, 0x5.f077b8p+4, 0x5.f07acp+4, -0x4.f0a998p-24, 0x1.4f3ebcp-4, -0x1.c367c2p-12, -0x3.7db22p-8 },
+  { 0x6.22b718p+4, 0x6.22bc38p+4, 0x6.22bf1p+4, -0x1.80c246p-24, -0x1.49d668p-4, 0x1.ae1adcp-12, 0x3.6f4c3cp-8 },
+  { 0x6.54f5cp+4, 0x6.5500a8p+4, 0x6.550298p+4, -0x2.22aff8p-24, 0x1.44aefap-4, -0x1.9a24dp-12, -0x3.61967p-8 },
+  { 0x6.874078p+4, 0x6.874518p+4, 0x6.874808p+4, -0x3.aad6d4p-24, -0x1.3fc386p-4, 0x1.87f54p-12, 0x3.5478ep-8 },
+  { 0x6.b983bp+4, 0x6.b98978p+4, 0x6.b98c88p+4, -0x2.010be4p-24, 0x1.3b0fa4p-4, -0x1.76beb4p-12, -0x3.47f348p-8 },
+  { 0x6.ebc8dp+4, 0x6.ebcdd8p+4, 0x6.ebd108p+4, -0xd.4fd17p-32, -0x1.368f5cp-4, 0x1.66f912p-12, 0x3.3bf5fp-8 },
+  { 0x7.1e0b98p+4, 0x7.1e123p+4, 0x7.1e1458p+4, -0xa.d662dp-28, 0x1.323f18p-4, -0x1.5832d6p-12, -0x3.3079d4p-8 },
+  { 0x7.505138p+4, 0x7.50568p+4, 0x7.505848p+4, 0x4.9f217p-24, -0x1.2e1b9ap-4, 0x1.4a4e9cp-12, 0x3.25733cp-8 },
+  { 0x7.82983p+4, 0x7.829adp+4, 0x7.829e2p+4, -0x2.d167p-24, 0x1.2a21eep-4, -0x1.3d7d36p-12, -0x3.1ada9p-8 },
+  { 0x7.b4dbb8p+4, 0x7.b4df2p+4, 0x7.b4e0fp+4, -0x4.15a83p-24, -0x1.264f66p-4, 0x1.31a534p-12, 0x3.10acb4p-8 },
+  { 0x7.e71dcp+4, 0x7.e7236p+4, 0x7.e726bp+4, -0x2.a5bbbp-24, 0x1.22a192p-4, -0x1.261ebp-12, -0x3.06dfd4p-8 },
+  { 0x8.19643p+4, 0x8.1967ap+4, 0x8.196b3p+4, 0x4.e828bp-24, -0x1.1f1634p-4, 0x1.1b6a7p-12, 0x2.fd6d78p-8 },
+  { 0x8.4ba86p+4, 0x8.4babep+4, 0x8.4bad9p+4, -0x3.28a3bcp-24, 0x1.1bab3ep-4, -0x1.11766p-12, -0x2.f452f8p-8 },
+  { 0x8.7dee6p+4, 0x8.7df02p+4, 0x8.7df1fp+4, -0x2.2f667p-24, -0x1.185eccp-4, 0x1.08324ep-12, 0x2.eb8854p-8 },
+  { 0x8.b031cp+4, 0x8.b0345p+4, 0x8.b0362p+4, -0x6.9097dp-24, 0x1.152f28p-4, -0xf.f0528p-16, -0x2.e30b48p-8 },
+  { 0x8.e274dp+4, 0x8.e2789p+4, 0x8.e27a5p+4, -0x5.1b408p-24, -0x1.121abp-4, 0xf.6f895p-16, 0x2.dad6acp-8 },
+  { 0x9.14b55p+4, 0x9.14bccp+4, 0x9.14be5p+4, 0x2.70d0dp-24, 0x1.0f1ffp-4, -0xe.eed25p-16, -0x2.d2e74cp-8 },
+  { 0x9.46fd2p+4, 0x9.4700fp+4, 0x9.4702ap+4, -0x2.7c176p-24, -0x1.0c3d8ap-4, 0xe.7629dp-16, 0x2.cb3674p-8 },
+  { 0x9.79415p+4, 0x9.79452p+4, 0x9.7946fp+4, 0x4.fd6368p-24, 0x1.097236p-4, -0xe.04f4fp-16, -0x2.c3c42p-8 },
+  { 0x9.ab858p+4, 0x9.ab894p+4, 0x9.ab8a8p+4, 0x6.b05f68p-24, -0x1.06bccap-4, 0xd.9270ap-16, 0x2.bc8c5p-8 },
+  { 0x9.ddc99p+4, 0x9.ddcd7p+4, 0x9.ddcf5p+4, 0x4.0d622p-28, 0x1.041c28p-4, -0xd.2e9ccp-16, -0x2.b58b94p-8 },
+  { 0xa.100dbp+4, 0xa.10119p+4, 0xa.10138p+4, 0x7.0d98cp-24, -0x1.018f52p-4, 0xc.c8acfp-16, 0x2.aebfbp-8 },
+  { 0xa.4253cp+4, 0xa.4255cp+4, 0xa.4256cp+4, 0x3.93d10cp-24, 0xf.f154fp-8, -0xc.7062ep-16, -0x2.a825bp-8 },
+  { 0xa.74964p+4, 0xa.7499ep+4, 0xa.749b4p+4, 0xd.88185p-32, -0xf.cad3fp-8, 0xc.15576p-16, 0x2.a1bc0cp-8 },
+  { 0xa.a6dc2p+4, 0xa.a6dep+4, 0xa.a6dfap+4, -0x1.fe6b92p-24, 0xf.a564cp-8, -0xb.bf3bep-16, -0x2.9b7f34p-8 },
+  { 0xa.d91e2p+4, 0xa.d9222p+4, 0xa.d9243p+4, 0x2.6137f4p-24, -0xf.80faep-8, 0xb.6dbd2p-16, 0x2.956ebcp-8 },
+  { 0xb.0b644p+4, 0xb.0b664p+4, 0xb.0b685p+4, -0x1.55468p-24, 0xf.5d8acp-8, -0xb.208ebp-16, -0x2.8f86fcp-8 },
+  { 0xb.3da7cp+4, 0xb.3daa6p+4, 0xb.3dac4p+4, -0x1.08c6bep-24, -0xf.3b096p-8, 0xa.d76a2p-16, 0x2.89c7ap-8 },
+  { 0xb.6fec7p+4, 0xb.6fee8p+4, 0xb.6fefdp+4, 0x4.9ebfa8p-24, 0xf.196c7p-8, -0xa.920eap-16, -0x2.842e2p-8 },
+  { 0xb.a230dp+4, 0xb.a2329p+4, 0xb.a2349p+4, 0x5.a4017p-24, -0xe.f8aa5p-8, 0xa.48c44p-16, 0x2.7eb8d8p-8 },
+  { 0xb.d474fp+4, 0xb.d476bp+4, 0xb.d4776p+4, 0x3.bcb2a8p-28, 0xe.d8b9dp-8, -0xa.0a5cp-16, -0x2.7966fp-8 },
+  { 0xc.06b8ap+4, 0xc.06badp+4, 0xc.06bc5p+4, -0x7.1091fp-24, -0xe.b9925p-8, 0x9.cf163p-16, 0x2.743628p-8 },
+  { 0xc.38facp+4, 0xc.38feep+4, 0xc.3900dp+4, 0x2.ca1cf4p-28, 0xe.9b2bep-8, -0x9.8f762p-16, -0x2.6f25e4p-8 },
+  { 0xc.6b41bp+4, 0xc.6b42fp+4, 0xc.6b441p+4, 0x5.aa8908p-24, -0xe.7d7ecp-8, 0x9.52baep-16, 0x2.6a33ccp-8 },
+  { 0xc.9d84ep+4, 0xc.9d871p+4, 0xc.9d887p+4, 0x3.d9cd9cp-24, 0xe.6083ap-8, -0x9.1feafp-16, -0x2.655fd8p-8 },
+};
+
+/* Argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi). */
+static double T[128] = {
+  0x1p+0,
+  0x2p+0,
+  -0x2.487ed5110b462p+0,
+  0x1.b7812aeef4b9fp+0,
+  -0x2.d97c7f3321d24p+0,
+  0x9.585d6aac7a1a8p-4,
+  0x1.2b0bad558f435p+0,
+  0x2.56175aab1e86ap+0,
+  -0x1.9c501fbace38dp+0,
+  0x3.0fde959b6ed46p+0,
+  -0x2.8c1a9da2d9d3cp-4,
+  -0x5.18353b45b3a78p-4,
+  -0xa.306a768b674fp-4,
+  -0x1.460d4ed16ce9ep+0,
+  -0x2.8c1a9da2d9d3cp+0,
+  0x1.304999cb579e8p+0,
+  0x2.60933396af3dp+0,
+  -0x1.87586de3accc3p+0,
+  -0x3.0eb0dbc759986p+0,
+  0x2.b1d1d8258155p-4,
+  0x5.63a3b04b02aap-4,
+  0xa.c74760960554p-4,
+  0x1.58e8ec12c0aa8p+0,
+  0x2.b1d1d8258155p+0,
+  -0xe.4db24c6089bf8p-4,
+  -0x1.c9b6498c1137fp+0,
+  0x2.b51241f8e8d64p+0,
+  -0xd.e5a511f3999bp-4,
+  -0x1.bcb4a23e73336p+0,
+  0x2.cf15909424df4p+0,
+  -0xa.a53b3e8c1877p-4,
+  -0x1.54a767d1830eep+0,
+  -0x2.a94ecfa3061dcp+0,
+  0xf.5e135caff0a78p-4,
+  0x1.ebc26b95fe14fp+0,
+  -0x2.70f9fde50f1c4p+0,
+  0x1.668ad946ed0dap+0,
+  0x2.cd15b28dda1b4p+0,
+  -0xa.e536ff5570fbp-4,
+  -0x1.5ca6dfeaae1f6p+0,
+  -0x2.b94dbfd55c3ecp+0,
+  0xd.5e3556652c89p-4,
+  0x1.abc6aacca5912p+0,
+  -0x2.f0f17f77c023ep+0,
+  0x6.69bd6218afe64p-4,
+  0xc.d37ac4315fcc8p-4,
+  0x1.9a6f58862bf99p+0,
+  -0x3.13a02404b352ep+0,
+  0x2.13e8d07a4a046p-4,
+  0x4.27d1a0f49408cp-4,
+  0x8.4fa341e928118p-4,
+  0x1.09f4683d25023p+0,
+  0x2.13e8d07a4a046p+0,
+  -0x2.20ad341c773d4p+0,
+  0x2.07246cd81ccb8p+0,
+  -0x2.3a35fb60d1af2p+0,
+  0x1.d412de4f67e7ep+0,
+  -0x2.a05918723b764p+0,
+  0x1.07cca42c94599p+0,
+  0x2.0f99485928b32p+0,
+  -0x2.294c445eb9dfep+0,
+  0x1.f5e64c5397865p+0,
+  -0x2.5cb23c69dc396p+0,
+  0x1.8f1a5c3d52d34p+0,
+  0x3.1e34b87aa5a68p+0,
+  -0xc.15641bbff90bp-8,
+  -0x1.82ac8377ff216p-4,
+  -0x3.055906effe42cp-4,
+  -0x6.0ab20ddffc858p-4,
+  -0xc.15641bbff90bp-4,
+  -0x1.82ac8377ff216p+0,
+  -0x3.055906effe42cp+0,
+  0x3.dccc7310ec09ap-4,
+  0x7.b998e621d8134p-4,
+  0xf.7331cc43b0268p-4,
+  0x1.ee6639887604dp+0,
+  -0x2.6bb262001f3c6p+0,
+  0x1.711a1110cccd4p+0,
+  0x2.e2342221999a8p+0,
+  -0x8.41690cdd81128p-4,
+  -0x1.082d219bb0225p+0,
+  -0x2.105a43376044ap+0,
+  0x2.27ca4ea24abcep+0,
+  -0x1.f8ea37cc75cc6p+0,
+  0x2.56aa65781fad6p+0,
+  -0x1.9b2a0a20cbeb6p+0,
+  0x3.122ac0cf736f6p+0,
+  -0x2.4295372246764p-4,
+  -0x4.852a6e448cec8p-4,
+  -0x9.0a54dc8919d9p-4,
+  -0x1.214a9b91233b2p+0,
+  -0x2.4295372246764p+0,
+  0x1.c35466cc7e59ap+0,
+  -0x2.c1d607780e92ep+0,
+  0xc.4d2c620ee206p-4,
+  0x1.89a58c41dc40cp+0,
+  0x3.134b1883b8818p+0,
+  -0x2.1e8a4099a4324p-4,
+  -0x4.3d14813348648p-4,
+  -0x8.7a29026690c9p-4,
+  -0x1.0f45204cd2192p+0,
+  -0x2.1e8a4099a4324p+0,
+  0x2.0b6a53ddc2e18p+0,
+  -0x2.31aa2d558583p+0,
+  0x1.e52a7a6600402p+0,
+  -0x2.7e29e0450ac5cp+0,
+  0x1.4c2b1486f5ba8p+0,
+  0x2.9856290deb75p+0,
+  -0x1.17d282f5345bfp+0,
+  -0x2.2fa505ea68b7ep+0,
+  0x1.e934c93c39d64p+0,
+  -0x2.761542989799ap+0,
+  0x1.5c544fdfdc12fp+0,
+  0x2.b8a89fbfb825ep+0,
+  -0xd.72d95919afa58p-4,
+  -0x1.ae5b2b2335f4bp+0,
+  0x2.ebc87eca9f5cap+0,
+  -0x7.0edd77bcc8ccp-4,
+  -0xe.1dbaef799198p-4,
+  -0x1.c3b75def3233p+0,
+  0x2.c1101932a6e02p+0,
+  -0xc.65ea2abbd85fp-4,
+  -0x1.8cbd45577b0bep+0,
+  -0x3.197a8aaef617cp+0,
+  0x1.589bfb31f1687p-4,
+  0x2.b137f663e2d0ep-4,
+  0x5.626fecc7c5a1cp-4,
+  0xa.c4dfd98f8b438p-4
+};
+
+/* Return h such that x - pi/4 mod (2*pi) ~ h. */
+static double
+reduce_mod_twopi (double x)
+{
+  double sign = 1, h;
+  if (x < 0)
+    {
+      x = -x;
+      sign = -1;
+    }
+  h = 0; /* Invariant is x+h mod (2*pi). */
+  while (x >= 4)
+    {
+      int i = ilogb (x);
+      x -= ldexp (1.0f, i);
+      h += T[i];
+    }
+  /* Add the remainder x. */
+  h += x;
+  /* Subtract pi/4. */
+  double piover4 = 0xc.90fdaa22168cp-4;
+  h -= piover4;
+  /* Reduce mod 2*pi. */
+  double twopi = 0x6.487ed5110b46p+0;
+  while (fabs (h) > twopi * 0.5)
+    {
+      if (h > 0)
+        h -= twopi;
+      else
+        h += twopi;
+    }
+  return sign * h;
+}
+
+/* Reduce h mod (pi/2), and add k to n if k*pi/2 was subtracted from h. */
+static double
+reduce_mod_piover2 (double h, int *n)
+{
+  double piover2 = 0x1.921fb54442d18p+0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          (*n) ++;
+        }
+      else
+        {
+          h += piover2;
+          (*n) --;
+        }
+    }
+  return h;
+}
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   j1f(x) ~ sqrt(2/(pi*x))*beta1(x)*cos(x-3pi/4-alpha1(x))
+   where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
+   and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
+static float
+j1f_asympt (float x)
+{
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  if (x < 0)
+    {
+      x = -x;
+      cst = -cst;
+    }
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
+  double alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  int n = -1; /* Accounts for -pi/2 (pi/4 was subtracted in reduce_mod_twopi). */
+  /* Now reduce mod pi/2. */
+  h = reduce_mod_piover2 (h, &n);
+  /* Subtract alpha1. */
+  h -= alpha1;
+  /* Reduce again mod pi/2 if needed. */
+  h = reduce_mod_piover2 (h, &n);
+  /* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float t = cst / sqrtf (x) * (float) beta1;
+  if (n == 0)
+    return t * cosf (xr);
+  else if (n == 2) /* cos(x+pi) = -cos(x) */
+    return -t * cosf (xr);
+  else if (n == 1) /* cos(x+pi/2) = -sin(x) */
+    return -t * sinf (xr);
+  else             /* cos(x+3pi/2) = sin(x) */
+    return t * sinf (xr);
+}
+
+/* Special code for x near a root of j1.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating j1 around its root.
+   For large x, we use an asymptotic formula (j1f_asympt). */
+static float
+j1f_near_root (float x, float z)
+{
+  float index_f, sign = 1.0f;
+  int index;
+
+  if (x < 0)
+    {
+      x = -x;
+      sign = -1.0f;
+    }
+  index_f = roundf ((x - FIRST_ZERO_J1) / (float) M_PI);
+  /* SMALL_SIZE should be at least 21 */
+  if (index_f >= SMALL_SIZE)
+    return sign * j1f_asympt (x);
+  index = (int) index_f;
+  float *p = Pj[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  float p6 = (index > 0) ? p[6] : p[6] + y * -0x1.3e52fp-8;
+  return sign * (p[3] + y * (p[4] + y * (p[5] + y * p6)));
+}
 
 float
 __ieee754_j1f(float x)
@@ -56,22 +399,29 @@  __ieee754_j1f(float x)
 		__sincosf (y, &s, &c);
 		ss = -s-c;
 		cc = s-c;
-		if(ix<0x7f000000) {  /* make sure y+y not overflow */
-		    z = __cosf(y+y);
-		    if ((s*c)>zero) cc = z/ss;
-		    else	    ss = z/cc;
-		}
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return j1f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = __cosf(y+y);
+                if ((s*c)>zero) cc = z/ss;
+                else	        ss = z/cc;
 	/*
 	 * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
 	 * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x5c000000) z = (invsqrtpi*cc)/sqrtf(y);
-		else {
+		if (ix <= 0x5c000000)
+                  {
 		    u = ponef(y); v = qonef(y);
-		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(y);
-		}
-		if(hx<0) return -z;
-		else	 return  z;
+		    cc = u*cc-v*ss;
+                  }
+                z = (invsqrtpi * cc) / sqrtf(y);
+                /* Adjust sign of z. */
+                z = (hx < 0) ? -z : z;
+                float magic = 0x1.8p+21; /* 2^21 + 2^20 */
+                if (magic + cc != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-3 */
+                  return j1f_near_root (x, z);
 	}
 	if(__builtin_expect(ix<0x32000000, 0)) {	/* |x|<2**-27 */
 	    if(huge+x>one) {		/* inexact if x!=0 necessary */
@@ -105,6 +455,160 @@  static const float V0[5] = {
   1.6655924903e-11, /* 0x2d9281cf */
 };
 
+#define FIRST_ZERO_Y1 2.197141f /* First zero of y1. */
+
+/* The following table contains successive zeros of y1 and degree-3 polynomial
+   approximations of y1 around these zeros: Py[0] for the first positive zero
+   (2.197141), Py[1] for the second one (5.429681), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough (except around the first root).
+*/
+static float Py[SMALL_SIZE][7] = {
+  /* For index 0, we use a degree-4 polynomial generated by Sollya, with the
+     coefficient of degree 4 hard-coded in y1f_near_root(). */
+  { 0x2.148768p+0, 0x2.3277dcp+0, 0x2.420bb8p+0, 0xb.96749p-28, 0x8.55242p-4, -0x1.e56a7ap-4, -0x8.6888p-8 },
+  { 0x5.625a8p+0, 0x5.6dff9p+0, 0x5.73e2b8p+0, 0x1.3c7822p-24, -0x5.71f17p-4, 0x8.05b18p-8, 0xd.16c1dp-8 },
+  { 0x8.914dp+0, 0x8.9893dp+0, 0x8.9c38cp+0, -0x1.f3ad8ep-24, 0x4.57e658p-4, -0x4.0ac6f8p-8, -0xb.21063p-8 },
+  { 0xb.b73efp+0, 0xb.bfc8ap+0, 0xb.c3535p+0, -0xd.5608fp-28, -0x3.b829d4p-4, 0x2.88544cp-8, 0x9.b7be2p-8 },
+  { 0xe.e13dfp+0, 0xe.e5becp+0, 0xe.e819p+0, -0xe.a7c04p-28, 0x3.4e0458p-4, -0x1.c64ed8p-8, -0x8.b2bd2p-8 },
+  { 0x1.20874p+4, 0x1.20b1c6p+4, 0x1.20c71p+4, 0x1.c2626p-24, -0x3.00f03cp-4, 0x1.54ec7cp-8, 0x7.f02b88p-8 },
+  { 0x1.52d934p+4, 0x1.530254p+4, 0x1.531962p+4, -0x1.9503ecp-24, 0x2.c5b29cp-4, -0x1.0bf4eep-8, -0x7.584198p-8 },
+  { 0x1.851f6ep+4, 0x1.854fa4p+4, 0x1.85675ap+4, -0x2.8d40fcp-24, -0x2.96547p-4, 0xd.9c4d1p-12, 0x6.ddacdp-8 },
+  { 0x1.b7808ep+4, 0x1.b79acep+4, 0x1.b7b034p+4, -0x2.36df5cp-24, 0x2.6f55ap-4, -0xb.57dcfp-12, -0x6.77afap-8 },
+  { 0x1.e9c916p+4, 0x1.e9e48p+4, 0x1.e9f24p+4, 0xd.e2eb7p-28, -0x2.4e8104p-4, 0x9.a4938p-12, 0x6.21cdbp-8 },
+  { 0x2.1c1454p+4, 0x2.1c2d2p+4, 0x2.1c3b24p+4, -0x2.3070f4p-24, 0x2.325e4cp-4, -0x8.5410cp-12, -0x5.d7d08p-8 },
+  { 0x2.4e663p+4, 0x2.4e74f8p+4, 0x2.4e83f8p+4, -0x3.525508p-24, -0x2.19e7dcp-4, 0x7.49d36p-12, 0x5.974078p-8 },
+  { 0x2.80ac64p+4, 0x2.80bc3p+4, 0x2.80caep+4, -0xe.6e158p-28, 0x2.046174p-4, -0x6.727d9p-12, -0x5.5e727p-8 },
+  { 0x2.b2e3b8p+4, 0x2.b302fp+4, 0x2.b30b24p+4, 0x1.e72698p-24, -0x1.f13fb2p-4, 0x5.c1ad88p-12, 0x5.2c075p-8 },
+  { 0x2.e5389cp+4, 0x2.e5495p+4, 0x2.e551b4p+4, -0x1.5bed9cp-24, 0x1.e018dcp-4, -0x5.2e5a3p-12, -0x4.fe8628p-8 },
+  { 0x3.177e9cp+4, 0x3.178f64p+4, 0x3.17982p+4, -0x3.6b654cp-24, -0x1.d09b2p-4, 0x4.b22ddp-12, 0x4.d57a68p-8 },
+  { 0x3.49cc2cp+4, 0x3.49d534p+4, 0x3.49dc9p+4, 0x1.6f11bp-24, 0x1.c28612p-4, -0x4.481288p-12, -0x4.b01a8p-8 },
+  { 0x3.7c117cp+4, 0x3.7c1adp+4, 0x3.7c211p+4, -0x2.0bc074p-24, -0x1.b5a622p-4, 0x3.ecc594p-12, 0x4.8df3fp-8 },
+  { 0x3.ae4e54p+4, 0x3.ae603cp+4, 0x3.ae64fp+4, -0x2.87dd4p-24, 0x1.a9d184p-4, -0x3.9d52dcp-12, -0x4.6e98p-8 },
+  { 0x3.e09334p+4, 0x3.e0a588p+4, 0x3.e0ad78p+4, -0x2.fb964p-24, -0x1.9ee5eep-4, 0x3.58195cp-12, 0x4.51938p-8 },
+  { 0x4.12e15p+4, 0x4.12eabp+4, 0x4.12f0fp+4, 0x2.cf5adp-24, 0x1.94c6f4p-4, -0x3.1af534p-12, -0x4.36a7e8p-8 },
+  { 0x4.451e78p+4, 0x4.452fb8p+4, 0x4.4534d8p+4, 0x3.6766fp-24, -0x1.8b5cccp-4, 0x2.e49344p-12, 0x4.1daa98p-8 },
+  { 0x4.776a3p+4, 0x4.7774bp+4, 0x4.7779e8p+4, 0x3.501424p-24, 0x1.829356p-4, -0x2.b47be8p-12, -0x4.0647d8p-8 },
+  { 0x4.a9afp+4, 0x4.a9b99p+4, 0x4.a9bea8p+4, -0x5.c05808p-24, -0x1.7a597ep-4, 0x2.894a64p-12, 0x3.f067e8p-8 },
+  { 0x4.dbf358p+4, 0x4.dbfe58p+4, 0x4.dc03d8p+4, 0x7.1e1478p-28, 0x1.72a09ap-4, -0x2.622e7cp-12, -0x3.dbdd9cp-8 },
+  { 0x5.0e3d88p+4, 0x5.0e431p+4, 0x5.0e4788p+4, 0x3.e36e6cp-24, -0x1.6b5c06p-4, 0x2.3ed8dcp-12, 0x3.c8847p-8 },
+  { 0x5.4082bp+4, 0x5.4087cp+4, 0x5.408d4p+4, 0x1.3f9e5ap-24, 0x1.6480c4p-4, -0x2.1f1138p-12, -0x3.b644f8p-8 },
+  { 0x5.72c1fp+4, 0x5.72cc6p+4, 0x5.72d19p+4, -0x2.39e41cp-24, -0x1.5e0544p-4, 0x2.020254p-12, 0x3.a5087p-8 },
+  { 0x5.a50598p+4, 0x5.a510fp+4, 0x5.a5165p+4, -0x2.912f84p-24, 0x1.57e12p-4, -0x1.e7472cp-12, -0x3.94b05p-8 },
+  { 0x5.d749fp+4, 0x5.d75578p+4, 0x5.d758dp+4, 0x3.d5b00cp-24, -0x1.520ceep-4, 0x1.cedf4cp-12, 0x3.852d54p-8 },
+  { 0x6.09959p+4, 0x6.0999f8p+4, 0x6.099dp+4, -0x3.1726ecp-24, 0x1.4c8222p-4, -0x1.b87e64p-12, -0x3.766828p-8 },
+  { 0x6.3bd32p+4, 0x6.3bde7p+4, 0x6.3be22p+4, 0x1.949e22p-24, -0x1.473ae6p-4, 0x1.a3e3b6p-12, 0x3.685db8p-8 },
+  { 0x6.6e1cap+4, 0x6.6e22ep+4, 0x6.6e25bp+4, -0x5.5553bp-28, 0x1.42320ap-4, -0x1.90d756p-12, -0x3.5af384p-8 },
+  { 0x6.a060fp+4, 0x6.a06748p+4, 0x6.a06a68p+4, 0x3.2df7ecp-28, -0x1.3d62e4p-4, 0x1.7f295cp-12, 0x3.4e24ccp-8 },
+  { 0x6.d2a87p+4, 0x6.d2aba8p+4, 0x6.d2aeep+4, -0x1.e13fcep-24, 0x1.38c948p-4, -0x1.6eb032p-12, -0x3.41e3p-8 },
+  { 0x7.04ea5p+4, 0x7.04f008p+4, 0x7.04f2cp+4, -0x3.ad9974p-24, -0x1.34616ep-4, 0x1.5f94dap-12, 0x3.36286cp-8 },
+  { 0x7.372dd8p+4, 0x7.373458p+4, 0x7.37379p+4, -0x3.6977e8p-24, 0x1.3027fp-4, -0x1.511ca2p-12, -0x3.2ae7d8p-8 },
+  { 0x7.6973c8p+4, 0x7.6978a8p+4, 0x7.697bcp+4, 0x4.654cbp-24, -0x1.2c19b6p-4, 0x1.43c536p-12, 0x3.201998p-8 },
+  { 0x7.9bb73p+4, 0x7.9bbcf8p+4, 0x7.9bc028p+4, 0x8.825c8p-32, 0x1.2833eep-4, -0x1.377382p-12, -0x3.15b7e4p-8 },
+  { 0x7.cdfe18p+4, 0x7.ce014p+4, 0x7.ce04a8p+4, -0x2.0d11d8p-28, -0x1.24740ap-4, 0x1.2bc672p-12, 0x3.0bb998p-8 },
+  { 0x8.00421p+4, 0x8.00458p+4, 0x8.00484p+4, -0x4.4e495p-24, 0x1.20d7b6p-4, -0x1.20ab76p-12, -0x3.021b64p-8 },
+  { 0x8.3282ep+4, 0x8.3289cp+4, 0x8.328cp+4, 0x4.81c1c8p-24, -0x1.1d5ccap-4, 0x1.165974p-12, 0x2.f8d71cp-8 },
+  { 0x8.64c77p+4, 0x8.64cep+4, 0x8.64d14p+4, -0xe.99386p-28, 0x1.1a015p-4, -0x1.0cbf48p-12, -0x2.efe49cp-8 },
+  { 0x8.97109p+4, 0x8.97124p+4, 0x8.97148p+4, -0x6.16f1c8p-24, -0x1.16c37ap-4, 0x1.03cdaep-12, 0x2.e7402cp-8 },
+  { 0x8.c9537p+4, 0x8.c9567p+4, 0x8.c9581p+4, -0x1.129336p-24, 0x1.13a19ep-4, -0xf.aed15p-16, -0x2.dee83p-8 },
+  { 0x8.fb938p+4, 0x8.fb9aap+4, 0x8.fb9c5p+4, 0x5.19c09p-24, -0x1.109a32p-4, 0xf.29df7p-16, 0x2.d6d728p-8 },
+  { 0x9.2ddcp+4, 0x9.2ddedp+4, 0x9.2de09p+4, -0x6.497dp-24, 0x1.0dabc8p-4, -0xe.ad4f9p-16, -0x2.cf0624p-8 },
+  { 0x9.602p+4, 0x9.6023p+4, 0x9.6025p+4, 0x4.e4f338p-24, -0x1.0ad512p-4, 0xe.387eep-16, 0x2.c77588p-8 },
+  { 0x9.92658p+4, 0x9.92673p+4, 0x9.9269p+4, -0x1.287c58p-24, 0x1.0814d4p-4, -0xd.cad8fp-16, -0x2.c02074p-8 },
+  { 0x9.c4a7bp+4, 0x9.c4ab6p+4, 0x9.c4acdp+4, -0x4.b594ep-24, -0x1.0569fp-4, 0xd.63d72p-16, 0x2.b9053p-8 },
+  { 0x9.f6eccp+4, 0x9.f6ef8p+4, 0x9.f6f15p+4, -0x3.a8f164p-24, 0x1.02d354p-4, -0xc.fae8ap-16, -0x2.b21efp-8 },
+  { 0xa.2931cp+4, 0xa.2933bp+4, 0xa.2935ap+4, -0x6.12505p-24, -0x1.005004p-4, 0xc.9fdebp-16, 0x2.ab6c2cp-8 },
+  { 0xa.5b74p+4, 0xa.5b77dp+4, 0xa.5b79ap+4, 0x1.8acf4ep-24, 0xf.ddf16p-8, -0xc.4239ap-16, -0x2.a4eb2p-8 },
+  { 0xa.8dba1p+4, 0xa.8dbbfp+4, 0xa.8dbd9p+4, 0x1.39cf86p-24, -0xf.b7faep-8, 0xb.e9b1p-16, 0x2.9e97dp-8 },
+  { 0xa.bffe2p+4, 0xa.c0001p+4, 0xa.c001ep+4, -0x2.5f8de8p-24, 0xf.930fep-8, -0xb.95eddp-16, -0x2.98716cp-8 },
+  { 0xa.f2422p+4, 0xa.f2443p+4, 0xa.f2464p+4, 0x2.073d64p-24, -0xf.6f245p-8, 0xb.46a06p-16, 0x2.92759p-8 },
+  { 0xb.24846p+4, 0xb.24885p+4, 0xb.24895p+4, -0x4.ed26ep-28, 0xf.4c2cep-8, -0xa.fb7f6p-16, -0x2.8ca308p-8 },
+  { 0xb.56c8fp+4, 0xb.56cc7p+4, 0xb.56cep+4, -0x2.ae5204p-24, -0xf.2a1efp-8, 0xa.b4472p-16, 0x2.86f67cp-8 },
+  { 0xb.890e7p+4, 0xb.89109p+4, 0xb.8911cp+4, 0x6.d72168p-24, 0xf.08f09p-8, -0xa.70b97p-16, -0x2.816f28p-8 },
+  { 0xb.bb506p+4, 0xb.bb54ap+4, 0xb.bb566p+4, 0x2.d3f294p-24, -0xe.e8986p-8, 0xa.2928cp-16, 0x2.7c0c0cp-8 },
+  { 0xb.ed974p+4, 0xb.ed98cp+4, 0xb.ed9adp+4, 0x3.88c0fp-24, 0xe.c90d7p-8, -0x9.ec57dp-16, -0x2.76ca28p-8 },
+  { 0xc.1fdafp+4, 0xc.1fdcdp+4, 0xc.1fdfp+4, 0x3.d94d34p-24, -0xe.aa478p-8, 0x9.ab3c5p-16, 0x2.71a9cp-8 },
+  { 0xc.521f7p+4, 0xc.5220fp+4, 0xc.5223p+4, 0x4.66b7ep-24, 0xe.8c3e9p-8, -0x9.74619p-16, -0x2.6ca8b8p-8 },
+  { 0xc.8463p+4, 0xc.8465p+4, 0xc.8466bp+4, 0xf.f7ac9p-28, -0xe.6eeb6p-8, 0x9.3901ap-16, 0x2.67c628p-8 },
+};
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   y1f(x) ~ sqrt(2/(pi*x))*beta1(x)*sin(x-3pi/4-alpha1(x))
+   where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
+   and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
+static float
+y1f_asympt (float x)
+{
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
+  double alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  int n = -1; /* Accounts for -pi/2 (pi/4 was subtracted in reduce_mod_twopi). */
+  /* Now reduce mod pi/2. */
+  h = reduce_mod_piover2 (h, &n);
+  /* Subtract alpha1. */
+  h -= alpha1;
+  /* Reduce again mod pi/2 if needed. */
+  h = reduce_mod_piover2 (h, &n);
+  /* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float t = cst / sqrtf (x) * (float) beta1;
+  if (n == 0)
+    return t * sinf (xr);
+  else if (n == 2) /* sin(x+pi) = -sin(x) */
+    return -t * sinf (xr);
+  else if (n == 1) /* sin(x+pi/2) = cos(x) */
+    return t * cosf (xr);
+  else             /* sin(x+3pi/2) = -cos(x) */
+    return -t * cosf (xr);
+}
+
+/* Special code for x near a root of y1.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating y1 around its root.
+   For large x, we use an asymptotic formula (y1f_asympt). */
+static float
+y1f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+
+  index_f = roundf ((x - FIRST_ZERO_Y1) / (float) M_PI);
+  /* SMALL_SIZE should be at least 18 */
+  if (index_f >= SMALL_SIZE)
+    return y1f_asympt (x);
+  index = (int) index_f;
+  float *p = Py[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  float p6 = (index > 0) ? p[6] : p[6] + y * -0x1.83758ep-8f;
+  return p[3] + y * (p[4] + y * (p[5] + y * p6));
+}
+
+/* Special code for 0x1.f7f7acp+0 <= x < 2, because the code for
+   2^-25 <= x < 2 yields an error > 9 ulps in a few cases. */
+static float
+y1f_below_two (float x)
+{
+  float xmid = 0x1.fbfbd6p+0;
+  float y = x - xmid;
+  float p[3] = { -0x1.dabdeep-4, 0x9.1290ap-4, -0x1.98468cp-4 };
+  return p[0] + y * (p[1] + y * p[2]);
+}
+
 float
 __ieee754_y1f(float x)
 {
@@ -123,11 +627,12 @@  __ieee754_y1f(float x)
 		__sincosf (x, &s, &c);
 		ss = -s-c;
 		cc = s-c;
-		if(ix<0x7f000000) {  /* make sure x+x not overflow */
-		    z = __cosf(x+x);
-		    if ((s*c)>zero) cc = z/ss;
-		    else            ss = z/cc;
-		}
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return y1f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = __cosf(x+x);
+                if ((s*c)>zero) cc = z/ss;
+                else            ss = z/cc;
 	/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
 	 * where x0 = x-3pi/4
 	 *      Better formula:
@@ -139,12 +644,17 @@  __ieee754_y1f(float x)
 	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 	 * to compute the worse one.
 	 */
-		if(ix>0x5c000000) z = (invsqrtpi*ss)/sqrtf(x);
-		else {
-		    u = ponef(x); v = qonef(x);
-		    z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
-		}
-		return z;
+                if (ix <= 0x5c000000)
+                  {
+                    u = ponef(x); v = qonef(x);
+                    ss = u*ss+v*cc;
+                  }
+                z = (invsqrtpi * ss) / sqrtf(x);
+                float magic = 0x1.8p+22; /* 2^22 + 2^21 */
+                if (magic + ss != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-2 */
+                  return y1f_near_root (x, z);
 	}
 	if(__builtin_expect(ix<=0x33000000, 0)) {    /* x < 2**-25 */
 	    z = -tpi / x;
@@ -152,6 +662,9 @@  __ieee754_y1f(float x)
 		__set_errno (ERANGE);
 	    return z;
 	}
+        /* Now 2**-25 <= x < 2. */
+        if (ix >= 0x3ffbfbd6) /* 0x1.f7f7acp+0 <= x < 2 */
+          return y1f_below_two (x);
 	z = x*x;
 	u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
 	v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
@@ -159,7 +672,7 @@  __ieee754_y1f(float x)
 }
 libm_alias_finite (__ieee754_y1f, __y1f)
 
-/* For x >= 8, the asymptotic expansions of pone is
+/* For x >= 8, the asymptotic expansion of pone is
  *	1 + 15/128 s^2 - 4725/2^15 s^4 - ...,	where s = 1/x.
  * We approximate pone by
  *	pone(x) = 1 + (R/S)
@@ -252,8 +765,19 @@  ponef(float x)
 	return one+ r/s;
 }
 
+/* FIXME: the comment below is wrong.  It was most likely copied from
+   dbl-64/e_j1f.c, but not updated. Largest errors are the following (rounded up):
+   * for qr8/qs8: largest known value of | qone(x)/s -0.375-R/S | is 2^(-35.78)
+     (for x=0x8p+0)
+   * for qr5/qs5: largest value of | qone(x)/s -0.375-R/S | is 2^(-34.28)
+     (for x=0x7.b8e2cp+0)
+   * for qr3/qs3: largest value of | qone(x)/s -0.375-R/S | is 2^(-32.18)
+     (for x=0x3.4956bcp+0)
+   * for qr2/qs2: largest value of | qone(x)/s -0.375-R/S | is 2^(-33.10)
+     (for x=0x2.da487cp+0)
+*/
 
-/* For x >= 8, the asymptotic expansions of qone is
+/* For x >= 8, the asymptotic expansion of qone is
  *	3/8 s - 105/1024 s^3 - ..., where s = 1/x.
  * We approximate pone by
  *	qone(x) = s*(0.375 + (R/S))
@@ -340,10 +864,10 @@  qonef(float x)
 	GET_FLOAT_WORD(ix,x);
 	ix &= 0x7fffffff;
 	/* ix >= 0x40000000 for all calls to this function.  */
-	if(ix>=0x40200000)     {p = qr8; q= qs8;}
-	else if(ix>=0x40f71c58){p = qr5; q= qs5;}
-	else if(ix>=0x4036db68){p = qr3; q= qs3;}
-	else {p = qr2; q= qs2;}
+	if(ix>=0x41000000)     {p = qr8; q= qs8;} /* x >= 8 */
+	else if(ix>=0x40f71c58){p = qr5; q= qs5;} /* x >= 7.722209930e+00 */
+	else if(ix>=0x4036db68){p = qr3; q= qs3;} /* x >= 2.857141495e+00 */
+	else {p = qr2; q= qs2;}                   /* x >= 2 */
 	z = one/(x*x);
 	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
 	s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));