Message ID | Pine.LNX.4.64.1405082300140.12485@digraph.polyomino.org.uk |
---|---|
State | New |
Headers | show |
On Thu, May 8, 2014 at 4:01 PM, Joseph S. Myers <joseph@codesourcery.com> wrote: > This patch fixes bug 16064, i386 fenv_t not including SSE state, using > the technique suggested there of storing the state in the existing > __eip field of fenv_t to avoid needing to increase the size of fenv_t > and add new symbol versions. The included testcase, which previously > failed for i386 (but passed for x86_64), illustrates how the previous > state was buggy. > > This patch causes the SSE state to be included *to the extent it is on > x86_64*. Where some state should logically be included but isn't for > x86_64 (see bug 16068), this patch does not cause it to be included > for i386 either. The idea is that any patch fixing that bug should > fix it for both x86_64 and i386 at once. > > Tested i386 and x86_64. (I haven't tested the case of a CPU without > SSE2 disabling the test.) > > 2014-05-08 Joseph Myers <joseph@codesourcery.com> > > [BZ #16064] > * sysdeps/i386/fpu/fegetenv.c: Include <unistd.h>, <ldsodefs.h> > and <dl-procinfo.h>. > (__fegetenv): Save SSE state in envp->__eip if supported. > * sysdeps/i386/fpu/feholdexcpt.c (feholdexcept): Save SSE state in > envp->__eip if supported. > * sysdeps/i386/fpu/fesetenv.c: Include <unistd.h>, <ldsodefs.h> > and <dl-procinfo.h>. > (__fesetenv): Do not use envp->__eip to set eip in floating-point > environment. Set SSE state if supported. > * sysdeps/x86/fpu/Makefile [$(subdir) = math] (tests): Add > test-fenv-sse. > [$(subdir) = math] (CFLAGS-test-fenv-sse.c): Add -msse2 > -mfpmath=sse. > * sysdeps/x86/fpu/test-fenv-sse.c: New file. > > diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c > index 8dbdb57..8c45b6b 100644 > --- a/sysdeps/i386/fpu/fegetenv.c > +++ b/sysdeps/i386/fpu/fegetenv.c > @@ -18,6 +18,9 @@ > <http://www.gnu.org/licenses/>. */ > > #include <fenv.h> > +#include <unistd.h> > +#include <ldsodefs.h> > +#include <dl-procinfo.h> > > int > __fegetenv (fenv_t *envp) > @@ -28,6 +31,9 @@ __fegetenv (fenv_t *envp) > would block all exceptions. */ > __asm__ ("fldenv %0" : : "m" (*envp)); > > + if ((GLRO(dl_hwcap) & HWCAP_I386_XMM) != 0) > + __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); > + > /* Success. */ > return 0; > } > diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c > index d475ca8..dc9d703 100644 > --- a/sysdeps/i386/fpu/feholdexcpt.c > +++ b/sysdeps/i386/fpu/feholdexcpt.c > @@ -35,10 +35,10 @@ feholdexcept (fenv_t *envp) > unsigned int xwork; > > /* Get the current control word. */ > - __asm__ ("stmxcsr %0" : "=m" (*&xwork)); > + __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); > > /* Set all exceptions to non-stop and clear them. */ > - xwork = (xwork | 0x1f80) & ~0x3f; > + xwork = (envp->__eip | 0x1f80) & ~0x3f; > > __asm__ ("ldmxcsr %0" : : "m" (*&xwork)); > } > diff --git a/sysdeps/i386/fpu/fesetenv.c b/sysdeps/i386/fpu/fesetenv.c > index 95b2f0a..3eb0de0 100644 > --- a/sysdeps/i386/fpu/fesetenv.c > +++ b/sysdeps/i386/fpu/fesetenv.c > @@ -19,6 +19,9 @@ > > #include <fenv.h> > #include <assert.h> > +#include <unistd.h> > +#include <ldsodefs.h> > +#include <dl-procinfo.h> > > > int > @@ -63,7 +66,7 @@ __fesetenv (const fenv_t *envp) > & (FE_ALL_EXCEPT | FE_TOWARDZERO)); > temp.__status_word &= ~FE_ALL_EXCEPT; > temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT; > - temp.__eip = envp->__eip; > + temp.__eip = 0; > temp.__cs_selector = envp->__cs_selector; > temp.__opcode = envp->__opcode; Should we also set __cs_selector/__opcode/__data_offset/__data_selector to 0?
On Thu, 8 May 2014, H.J. Lu wrote: > > @@ -63,7 +66,7 @@ __fesetenv (const fenv_t *envp) > > & (FE_ALL_EXCEPT | FE_TOWARDZERO)); > > temp.__status_word &= ~FE_ALL_EXCEPT; > > temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT; > > - temp.__eip = envp->__eip; > > + temp.__eip = 0; > > temp.__cs_selector = envp->__cs_selector; > > temp.__opcode = envp->__opcode; > > Should we also set __cs_selector/__opcode/__data_offset/__data_selector > to 0? Not as part of this patch (the change there was simply to avoid setting __eip to a value that now represents MXCSR and so is completely logically unrelated to __eip). I don't think any of those fields you list are particularly meaningfully part of the C floating-point environment, so they could reasonably all be set to 0 - but any such change should be done in sync for the i386 and x86_64 implementations. The point of this patch is to fix an actual bug, visible to ordinary C code, arising from a case where the i386 implementation misses functionality present in the x86_64 version. If in future we need space in fenv_t for something else (e.g. if libdfp is merged into glibc and gains BID support, so that the software rounding mode for decimal floating point needs including in fenv_t) I don't think there will be difficulty finding space for it.
On Fri, May 9, 2014 at 5:17 AM, Joseph S. Myers <joseph@codesourcery.com> wrote: > On Thu, 8 May 2014, H.J. Lu wrote: > >> > @@ -63,7 +66,7 @@ __fesetenv (const fenv_t *envp) >> > & (FE_ALL_EXCEPT | FE_TOWARDZERO)); >> > temp.__status_word &= ~FE_ALL_EXCEPT; >> > temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT; >> > - temp.__eip = envp->__eip; >> > + temp.__eip = 0; >> > temp.__cs_selector = envp->__cs_selector; >> > temp.__opcode = envp->__opcode; >> >> Should we also set __cs_selector/__opcode/__data_offset/__data_selector >> to 0? > > Not as part of this patch (the change there was simply to avoid setting > __eip to a value that now represents MXCSR and so is completely logically > unrelated to __eip). > > I don't think any of those fields you list are particularly meaningfully > part of the C floating-point environment, so they could reasonably all be > set to 0 - but any such change should be done in sync for the i386 and > x86_64 implementations. The point of this patch is to fix an actual bug, > visible to ordinary C code, arising from a case where the i386 > implementation misses functionality present in the x86_64 version. > There are 4 unused short int fields in fenv_t. Can we use 2 of them for mxcsr instead of re-using __eip?
On Fri, 9 May 2014, H.J. Lu wrote: > There are 4 unused short int fields in fenv_t. Can we use 2 of them > for mxcsr instead of re-using __eip? You could, but I don't think it's worth the extra complexity. __eip is already reused for this purpose in the fenv_private code.
On Fri, May 9, 2014 at 8:49 AM, Joseph S. Myers <joseph@codesourcery.com> wrote: > On Fri, 9 May 2014, H.J. Lu wrote: > >> There are 4 unused short int fields in fenv_t. Can we use 2 of them >> for mxcsr instead of re-using __eip? > > You could, but I don't think it's worth the extra complexity. __eip is > already reused for this purpose in the fenv_private code. > If I read fenv_private correctly, we don't load x87 fenv_t with __eip set to mxcsr. Am I correct?
On Fri, 9 May 2014, H.J. Lu wrote: > On Fri, May 9, 2014 at 8:49 AM, Joseph S. Myers <joseph@codesourcery.com> wrote: > > On Fri, 9 May 2014, H.J. Lu wrote: > > > >> There are 4 unused short int fields in fenv_t. Can we use 2 of them > >> for mxcsr instead of re-using __eip? > > > > You could, but I don't think it's worth the extra complexity. __eip is > > already reused for this purpose in the fenv_private code. > > > > If I read fenv_private correctly, we don't load x87 fenv_t with > __eip set to mxcsr. Am I correct? That may be the case (and this patch avoids loading it in the <fenv.h> functions - it just loads 0 there in fesetenv rather than treating eip as a meaningful part of the x87 environment at C level).
On Fri, May 9, 2014 at 9:06 AM, Joseph S. Myers <joseph@codesourcery.com> wrote: > On Fri, 9 May 2014, H.J. Lu wrote: > >> On Fri, May 9, 2014 at 8:49 AM, Joseph S. Myers <joseph@codesourcery.com> wrote: >> > On Fri, 9 May 2014, H.J. Lu wrote: >> > >> >> There are 4 unused short int fields in fenv_t. Can we use 2 of them >> >> for mxcsr instead of re-using __eip? >> > >> > You could, but I don't think it's worth the extra complexity. __eip is >> > already reused for this purpose in the fenv_private code. >> > >> >> If I read fenv_private correctly, we don't load x87 fenv_t with >> __eip set to mxcsr. Am I correct? > > That may be the case (and this patch avoids loading it in the <fenv.h> > functions - it just loads 0 there in fesetenv rather than treating eip as > a meaningful part of the x87 environment at C level). > When loading load x87 fenv_t, we should either not to change __eip or set it to 0 together with other fields, similar to other code paths in __fesetenv, to avoid any potential issues.
diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c index 8dbdb57..8c45b6b 100644 --- a/sysdeps/i386/fpu/fegetenv.c +++ b/sysdeps/i386/fpu/fegetenv.c @@ -18,6 +18,9 @@ <http://www.gnu.org/licenses/>. */ #include <fenv.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <dl-procinfo.h> int __fegetenv (fenv_t *envp) @@ -28,6 +31,9 @@ __fegetenv (fenv_t *envp) would block all exceptions. */ __asm__ ("fldenv %0" : : "m" (*envp)); + if ((GLRO(dl_hwcap) & HWCAP_I386_XMM) != 0) + __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); + /* Success. */ return 0; } diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c index d475ca8..dc9d703 100644 --- a/sysdeps/i386/fpu/feholdexcpt.c +++ b/sysdeps/i386/fpu/feholdexcpt.c @@ -35,10 +35,10 @@ feholdexcept (fenv_t *envp) unsigned int xwork; /* Get the current control word. */ - __asm__ ("stmxcsr %0" : "=m" (*&xwork)); + __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); /* Set all exceptions to non-stop and clear them. */ - xwork = (xwork | 0x1f80) & ~0x3f; + xwork = (envp->__eip | 0x1f80) & ~0x3f; __asm__ ("ldmxcsr %0" : : "m" (*&xwork)); } diff --git a/sysdeps/i386/fpu/fesetenv.c b/sysdeps/i386/fpu/fesetenv.c index 95b2f0a..3eb0de0 100644 --- a/sysdeps/i386/fpu/fesetenv.c +++ b/sysdeps/i386/fpu/fesetenv.c @@ -19,6 +19,9 @@ #include <fenv.h> #include <assert.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <dl-procinfo.h> int @@ -63,7 +66,7 @@ __fesetenv (const fenv_t *envp) & (FE_ALL_EXCEPT | FE_TOWARDZERO)); temp.__status_word &= ~FE_ALL_EXCEPT; temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT; - temp.__eip = envp->__eip; + temp.__eip = 0; temp.__cs_selector = envp->__cs_selector; temp.__opcode = envp->__opcode; temp.__data_offset = envp->__data_offset; @@ -72,6 +75,33 @@ __fesetenv (const fenv_t *envp) __asm__ ("fldenv %0" : : "m" (temp)); + if ((GLRO(dl_hwcap) & HWCAP_I386_XMM) != 0) + { + unsigned int mxcsr; + __asm__ ("stmxcsr %0" : "=m" (mxcsr)); + + if (envp == FE_DFL_ENV) + { + /* Set mask for SSE MXCSR. */ + mxcsr |= (FE_ALL_EXCEPT << 7); + /* Set rounding to FE_TONEAREST. */ + mxcsr &= ~0x6000; + mxcsr |= (FE_TONEAREST << 3); + } + else if (envp == FE_NOMASK_ENV) + { + /* Do not mask exceptions. */ + mxcsr &= ~(FE_ALL_EXCEPT << 7); + /* Set rounding to FE_TONEAREST. */ + mxcsr &= ~0x6000; + mxcsr |= (FE_TONEAREST << 3); + } + else + mxcsr = envp->__eip; + + __asm__ ("ldmxcsr %0" : : "m" (mxcsr)); + } + /* Success. */ return 0; } diff --git a/sysdeps/x86/fpu/Makefile b/sysdeps/x86/fpu/Makefile index 8054380..9cb7bb2 100644 --- a/sysdeps/x86/fpu/Makefile +++ b/sysdeps/x86/fpu/Makefile @@ -1,3 +1,5 @@ ifeq ($(subdir),math) libm-support += powl_helper +tests += test-fenv-sse +CFLAGS-test-fenv-sse.c += -msse2 -mfpmath=sse endif diff --git a/sysdeps/x86/fpu/test-fenv-sse.c b/sysdeps/x86/fpu/test-fenv-sse.c new file mode 100644 index 0000000..c81eb16 --- /dev/null +++ b/sysdeps/x86/fpu/test-fenv-sse.c @@ -0,0 +1,138 @@ +/* Test floating-point environment includes SSE state (bug 16064). + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <fenv.h> +#include <float.h> +#include <stdbool.h> +#include <stdio.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +static __attribute__ ((noinline)) int +sse_tests (void) +{ + int ret = 0; + fenv_t base_env; + if (fegetenv (&base_env) != 0) + { + puts ("fegetenv (&base_env) failed"); + return 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + if (fesetenv (&base_env) != 0) + { + puts ("fesetenv (&base_env) failed"); + return 1; + } + volatile float a = 1.0f, b = FLT_MIN, c; + c = a + b; + if (c != 1.0f) + { + puts ("fesetenv did not restore rounding mode"); + ret = 1; + } + if (fesetround (FE_DOWNWARD) != 0) + { + puts ("fesetround (FE_DOWNWARD) failed"); + return 1; + } + if (feupdateenv (&base_env) != 0) + { + puts ("feupdateenv (&base_env) failed"); + return 1; + } + volatile float d = -FLT_MIN, e; + e = a + d; + if (e != 1.0f) + { + puts ("feupdateenv did not restore rounding mode"); + ret = 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + fenv_t upward_env; + if (feholdexcept (&upward_env) != 0) + { + puts ("feholdexcept (&upward_env) failed"); + return 1; + } + if (fesetround (FE_DOWNWARD) != 0) + { + puts ("fesetround (FE_DOWNWARD) failed"); + return 1; + } + if (fesetenv (&upward_env) != 0) + { + puts ("fesetenv (&upward_env) failed"); + return 1; + } + e = a + d; + if (e != 1.0f) + { + puts ("fesetenv did not restore rounding mode from feholdexcept"); + ret = 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + if (fesetenv (FE_DFL_ENV) != 0) + { + puts ("fesetenv (FE_DFL_ENV) failed"); + return 1; + } + c = a + b; + if (c != 1.0f) + { + puts ("fesetenv (FE_DFL_ENV) did not restore rounding mode"); + ret = 1; + } + return ret; +} + +static int +do_test (void) +{ + if (!have_sse2 ()) + { + puts ("CPU does not support SSE2, cannot test"); + return 0; + } + return sse_tests (); +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c>