Message ID | 20240403121150.1018799-6-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | Improve rounding to interger function for C23 | expand |
On Wed, Apr 3, 2024 at 5:12 AM Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > It is not allowed anymore on ISO C23. > > Checked on x86_64-linux-gnu and i686-linux-gnu. > --- > sysdeps/i386/fpu/s_ceill.S | 39 ------------------------------------ > sysdeps/x86/fpu/s_ceill.c | 38 +++++++++++++++++++++++++++++++++++ > sysdeps/x86_64/fpu/s_ceill.S | 34 ------------------------------- > 3 files changed, 38 insertions(+), 73 deletions(-) > delete mode 100644 sysdeps/i386/fpu/s_ceill.S > create mode 100644 sysdeps/x86/fpu/s_ceill.c > delete mode 100644 sysdeps/x86_64/fpu/s_ceill.S > > diff --git a/sysdeps/i386/fpu/s_ceill.S b/sysdeps/i386/fpu/s_ceill.S > deleted file mode 100644 > index a551fce7f9..0000000000 > --- a/sysdeps/i386/fpu/s_ceill.S > +++ /dev/null > @@ -1,39 +0,0 @@ > -/* > - * Public domain. > - */ > - > -#include <libm-alias-ldouble.h> > -#include <machine/asm.h> > - > -RCSID("$NetBSD: $") > - > -ENTRY(__ceill) > - fldt 4(%esp) > - subl $32,%esp > - cfi_adjust_cfa_offset (32) > - > - fnstenv 4(%esp) /* store fpu environment */ > - > - /* We use here %edx although only the low 1 bits are defined. > - But none of the operations should care and they are faster > - than the 16 bit operations. */ > - movl $0x0800,%edx /* round towards +oo */ > - orl 4(%esp),%edx > - andl $0xfbff,%edx > - movl %edx,(%esp) > - fldcw (%esp) /* load modified control word */ > - > - frndint /* round */ > - > - /* Preserve "invalid" exceptions from sNaN input. */ > - fnstsw > - andl $0x1, %eax > - orl %eax, 8(%esp) > - > - fldenv 4(%esp) /* restore original environment */ > - > - addl $32,%esp > - cfi_adjust_cfa_offset (-32) > - ret > -END (__ceill) > -libm_alias_ldouble (__ceil, ceil) > diff --git a/sysdeps/x86/fpu/s_ceill.c b/sysdeps/x86/fpu/s_ceill.c > new file mode 100644 > index 0000000000..6ccc9d84d4 > --- /dev/null > +++ b/sysdeps/x86/fpu/s_ceill.c > @@ -0,0 +1,38 @@ > +/* Return smallest integral value not less than argument. x86 version. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define NO_MATH_REDIRECT > +#include <math.h> > +#include <fenv_private.h> > +#include <libm-alias-ldouble.h> > + > +long double > +__ceill (long double x) > +{ > + fenv_t fenv; > + long double r; > + > + libc_feholdexcept_setround_387 (&fenv, FE_UPWARD); > + asm volatile ("frndint" : "=t" (r) : "0" (x)); > + /* Preserve "invalid" exceptions from sNaN input. */ > + fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); > + libc_fesetenv_387 (&fenv); > + Since all these newly added functions are almost identical, should we use a template instead of repeating the similar code sequence? Something like TYPE FUNC (TYPE x) { fenv_t fenv; TYPE r; libc_feholdexcept_setround_387 (&fenv, FE_OPTION); asm volatile ("frndint" : "=t" (r) : "0" (x)); /* Preserve "invalid" exceptions from sNaN input. */ fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); libc_fesetenv_387 (&fenv); return r; } > + return r; > +} > +libm_alias_ldouble (__ceil, ceil) > diff --git a/sysdeps/x86_64/fpu/s_ceill.S b/sysdeps/x86_64/fpu/s_ceill.S > deleted file mode 100644 > index 16dbecd56d..0000000000 > --- a/sysdeps/x86_64/fpu/s_ceill.S > +++ /dev/null > @@ -1,34 +0,0 @@ > -/* > - * Public domain. > - */ > - > -#include <libm-alias-ldouble.h> > -#include <machine/asm.h> > - > - > -ENTRY(__ceill) > - fldt 8(%rsp) > - > - fnstenv -28(%rsp) /* store fpu environment */ > - > - /* We use here %edx although only the low 1 bits are defined. > - But none of the operations should care and they are faster > - than the 16 bit operations. */ > - movl $0x0800,%edx /* round towards +oo */ > - orl -28(%rsp),%edx > - andl $0xfbff,%edx > - movl %edx,-32(%rsp) > - fldcw -32(%rsp) /* load modified control word */ > - > - frndint /* round */ > - > - /* Preserve "invalid" exceptions from sNaN input. */ > - fnstsw > - andl $0x1, %eax > - orl %eax, -24(%rsp) > - > - fldenv -28(%rsp) /* restore original environment */ > - > - ret > -END (__ceill) > -libm_alias_ldouble (__ceil, ceil) > -- > 2.34.1 > -- H.J.
On 03/04/24 10:04, H.J. Lu wrote: > On Wed, Apr 3, 2024 at 5:12 AM Adhemerval Zanella > <adhemerval.zanella@linaro.org> wrote: >> >> It is not allowed anymore on ISO C23. >> >> Checked on x86_64-linux-gnu and i686-linux-gnu. >> --- >> sysdeps/i386/fpu/s_ceill.S | 39 ------------------------------------ >> sysdeps/x86/fpu/s_ceill.c | 38 +++++++++++++++++++++++++++++++++++ >> sysdeps/x86_64/fpu/s_ceill.S | 34 ------------------------------- >> 3 files changed, 38 insertions(+), 73 deletions(-) >> delete mode 100644 sysdeps/i386/fpu/s_ceill.S >> create mode 100644 sysdeps/x86/fpu/s_ceill.c >> delete mode 100644 sysdeps/x86_64/fpu/s_ceill.S >> >> diff --git a/sysdeps/i386/fpu/s_ceill.S b/sysdeps/i386/fpu/s_ceill.S >> deleted file mode 100644 >> index a551fce7f9..0000000000 >> --- a/sysdeps/i386/fpu/s_ceill.S >> +++ /dev/null >> @@ -1,39 +0,0 @@ >> -/* >> - * Public domain. >> - */ >> - >> -#include <libm-alias-ldouble.h> >> -#include <machine/asm.h> >> - >> -RCSID("$NetBSD: $") >> - >> -ENTRY(__ceill) >> - fldt 4(%esp) >> - subl $32,%esp >> - cfi_adjust_cfa_offset (32) >> - >> - fnstenv 4(%esp) /* store fpu environment */ >> - >> - /* We use here %edx although only the low 1 bits are defined. >> - But none of the operations should care and they are faster >> - than the 16 bit operations. */ >> - movl $0x0800,%edx /* round towards +oo */ >> - orl 4(%esp),%edx >> - andl $0xfbff,%edx >> - movl %edx,(%esp) >> - fldcw (%esp) /* load modified control word */ >> - >> - frndint /* round */ >> - >> - /* Preserve "invalid" exceptions from sNaN input. */ >> - fnstsw >> - andl $0x1, %eax >> - orl %eax, 8(%esp) >> - >> - fldenv 4(%esp) /* restore original environment */ >> - >> - addl $32,%esp >> - cfi_adjust_cfa_offset (-32) >> - ret >> -END (__ceill) >> -libm_alias_ldouble (__ceil, ceil) >> diff --git a/sysdeps/x86/fpu/s_ceill.c b/sysdeps/x86/fpu/s_ceill.c >> new file mode 100644 >> index 0000000000..6ccc9d84d4 >> --- /dev/null >> +++ b/sysdeps/x86/fpu/s_ceill.c >> @@ -0,0 +1,38 @@ >> +/* Return smallest integral value not less than argument. x86 version. >> + Copyright (C) 2024 Free Software Foundation, Inc. >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#define NO_MATH_REDIRECT >> +#include <math.h> >> +#include <fenv_private.h> >> +#include <libm-alias-ldouble.h> >> + >> +long double >> +__ceill (long double x) >> +{ >> + fenv_t fenv; >> + long double r; >> + >> + libc_feholdexcept_setround_387 (&fenv, FE_UPWARD); >> + asm volatile ("frndint" : "=t" (r) : "0" (x)); >> + /* Preserve "invalid" exceptions from sNaN input. */ >> + fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); >> + libc_fesetenv_387 (&fenv); >> + > > Since all these newly added functions are almost identical, > should we use a template instead of repeating the similar > code sequence? Something like > > TYPE > FUNC (TYPE x) > { > fenv_t fenv; > TYPE r; > > libc_feholdexcept_setround_387 (&fenv, FE_OPTION); > asm volatile ("frndint" : "=t" (r) : "0" (x)); > /* Preserve "invalid" exceptions from sNaN input. */ > fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); > libc_fesetenv_387 (&fenv); > > return r; > > } Sounds reasonable, I will update the patch.
On Wed, Apr 3, 2024 at 6:15 AM Adhemerval Zanella Netto <adhemerval.zanella@linaro.org> wrote: > > > > On 03/04/24 10:04, H.J. Lu wrote: > > On Wed, Apr 3, 2024 at 5:12 AM Adhemerval Zanella > > <adhemerval.zanella@linaro.org> wrote: > >> > >> It is not allowed anymore on ISO C23. > >> > >> Checked on x86_64-linux-gnu and i686-linux-gnu. > >> --- > >> sysdeps/i386/fpu/s_ceill.S | 39 ------------------------------------ > >> sysdeps/x86/fpu/s_ceill.c | 38 +++++++++++++++++++++++++++++++++++ > >> sysdeps/x86_64/fpu/s_ceill.S | 34 ------------------------------- > >> 3 files changed, 38 insertions(+), 73 deletions(-) > >> delete mode 100644 sysdeps/i386/fpu/s_ceill.S > >> create mode 100644 sysdeps/x86/fpu/s_ceill.c > >> delete mode 100644 sysdeps/x86_64/fpu/s_ceill.S > >> > >> diff --git a/sysdeps/i386/fpu/s_ceill.S b/sysdeps/i386/fpu/s_ceill.S > >> deleted file mode 100644 > >> index a551fce7f9..0000000000 > >> --- a/sysdeps/i386/fpu/s_ceill.S > >> +++ /dev/null > >> @@ -1,39 +0,0 @@ > >> -/* > >> - * Public domain. > >> - */ > >> - > >> -#include <libm-alias-ldouble.h> > >> -#include <machine/asm.h> > >> - > >> -RCSID("$NetBSD: $") > >> - > >> -ENTRY(__ceill) > >> - fldt 4(%esp) > >> - subl $32,%esp > >> - cfi_adjust_cfa_offset (32) > >> - > >> - fnstenv 4(%esp) /* store fpu environment */ > >> - > >> - /* We use here %edx although only the low 1 bits are defined. > >> - But none of the operations should care and they are faster > >> - than the 16 bit operations. */ > >> - movl $0x0800,%edx /* round towards +oo */ > >> - orl 4(%esp),%edx > >> - andl $0xfbff,%edx > >> - movl %edx,(%esp) > >> - fldcw (%esp) /* load modified control word */ > >> - > >> - frndint /* round */ > >> - > >> - /* Preserve "invalid" exceptions from sNaN input. */ > >> - fnstsw > >> - andl $0x1, %eax > >> - orl %eax, 8(%esp) > >> - > >> - fldenv 4(%esp) /* restore original environment */ > >> - > >> - addl $32,%esp > >> - cfi_adjust_cfa_offset (-32) > >> - ret > >> -END (__ceill) > >> -libm_alias_ldouble (__ceil, ceil) > >> diff --git a/sysdeps/x86/fpu/s_ceill.c b/sysdeps/x86/fpu/s_ceill.c > >> new file mode 100644 > >> index 0000000000..6ccc9d84d4 > >> --- /dev/null > >> +++ b/sysdeps/x86/fpu/s_ceill.c > >> @@ -0,0 +1,38 @@ > >> +/* Return smallest integral value not less than argument. x86 version. > >> + Copyright (C) 2024 Free Software Foundation, Inc. > >> + This file is part of the GNU C Library. > >> + > >> + The GNU C Library is free software; you can redistribute it and/or > >> + modify it under the terms of the GNU Lesser General Public > >> + License as published by the Free Software Foundation; either > >> + version 2.1 of the License, or (at your option) any later version. > >> + > >> + The GNU C Library is distributed in the hope that it will be useful, > >> + but WITHOUT ANY WARRANTY; without even the implied warranty of > >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >> + Lesser General Public License for more details. > >> + > >> + You should have received a copy of the GNU Lesser General Public > >> + License along with the GNU C Library; if not, see > >> + <https://www.gnu.org/licenses/>. */ > >> + > >> +#define NO_MATH_REDIRECT > >> +#include <math.h> > >> +#include <fenv_private.h> > >> +#include <libm-alias-ldouble.h> > >> + > >> +long double > >> +__ceill (long double x) > >> +{ > >> + fenv_t fenv; > >> + long double r; > >> + > >> + libc_feholdexcept_setround_387 (&fenv, FE_UPWARD); > >> + asm volatile ("frndint" : "=t" (r) : "0" (x)); > >> + /* Preserve "invalid" exceptions from sNaN input. */ > >> + fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); > >> + libc_fesetenv_387 (&fenv); > >> + > > > > Since all these newly added functions are almost identical, > > should we use a template instead of repeating the similar > > code sequence? Something like > > > > TYPE > > FUNC (TYPE x) > > { > > fenv_t fenv; > > TYPE r; > > > > libc_feholdexcept_setround_387 (&fenv, FE_OPTION); > > asm volatile ("frndint" : "=t" (r) : "0" (x)); > > /* Preserve "invalid" exceptions from sNaN input. */ > > fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); > > libc_fesetenv_387 (&fenv); > > > > return r; > > > > } > > Sounds reasonable, I will update the patch. Please include "387" or something similar in the template filename since it should only be used with 387 implementation. Thanks.
diff --git a/sysdeps/i386/fpu/s_ceill.S b/sysdeps/i386/fpu/s_ceill.S deleted file mode 100644 index a551fce7f9..0000000000 --- a/sysdeps/i386/fpu/s_ceill.S +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Public domain. - */ - -#include <libm-alias-ldouble.h> -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__ceill) - fldt 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x0800,%edx /* round towards +oo */ - orl 4(%esp),%edx - andl $0xfbff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - /* Preserve "invalid" exceptions from sNaN input. */ - fnstsw - andl $0x1, %eax - orl %eax, 8(%esp) - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__ceill) -libm_alias_ldouble (__ceil, ceil) diff --git a/sysdeps/x86/fpu/s_ceill.c b/sysdeps/x86/fpu/s_ceill.c new file mode 100644 index 0000000000..6ccc9d84d4 --- /dev/null +++ b/sysdeps/x86/fpu/s_ceill.c @@ -0,0 +1,38 @@ +/* Return smallest integral value not less than argument. x86 version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define NO_MATH_REDIRECT +#include <math.h> +#include <fenv_private.h> +#include <libm-alias-ldouble.h> + +long double +__ceill (long double x) +{ + fenv_t fenv; + long double r; + + libc_feholdexcept_setround_387 (&fenv, FE_UPWARD); + asm volatile ("frndint" : "=t" (r) : "0" (x)); + /* Preserve "invalid" exceptions from sNaN input. */ + fenv.__status_word |= libc_fetestexcept_387 (FE_INVALID); + libc_fesetenv_387 (&fenv); + + return r; +} +libm_alias_ldouble (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/s_ceill.S b/sysdeps/x86_64/fpu/s_ceill.S deleted file mode 100644 index 16dbecd56d..0000000000 --- a/sysdeps/x86_64/fpu/s_ceill.S +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Public domain. - */ - -#include <libm-alias-ldouble.h> -#include <machine/asm.h> - - -ENTRY(__ceill) - fldt 8(%rsp) - - fnstenv -28(%rsp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x0800,%edx /* round towards +oo */ - orl -28(%rsp),%edx - andl $0xfbff,%edx - movl %edx,-32(%rsp) - fldcw -32(%rsp) /* load modified control word */ - - frndint /* round */ - - /* Preserve "invalid" exceptions from sNaN input. */ - fnstsw - andl $0x1, %eax - orl %eax, -24(%rsp) - - fldenv -28(%rsp) /* restore original environment */ - - ret -END (__ceill) -libm_alias_ldouble (__ceil, ceil)