Message ID | 1567967373-27052-1-git-send-email-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | [powerpc] libc_feupdateenv_test: optimize FPSCR access | expand |
On 9/8/19 1:29 PM, Paul A. Clarke wrote: > From: "Paul A. Clarke" <pc@us.ibm.com> > > ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to > restore the rounding mode and exception enables, preserve exception flags, > and test whether given exception(s) were generated. > > If the exception flags haven't changed, then it is sufficient and a bit > more efficient to just restore the rounding mode and enables, rather than > writing the full Floating-Point Status and Control Register (FPSCR). > > 2019-09-08 Paul A. Clarke <pc@us.ibm.com> > > * sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK): New. > * sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc): Optimize > to write FPSCR control only, if exceptions have not changed. > --- > sysdeps/powerpc/fpu/fenv_libc.h | 4 ++++ > sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++-- > 2 files changed, 18 insertions(+), 2 deletions(-) > > diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h > index b703c8d..0aad897 100644 > --- a/sysdeps/powerpc/fpu/fenv_libc.h > +++ b/sysdeps/powerpc/fpu/fenv_libc.h > @@ -204,6 +204,10 @@ enum { > (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) > #define FPSCR_BASIC_EXCEPTIONS_MASK \ > (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) > +#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \ > + FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \ > + FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \ > + FPSCR_VXCVI_MASK) OK > #define FPSCR_FPRF_MASK \ > (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \ > FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK) > diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h > index 5ebe6cd..af72560 100644 > --- a/sysdeps/powerpc/fpu/fenv_private.h > +++ b/sysdeps/powerpc/fpu/fenv_private.h > @@ -116,8 +116,20 @@ __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask, > if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0) > (void) __fe_mask_env (); > > - /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ > - fesetenv_register (new.fenv); > + /* If requesting to keep status, replace control, and merge exceptions, > + and exceptions haven't changed, we can just set new control instead > + of the whole FPSCR. */ > + if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK)) > + == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) && > + (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK)) > + == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) &&] I think the _FPU_MASK_* macros should get rewritten similar to the first part of this patch. I had a hard time digesting. I think it is OK. > + (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK)) > + { > + fesetenv_mode (new.fenv); > + } > + else > + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ > + fesetenv_register (new.fenv); OK. > > return old.l; > } > LGTM. thanks.
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h index b703c8d..0aad897 100644 --- a/sysdeps/powerpc/fpu/fenv_libc.h +++ b/sysdeps/powerpc/fpu/fenv_libc.h @@ -204,6 +204,10 @@ enum { (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) #define FPSCR_BASIC_EXCEPTIONS_MASK \ (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) +#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \ + FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \ + FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \ + FPSCR_VXCVI_MASK) #define FPSCR_FPRF_MASK \ (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \ FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK) diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h index 5ebe6cd..af72560 100644 --- a/sysdeps/powerpc/fpu/fenv_private.h +++ b/sysdeps/powerpc/fpu/fenv_private.h @@ -116,8 +116,20 @@ __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask, if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0) (void) __fe_mask_env (); - /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ - fesetenv_register (new.fenv); + /* If requesting to keep status, replace control, and merge exceptions, + and exceptions haven't changed, we can just set new control instead + of the whole FPSCR. */ + if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK)) + == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) && + (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK)) + == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) && + (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK)) + { + fesetenv_mode (new.fenv); + } + else + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ + fesetenv_register (new.fenv); return old.l; }
From: "Paul A. Clarke" <pc@us.ibm.com> ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to restore the rounding mode and exception enables, preserve exception flags, and test whether given exception(s) were generated. If the exception flags haven't changed, then it is sufficient and a bit more efficient to just restore the rounding mode and enables, rather than writing the full Floating-Point Status and Control Register (FPSCR). 2019-09-08 Paul A. Clarke <pc@us.ibm.com> * sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK): New. * sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc): Optimize to write FPSCR control only, if exceptions have not changed. --- sysdeps/powerpc/fpu/fenv_libc.h | 4 ++++ sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-)