Message ID | 1564802553-6645-1-git-send-email-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | [powerpc] fe{en,dis}ableexcept optimize bit translations | expand |
On 03/08/2019 00:22, Paul A. Clarke wrote: > From: "Paul A. Clarke" <pc@us.ibm.com> > > The exceptions passed to fe{en,dis}ableexcept() are defined in the ABI > as a bitmask, a combination of FE_INVALID, FE_OVERFLOW, etc. > Within the functions, these bits must be translated to/from the corresponding > enable bits in the Floating Point Status Control Register (FPSCR). > This translation is currently done bit-by-bit. The compiler generates > a series of conditional bit operations. Nicely, the "FE" exception > bits are all a uniform offset from the FPSCR enable bits, so the bit-by-bit > operation can instead be performed by a shift with appropriate masking. > > 2019-08-02 Paul A. Clarke <pc@us.ibm.com> > > * sysdeps/powerpc/fpu/fenv_libc.h: Define FPSCR bitmasks. > (fenv_reg_to_exceptions): Replace bitwise operations with mask-shift. > (fenv_exceptions_to_reg): New. > * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Replace bitwise > operation with call to fenv_exceptions_to_reg(). > * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise. > > This patch is a prerequisite for the two patches I sent over the past two days: > - [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses > - [powerpc] SET_RESTORE_ROUND improvements > Apologies for sending these out-of-order. I forgot about this one during the > freeze window. LGTM with a suggestion below. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > > --- > sysdeps/powerpc/fpu/fedisblxcpt.c | 11 +----- > sysdeps/powerpc/fpu/feenablxcpt.c | 11 +----- > sysdeps/powerpc/fpu/fenv_libc.h | 72 ++++++++++++++++++++++++++++++++------- > 3 files changed, 61 insertions(+), 33 deletions(-) > > diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c > index 2872b1b..5cc8799 100644 > --- a/sysdeps/powerpc/fpu/fedisblxcpt.c > +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c > @@ -33,16 +33,7 @@ fedisableexcept (int excepts) > excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; > > /* Sets the new exception mask. */ > - if (excepts & FE_INEXACT) > - fe.l &= ~(1 << (31 - FPSCR_XE)); > - if (excepts & FE_DIVBYZERO) > - fe.l &= ~(1 << (31 - FPSCR_ZE)); > - if (excepts & FE_UNDERFLOW) > - fe.l &= ~(1 << (31 - FPSCR_UE)); > - if (excepts & FE_OVERFLOW) > - fe.l &= ~(1 << (31 - FPSCR_OE)); > - if (excepts & FE_INVALID) > - fe.l &= ~(1 << (31 - FPSCR_VE)); > + fe.l &= ~ fenv_exceptions_to_reg (excepts); > > if (fe.l != curr.l) > fesetenv_register (fe.fenv); Ok. > diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c > index dbaffdc..3b64398 100644 > --- a/sysdeps/powerpc/fpu/feenablxcpt.c > +++ b/sysdeps/powerpc/fpu/feenablxcpt.c > @@ -33,16 +33,7 @@ feenableexcept (int excepts) > excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; > > /* Sets the new exception mask. */ > - if (excepts & FE_INEXACT) > - fe.l |= (1 << (31 - FPSCR_XE)); > - if (excepts & FE_DIVBYZERO) > - fe.l |= (1 << (31 - FPSCR_ZE)); > - if (excepts & FE_UNDERFLOW) > - fe.l |= (1 << (31 - FPSCR_UE)); > - if (excepts & FE_OVERFLOW) > - fe.l |= (1 << (31 - FPSCR_OE)); > - if (excepts & FE_INVALID) > - fe.l |= (1 << (31 - FPSCR_VE)); > + fe.l |= fenv_exceptions_to_reg (excepts); > > if (fe.l != curr.l) > fesetenv_register (fe.fenv); Ok. > diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h > index 9861f18..853239f 100644 > --- a/sysdeps/powerpc/fpu/fenv_libc.h > +++ b/sysdeps/powerpc/fpu/fenv_libc.h > @@ -131,57 +131,103 @@ __fesetround_inline_nocheck (const int round) > /* Definitions of all the FPSCR bit numbers */ > enum { > FPSCR_FX = 0, /* exception summary */ > +#define FPSCR_FX_MASK (1 << (31 - FPSCR_FX)) > FPSCR_FEX, /* enabled exception summary */ > +#define FPSCR_FEX_MASK (1 << (31 - FPSCR_FEX)) > FPSCR_VX, /* invalid operation summary */ > +#define FPSCR_VX_MASK (1 << (31 - FPSCR_VX)) > FPSCR_OX, /* overflow */ > +#define FPSCR_OX_MASK (1 << (31 - FPSCR_OX)) > FPSCR_UX, /* underflow */ > +#define FPSCR_UX_MASK (1 << (31 - FPSCR_UX)) > FPSCR_ZX, /* zero divide */ > +#define FPSCR_ZX_MASK (1 << (31 - FPSCR_ZX)) > FPSCR_XX, /* inexact */ > +#define FPSCR_XX_MASK (1 << (31 - FPSCR_XX)) > FPSCR_VXSNAN, /* invalid operation for sNaN */ > +#define FPSCR_VXSNAN_MASK (1 << (31 - FPSCR_VXSNAN)) > FPSCR_VXISI, /* invalid operation for Inf-Inf */ > +#define FPSCR_VXISI_MASK (1 << (31 - FPSCR_VXISI)) > FPSCR_VXIDI, /* invalid operation for Inf/Inf */ > +#define FPSCR_VXIDI_MASK (1 << (31 - FPSCR_VXIDI)) > FPSCR_VXZDZ, /* invalid operation for 0/0 */ > +#define FPSCR_VXZDZ_MASK (1 << (31 - FPSCR_VXZDZ)) > FPSCR_VXIMZ, /* invalid operation for Inf*0 */ > +#define FPSCR_VXIMZ_MASK (1 << (31 - FPSCR_VXIMZ)) > FPSCR_VXVC, /* invalid operation for invalid compare */ > +#define FPSCR_VXVC_MASK (1 << (31 - FPSCR_VXVC)) > FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ > +#define FPSCR_FR_MASK (1 << (31 - FPSCR_FR)) > FPSCR_FI, /* fraction inexact */ > +#define FPSCR_FI_MASK (1 << (31 - FPSCR_FI)) > FPSCR_FPRF_C, /* result class descriptor */ > +#define FPSCR_FPRF_C_MASK (1 << (31 - FPSCR_FPRF_C)) > FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ > +#define FPSCR_FPRF_FL_MASK (1 << (31 - FPSCR_FPRF_FL)) > FPSCR_FPRF_FG, /* result greater than */ > +#define FPSCR_FPRF_FG_MASK (1 << (31 - FPSCR_FPRF_FG)) > FPSCR_FPRF_FE, /* result equal to */ > +#define FPSCR_FPRF_FE_MASK (1 << (31 - FPSCR_FPRF_FE)) > FPSCR_FPRF_FU, /* result unordered */ > +#define FPSCR_FPRF_FU_MASK (1 << (31 - FPSCR_FPRF_FU)) > FPSCR_20, /* reserved */ > FPSCR_VXSOFT, /* invalid operation set by software */ > +#define FPSCR_VXSOFT_MASK (1 << (31 - FPSCR_VXSOFT)) > FPSCR_VXSQRT, /* invalid operation for square root */ > +#define FPSCR_VXSQRT_MASK (1 << (31 - FPSCR_VXSQRT)) > FPSCR_VXCVI, /* invalid operation for invalid integer convert */ > +#define FPSCR_VXCVI_MASK (1 << (31 - FPSCR_VXCVI)) > FPSCR_VE, /* invalid operation exception enable */ > +#define FPSCR_VE_MASK (1 << (31 - FPSCR_VE)) > FPSCR_OE, /* overflow exception enable */ > +#define FPSCR_OE_MASK (1 << (31 - FPSCR_OE)) > FPSCR_UE, /* underflow exception enable */ > +#define FPSCR_UE_MASK (1 << (31 - FPSCR_UE)) > FPSCR_ZE, /* zero divide exception enable */ > +#define FPSCR_ZE_MASK (1 << (31 - FPSCR_ZE)) > FPSCR_XE, /* inexact exception enable */ > +#define FPSCR_XE_MASK (1 << (31 - FPSCR_XE)) > #ifdef _ARCH_PWR6 > FPSCR_29, /* Reserved in ISA 2.05 */ > +#define FPSCR_NI_MASK (1 << (31 - FPSCR_29)) > #else > - FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */ > + FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */ > +#define FPSCR_NI_MASK (1 << (31 - FPSCR_NI)) > #endif /* _ARCH_PWR6 */ > /* the remaining two least-significant bits keep the rounding mode */ > + FPSCR_RN_hi, > +#define FPSCR_RN_hi_MASK (1 << (31 - FPSCR_RN_hi)) > + FPSCR_RN_lo > +#define FPSCR_RN_lo_MASK (1 << (31 - FPSCR_RN_lo)) > }; > > +#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) > +#define FPSCR_ENABLES_MASK \ > + (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) > +#define FPSCR_BASIC_EXCEPTIONS_MASK \ > + (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) > + > +#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) It is ok, but one suggestion I have is it would be simple to just define a macro to create the mask and use it. Like: #define FPSCR_MASK(bit) (1 << (31 - (bit))) #define FPSCR_RN_MASK (FPSCR_MASK (FPSCR_RN_hi) | FPSCR_MASK (FPSCR_RN_lo)) #define FPSCR_ENABLES_MASK \ (FPSCR_MASK(FPSCR_VE) | FPSCR_MASK (FPSCR_OE) | FPSCR_MASK (FPSCR_UE) \ | FPSCR_MASK (FPSCR_ZE) | FPSCR_MASK (FPSCR_XE)) #define FPSCR_BASIC_EXCEPTIONS_MASK \ (FPSCR_MASK (FPSCR_VX) | FPSCR_MASK (FPSCR_OX) | FPSCR_MASK (FPSCR_UX) \ | FPSCR_MASK (FPSCR_ZX) | FPSCR_MASK (FPSCR_XX)) > + > +/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits > + in the FPSCR, albeit shifted to different but corresponding locations. > + Similarly, the exception indicator bits in the FPSCR correspond one-to-one > + with the exception enable bits. It is thus possible to map the FENV(1) > + exceptions directly to the FPSCR enables with a simple mask and shift, > + and vice versa. */ > +#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22 > + > static inline int > fenv_reg_to_exceptions (unsigned long long l) > { > - int result = 0; > - if (l & (1 << (31 - FPSCR_XE))) > - result |= FE_INEXACT; > - if (l & (1 << (31 - FPSCR_ZE))) > - result |= FE_DIVBYZERO; > - if (l & (1 << (31 - FPSCR_UE))) > - result |= FE_UNDERFLOW; > - if (l & (1 << (31 - FPSCR_OE))) > - result |= FE_OVERFLOW; > - if (l & (1 << (31 - FPSCR_VE))) > - result |= FE_INVALID; > - return result; > + return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT; > +} > + > +static inline unsigned long long > +fenv_exceptions_to_reg (int excepts) > +{ > + return (unsigned long long) > + (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT; > } > > #ifdef _ARCH_PWR6 > Ok.
On 8/28/19 7:58 AM, Adhemerval Zanella wrote: > On 03/08/2019 00:22, Paul A. Clarke wrote: >> The exceptions passed to fe{en,dis}ableexcept() are defined in the ABI >> as a bitmask, a combination of FE_INVALID, FE_OVERFLOW, etc. >> Within the functions, these bits must be translated to/from the corresponding >> enable bits in the Floating Point Status Control Register (FPSCR). >> This translation is currently done bit-by-bit. The compiler generates >> a series of conditional bit operations. Nicely, the "FE" exception >> bits are all a uniform offset from the FPSCR enable bits, so the bit-by-bit >> operation can instead be performed by a shift with appropriate masking. >> >> 2019-08-02 Paul A. Clarke <pc@us.ibm.com> >> >> * sysdeps/powerpc/fpu/fenv_libc.h: Define FPSCR bitmasks. >> (fenv_reg_to_exceptions): Replace bitwise operations with mask-shift. >> (fenv_exceptions_to_reg): New. >> * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Replace bitwise >> operation with call to fenv_exceptions_to_reg(). >> * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise. >> >> This patch is a prerequisite for the two patches I sent over the past two days: >> - [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses >> - [powerpc] SET_RESTORE_ROUND improvements >> Apologies for sending these out-of-order. I forgot about this one during the >> freeze window. > > LGTM with a suggestion below. > > Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> >> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h >> index 9861f18..853239f 100644 >> --- a/sysdeps/powerpc/fpu/fenv_libc.h >> +++ b/sysdeps/powerpc/fpu/fenv_libc.h >> @@ -131,57 +131,103 @@ __fesetround_inline_nocheck (const int round) >> /* Definitions of all the FPSCR bit numbers */ >> enum { >> FPSCR_FX = 0, /* exception summary */ >> +#define FPSCR_FX_MASK (1 << (31 - FPSCR_FX)) >> FPSCR_FEX, /* enabled exception summary */ >> +#define FPSCR_FEX_MASK (1 << (31 - FPSCR_FEX)) [...] >> +#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) >> +#define FPSCR_ENABLES_MASK \ >> + (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) >> +#define FPSCR_BASIC_EXCEPTIONS_MASK \ >> + (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) >> + >> +#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) > It is ok, but one suggestion I have is it would be simple to just define a > macro to create the mask and use it. Like: > > #define FPSCR_MASK(bit) (1 << (31 - (bit))) > > #define FPSCR_RN_MASK (FPSCR_MASK (FPSCR_RN_hi) | FPSCR_MASK (FPSCR_RN_lo)) > #define FPSCR_ENABLES_MASK \ > (FPSCR_MASK(FPSCR_VE) | FPSCR_MASK (FPSCR_OE) | FPSCR_MASK (FPSCR_UE) \ > | FPSCR_MASK (FPSCR_ZE) | FPSCR_MASK (FPSCR_XE)) > #define FPSCR_BASIC_EXCEPTIONS_MASK \ > (FPSCR_MASK (FPSCR_VX) | FPSCR_MASK (FPSCR_OX) | FPSCR_MASK (FPSCR_UX) \ > | FPSCR_MASK (FPSCR_ZX) | FPSCR_MASK (FPSCR_XX)) Thanks very much for the review, Adhemerval! Apologies are due, as I forgot to include your "Reviewed-by" in the commit. :-/ This is the essence of what I checked in. I just used the newly suggested FPSCR_MASK for all of the bits, and the more complex masks stayed the same. diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h index 9861f18..9956136 100644 --- a/sysdeps/powerpc/fpu/fenv_libc.h +++ b/sysdeps/powerpc/fpu/fenv_libc.h @@ -128,60 +128,108 @@ __fesetround_inline_nocheck (const int round) asm volatile ("mtfsfi 7,%0" : : "i" (round)); } +#define FPSCR_MASK(bit) (1 << (31 - (bit))) + /* Definitions of all the FPSCR bit numbers */ enum { FPSCR_FX = 0, /* exception summary */ +#define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX)) FPSCR_FEX, /* enabled exception summary */ +#define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX)) [...] +#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) +#define FPSCR_ENABLES_MASK \ + (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) +#define FPSCR_BASIC_EXCEPTIONS_MASK \ + (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) + +#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) PC
diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c index 2872b1b..5cc8799 100644 --- a/sysdeps/powerpc/fpu/fedisblxcpt.c +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c @@ -33,16 +33,7 @@ fedisableexcept (int excepts) excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; /* Sets the new exception mask. */ - if (excepts & FE_INEXACT) - fe.l &= ~(1 << (31 - FPSCR_XE)); - if (excepts & FE_DIVBYZERO) - fe.l &= ~(1 << (31 - FPSCR_ZE)); - if (excepts & FE_UNDERFLOW) - fe.l &= ~(1 << (31 - FPSCR_UE)); - if (excepts & FE_OVERFLOW) - fe.l &= ~(1 << (31 - FPSCR_OE)); - if (excepts & FE_INVALID) - fe.l &= ~(1 << (31 - FPSCR_VE)); + fe.l &= ~ fenv_exceptions_to_reg (excepts); if (fe.l != curr.l) fesetenv_register (fe.fenv); diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c index dbaffdc..3b64398 100644 --- a/sysdeps/powerpc/fpu/feenablxcpt.c +++ b/sysdeps/powerpc/fpu/feenablxcpt.c @@ -33,16 +33,7 @@ feenableexcept (int excepts) excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; /* Sets the new exception mask. */ - if (excepts & FE_INEXACT) - fe.l |= (1 << (31 - FPSCR_XE)); - if (excepts & FE_DIVBYZERO) - fe.l |= (1 << (31 - FPSCR_ZE)); - if (excepts & FE_UNDERFLOW) - fe.l |= (1 << (31 - FPSCR_UE)); - if (excepts & FE_OVERFLOW) - fe.l |= (1 << (31 - FPSCR_OE)); - if (excepts & FE_INVALID) - fe.l |= (1 << (31 - FPSCR_VE)); + fe.l |= fenv_exceptions_to_reg (excepts); if (fe.l != curr.l) fesetenv_register (fe.fenv); diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h index 9861f18..853239f 100644 --- a/sysdeps/powerpc/fpu/fenv_libc.h +++ b/sysdeps/powerpc/fpu/fenv_libc.h @@ -131,57 +131,103 @@ __fesetround_inline_nocheck (const int round) /* Definitions of all the FPSCR bit numbers */ enum { FPSCR_FX = 0, /* exception summary */ +#define FPSCR_FX_MASK (1 << (31 - FPSCR_FX)) FPSCR_FEX, /* enabled exception summary */ +#define FPSCR_FEX_MASK (1 << (31 - FPSCR_FEX)) FPSCR_VX, /* invalid operation summary */ +#define FPSCR_VX_MASK (1 << (31 - FPSCR_VX)) FPSCR_OX, /* overflow */ +#define FPSCR_OX_MASK (1 << (31 - FPSCR_OX)) FPSCR_UX, /* underflow */ +#define FPSCR_UX_MASK (1 << (31 - FPSCR_UX)) FPSCR_ZX, /* zero divide */ +#define FPSCR_ZX_MASK (1 << (31 - FPSCR_ZX)) FPSCR_XX, /* inexact */ +#define FPSCR_XX_MASK (1 << (31 - FPSCR_XX)) FPSCR_VXSNAN, /* invalid operation for sNaN */ +#define FPSCR_VXSNAN_MASK (1 << (31 - FPSCR_VXSNAN)) FPSCR_VXISI, /* invalid operation for Inf-Inf */ +#define FPSCR_VXISI_MASK (1 << (31 - FPSCR_VXISI)) FPSCR_VXIDI, /* invalid operation for Inf/Inf */ +#define FPSCR_VXIDI_MASK (1 << (31 - FPSCR_VXIDI)) FPSCR_VXZDZ, /* invalid operation for 0/0 */ +#define FPSCR_VXZDZ_MASK (1 << (31 - FPSCR_VXZDZ)) FPSCR_VXIMZ, /* invalid operation for Inf*0 */ +#define FPSCR_VXIMZ_MASK (1 << (31 - FPSCR_VXIMZ)) FPSCR_VXVC, /* invalid operation for invalid compare */ +#define FPSCR_VXVC_MASK (1 << (31 - FPSCR_VXVC)) FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ +#define FPSCR_FR_MASK (1 << (31 - FPSCR_FR)) FPSCR_FI, /* fraction inexact */ +#define FPSCR_FI_MASK (1 << (31 - FPSCR_FI)) FPSCR_FPRF_C, /* result class descriptor */ +#define FPSCR_FPRF_C_MASK (1 << (31 - FPSCR_FPRF_C)) FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ +#define FPSCR_FPRF_FL_MASK (1 << (31 - FPSCR_FPRF_FL)) FPSCR_FPRF_FG, /* result greater than */ +#define FPSCR_FPRF_FG_MASK (1 << (31 - FPSCR_FPRF_FG)) FPSCR_FPRF_FE, /* result equal to */ +#define FPSCR_FPRF_FE_MASK (1 << (31 - FPSCR_FPRF_FE)) FPSCR_FPRF_FU, /* result unordered */ +#define FPSCR_FPRF_FU_MASK (1 << (31 - FPSCR_FPRF_FU)) FPSCR_20, /* reserved */ FPSCR_VXSOFT, /* invalid operation set by software */ +#define FPSCR_VXSOFT_MASK (1 << (31 - FPSCR_VXSOFT)) FPSCR_VXSQRT, /* invalid operation for square root */ +#define FPSCR_VXSQRT_MASK (1 << (31 - FPSCR_VXSQRT)) FPSCR_VXCVI, /* invalid operation for invalid integer convert */ +#define FPSCR_VXCVI_MASK (1 << (31 - FPSCR_VXCVI)) FPSCR_VE, /* invalid operation exception enable */ +#define FPSCR_VE_MASK (1 << (31 - FPSCR_VE)) FPSCR_OE, /* overflow exception enable */ +#define FPSCR_OE_MASK (1 << (31 - FPSCR_OE)) FPSCR_UE, /* underflow exception enable */ +#define FPSCR_UE_MASK (1 << (31 - FPSCR_UE)) FPSCR_ZE, /* zero divide exception enable */ +#define FPSCR_ZE_MASK (1 << (31 - FPSCR_ZE)) FPSCR_XE, /* inexact exception enable */ +#define FPSCR_XE_MASK (1 << (31 - FPSCR_XE)) #ifdef _ARCH_PWR6 FPSCR_29, /* Reserved in ISA 2.05 */ +#define FPSCR_NI_MASK (1 << (31 - FPSCR_29)) #else - FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */ + FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */ +#define FPSCR_NI_MASK (1 << (31 - FPSCR_NI)) #endif /* _ARCH_PWR6 */ /* the remaining two least-significant bits keep the rounding mode */ + FPSCR_RN_hi, +#define FPSCR_RN_hi_MASK (1 << (31 - FPSCR_RN_hi)) + FPSCR_RN_lo +#define FPSCR_RN_lo_MASK (1 << (31 - FPSCR_RN_lo)) }; +#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) +#define FPSCR_ENABLES_MASK \ + (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) +#define FPSCR_BASIC_EXCEPTIONS_MASK \ + (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) + +#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) + +/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits + in the FPSCR, albeit shifted to different but corresponding locations. + Similarly, the exception indicator bits in the FPSCR correspond one-to-one + with the exception enable bits. It is thus possible to map the FENV(1) + exceptions directly to the FPSCR enables with a simple mask and shift, + and vice versa. */ +#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22 + static inline int fenv_reg_to_exceptions (unsigned long long l) { - int result = 0; - if (l & (1 << (31 - FPSCR_XE))) - result |= FE_INEXACT; - if (l & (1 << (31 - FPSCR_ZE))) - result |= FE_DIVBYZERO; - if (l & (1 << (31 - FPSCR_UE))) - result |= FE_UNDERFLOW; - if (l & (1 << (31 - FPSCR_OE))) - result |= FE_OVERFLOW; - if (l & (1 << (31 - FPSCR_VE))) - result |= FE_INVALID; - return result; + return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT; +} + +static inline unsigned long long +fenv_exceptions_to_reg (int excepts) +{ + return (unsigned long long) + (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT; } #ifdef _ARCH_PWR6
From: "Paul A. Clarke" <pc@us.ibm.com> The exceptions passed to fe{en,dis}ableexcept() are defined in the ABI as a bitmask, a combination of FE_INVALID, FE_OVERFLOW, etc. Within the functions, these bits must be translated to/from the corresponding enable bits in the Floating Point Status Control Register (FPSCR). This translation is currently done bit-by-bit. The compiler generates a series of conditional bit operations. Nicely, the "FE" exception bits are all a uniform offset from the FPSCR enable bits, so the bit-by-bit operation can instead be performed by a shift with appropriate masking. 2019-08-02 Paul A. Clarke <pc@us.ibm.com> * sysdeps/powerpc/fpu/fenv_libc.h: Define FPSCR bitmasks. (fenv_reg_to_exceptions): Replace bitwise operations with mask-shift. (fenv_exceptions_to_reg): New. * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Replace bitwise operation with call to fenv_exceptions_to_reg(). * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise. This patch is a prerequisite for the two patches I sent over the past two days: - [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses - [powerpc] SET_RESTORE_ROUND improvements Apologies for sending these out-of-order. I forgot about this one during the freeze window. --- sysdeps/powerpc/fpu/fedisblxcpt.c | 11 +----- sysdeps/powerpc/fpu/feenablxcpt.c | 11 +----- sysdeps/powerpc/fpu/fenv_libc.h | 72 ++++++++++++++++++++++++++++++++------- 3 files changed, 61 insertions(+), 33 deletions(-)