Message ID | 535ECAA9.4080800@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Mon, Apr 28, 2014 at 06:39:53PM -0300, Adhemerval Zanella wrote: > This patch optimizes the FPSCR update on exception and rounding change > functions by just updating its value if new value if different from > current one. It also optimizes fedisableexcept and feenableexcept by > removing an unecessary FPSCR read. > > Tested on powerpc32, powerpc64, and powerpc64le. > > --- > > 2014-04-28 Adhemerval Zanella <azanella@linux.vnet.ibm.com> > > * sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Do not update > FPSCR if value do not change. > * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise. > * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise. > * sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Likewise. > * sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Likewise. > * sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Likewise. > * sysdeps/powerpc/fpu/fenv_libc.h (fenv_reg_to_exceptions): New helper > function. > > diff --git a/sysdeps/powerpc/fpu/fclrexcpt.c b/sysdeps/powerpc/fpu/fclrexcpt.c > index cda2810..4607f62 100644 > --- a/sysdeps/powerpc/fpu/fclrexcpt.c > +++ b/sysdeps/powerpc/fpu/fclrexcpt.c > @@ -22,17 +22,18 @@ > int > __feclearexcept (int excepts) > { > - fenv_union_t u; > + fenv_union_t u, n; > > /* Get the current state. */ > u.fenv = fegetenv_register (); > > /* Clear the relevant bits. */ > - u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) > + n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) > | (excepts & FPSCR_STICKY_BITS)); > > /* Put the new state in effect. */ > - fesetenv_register (u.fenv); > + if (u.l != n.l) > + fesetenv_register (n.fenv); > > /* Success. */ > return 0; > diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c > index 5883e09..94c01ab 100644 > --- a/sysdeps/powerpc/fpu/fedisblxcpt.c > +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c > @@ -22,15 +22,17 @@ > int > fedisableexcept (int excepts) > { > - fenv_union_t fe; > - int result, new; > + fenv_union_t fe, curr; > + int result = 0, new; result doesn't need to be initialized. > > - result = __fegetexcept (); > + /* Get current exception mask to return. */ > + fe.fenv = curr.fenv = fegetenv_register (); > + result = fenv_reg_to_exceptions (fe.l); > > if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) > excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; > > - fe.fenv = fegetenv_register (); > + /* Sets the new exception mask. */ > if (excepts & FE_INEXACT) > fe.l &= ~(1 << (31 - FPSCR_XE)); > if (excepts & FE_DIVBYZERO) > @@ -41,7 +43,9 @@ fedisableexcept (int excepts) > fe.l &= ~(1 << (31 - FPSCR_OE)); > if (excepts & FE_INVALID) > fe.l &= ~(1 << (31 - FPSCR_VE)); > - fesetenv_register (fe.fenv); > + > + if (fe.l != curr.l) > + fesetenv_register (fe.fenv); > > new = __fegetexcept (); > if (new == 0 && result != 0) > diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c > index 8ad0f97..01a68cf 100644 > --- a/sysdeps/powerpc/fpu/feenablxcpt.c > +++ b/sysdeps/powerpc/fpu/feenablxcpt.c > @@ -22,15 +22,17 @@ > int > feenableexcept (int excepts) > { > - fenv_union_t fe; > - int result, new; > + fenv_union_t fe, curr; > + int result = 0, new; > Likewise. > - result = __fegetexcept (); > + /* Get current exception mask to return. */ > + fe.fenv = curr.fenv = fegetenv_register (); > + result = fenv_reg_to_exceptions (fe.l); > > if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) > excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; > > - fe.fenv = fegetenv_register (); > + /* Sets the new exception mask. */ > if (excepts & FE_INEXACT) > fe.l |= (1 << (31 - FPSCR_XE)); > if (excepts & FE_DIVBYZERO) > @@ -41,7 +43,9 @@ feenableexcept (int excepts) > fe.l |= (1 << (31 - FPSCR_OE)); > if (excepts & FE_INVALID) > fe.l |= (1 << (31 - FPSCR_VE)); > - fesetenv_register (fe.fenv); > + > + if (fe.l != curr.l) > + fesetenv_register (fe.fenv); > > new = __fegetexcept (); > if (new != 0 && result == 0) > diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c > index 1375a2f..764dd38 100644 > --- a/sysdeps/powerpc/fpu/feholdexcpt.c > +++ b/sysdeps/powerpc/fpu/feholdexcpt.c > @@ -32,6 +32,9 @@ feholdexcept (fenv_t *envp) > flag. */ > new.l = old.l & 0xffffffff00000007LL; > > + if (new.l == old.l) > + return 0; > + > /* If the old env had any enabled exceptions, then mask SIGFPE in the > MSR FE0/FE1 bits. This may allow the FPU to run faster because it > always takes the default action and can not generate SIGFPE. */ > diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h > index 28e4d1f..c1df5ce 100644 > --- a/sysdeps/powerpc/fpu/fenv_libc.h > +++ b/sysdeps/powerpc/fpu/fenv_libc.h > @@ -146,6 +146,23 @@ enum { > /* the remaining two least-significant bits keep the rounding mode */ > }; > > +static inline int > +fenv_reg_to_exceptions (unsigned long long l) > +{ > + int result = 0; > + if (l & (1 << (31 - FPSCR_XE))) > + result |= FE_INEXACT; > + if (l & (1 << (31 - FPSCR_ZE))) > + result |= FE_DIVBYZERO; > + if (l & (1 << (31 - FPSCR_UE))) > + result |= FE_UNDERFLOW; > + if (l & (1 << (31 - FPSCR_OE))) > + result |= FE_OVERFLOW; > + if (l & (1 << (31 - FPSCR_VE))) > + result |= FE_INVALID; > + return result; > +} > + > #ifdef _ARCH_PWR6 > /* Not supported in ISA 2.05. Provided for source compat only. */ > # define FPSCR_NI 29 > diff --git a/sysdeps/powerpc/fpu/fesetenv.c b/sysdeps/powerpc/fpu/fesetenv.c > index fa99ddb..138bde0 100644 > --- a/sysdeps/powerpc/fpu/fesetenv.c > +++ b/sysdeps/powerpc/fpu/fesetenv.c > @@ -29,6 +29,8 @@ __fesetenv (const fenv_t *envp) > /* get the currently set exceptions. */ > new.fenv = *envp; > old.fenv = fegetenv_register (); > + if (old.l == new.l) > + return 0; > > /* If the old env has no enabled exceptions and the new env has any enabled > exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the > diff --git a/sysdeps/powerpc/fpu/fsetexcptflg.c b/sysdeps/powerpc/fpu/fsetexcptflg.c > index 95193c1..cee6682 100644 > --- a/sysdeps/powerpc/fpu/fsetexcptflg.c > +++ b/sysdeps/powerpc/fpu/fsetexcptflg.c > @@ -21,7 +21,7 @@ > int > __fesetexceptflag (const fexcept_t *flagp, int excepts) > { > - fenv_union_t u; > + fenv_union_t u, n; > fexcept_t flag; > > /* Get the current state. */ > @@ -31,7 +31,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) > flag = *flagp & excepts; > > /* Replace the exception status */ > - u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts)) > + n.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts)) > | (flag & FPSCR_STICKY_BITS) > | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) > & FE_INVALID_SOFTWARE)); > @@ -39,7 +39,8 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) > /* Store the new status word (along with the rest of the environment). > This may cause floating-point exceptions if the restored state > requests it. */ > - fesetenv_register (u.fenv); > + if (n.l != u.l) > + fesetenv_register (u.fenv); > > /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ > if (flag & FE_INVALID) > Looks good to me with those changes. Siddhesh
On 29-04-2014 01:07, Siddhesh Poyarekar wrote: > On Mon, Apr 28, 2014 at 06:39:53PM -0300, Adhemerval Zanella wrote: >> This patch optimizes the FPSCR update on exception and rounding change >> functions by just updating its value if new value if different from >> current one. It also optimizes fedisableexcept and feenableexcept by >> removing an unecessary FPSCR read. >> >> Tested on powerpc32, powerpc64, and powerpc64le. > Looks good to me with those changes. > > Siddhesh Thanks for the review, I have pushed as 18f2945ae9216cfcd53a162080a73e3d719de9e6 (I screw up and ending add dc041bd4dbaf0f22b962647e0d0a1b8090d0e679 as well because forgot to rebase...).
diff --git a/sysdeps/powerpc/fpu/fclrexcpt.c b/sysdeps/powerpc/fpu/fclrexcpt.c index cda2810..4607f62 100644 --- a/sysdeps/powerpc/fpu/fclrexcpt.c +++ b/sysdeps/powerpc/fpu/fclrexcpt.c @@ -22,17 +22,18 @@ int __feclearexcept (int excepts) { - fenv_union_t u; + fenv_union_t u, n; /* Get the current state. */ u.fenv = fegetenv_register (); /* Clear the relevant bits. */ - u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) + n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) | (excepts & FPSCR_STICKY_BITS)); /* Put the new state in effect. */ - fesetenv_register (u.fenv); + if (u.l != n.l) + fesetenv_register (n.fenv); /* Success. */ return 0; diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c index 5883e09..94c01ab 100644 --- a/sysdeps/powerpc/fpu/fedisblxcpt.c +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c @@ -22,15 +22,17 @@ int fedisableexcept (int excepts) { - fenv_union_t fe; - int result, new; + fenv_union_t fe, curr; + int result = 0, new; - result = __fegetexcept (); + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; - fe.fenv = fegetenv_register (); + /* Sets the new exception mask. */ if (excepts & FE_INEXACT) fe.l &= ~(1 << (31 - FPSCR_XE)); if (excepts & FE_DIVBYZERO) @@ -41,7 +43,9 @@ fedisableexcept (int excepts) fe.l &= ~(1 << (31 - FPSCR_OE)); if (excepts & FE_INVALID) fe.l &= ~(1 << (31 - FPSCR_VE)); - fesetenv_register (fe.fenv); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); new = __fegetexcept (); if (new == 0 && result != 0) diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c index 8ad0f97..01a68cf 100644 --- a/sysdeps/powerpc/fpu/feenablxcpt.c +++ b/sysdeps/powerpc/fpu/feenablxcpt.c @@ -22,15 +22,17 @@ int feenableexcept (int excepts) { - fenv_union_t fe; - int result, new; + fenv_union_t fe, curr; + int result = 0, new; - result = __fegetexcept (); + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; - fe.fenv = fegetenv_register (); + /* Sets the new exception mask. */ if (excepts & FE_INEXACT) fe.l |= (1 << (31 - FPSCR_XE)); if (excepts & FE_DIVBYZERO) @@ -41,7 +43,9 @@ feenableexcept (int excepts) fe.l |= (1 << (31 - FPSCR_OE)); if (excepts & FE_INVALID) fe.l |= (1 << (31 - FPSCR_VE)); - fesetenv_register (fe.fenv); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); new = __fegetexcept (); if (new != 0 && result == 0) diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c index 1375a2f..764dd38 100644 --- a/sysdeps/powerpc/fpu/feholdexcpt.c +++ b/sysdeps/powerpc/fpu/feholdexcpt.c @@ -32,6 +32,9 @@ feholdexcept (fenv_t *envp) flag. */ new.l = old.l & 0xffffffff00000007LL; + if (new.l == old.l) + return 0; + /* If the old env had any enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h index 28e4d1f..c1df5ce 100644 --- a/sysdeps/powerpc/fpu/fenv_libc.h +++ b/sysdeps/powerpc/fpu/fenv_libc.h @@ -146,6 +146,23 @@ enum { /* the remaining two least-significant bits keep the rounding mode */ }; +static inline int +fenv_reg_to_exceptions (unsigned long long l) +{ + int result = 0; + if (l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; + if (l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; + if (l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; + if (l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; + if (l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + return result; +} + #ifdef _ARCH_PWR6 /* Not supported in ISA 2.05. Provided for source compat only. */ # define FPSCR_NI 29 diff --git a/sysdeps/powerpc/fpu/fesetenv.c b/sysdeps/powerpc/fpu/fesetenv.c index fa99ddb..138bde0 100644 --- a/sysdeps/powerpc/fpu/fesetenv.c +++ b/sysdeps/powerpc/fpu/fesetenv.c @@ -29,6 +29,8 @@ __fesetenv (const fenv_t *envp) /* get the currently set exceptions. */ new.fenv = *envp; old.fenv = fegetenv_register (); + if (old.l == new.l) + return 0; /* If the old env has no enabled exceptions and the new env has any enabled exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the diff --git a/sysdeps/powerpc/fpu/fsetexcptflg.c b/sysdeps/powerpc/fpu/fsetexcptflg.c index 95193c1..cee6682 100644 --- a/sysdeps/powerpc/fpu/fsetexcptflg.c +++ b/sysdeps/powerpc/fpu/fsetexcptflg.c @@ -21,7 +21,7 @@ int __fesetexceptflag (const fexcept_t *flagp, int excepts) { - fenv_union_t u; + fenv_union_t u, n; fexcept_t flag; /* Get the current state. */ @@ -31,7 +31,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) flag = *flagp & excepts; /* Replace the exception status */ - u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts)) + n.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts)) | (flag & FPSCR_STICKY_BITS) | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) & FE_INVALID_SOFTWARE)); @@ -39,7 +39,8 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) /* Store the new status word (along with the rest of the environment). This may cause floating-point exceptions if the restored state requests it. */ - fesetenv_register (u.fenv); + if (n.l != u.l) + fesetenv_register (u.fenv); /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ if (flag & FE_INVALID)