Message ID | 001001cf6ad2$06603b00$1320b100$@com |
---|---|
State | New |
Headers | show |
On Thu, May 08, 2014 at 04:27:29PM +0100, Wilco wrote: > Hi Marcus, > > Since there have been no further comments, could you check this in? I had suggested a couple of changes in addition to my comment. Siddhesh > > Wilco > > ChangeLog: > 2014-05-08 Wilco <wdijkstr@arm.com> > > * sysdeps/generic/math_private.h: Add default HAVE_RM_CTX > implementation. New function (libc_feholdsetround_noex_ctx). > > > -----Original Message----- > From: Wilco [mailto:wdijkstr@arm.com] > Sent: 15 April 2014 14:35 > To: 'libc-alpha@sourceware.org' > Subject: [PATCH] Add generic HAVE_RM_CTX implementation > > Hi, > > This patch adds a generic implementation of HAVE_RM_CTX using standard fenv calls. As a result math > functions using SET_RESTORE_ROUND* macros do not suffer from a large slowdown on targets which do > not implement optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline functions are now > unused and could be removed in the future (there are a few math functions left which use a mixture > of standard fenv calls and libc_fe* inline functions - they could be updated to use > SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations across just a few FP > instructions). > > libc_feholdsetround*_noex_ctx is added to enable better optimization of SET_RESTORE_ROUND_NOEX* > implementations. > > Performance measurements on ARM and x86 of sin() show significant gains over the current default, > fairly close to a highly optimized fenv_private: > > ARM x86 > no fenv_private : 100% 100% > generic HAVE_RM_CTX : 250% 350% > fenv_private (CTX) : 250% 450% > > Wilco > > ChangeLog: > 2014-04-15 Wilco <wdijkstr@arm.com> > > * sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX > implementation. New function (libc_feholdsetround_noex_ctx). > > --- > sysdeps/generic/math_private.h | 116 ++++++++++++++++++++++++++++++++-------- > 1 file changed, 93 insertions(+), 23 deletions(-) > > diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h > index 9b881a3..fade483 100644 > --- a/sysdeps/generic/math_private.h > +++ b/sysdeps/generic/math_private.h > @@ -20,6 +20,7 @@ > #include <stdint.h> > #include <sys/types.h> > #include <fenv.h> > +#include <get-rounding-mode.h> > > /* The original fdlibm code used statements like: > n0 = ((*(int*)&one)>>29)^1; * index of high word * > @@ -557,6 +558,16 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) > block is different from the current state. This saves a lot of time when > the floating point unit is much slower than the fixed point units. */ > > +# ifndef libc_feholdsetround_noex_ctx > +# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx > +# endif > +# ifndef libc_feholdsetround_noexf_ctx > +# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx > +# endif > +# ifndef libc_feholdsetround_noexl_ctx > +# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx > +# endif > + > # ifndef libc_feresetround_noex_ctx > # define libc_feresetround_noex_ctx libc_fesetenv_ctx > # endif > @@ -567,24 +578,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) > # define libc_feresetround_noexl_ctx libc_fesetenvl_ctx > # endif > > -# ifndef libc_feholdsetround_53bit_ctx > -# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx > -# endif > +#else > > -# ifndef libc_feresetround_53bit_ctx > -# define libc_feresetround_53bit_ctx libc_feresetround_ctx > -# endif > +/* Default implementation using standard fenv functions. > + Avoid unnecessary rounding mode changes by first checking the > + current rounding mode. Note the use of __glibc_unlikely is > + important for performance. */ > > -# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ > - struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \ > - ROUNDFUNC ## _ctx (&ctx, (RM)) > -#else > -# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \ > - fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \ > - ROUNDFUNC (&__libc_save_rm, (RM)) > +static __always_inline void > +libc_feholdsetround_ctx (struct rm_ctx *ctx, int round) > +{ > + ctx->updated_status = false; > + > + /* Update rounding mode only if different. */ > + if (__glibc_unlikely (round != get_rounding_mode ())) > + { > + ctx->updated_status = true; > + fegetenv (&ctx->env); > + fesetround (round); > + } > +} > + > +static __always_inline void > +libc_feresetround_ctx (struct rm_ctx *ctx) > +{ > + /* Restore the rounding mode if updated. */ > + if (__glibc_unlikely (ctx->updated_status)) > + feupdateenv (&ctx->env); > +} > + > +static __always_inline void > +libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round) > +{ > + /* Save exception flags and rounding mode. */ > + fegetenv (&ctx->env); > + > + /* Update rounding mode only if different. */ > + if (__glibc_unlikely (round != get_rounding_mode ())) > + fesetround (round); > +} > + > +static __always_inline void > +libc_feresetround_noex_ctx (struct rm_ctx *ctx) > +{ > + /* Restore exception flags and rounding mode. */ > + fesetenv (&ctx->env); > +} > + > +# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx > +# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx > +# define libc_feresetroundf_ctx libc_feresetround_ctx > +# define libc_feresetroundl_ctx libc_feresetround_ctx > + > +# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx > +# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx > +# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx > +# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx > + > +#endif > + > +#ifndef libc_feholdsetround_53bit_ctx > +# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx > #endif > +#ifndef libc_feresetround_53bit_ctx > +# define libc_feresetround_53bit_ctx libc_feresetround_ctx > +#endif > + > +#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ > + struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \ > + ROUNDFUNC ## _ctx (&ctx, (RM)) > > -/* Save and restore the rounding mode within a lexical block. */ > +/* Set the rounding mode within a lexical block. Restore the rounding mode to > + the value at the start of the block. The exception mode must be preserved. > + Exceptions raised within the block must be set in the exception flags. > + Non-stop mode may be enabled inside the block. */ > > #define SET_RESTORE_ROUND(RM) \ > SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround) > @@ -593,15 +660,18 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) > #define SET_RESTORE_ROUNDL(RM) \ > SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl) > > -/* Save and restore the rounding mode within a lexical block, and also > - the set of exceptions raised within the block may be discarded. */ > - > -#define SET_RESTORE_ROUND_NOEX(RM) \ > - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex) > -#define SET_RESTORE_ROUND_NOEXF(RM) \ > - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf) > -#define SET_RESTORE_ROUND_NOEXL(RM) \ > - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl) > +/* Set the rounding mode within a lexical block. Restore the rounding mode to > + the value at the start of the block. The exception mode must be preserved. > + Exceptions raised within the block must be discarded, and exception flags > + are restored to the value at the start of the block. > + Non-stop mode may be enabled inside the block. */ > + > +#define SET_RESTORE_ROUND_NOEX(RM) SET_RESTORE_ROUND_GENERIC (RM, \ > + libc_feholdsetround_noex, libc_feresetround_noex) > +#define SET_RESTORE_ROUND_NOEXF(RM) SET_RESTORE_ROUND_GENERIC (RM, \ > + libc_feholdsetround_noexf, libc_feresetround_noexf) > +#define SET_RESTORE_ROUND_NOEXL(RM) SET_RESTORE_ROUND_GENERIC (RM, \ > + libc_feholdsetround_noexl, libc_feresetround_noexl) > > /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ > #define SET_RESTORE_ROUND_53BIT(RM) \ > -- > 1.7.9.5 >
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index 9b881a3..fade483 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -20,6 +20,7 @@ #include <stdint.h> #include <sys/types.h> #include <fenv.h> +#include <get-rounding-mode.h> /* The original fdlibm code used statements like: n0 = ((*(int*)&one)>>29)^1; * index of high word * @@ -557,6 +558,16 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) block is different from the current state. This saves a lot of time when the floating point unit is much slower than the fixed point units. */ +# ifndef libc_feholdsetround_noex_ctx +# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx +# endif +# ifndef libc_feholdsetround_noexf_ctx +# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx +# endif +# ifndef libc_feholdsetround_noexl_ctx +# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx +# endif + # ifndef libc_feresetround_noex_ctx # define libc_feresetround_noex_ctx libc_fesetenv_ctx # endif @@ -567,24 +578,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) # define libc_feresetround_noexl_ctx libc_fesetenvl_ctx # endif -# ifndef libc_feholdsetround_53bit_ctx -# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx -# endif +#else -# ifndef libc_feresetround_53bit_ctx -# define libc_feresetround_53bit_ctx libc_feresetround_ctx -# endif +/* Default implementation using standard fenv functions. + Avoid unnecessary rounding mode changes by first checking the + current rounding mode. Note the use of __glibc_unlikely is + important for performance. */ -# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ - struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \ - ROUNDFUNC ## _ctx (&ctx, (RM)) -#else -# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \ - fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \ - ROUNDFUNC (&__libc_save_rm, (RM)) +static __always_inline void +libc_feholdsetround_ctx (struct rm_ctx *ctx, int round) +{ + ctx->updated_status = false; + + /* Update rounding mode only if different. */ + if (__glibc_unlikely (round != get_rounding_mode ())) + { + ctx->updated_status = true; + fegetenv (&ctx->env); + fesetround (round); + } +} + +static __always_inline void +libc_feresetround_ctx (struct rm_ctx *ctx) +{ + /* Restore the rounding mode if updated. */ + if (__glibc_unlikely (ctx->updated_status)) + feupdateenv (&ctx->env); +} + +static __always_inline void +libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round) +{ + /* Save exception flags and rounding mode. */ + fegetenv (&ctx->env); + + /* Update rounding mode only if different. */ + if (__glibc_unlikely (round != get_rounding_mode ())) + fesetround (round); +} + +static __always_inline void +libc_feresetround_noex_ctx (struct rm_ctx *ctx) +{ + /* Restore exception flags and rounding mode. */ + fesetenv (&ctx->env); +} + +# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx +# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx +# define libc_feresetroundf_ctx libc_feresetround_ctx +# define libc_feresetroundl_ctx libc_feresetround_ctx + +# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx +# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx +# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx +# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx + +#endif + +#ifndef libc_feholdsetround_53bit_ctx +# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx #endif +#ifndef libc_feresetround_53bit_ctx +# define libc_feresetround_53bit_ctx libc_feresetround_ctx +#endif + +#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ + struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \ + ROUNDFUNC ## _ctx (&ctx, (RM)) -/* Save and restore the rounding mode within a lexical block. */ +/* Set the rounding mode within a lexical block. Restore the rounding mode to + the value at the start of the block. The exception mode must be preserved. + Exceptions raised within the block must be set in the exception flags. + Non-stop mode may be enabled inside the block. */ #define SET_RESTORE_ROUND(RM) \ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround) @@ -593,15 +660,18 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) #define SET_RESTORE_ROUNDL(RM) \ SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl) -/* Save and restore the rounding mode within a lexical block, and also - the set of exceptions raised within the block may be discarded. */ - -#define SET_RESTORE_ROUND_NOEX(RM) \ - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex) -#define SET_RESTORE_ROUND_NOEXF(RM) \ - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf) -#define SET_RESTORE_ROUND_NOEXL(RM) \ - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl) +/* Set the rounding mode within a lexical block. Restore the rounding mode to + the value at the start of the block. The exception mode must be preserved. + Exceptions raised within the block must be discarded, and exception flags + are restored to the value at the start of the block. + Non-stop mode may be enabled inside the block. */ + +#define SET_RESTORE_ROUND_NOEX(RM) SET_RESTORE_ROUND_GENERIC (RM, \ + libc_feholdsetround_noex, libc_feresetround_noex) +#define SET_RESTORE_ROUND_NOEXF(RM) SET_RESTORE_ROUND_GENERIC (RM, \ + libc_feholdsetround_noexf, libc_feresetround_noexf) +#define SET_RESTORE_ROUND_NOEXL(RM) SET_RESTORE_ROUND_GENERIC (RM, \ + libc_feholdsetround_noexl, libc_feresetround_noexl) /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ #define SET_RESTORE_ROUND_53BIT(RM) \
Hi Marcus, Since there have been no further comments, could you check this in? Wilco ChangeLog: 2014-05-08 Wilco <wdijkstr@arm.com> * sysdeps/generic/math_private.h: Add default HAVE_RM_CTX implementation. New function (libc_feholdsetround_noex_ctx). -----Original Message----- From: Wilco [mailto:wdijkstr@arm.com] Sent: 15 April 2014 14:35 To: 'libc-alpha@sourceware.org' Subject: [PATCH] Add generic HAVE_RM_CTX implementation Hi, This patch adds a generic implementation of HAVE_RM_CTX using standard fenv calls. As a result math functions using SET_RESTORE_ROUND* macros do not suffer from a large slowdown on targets which do not implement optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline functions are now unused and could be removed in the future (there are a few math functions left which use a mixture of standard fenv calls and libc_fe* inline functions - they could be updated to use SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations across just a few FP instructions). libc_feholdsetround*_noex_ctx is added to enable better optimization of SET_RESTORE_ROUND_NOEX* implementations. Performance measurements on ARM and x86 of sin() show significant gains over the current default, fairly close to a highly optimized fenv_private: ARM x86 no fenv_private : 100% 100% generic HAVE_RM_CTX : 250% 350% fenv_private (CTX) : 250% 450% Wilco ChangeLog: 2014-04-15 Wilco <wdijkstr@arm.com> * sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX implementation. New function (libc_feholdsetround_noex_ctx). --- sysdeps/generic/math_private.h | 116 ++++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 23 deletions(-)