@@ -20,6 +20,7 @@
#include <stdint.h>
#include <sys/types.h>
#include <fenv.h>
+#include <get-rounding-mode.h>
/* The original fdlibm code used statements like:
n0 = ((*(int*)&one)>>29)^1; * index of high word *
@@ -557,6 +558,16 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
block is different from the current state. This saves a lot of time when
the floating point unit is much slower than the fixed point units. */
+# ifndef libc_feholdsetround_noex_ctx
+# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx
+# endif
+# ifndef libc_feholdsetround_noexf_ctx
+# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
+# endif
+# ifndef libc_feholdsetround_noexl_ctx
+# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
+# endif
+
# ifndef libc_feresetround_noex_ctx
# define libc_feresetround_noex_ctx libc_fesetenv_ctx
# endif
@@ -567,24 +578,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
# define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
# endif
-# ifndef libc_feholdsetround_53bit_ctx
-# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
-# endif
+#else
-# ifndef libc_feresetround_53bit_ctx
-# define libc_feresetround_53bit_ctx libc_feresetround_ctx
-# endif
+/* Default implementation using standard fenv functions.
+ Avoid unnecessary rounding mode changes by first checking the
+ current rounding mode. Note the use of __glibc_unlikely is
+ important for performance. */
-# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
- struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \
- ROUNDFUNC ## _ctx (&ctx, (RM))
-#else
-# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \
- fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \
- ROUNDFUNC (&__libc_save_rm, (RM))
+static __always_inline void
+libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
+{
+ ctx->updated_status = false;
+
+ /* Update rounding mode only if different. */
+ if (__glibc_unlikely (round != get_rounding_mode ()))
+ {
+ ctx->updated_status = true;
+ fegetenv (&ctx->env);
+ fesetround (round);
+ }
+}
+
+static __always_inline void
+libc_feresetround_ctx (struct rm_ctx *ctx)
+{
+ /* Restore the rounding mode if updated. */
+ if (__glibc_unlikely (ctx->updated_status))
+ feupdateenv (&ctx->env);
+}
+
+static __always_inline void
+libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
+{
+ /* Save exception flags and rounding mode. */
+ fegetenv (&ctx->env);
+
+ /* Update rounding mode only if different. */
+ if (__glibc_unlikely (round != get_rounding_mode ()))
+ fesetround (round);
+}
+
+static __always_inline void
+libc_feresetround_noex_ctx (struct rm_ctx *ctx)
+{
+ /* Restore exception flags and rounding mode. */
+ fesetenv (&ctx->env);
+}
+
+# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
+# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
+# define libc_feresetroundf_ctx libc_feresetround_ctx
+# define libc_feresetroundl_ctx libc_feresetround_ctx
+
+# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx
+# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx
+# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx
+# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx
+
+#endif
+
+#ifndef libc_feholdsetround_53bit_ctx
+# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
#endif
+#ifndef libc_feresetround_53bit_ctx
+# define libc_feresetround_53bit_ctx libc_feresetround_ctx
+#endif
+
+#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
+ struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \
+ ROUNDFUNC ## _ctx (&ctx, (RM))
-/* Save and restore the rounding mode within a lexical block. */
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
+ the value at the start of the block. The exception mode must be preserved.
+ Exceptions raised within the block must be set in the exception flags.
+ Non-stop mode may be enabled inside the block. */
#define SET_RESTORE_ROUND(RM) \
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround)
@@ -593,15 +660,18 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
#define SET_RESTORE_ROUNDL(RM) \
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl)
-/* Save and restore the rounding mode within a lexical block, and also
- the set of exceptions raised within the block may be discarded. */
-
-#define SET_RESTORE_ROUND_NOEX(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex)
-#define SET_RESTORE_ROUND_NOEXF(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf)
-#define SET_RESTORE_ROUND_NOEXL(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl)
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
+ the value at the start of the block. The exception mode must be preserved.
+ Exceptions raised within the block must be discarded, and exception flags
+ are restored to the value at the start of the block.
+ Non-stop mode may be enabled inside the block. */
+
+#define SET_RESTORE_ROUND_NOEX(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noex, libc_feresetround_noex)
+#define SET_RESTORE_ROUND_NOEXF(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noexf, libc_feresetround_noexf)
+#define SET_RESTORE_ROUND_NOEXL(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noexl, libc_feresetround_noexl)
/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */
#define SET_RESTORE_ROUND_53BIT(RM) \
Ping -----Original Message----- From: Wilco [mailto:wdijkstr@arm.com] Sent: 15 April 2014 14:35 To: 'libc-alpha@sourceware.org' Subject: [PATCH] Add generic HAVE_RM_CTX implementation Hi, This patch adds a generic implementation of HAVE_RM_CTX using standard fenv calls. As a result math functions using SET_RESTORE_ROUND* macros do not suffer from a large slowdown on targets which do not implement optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline functions are now unused and could be removed in the future (there are a few math functions left which use a mixture of standard fenv calls and libc_fe* inline functions - they could be updated to use SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations across just a few FP instructions). libc_feholdsetround*_noex_ctx is added to enable better optimization of SET_RESTORE_ROUND_NOEX* implementations. Performance measurements on ARM and x86 of sin() show significant gains over the current default, fairly close to a highly optimized fenv_private: ARM x86 no fenv_private : 100% 100% generic HAVE_RM_CTX : 250% 350% fenv_private (CTX) : 250% 450% Wilco ChangeLog: 2014-04-15 Wilco <wdijkstr@arm.com> * sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX implementation. New function (libc_feholdsetround_noex_ctx). --- sysdeps/generic/math_private.h | 116 ++++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 23 deletions(-)