Message ID | 1471976565-3576-6-git-send-email-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Ping! On Tuesday 23 August 2016 11:52 PM, Siddhesh Poyarekar wrote: > The support functions for sin and cos have a lot of identical > functionality, so inlining them gives a pretty decent jump in > functionality: ~19% in the sincos function. On SPEC2006 this > translates to about 2.1% in the tonto test. > > * sysdeps/ieee754/dbl-64/s_sin.c (do_cos): Mark as inline. > (do_cos_slow): Likewise. > (do_sin): Likewise. > (do_sin_slow): Likewise. > (slow): Likewise. > (slow1): Likewise. > (slow2): Likewise. > (sloww): Likewise. > (sloww1): Likewise. > (sloww2): Likewise. > (bsloww): Likewise. > (bsloww1): Likewise. > (bsloww2): Likewise. > (cslow2): Likewise. > --- > sysdeps/ieee754/dbl-64/s_sin.c | 52 +++++++++++++++++++++++------------------- > 1 file changed, 28 insertions(+), 24 deletions(-) > > diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c > index 82f9345..c20ef4d 100644 > --- a/sysdeps/ieee754/dbl-64/s_sin.c > +++ b/sysdeps/ieee754/dbl-64/s_sin.c > @@ -145,7 +145,8 @@ static double cslow2 (double x); > of the number by combining the sin and cos of X (as computed by a variation > of the Taylor series) with the values looked up from the sin/cos table to > get the result in RES and a correction value in COR. */ > -static double > +static inline double > +__always_inline > do_cos (double x, double dx, double *corp) > { > mynumber u; > @@ -170,7 +171,8 @@ do_cos (double x, double dx, double *corp) > > /* A more precise variant of DO_COS. EPS is the adjustment to the correction > COR. */ > -static double > +static inline double > +__always_inline > do_cos_slow (double x, double dx, double eps, double *corp) > { > mynumber u; > @@ -205,7 +207,8 @@ do_cos_slow (double x, double dx, double eps, double *corp) > the number by combining the sin and cos of X (as computed by a variation of > the Taylor series) with the values looked up from the sin/cos table to get > the result in RES and a correction value in COR. */ > -static double > +static inline double > +__always_inline > do_sin (double x, double dx, double *corp) > { > mynumber u; > @@ -229,7 +232,8 @@ do_sin (double x, double dx, double *corp) > > /* A more precise variant of DO_SIN. EPS is the adjustment to the correction > COR. */ > -static double > +static inline double > +__always_inline > do_sin_slow (double x, double dx, double eps, double *corp) > { > mynumber u; > @@ -615,8 +619,8 @@ __cos (double x) > /* precision and if still doesn't accurate enough by mpsin or dubsin */ > /************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > slow (double x) > { > double res, cor, w[2]; > @@ -636,8 +640,8 @@ slow (double x) > /* and if result still doesn't accurate enough by mpsin or dubsin */ > /*******************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > slow1 (double x) > { > double w[2], cor, res; > @@ -657,8 +661,8 @@ slow1 (double x) > /* Routine compute sin(x) for 0.855469 <|x|<2.426265 by __sincostab.tbl */ > /* and if result still doesn't accurate enough by mpsin or dubsin */ > /**************************************************************************/ > -static double > -SECTION > +static inline double > +__always_inline > slow2 (double x) > { > double w[2], y, y1, y2, cor, res; > @@ -686,8 +690,8 @@ slow2 (double x) > /* result.And if result not accurate enough routine calls mpsin1 or dubsin */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > sloww (double x, double dx, double orig, int k) > { > double y, t, res, cor, w[2], a, da, xn; > @@ -747,8 +751,8 @@ sloww (double x, double dx, double orig, int k) > /* accurate enough routine calls mpsin1 or dubsin */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > sloww1 (double x, double dx, double orig, int k) > { > double w[2], cor, res; > @@ -777,8 +781,8 @@ sloww1 (double x, double dx, double orig, int k) > /* accurate enough routine calls mpsin1 or dubsin */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > sloww2 (double x, double dx, double orig, int n) > { > double w[2], cor, res; > @@ -808,8 +812,8 @@ sloww2 (double x, double dx, double orig, int n) > /* result.And if result not accurate enough routine calls other routines */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > bsloww (double x, double dx, double orig, int n) > { > double res, cor, w[2], a, da; > @@ -837,8 +841,8 @@ bsloww (double x, double dx, double orig, int n) > /* And if result not accurate enough routine calls other routines */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > bsloww1 (double x, double dx, double orig, int n) > { > double w[2], cor, res; > @@ -865,8 +869,8 @@ bsloww1 (double x, double dx, double orig, int n) > /* And if result not accurate enough routine calls other routines */ > /***************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > bsloww2 (double x, double dx, double orig, int n) > { > double w[2], cor, res; > @@ -891,8 +895,8 @@ bsloww2 (double x, double dx, double orig, int n) > /* precision and if still doesn't accurate enough by mpcos or docos */ > /************************************************************************/ > > -static double > -SECTION > +static inline double > +__always_inline > cslow2 (double x) > { > double w[2], cor, res; >
On Aug 23 2016, Siddhesh Poyarekar <siddhesh@sourceware.org> wrote: > The support functions for sin and cos have a lot of identical > functionality, so inlining them gives a pretty decent jump in > functionality: ~19% in the sincos function. On SPEC2006 this What is the metric of functionality? > translates to about 2.1% in the tonto test. What does "tonto test" mean? Andreas.
On Tue, Aug 30, 2016 at 8:52 AM, Andreas Schwab <schwab@suse.de> wrote: > On Aug 23 2016, Siddhesh Poyarekar <siddhesh@sourceware.org> wrote: > >> The support functions for sin and cos have a lot of identical >> functionality, so inlining them gives a pretty decent jump in >> functionality: ~19% in the sincos function. On SPEC2006 this > > What is the metric of functionality? > >> translates to about 2.1% in the tonto test. > > What does "tonto test" mean? https://www.spec.org/cpu2006/Docs/465.tonto.html > > Andreas. > > -- > Andreas Schwab, SUSE Labs, schwab@suse.de > GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 > "And now for something completely different."
On Tuesday 30 August 2016 01:22 PM, Andreas Schwab wrote: >> The support functions for sin and cos have a lot of identical >> functionality, so inlining them gives a pretty decent jump in >> functionality: ~19% in the sincos function. On SPEC2006 this > What is the metric of functionality? Sorry, that was a typo, it should read as "a pretty decent jump in performance" in the sincos function microbenchmark in benchtests. >> translates to about 2.1% in the tonto test. > What does "tonto test" mean? The tonto test is part of the CPU2006 benchmark and it uses sincos and its children functions for a little under half of its execution time. Siddhesh
On Tue, 23 Aug 2016, Siddhesh Poyarekar wrote: > The support functions for sin and cos have a lot of identical > functionality, so inlining them gives a pretty decent jump in > functionality: ~19% in the sincos function. On SPEC2006 this > translates to about 2.1% in the tonto test. > > * sysdeps/ieee754/dbl-64/s_sin.c (do_cos): Mark as inline. > (do_cos_slow): Likewise. > (do_sin): Likewise. > (do_sin_slow): Likewise. > (slow): Likewise. > (slow1): Likewise. > (slow2): Likewise. > (sloww): Likewise. > (sloww1): Likewise. > (sloww2): Likewise. > (bsloww): Likewise. > (bsloww1): Likewise. > (bsloww2): Likewise. > (cslow2): Likewise. OK.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index 82f9345..c20ef4d 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -145,7 +145,8 @@ static double cslow2 (double x); of the number by combining the sin and cos of X (as computed by a variation of the Taylor series) with the values looked up from the sin/cos table to get the result in RES and a correction value in COR. */ -static double +static inline double +__always_inline do_cos (double x, double dx, double *corp) { mynumber u; @@ -170,7 +171,8 @@ do_cos (double x, double dx, double *corp) /* A more precise variant of DO_COS. EPS is the adjustment to the correction COR. */ -static double +static inline double +__always_inline do_cos_slow (double x, double dx, double eps, double *corp) { mynumber u; @@ -205,7 +207,8 @@ do_cos_slow (double x, double dx, double eps, double *corp) the number by combining the sin and cos of X (as computed by a variation of the Taylor series) with the values looked up from the sin/cos table to get the result in RES and a correction value in COR. */ -static double +static inline double +__always_inline do_sin (double x, double dx, double *corp) { mynumber u; @@ -229,7 +232,8 @@ do_sin (double x, double dx, double *corp) /* A more precise variant of DO_SIN. EPS is the adjustment to the correction COR. */ -static double +static inline double +__always_inline do_sin_slow (double x, double dx, double eps, double *corp) { mynumber u; @@ -615,8 +619,8 @@ __cos (double x) /* precision and if still doesn't accurate enough by mpsin or dubsin */ /************************************************************************/ -static double -SECTION +static inline double +__always_inline slow (double x) { double res, cor, w[2]; @@ -636,8 +640,8 @@ slow (double x) /* and if result still doesn't accurate enough by mpsin or dubsin */ /*******************************************************************************/ -static double -SECTION +static inline double +__always_inline slow1 (double x) { double w[2], cor, res; @@ -657,8 +661,8 @@ slow1 (double x) /* Routine compute sin(x) for 0.855469 <|x|<2.426265 by __sincostab.tbl */ /* and if result still doesn't accurate enough by mpsin or dubsin */ /**************************************************************************/ -static double -SECTION +static inline double +__always_inline slow2 (double x) { double w[2], y, y1, y2, cor, res; @@ -686,8 +690,8 @@ slow2 (double x) /* result.And if result not accurate enough routine calls mpsin1 or dubsin */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline sloww (double x, double dx, double orig, int k) { double y, t, res, cor, w[2], a, da, xn; @@ -747,8 +751,8 @@ sloww (double x, double dx, double orig, int k) /* accurate enough routine calls mpsin1 or dubsin */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline sloww1 (double x, double dx, double orig, int k) { double w[2], cor, res; @@ -777,8 +781,8 @@ sloww1 (double x, double dx, double orig, int k) /* accurate enough routine calls mpsin1 or dubsin */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline sloww2 (double x, double dx, double orig, int n) { double w[2], cor, res; @@ -808,8 +812,8 @@ sloww2 (double x, double dx, double orig, int n) /* result.And if result not accurate enough routine calls other routines */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline bsloww (double x, double dx, double orig, int n) { double res, cor, w[2], a, da; @@ -837,8 +841,8 @@ bsloww (double x, double dx, double orig, int n) /* And if result not accurate enough routine calls other routines */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline bsloww1 (double x, double dx, double orig, int n) { double w[2], cor, res; @@ -865,8 +869,8 @@ bsloww1 (double x, double dx, double orig, int n) /* And if result not accurate enough routine calls other routines */ /***************************************************************************/ -static double -SECTION +static inline double +__always_inline bsloww2 (double x, double dx, double orig, int n) { double w[2], cor, res; @@ -891,8 +895,8 @@ bsloww2 (double x, double dx, double orig, int n) /* precision and if still doesn't accurate enough by mpcos or docos */ /************************************************************************/ -static double -SECTION +static inline double +__always_inline cslow2 (double x) { double w[2], cor, res;