Message ID | 1495819159.15163.170.camel@brimstone.rchland.ibm.com |
---|---|
State | New |
Headers | show |
On Fri, May 26, 2017 at 7:19 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > Hi, > > Add support for early expansion of vector absolute built-ins. > > Bootstraps currently running (p7,p8le,p8be). > > OK for trunk? What's the documented behavior for vec_abs with respect to an argument of value INT_MIN? Richard. > Thanks, > -Will > > > [gcc] > > 2017-05-26 Will Schmidt <will_schmidt@vnet.ibm.com> > > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling > for early expansion of vector absolute builtins. > > [gcc/testsuite] > > 2017-05-15 Will Schmidt <will_schmidt@vnet.ibm.com> > > * gcc.target/powerpc/fold-vec-abs-char.c: New. > * gcc.target/powerpc/fold-vec-abs-floatdouble.c: New. > * gcc.target/powerpc/fold-vec-abs-int.c: New. > * gcc.target/powerpc/fold-vec-abs-longlong.c: New. > * gcc.target/powerpc/fold-vec-abs-short.c: New. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index dac673c..104a052 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -17333,6 +17333,21 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > gsi_replace (gsi, g, true); > return true; > } > + /* flavors of vec_abs. */ > + case ALTIVEC_BUILTIN_ABS_V16QI: > + case ALTIVEC_BUILTIN_ABS_V8HI: > + case ALTIVEC_BUILTIN_ABS_V4SI: > + case ALTIVEC_BUILTIN_ABS_V4SF: > + case P8V_BUILTIN_ABS_V2DI: > + case VSX_BUILTIN_XVABSDP: > + { > + arg0 = gimple_call_arg (stmt, 0); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, ABS_EXPR, arg0); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > default: > break; > } > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c > new file mode 100644 > index 0000000..239c919 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c > @@ -0,0 +1,18 @@ > +/* Verify that overloaded built-ins for vec_abs with char > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed char > +test2 (vector signed char x) > +{ > + return vec_abs (x); > +} > + > +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ > +/* { dg-final { scan-assembler-times "vsububm" 1 } } */ > +/* { dg-final { scan-assembler-times "vmaxsb" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c > new file mode 100644 > index 0000000..1a08618 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c > @@ -0,0 +1,23 @@ > +/* Verify that overloaded built-ins for vec_abs with float and > + double inputs for VSX produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-mvsx -O2" } */ > + > +#include <altivec.h> > + > +vector float > +test1 (vector float x) > +{ > + return vec_abs (x); > +} > + > +vector double > +test2 (vector double x) > +{ > + return vec_abs (x); > +} > + > +/* { dg-final { scan-assembler-times "xvabssp" 1 } } */ > +/* { dg-final { scan-assembler-times "xvabsdp" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c > new file mode 100644 > index 0000000..caf8861 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c > @@ -0,0 +1,18 @@ > +/* Verify that overloaded built-ins for vec_abs with int > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2 " } */ > + > +#include <altivec.h> > + > +vector signed int > +test1 (vector signed int x) > +{ > + return vec_abs (x); > +} > + > +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ > +/* { dg-final { scan-assembler-times "vsubuwm" 1 } } */ > +/* { dg-final { scan-assembler-times "vmaxsw" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c > new file mode 100644 > index 0000000..5b59d19 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c > @@ -0,0 +1,18 @@ > +/* Verify that overloaded built-ins for vec_abs with long long > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-options "-mpower8-vector -O2" } */ > + > +#include <altivec.h> > + > +vector signed long long > +test3 (vector signed long long x) > +{ > + return vec_abs (x); > +} > + > +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ > +/* { dg-final { scan-assembler-times "vsubudm" 1 } } */ > +/* { dg-final { scan-assembler-times "vmaxsd" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c > new file mode 100644 > index 0000000..d312000 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c > @@ -0,0 +1,18 @@ > +/* Verify that overloaded built-ins for vec_abs with short > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed short > +test3 (vector signed short x) > +{ > + return vec_abs (x); > +} > + > +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ > +/* { dg-final { scan-assembler-times "vsubuhm" 1 } } */ > +/* { dg-final { scan-assembler-times "vmaxsh" 1 } } */ > >
On Mon, May 29, 2017 at 10:32:18AM +0200, Richard Biener wrote: > On Fri, May 26, 2017 at 7:19 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > > Add support for early expansion of vector absolute built-ins. > > > > Bootstraps currently running (p7,p8le,p8be). > > > > OK for trunk? > > What's the documented behavior for vec_abs with respect to an argument > of value INT_MIN? The documentation says: "For integer vectors, the arithmetic is modular." http://openpowerfoundation.org/wp-content/uploads/resources/leabi-prd/ (appendix A; the PDF is easier to read). Segher
On May 29, 2017 12:24:44 PM GMT+02:00, Segher Boessenkool <segher@kernel.crashing.org> wrote: >On Mon, May 29, 2017 at 10:32:18AM +0200, Richard Biener wrote: >> On Fri, May 26, 2017 at 7:19 PM, Will Schmidt ><will_schmidt@vnet.ibm.com> wrote: >> > Add support for early expansion of vector absolute built-ins. >> > >> > Bootstraps currently running (p7,p8le,p8be). >> > >> > OK for trunk? >> >> What's the documented behavior for vec_abs with respect to an >argument >> of value INT_MIN? > >The documentation says: > > "For integer vectors, the arithmetic is modular." This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS Integral vector types. Richard. >http://openpowerfoundation.org/wp-content/uploads/resources/leabi-prd/ >(appendix A; the PDF is easier to read). > > >Segher
On Mon, May 29, 2017 at 01:35:22PM +0200, Richard Biener wrote: > >> What's the documented behavior for vec_abs with respect to an > >argument > >> of value INT_MIN? > > > >The documentation says: > > > > "For integer vectors, the arithmetic is modular." > > This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS > Integral vector types. Is it still fine if TYPE_OVERFLOW_UNDEFINED? So essentially always except with -ftrapv? Segher
On Mon, May 29, 2017 at 2:21 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > On Mon, May 29, 2017 at 01:35:22PM +0200, Richard Biener wrote: >> >> What's the documented behavior for vec_abs with respect to an >> >argument >> >> of value INT_MIN? >> > >> >The documentation says: >> > >> > "For integer vectors, the arithmetic is modular." >> >> This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS >> Integral vector types. > > Is it still fine if TYPE_OVERFLOW_UNDEFINED? So essentially always > except with -ftrapv? The docs say it needs to wrap so the correct check is TYPE_OVERFLOW_WRAPS. It's not fine with TYPE_OVERFLOW_UNDEFINED as we will conclude the result can never be INT_MIN while the spec says it can. Richard. > > > Segher
On Tue, 2017-05-30 at 09:00 +0200, Richard Biener wrote: > On Mon, May 29, 2017 at 2:21 PM, Segher Boessenkool > <segher@kernel.crashing.org> wrote: > > On Mon, May 29, 2017 at 01:35:22PM +0200, Richard Biener wrote: > >> >> What's the documented behavior for vec_abs with respect to an > >> >argument > >> >> of value INT_MIN? > >> > > >> >The documentation says: > >> > > >> > "For integer vectors, the arithmetic is modular." > >> > >> This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS > >> Integral vector types. > > > > Is it still fine if TYPE_OVERFLOW_UNDEFINED? So essentially always > > except with -ftrapv? > > The docs say it needs to wrap so the correct check is TYPE_OVERFLOW_WRAPS. > It's not fine with TYPE_OVERFLOW_UNDEFINED as we will conclude the result > can never be INT_MIN while the spec says it can. Ok, thanks for the review. So it looks like I should bail with something like: ... case VSX_BUILTIN_XVABSDP: { arg0 = gimple_call_arg (stmt, 0); lhs = gimple_call_lhs (stmt); if (TYPE_OVERFLOW_WRAPS(TREE_TYPE(arg1)) return false; ... How can I test this scenario? At a glance, a testcase snippet doesn't appear to error out. Am I quietly losing an overflow indicator? vector signed int test1_min (vector signed int x) { vector signed int y = {INT_MIN,INT_MIN,INT_MIN,INT_MIN}; return vec_abs (y); } generates gimple code: y = { -2147483648, -2147483648, -2147483648, -2147483648 }; D.2579 = __builtin_altivec_abs_v4si (y); or after folding: y = { -2147483648, -2147483648, -2147483648, -2147483648 }; D.2579 = ABS_EXPR <y>; > > Richard. > > > > > > > Segher >
On Wed, May 31, 2017 at 3:56 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > On Tue, 2017-05-30 at 09:00 +0200, Richard Biener wrote: >> On Mon, May 29, 2017 at 2:21 PM, Segher Boessenkool >> <segher@kernel.crashing.org> wrote: >> > On Mon, May 29, 2017 at 01:35:22PM +0200, Richard Biener wrote: >> >> >> What's the documented behavior for vec_abs with respect to an >> >> >argument >> >> >> of value INT_MIN? >> >> > >> >> >The documentation says: >> >> > >> >> > "For integer vectors, the arithmetic is modular." >> >> >> >> This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS >> >> Integral vector types. >> > >> > Is it still fine if TYPE_OVERFLOW_UNDEFINED? So essentially always >> > except with -ftrapv? >> >> The docs say it needs to wrap so the correct check is TYPE_OVERFLOW_WRAPS. >> It's not fine with TYPE_OVERFLOW_UNDEFINED as we will conclude the result >> can never be INT_MIN while the spec says it can. > > Ok, thanks for the review. > > So it looks like I should bail with something like: > ... > case VSX_BUILTIN_XVABSDP: > { > arg0 = gimple_call_arg (stmt, 0); > lhs = gimple_call_lhs (stmt); > if (TYPE_OVERFLOW_WRAPS(TREE_TYPE(arg1)) > return false; No, you want if (! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1))) return false; that will likely render the transform useless unless -fwrapv is given. What we miss in the middle-end is a ABSU_EXPR that computes the unsigned result of the absolute value (of the signed operand). That's always well-defined. So you'd then lower to y = { -2147483648, -2147483648, -2147483648, -2147483648 }; D.1234 = ABSU_EXPR <y>; D.2579 = VIEW_CONVERT <D.1234>; RTL expansion of ABSU_EXPR can re-use RTL abs since there's nothing undefined on RTL. Richard. > ... > > How can I test this scenario? At a glance, a testcase snippet doesn't > appear to error out. Am I quietly losing an overflow indicator? > > vector signed int > test1_min (vector signed int x) > { > vector signed int y = {INT_MIN,INT_MIN,INT_MIN,INT_MIN}; > return vec_abs (y); > } > > generates gimple code: > y = { -2147483648, -2147483648, -2147483648, -2147483648 }; > D.2579 = __builtin_altivec_abs_v4si (y); > or after folding: > y = { -2147483648, -2147483648, -2147483648, -2147483648 }; > D.2579 = ABS_EXPR <y>; > > > > >> >> Richard. >> >> > >> > >> > Segher >> > >
On Wed, May 31, 2017 at 3:02 PM, Richard Biener <richard.guenther@gmail.com> wrote: > On Wed, May 31, 2017 at 3:56 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: >> On Tue, 2017-05-30 at 09:00 +0200, Richard Biener wrote: >>> On Mon, May 29, 2017 at 2:21 PM, Segher Boessenkool >>> <segher@kernel.crashing.org> wrote: >>> > On Mon, May 29, 2017 at 01:35:22PM +0200, Richard Biener wrote: >>> >> >> What's the documented behavior for vec_abs with respect to an >>> >> >argument >>> >> >> of value INT_MIN? >>> >> > >>> >> >The documentation says: >>> >> > >>> >> > "For integer vectors, the arithmetic is modular." >>> >> >>> >> This means that folding as ABS_EXPR is not safe for !TYPE_OVERFLOW_WRAPS >>> >> Integral vector types. >>> > >>> > Is it still fine if TYPE_OVERFLOW_UNDEFINED? So essentially always >>> > except with -ftrapv? >>> >>> The docs say it needs to wrap so the correct check is TYPE_OVERFLOW_WRAPS. >>> It's not fine with TYPE_OVERFLOW_UNDEFINED as we will conclude the result >>> can never be INT_MIN while the spec says it can. >> >> Ok, thanks for the review. >> >> So it looks like I should bail with something like: >> ... >> case VSX_BUILTIN_XVABSDP: >> { >> arg0 = gimple_call_arg (stmt, 0); >> lhs = gimple_call_lhs (stmt); >> if (TYPE_OVERFLOW_WRAPS(TREE_TYPE(arg1)) >> return false; > > No, you want > > if (! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1))) > return false; > > that will likely render the transform useless unless -fwrapv is given. > > What we miss in the middle-end is a ABSU_EXPR that computes the > unsigned result of the absolute value (of the signed operand). That's > always well-defined. So you'd then lower to > > y = { -2147483648, -2147483648, -2147483648, -2147483648 }; > D.1234 = ABSU_EXPR <y>; > D.2579 = VIEW_CONVERT <D.1234>; > > RTL expansion of ABSU_EXPR can re-use RTL abs since there's > nothing undefined on RTL. There is a PR for this in BZ, though can't find it in a quick search ... We can use this on arm and aarch64 as well IIRC. regards Ramana > > Richard. > >> ... >> >> How can I test this scenario? At a glance, a testcase snippet doesn't >> appear to error out. Am I quietly losing an overflow indicator? >> >> vector signed int >> test1_min (vector signed int x) >> { >> vector signed int y = {INT_MIN,INT_MIN,INT_MIN,INT_MIN}; >> return vec_abs (y); >> } >> >> generates gimple code: >> y = { -2147483648, -2147483648, -2147483648, -2147483648 }; >> D.2579 = __builtin_altivec_abs_v4si (y); >> or after folding: >> y = { -2147483648, -2147483648, -2147483648, -2147483648 }; >> D.2579 = ABS_EXPR <y>; >> >> >> >> >>> >>> Richard. >>> >>> > >>> > >>> > Segher >>> >> >>
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index dac673c..104a052 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -17333,6 +17333,21 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gsi_replace (gsi, g, true); return true; } + /* flavors of vec_abs. */ + case ALTIVEC_BUILTIN_ABS_V16QI: + case ALTIVEC_BUILTIN_ABS_V8HI: + case ALTIVEC_BUILTIN_ABS_V4SI: + case ALTIVEC_BUILTIN_ABS_V4SF: + case P8V_BUILTIN_ABS_V2DI: + case VSX_BUILTIN_XVABSDP: + { + arg0 = gimple_call_arg (stmt, 0); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, ABS_EXPR, arg0); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } default: break; } diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c new file mode 100644 index 0000000..239c919 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-char.c @@ -0,0 +1,18 @@ +/* Verify that overloaded built-ins for vec_abs with char + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +vector signed char +test2 (vector signed char x) +{ + return vec_abs (x); +} + +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ +/* { dg-final { scan-assembler-times "vsububm" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c new file mode 100644 index 0000000..1a08618 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-floatdouble.c @@ -0,0 +1,23 @@ +/* Verify that overloaded built-ins for vec_abs with float and + double inputs for VSX produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mvsx -O2" } */ + +#include <altivec.h> + +vector float +test1 (vector float x) +{ + return vec_abs (x); +} + +vector double +test2 (vector double x) +{ + return vec_abs (x); +} + +/* { dg-final { scan-assembler-times "xvabssp" 1 } } */ +/* { dg-final { scan-assembler-times "xvabsdp" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c new file mode 100644 index 0000000..caf8861 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-int.c @@ -0,0 +1,18 @@ +/* Verify that overloaded built-ins for vec_abs with int + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2 " } */ + +#include <altivec.h> + +vector signed int +test1 (vector signed int x) +{ + return vec_abs (x); +} + +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ +/* { dg-final { scan-assembler-times "vsubuwm" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsw" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c new file mode 100644 index 0000000..5b59d19 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-longlong.c @@ -0,0 +1,18 @@ +/* Verify that overloaded built-ins for vec_abs with long long + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mpower8-vector -O2" } */ + +#include <altivec.h> + +vector signed long long +test3 (vector signed long long x) +{ + return vec_abs (x); +} + +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ +/* { dg-final { scan-assembler-times "vsubudm" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsd" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c new file mode 100644 index 0000000..d312000 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-abs-short.c @@ -0,0 +1,18 @@ +/* Verify that overloaded built-ins for vec_abs with short + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +vector signed short +test3 (vector signed short x) +{ + return vec_abs (x); +} + +/* { dg-final { scan-assembler-times "vspltisw|vxor" 1 } } */ +/* { dg-final { scan-assembler-times "vsubuhm" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxsh" 1 } } */