Message ID | b4399abc-88af-bf65-b8d0-74e865b995ea@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v3,rs6000] Add V1TI into vector comparison expand [PR103316] | expand |
On Mon, 2022-03-21 at 09:51 +0800, HAO CHEN GUI wrote: > Hi, > This patch adds V1TI mode into a new mode iterator used in vector > comparison expands.Without the patch, the comparisons between two vector > __int128 are converted to scalar comparisons with branches. The code is > suboptimal.The patch fixes the issue. Now all comparisons between two > vector __int128 generates P10 new comparison instructions. Also the > relative built-ins generate the same instructions after gimple folding. > So they're added back to the list. > Hi, Thanks for reworking the description, this clears up my uncertainty. :-) A few spots where spaces should be added after periods. No need to re-post for just that. Patch content otherwise seems OK to me, though I defer to others for any subtleties with actual VEC_IC related changes, Thanks -Will > Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-03-16 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/103316 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable > gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET, > RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT, > RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI. > * config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10 > V1TI instructions. > (vec_cmp<mode><mode>): Set mode iterator to VEC_IC. > (vec_cmpu<mode><mode>): Likewise. > > gcc/testsuite/ > PR target/103316 > * gcc.target/powerpc/pr103316.c: New. > * gcc.target/powerpc/fold-vec-cmp-int128.c: New cases for vector > __int128. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc > index 5d34c1bcfc9..fac7f43f438 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -1994,16 +1994,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPEQUH: > case RS6000_BIF_VCMPEQUW: > case RS6000_BIF_VCMPEQUD: > - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple > - folding produces worse code for 128-bit compares. */ > + case RS6000_BIF_VCMPEQUT: > fold_compare_helper (gsi, EQ_EXPR, stmt); > return true; > > case RS6000_BIF_VCMPNEB: > case RS6000_BIF_VCMPNEH: > case RS6000_BIF_VCMPNEW: > - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple > - folding produces worse code for 128-bit compares. */ > + case RS6000_BIF_VCMPNET: > fold_compare_helper (gsi, NE_EXPR, stmt); > return true; > > @@ -2015,9 +2013,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPGE_U4SI: > case RS6000_BIF_CMPGE_2DI: > case RS6000_BIF_CMPGE_U2DI: > - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_CMPGE_1TI: > + case RS6000_BIF_CMPGE_U1TI: > fold_compare_helper (gsi, GE_EXPR, stmt); > return true; > > @@ -2029,9 +2026,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPGTUW: > case RS6000_BIF_VCMPGTUD: > case RS6000_BIF_VCMPGTSD: > - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_VCMPGTUT: > + case RS6000_BIF_VCMPGTST: > fold_compare_helper (gsi, GT_EXPR, stmt); > return true; > > @@ -2043,9 +2039,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPLE_U4SI: > case RS6000_BIF_CMPLE_2DI: > case RS6000_BIF_CMPLE_U2DI: > - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_CMPLE_1TI: > + case RS6000_BIF_CMPLE_U1TI: > fold_compare_helper (gsi, LE_EXPR, stmt); > return true; > > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index b87a742cca8..d88869cc8d0 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -26,6 +26,9 @@ > ;; Vector int modes > (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) > > +;; Vector int modes for comparison > +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) > + > ;; 128-bit int modes > (define_mode_iterator VEC_TI [V1TI TI]) > > @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>" > > ;; For signed integer vectors comparison. > (define_expand "vec_cmp<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "signed_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > enum rtx_code code = GET_CODE (operands[1]); > @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>" > > ;; For unsigned integer vectors comparison. > (define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "unsigned_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > enum rtx_code code = GET_CODE (operands[1]); > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c > new file mode 100644 > index 00000000000..1a4db0f45d4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c > @@ -0,0 +1,86 @@ > +/* Verify that overloaded built-ins for vec_cmp with __int128 > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> > + > +vector bool __int128 > +test3_eq (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpeq (x, y); > +} > + > +vector bool __int128 > +test6_eq (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpeq (x, y); > +} > + > +vector bool __int128 > +test3_ge (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpge (x, y); > +} > + > +vector bool __int128 > +test6_ge (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpge (x, y); > +} > + > +vector bool __int128 > +test3_gt (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpgt (x, y); > +} > + > +vector bool __int128 > +test6_gt (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpgt (x, y); > +} > + > +vector bool __int128 > +test3_le (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmple (x, y); > +} > + > +vector bool __int128 > +test6_le (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmple (x, y); > +} > + > +vector bool __int128 > +test3_lt (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmplt (x, y); > +} > + > +vector bool __int128 > +test6_lt (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmplt (x, y); > +} > + > +vector bool __int128 > +test3_ne (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpne (x, y); > +} > + > +vector bool __int128 > +test6_ne (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpne (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ > +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c > new file mode 100644 > index 00000000000..02f7dc5ca1b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c > @@ -0,0 +1,80 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +vector bool __int128 > +test_eq (vector signed __int128 a, vector signed __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +test_ne (vector signed __int128 a, vector signed __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +test_gt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +test_ge (vector signed __int128 a, vector signed __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +test_lt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +test_le (vector signed __int128 a, vector signed __int128 b) > +{ > + return a <= b; > +} > + > +vector bool __int128 > +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a <= b; > +} > + > +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ > +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 5d34c1bcfc9..fac7f43f438 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1994,16 +1994,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPEQUH: case RS6000_BIF_VCMPEQUW: case RS6000_BIF_VCMPEQUD: - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPEQUT: fold_compare_helper (gsi, EQ_EXPR, stmt); return true; case RS6000_BIF_VCMPNEB: case RS6000_BIF_VCMPNEH: case RS6000_BIF_VCMPNEW: - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPNET: fold_compare_helper (gsi, NE_EXPR, stmt); return true; @@ -2015,9 +2013,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPGE_U4SI: case RS6000_BIF_CMPGE_2DI: case RS6000_BIF_CMPGE_U2DI: - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPGE_1TI: + case RS6000_BIF_CMPGE_U1TI: fold_compare_helper (gsi, GE_EXPR, stmt); return true; @@ -2029,9 +2026,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPGTUW: case RS6000_BIF_VCMPGTUD: case RS6000_BIF_VCMPGTSD: - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_VCMPGTUT: + case RS6000_BIF_VCMPGTST: fold_compare_helper (gsi, GT_EXPR, stmt); return true; @@ -2043,9 +2039,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPLE_U4SI: case RS6000_BIF_CMPLE_2DI: case RS6000_BIF_CMPLE_U2DI: - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPLE_1TI: + case RS6000_BIF_CMPLE_U1TI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index b87a742cca8..d88869cc8d0 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -26,6 +26,9 @@ ;; Vector int modes (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) +;; Vector int modes for comparison +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) + ;; 128-bit int modes (define_mode_iterator VEC_TI [V1TI TI]) @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>" ;; For signed integer vectors comparison. (define_expand "vec_cmp<mode><mode>" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "signed_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { enum rtx_code code = GET_CODE (operands[1]); @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>" ;; For unsigned integer vectors comparison. (define_expand "vec_cmpu<mode><mode>" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "unsigned_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { enum rtx_code code = GET_CODE (operands[1]); diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c new file mode 100644 index 00000000000..1a4db0f45d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c @@ -0,0 +1,86 @@ +/* Verify that overloaded built-ins for vec_cmp with __int128 + inputs produce the right code. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +vector bool __int128 +test3_eq (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +test6_eq (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +test3_ge (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +test6_ge (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +test3_gt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +test6_gt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +test3_le (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +test6_le (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +test3_lt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +test6_lt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +test3_ne (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpne (x, y); +} + +vector bool __int128 +test6_ne (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpne (x, y); +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c new file mode 100644 index 00000000000..02f7dc5ca1b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c @@ -0,0 +1,80 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +vector bool __int128 +test_eq (vector signed __int128 a, vector signed __int128 b) +{ + return a == b; +} + +vector bool __int128 +test_ne (vector signed __int128 a, vector signed __int128 b) +{ + return a != b; +} + +vector bool __int128 +test_gt (vector signed __int128 a, vector signed __int128 b) +{ + return a > b; +} + +vector bool __int128 +test_ge (vector signed __int128 a, vector signed __int128 b) +{ + return a >= b; +} + +vector bool __int128 +test_lt (vector signed __int128 a, vector signed __int128 b) +{ + return a < b; +} + +vector bool __int128 +test_le (vector signed __int128 a, vector signed __int128 b) +{ + return a <= b; +} + +vector bool __int128 +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a == b; +} + +vector bool __int128 +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a != b; +} + +vector bool __int128 +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a > b; +} + +vector bool __int128 +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a >= b; +} + +vector bool __int128 +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a < b; +} + +vector bool __int128 +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a <= b; +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */