Message ID | d18c65ba-212f-0bcc-6114-f8df7c1ba02e@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v5,rs6000] Add V1TI into vector comparison expand [PR103316] | expand |
Hi Haochen, on 2022/5/31 09:52, HAO CHEN GUI wrote: > Hi, > This patch adds V1TI mode into a new mode iterator used in vector > comparison shift and rotation expands. Without the patch, the comparisons > between two vector __int128 are converted to scalar comparisons and > code is suboptimal. The patch fixes the issue. Now all comparisons > between two vector __int128 generates P10 new comparison instructions. > Also the relative built-ins generate the same instructions after gimple > folding. So they're added back to the folding list. > > This patch also merges some vector comparison shift and rotation expands > for V1T1 and other vector integer modes as they have the similar patterns. > The expands for V1TI only are removed. > > Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > OK, thanks! BR, Kewen > ChangeLog > 2022-05-24 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/103316 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable > gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET, > RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT, > RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI. > * config/rs6000/vector.md (VEC_IC): New mode iterator. Add support > for new Power10 V1TI instructions. > (vec_cmp<mode><mode>): Set mode iterator to VEC_IC. > (vec_cmpu<mode><mode>): Likewise. > (vector_nlt<mode>): Set mode iterator to VEC_IC. > (vector_nltv1ti): Remove. > (vector_gtu<mode>): Set mode iterator to VEC_IC. > (vector_gtuv1ti): Remove. > (vector_nltu<mode>): Set mode iterator to VEC_IC. > (vector_nltuv1ti): Remove. > (vector_geu<mode>): Set mode iterator to VEC_IC. > (vector_ngt<mode>): Likewise. > (vector_ngtv1ti): Remove. > (vector_ngtu<mode>): Set mode iterator to VEC_IC. > (vector_ngtuv1ti): Remove. > (vector_gtu_<mode>_p): Set mode iterator to VEC_IC. > (vector_gtu_v1ti_p): Remove. > (vrotl<mode>3): Set mode iterator to VEC_IC. Emit insns for V1TI. > (vrotlv1ti3): Remove. > (vashr<mode>3): Set mode iterator to VEC_IC. Emit insns for V1TI. > (vashrv1ti3): Remove. > > gcc/testsuite/ > PR target/103316 > * gcc.target/powerpc/pr103316.c: New. > * gcc.target/powerpc/fold-vec-cmp-int128.c: New. > > > patch.diff > diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc > index e925ba9fad9..b67f4e066a8 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPEQUH: > case RS6000_BIF_VCMPEQUW: > case RS6000_BIF_VCMPEQUD: > - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple > - folding produces worse code for 128-bit compares. */ > + case RS6000_BIF_VCMPEQUT: > fold_compare_helper (gsi, EQ_EXPR, stmt); > return true; > > case RS6000_BIF_VCMPNEB: > case RS6000_BIF_VCMPNEH: > case RS6000_BIF_VCMPNEW: > - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple > - folding produces worse code for 128-bit compares. */ > + case RS6000_BIF_VCMPNET: > fold_compare_helper (gsi, NE_EXPR, stmt); > return true; > > @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPGE_U4SI: > case RS6000_BIF_CMPGE_2DI: > case RS6000_BIF_CMPGE_U2DI: > - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_CMPGE_1TI: > + case RS6000_BIF_CMPGE_U1TI: > fold_compare_helper (gsi, GE_EXPR, stmt); > return true; > > @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_VCMPGTUW: > case RS6000_BIF_VCMPGTUD: > case RS6000_BIF_VCMPGTSD: > - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_VCMPGTUT: > + case RS6000_BIF_VCMPGTST: > fold_compare_helper (gsi, GT_EXPR, stmt); > return true; > > @@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > case RS6000_BIF_CMPLE_U4SI: > case RS6000_BIF_CMPLE_2DI: > case RS6000_BIF_CMPLE_U2DI: > - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI > - for now, because gimple folding produces worse code for 128-bit > - compares. */ > + case RS6000_BIF_CMPLE_1TI: > + case RS6000_BIF_CMPLE_U1TI: > fold_compare_helper (gsi, LE_EXPR, stmt); > return true; > > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index 4d0797c48f8..a0d33d2f604 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -26,6 +26,9 @@ > ;; Vector int modes > (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) > > +;; Vector int modes for comparison, shift and rotation > +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) > + > ;; 128-bit int modes > (define_mode_iterator VEC_TI [V1TI TI]) > > @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>" > > ;; For signed integer vectors comparison. > (define_expand "vec_cmp<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "signed_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > enum rtx_code code = GET_CODE (operands[1]); > @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>" > > ;; For unsigned integer vectors comparison. > (define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > + [(set (match_operand:VEC_IC 0 "vint_operand") > (match_operator 1 "unsigned_or_equality_comparison_operator" > - [(match_operand:VEC_I 2 "vint_operand") > - (match_operand:VEC_I 3 "vint_operand")]))] > + [(match_operand:VEC_IC 2 "vint_operand") > + (match_operand:VEC_IC 3 "vint_operand")]))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > enum rtx_code code = GET_CODE (operands[1]); > @@ -690,116 +693,65 @@ (define_expand "vector_gt<mode>" > > ; >= for integer vectors: swap operands and apply not-greater-than > (define_expand "vector_nlt<mode>" > - [(set (match_operand:VEC_I 3 "vlogical_operand") > - (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand") > - (match_operand:VEC_I 1 "vlogical_operand"))) > - (set (match_operand:VEC_I 0 "vlogical_operand") > - (not:VEC_I (match_dup 3)))] > + [(set (match_operand:VEC_IC 3 "vlogical_operand") > + (gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") > + (match_operand:VEC_IC 1 "vlogical_operand"))) > + (set (match_operand:VEC_IC 0 "vlogical_operand") > + (not:VEC_IC (match_dup 3)))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > operands[3] = gen_reg_rtx_and_attrs (operands[0]); > }) > > -(define_expand "vector_nltv1ti" > - [(set (match_operand:V1TI 3 "vlogical_operand") > - (gt:V1TI (match_operand:V1TI 2 "vlogical_operand") > - (match_operand:V1TI 1 "vlogical_operand"))) > - (set (match_operand:V1TI 0 "vlogical_operand") > - (not:V1TI (match_dup 3)))] > - "TARGET_POWER10" > -{ > - operands[3] = gen_reg_rtx_and_attrs (operands[0]); > -}) > - > (define_expand "vector_gtu<mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > - (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand")))] > + [(set (match_operand:VEC_IC 0 "vint_operand") > + (gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") > + (match_operand:VEC_IC 2 "vint_operand")))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > "") > > -(define_expand "vector_gtuv1ti" > - [(set (match_operand:V1TI 0 "altivec_register_operand") > - (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand") > - (match_operand:V1TI 2 "altivec_register_operand")))] > - "TARGET_POWER10" > - "") > - > ; >= for integer vectors: swap operands and apply not-greater-than > (define_expand "vector_nltu<mode>" > - [(set (match_operand:VEC_I 3 "vlogical_operand") > - (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand") > - (match_operand:VEC_I 1 "vlogical_operand"))) > - (set (match_operand:VEC_I 0 "vlogical_operand") > - (not:VEC_I (match_dup 3)))] > + [(set (match_operand:VEC_IC 3 "vlogical_operand") > + (gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") > + (match_operand:VEC_IC 1 "vlogical_operand"))) > + (set (match_operand:VEC_IC 0 "vlogical_operand") > + (not:VEC_IC (match_dup 3)))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > operands[3] = gen_reg_rtx_and_attrs (operands[0]); > }) > > -(define_expand "vector_nltuv1ti" > - [(set (match_operand:V1TI 3 "vlogical_operand") > - (gtu:V1TI (match_operand:V1TI 2 "vlogical_operand") > - (match_operand:V1TI 1 "vlogical_operand"))) > - (set (match_operand:V1TI 0 "vlogical_operand") > - (not:V1TI (match_dup 3)))] > - "TARGET_POWER10" > -{ > - operands[3] = gen_reg_rtx_and_attrs (operands[0]); > -}) > - > (define_expand "vector_geu<mode>" > - [(set (match_operand:VEC_I 0 "vint_operand") > - (geu:VEC_I (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand")))] > + [(set (match_operand:VEC_IC 0 "vint_operand") > + (geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") > + (match_operand:VEC_IC 2 "vint_operand")))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > "") > > ; <= for integer vectors: apply not-greater-than > (define_expand "vector_ngt<mode>" > - [(set (match_operand:VEC_I 3 "vlogical_operand") > - (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand") > - (match_operand:VEC_I 2 "vlogical_operand"))) > - (set (match_operand:VEC_I 0 "vlogical_operand") > - (not:VEC_I (match_dup 3)))] > + [(set (match_operand:VEC_IC 3 "vlogical_operand") > + (gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") > + (match_operand:VEC_IC 2 "vlogical_operand"))) > + (set (match_operand:VEC_IC 0 "vlogical_operand") > + (not:VEC_IC (match_dup 3)))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > operands[3] = gen_reg_rtx_and_attrs (operands[0]); > }) > > -(define_expand "vector_ngtv1ti" > - [(set (match_operand:V1TI 3 "vlogical_operand") > - (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") > - (match_operand:V1TI 2 "vlogical_operand"))) > - (set (match_operand:V1TI 0 "vlogical_operand") > - (not:V1TI (match_dup 3)))] > - "TARGET_POWER10" > -{ > - operands[3] = gen_reg_rtx_and_attrs (operands[0]); > -}) > - > (define_expand "vector_ngtu<mode>" > - [(set (match_operand:VEC_I 3 "vlogical_operand") > - (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand") > - (match_operand:VEC_I 2 "vlogical_operand"))) > - (set (match_operand:VEC_I 0 "vlogical_operand") > - (not:VEC_I (match_dup 3)))] > + [(set (match_operand:VEC_IC 3 "vlogical_operand") > + (gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") > + (match_operand:VEC_IC 2 "vlogical_operand"))) > + (set (match_operand:VEC_IC 0 "vlogical_operand") > + (not:VEC_IC (match_dup 3)))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > { > operands[3] = gen_reg_rtx_and_attrs (operands[0]); > }) > > -(define_expand "vector_ngtuv1ti" > - [(set (match_operand:V1TI 3 "vlogical_operand") > - (gtu:V1TI (match_operand:V1TI 1 "vlogical_operand") > - (match_operand:V1TI 2 "vlogical_operand"))) > - (set (match_operand:V1TI 0 "vlogical_operand") > - (not:V1TI (match_dup 3)))] > - "TARGET_POWER10" > -{ > - operands[3] = gen_reg_rtx_and_attrs (operands[0]); > -}) > - > ; There are 14 possible vector FP comparison operators, gt and eq of them have > ; been expanded above, so just support 12 remaining operators here. > > @@ -1189,27 +1141,15 @@ (define_expand "vector_ge_<mode>_p" > (define_expand "vector_gtu_<mode>_p" > [(parallel > [(set (reg:CC CR6_REGNO) > - (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand"))] > + (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand") > + (match_operand:VEC_IC 2 "vint_operand"))] > UNSPEC_PREDICATE)) > - (set (match_operand:VEC_I 0 "vlogical_operand") > - (gtu:VEC_I (match_dup 1) > - (match_dup 2)))])] > + (set (match_operand:VEC_IC 0 "vlogical_operand") > + (gtu:VEC_IC (match_dup 1) > + (match_dup 2)))])] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > "") > > -(define_expand "vector_gtu_v1ti_p" > - [(parallel > - [(set (reg:CC CR6_REGNO) > - (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand") > - (match_operand:V1TI 2 "altivec_register_operand"))] > - UNSPEC_PREDICATE)) > - (set (match_operand:V1TI 0 "altivec_register_operand") > - (gtu:V1TI (match_dup 1) > - (match_dup 2)))])] > - "TARGET_POWER10" > - "") > - > ;; AltiVec/VSX predicates. > > ;; This expansion is triggered during expansion of predicate built-in > @@ -1582,25 +1522,21 @@ (define_expand "vec_shr_<mode>" > > ;; Expanders for rotate each element in a vector > (define_expand "vrotl<mode>3" > - [(set (match_operand:VEC_I 0 "vint_operand") > - (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand")))] > + [(set (match_operand:VEC_IC 0 "vint_operand") > + (rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand") > + (match_operand:VEC_IC 2 "vint_operand")))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > - "") > - > -(define_expand "vrotlv1ti3" > - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") > - (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") > - (match_operand:V1TI 2 "vsx_register_operand" "v")))] > - "TARGET_POWER10" > { > - /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ > - rtx tmp = gen_reg_rtx (V1TImode); > + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ > + if (<MODE>mode == V1TImode) > + { > + rtx tmp = gen_reg_rtx (V1TImode); > > - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); > - emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); > - DONE; > -}) > + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); > + emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); > + DONE; > + } > + }) > > ;; Expanders for rotatert to make use of vrotl > (define_expand "vrotr<mode>3" > @@ -1663,25 +1599,20 @@ (define_expand "vlshr<mode>3" > > ;; Expanders for arithmetic shift right on each vector element > (define_expand "vashr<mode>3" > - [(set (match_operand:VEC_I 0 "vint_operand") > - (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand")))] > + [(set (match_operand:VEC_IC 0 "vint_operand") > + (ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand") > + (match_operand:VEC_IC 2 "vint_operand")))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > - "") > - > -;; No immediate version of this 128-bit instruction > -(define_expand "vashrv1ti3" > - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") > - (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") > - (match_operand:V1TI 2 "vsx_register_operand" "v")))] > - "TARGET_POWER10" > { > - /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ > - rtx tmp = gen_reg_rtx (V1TImode); > + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ > + if (<MODE>mode == V1TImode) > + { > + rtx tmp = gen_reg_rtx (V1TImode); > > - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); > - emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); > - DONE; > + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); > + emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); > + DONE; > + } > }) > > > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c > new file mode 100644 > index 00000000000..fe667b9cb50 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c > @@ -0,0 +1,87 @@ > +/* Verify that overloaded built-ins for vec_cmp with __int128 > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> > + > +vector bool __int128 > +test_eq (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpeq (x, y); > +} > + > +vector bool __int128 > +testu_eq (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpeq (x, y); > +} > + > +vector bool __int128 > +test_ge (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpge (x, y); > +} > + > +vector bool __int128 > +testu_ge (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpge (x, y); > +} > + > +vector bool __int128 > +test_gt (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpgt (x, y); > +} > + > +vector bool __int128 > +testu_gt (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpgt (x, y); > +} > + > +vector bool __int128 > +test_le (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmple (x, y); > +} > + > +vector bool __int128 > +testu_le (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmple (x, y); > +} > + > +vector bool __int128 > +test_lt (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmplt (x, y); > +} > + > +vector bool __int128 > +testu_lt (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmplt (x, y); > +} > + > +vector bool __int128 > +test_ne (vector signed __int128 x, vector signed __int128 y) > +{ > + return vec_cmpne (x, y); > +} > + > +vector bool __int128 > +testu_ne (vector unsigned __int128 x, vector unsigned __int128 y) > +{ > + return vec_cmpne (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ > +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c > new file mode 100644 > index 00000000000..9f1ba791cc5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c > @@ -0,0 +1,81 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +vector bool __int128 > +test_eq (vector signed __int128 a, vector signed __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +test_ne (vector signed __int128 a, vector signed __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +test_gt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +test_ge (vector signed __int128 a, vector signed __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +test_lt (vector signed __int128 a, vector signed __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +test_le (vector signed __int128 a, vector signed __int128 b) > +{ > + return a <= b; > +} > + > +vector bool __int128 > +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a == b; > +} > + > +vector bool __int128 > +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a != b; > +} > + > +vector bool __int128 > +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a > b; > +} > + > +vector bool __int128 > +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a >= b; > +} > + > +vector bool __int128 > +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a < b; > +} > + > +vector bool __int128 > +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) > +{ > + return a <= b; > +} > + > +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ > +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ > +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ >
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index e925ba9fad9..b67f4e066a8 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPEQUH: case RS6000_BIF_VCMPEQUW: case RS6000_BIF_VCMPEQUD: - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPEQUT: fold_compare_helper (gsi, EQ_EXPR, stmt); return true; case RS6000_BIF_VCMPNEB: case RS6000_BIF_VCMPNEH: case RS6000_BIF_VCMPNEW: - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPNET: fold_compare_helper (gsi, NE_EXPR, stmt); return true; @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPGE_U4SI: case RS6000_BIF_CMPGE_2DI: case RS6000_BIF_CMPGE_U2DI: - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPGE_1TI: + case RS6000_BIF_CMPGE_U1TI: fold_compare_helper (gsi, GE_EXPR, stmt); return true; @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPGTUW: case RS6000_BIF_VCMPGTUD: case RS6000_BIF_VCMPGTSD: - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_VCMPGTUT: + case RS6000_BIF_VCMPGTST: fold_compare_helper (gsi, GT_EXPR, stmt); return true; @@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPLE_U4SI: case RS6000_BIF_CMPLE_2DI: case RS6000_BIF_CMPLE_U2DI: - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPLE_1TI: + case RS6000_BIF_CMPLE_U1TI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 4d0797c48f8..a0d33d2f604 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -26,6 +26,9 @@ ;; Vector int modes (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) +;; Vector int modes for comparison, shift and rotation +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) + ;; 128-bit int modes (define_mode_iterator VEC_TI [V1TI TI]) @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>" ;; For signed integer vectors comparison. (define_expand "vec_cmp<mode><mode>" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "signed_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { enum rtx_code code = GET_CODE (operands[1]); @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>" ;; For unsigned integer vectors comparison. (define_expand "vec_cmpu<mode><mode>" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "unsigned_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { enum rtx_code code = GET_CODE (operands[1]); @@ -690,116 +693,65 @@ (define_expand "vector_gt<mode>" ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nlt<mode>" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand") - (match_operand:VEC_I 1 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") + (match_operand:VEC_IC 1 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_nltv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gt:V1TI (match_operand:V1TI 2 "vlogical_operand") - (match_operand:V1TI 1 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_gtu<mode>" - [(set (match_operand:VEC_I 0 "vint_operand") - (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") -(define_expand "vector_gtuv1ti" - [(set (match_operand:V1TI 0 "altivec_register_operand") - (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand") - (match_operand:V1TI 2 "altivec_register_operand")))] - "TARGET_POWER10" - "") - ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nltu<mode>" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand") - (match_operand:VEC_I 1 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") + (match_operand:VEC_IC 1 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_nltuv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gtu:V1TI (match_operand:V1TI 2 "vlogical_operand") - (match_operand:V1TI 1 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_geu<mode>" - [(set (match_operand:VEC_I 0 "vint_operand") - (geu:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ; <= for integer vectors: apply not-greater-than (define_expand "vector_ngt<mode>" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand") - (match_operand:VEC_I 2 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") + (match_operand:VEC_IC 2 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_ngtv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") - (match_operand:V1TI 2 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_ngtu<mode>" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand") - (match_operand:VEC_I 2 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") + (match_operand:VEC_IC 2 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_ngtuv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gtu:V1TI (match_operand:V1TI 1 "vlogical_operand") - (match_operand:V1TI 2 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - ; There are 14 possible vector FP comparison operators, gt and eq of them have ; been expanded above, so just support 12 remaining operators here. @@ -1189,27 +1141,15 @@ (define_expand "vector_ge_<mode>_p" (define_expand "vector_gtu_<mode>_p" [(parallel [(set (reg:CC CR6_REGNO) - (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand"))] + (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand"))] UNSPEC_PREDICATE)) - (set (match_operand:VEC_I 0 "vlogical_operand") - (gtu:VEC_I (match_dup 1) - (match_dup 2)))])] + (set (match_operand:VEC_IC 0 "vlogical_operand") + (gtu:VEC_IC (match_dup 1) + (match_dup 2)))])] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") -(define_expand "vector_gtu_v1ti_p" - [(parallel - [(set (reg:CC CR6_REGNO) - (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand") - (match_operand:V1TI 2 "altivec_register_operand"))] - UNSPEC_PREDICATE)) - (set (match_operand:V1TI 0 "altivec_register_operand") - (gtu:V1TI (match_dup 1) - (match_dup 2)))])] - "TARGET_POWER10" - "") - ;; AltiVec/VSX predicates. ;; This expansion is triggered during expansion of predicate built-in @@ -1582,25 +1522,21 @@ (define_expand "vec_shr_<mode>" ;; Expanders for rotate each element in a vector (define_expand "vrotl<mode>3" - [(set (match_operand:VEC_I 0 "vint_operand") - (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") - -(define_expand "vrotlv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] - "TARGET_POWER10" { - /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + if (<MODE>mode == V1TImode) + { + rtx tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); - DONE; -}) + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); + DONE; + } + }) ;; Expanders for rotatert to make use of vrotl (define_expand "vrotr<mode>3" @@ -1663,25 +1599,20 @@ (define_expand "vlshr<mode>3" ;; Expanders for arithmetic shift right on each vector element (define_expand "vashr<mode>3" - [(set (match_operand:VEC_I 0 "vint_operand") - (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") - -;; No immediate version of this 128-bit instruction -(define_expand "vashrv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] - "TARGET_POWER10" { - /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + if (<MODE>mode == V1TImode) + { + rtx tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); - DONE; + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); + DONE; + } }) diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c new file mode 100644 index 00000000000..fe667b9cb50 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c @@ -0,0 +1,87 @@ +/* Verify that overloaded built-ins for vec_cmp with __int128 + inputs produce the right code. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +vector bool __int128 +test_eq (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +testu_eq (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +test_ge (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +testu_ge (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +test_gt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +testu_gt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +test_le (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +testu_le (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +test_lt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +testu_lt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +test_ne (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpne (x, y); +} + +vector bool __int128 +testu_ne (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpne (x, y); +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c new file mode 100644 index 00000000000..9f1ba791cc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +vector bool __int128 +test_eq (vector signed __int128 a, vector signed __int128 b) +{ + return a == b; +} + +vector bool __int128 +test_ne (vector signed __int128 a, vector signed __int128 b) +{ + return a != b; +} + +vector bool __int128 +test_gt (vector signed __int128 a, vector signed __int128 b) +{ + return a > b; +} + +vector bool __int128 +test_ge (vector signed __int128 a, vector signed __int128 b) +{ + return a >= b; +} + +vector bool __int128 +test_lt (vector signed __int128 a, vector signed __int128 b) +{ + return a < b; +} + +vector bool __int128 +test_le (vector signed __int128 a, vector signed __int128 b) +{ + return a <= b; +} + +vector bool __int128 +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a == b; +} + +vector bool __int128 +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a != b; +} + +vector bool __int128 +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a > b; +} + +vector bool __int128 +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a >= b; +} + +vector bool __int128 +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a < b; +} + +vector bool __int128 +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a <= b; +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */