Message ID | 0eacc4b3-8de4-b373-2f92-bf91bf1f2b14@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Fix PR92132 | expand |
Fixed one place without consistent mode. Bootstrapped and regress testing passed on powerpc64le-linux. Thanks! Kewen --- gcc/ChangeLog 2019-10-25 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * config/rs6000/rs6000.md (one_cmpl<mode>3_internal): Expose name. * config/rs6000/vector.md (fpcmpun): New code_iterator. (vcond_mask_<mode><mode>): New expand. (vcond_mask_<mode><VEC_int>): Likewise. (vec_cmp<mode><mode>): Likewise. (vec_cmpu<mode><mode>): Likewise. (vec_cmp<mode><VEC_int>): Likewise. (vector_{ungt,unge,unlt,unle}<mode>): Likewise. (vector_uneq<mode>): Expose name. (vector_ltgt<mode>): Likewise. (vector_unordered<mode>): Likewise. (vector_ordered<mode>): Likewise. gcc/testsuite/ChangeLog 2019-10-25 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * gcc.target/powerpc/pr92132-fp-1.c: New test. * gcc.target/powerpc/pr92132-fp-2.c: New test. * gcc.target/powerpc/pr92132-int-1.c: New test. * gcc.target/powerpc/pr92132-int-2.c: New test. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d0cca1e..2a68548 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6800,7 +6800,7 @@ (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "*one_cmpl<mode>3_internal" +(define_insn_and_split "one_cmpl<mode>3_internal" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 886cbad..0ef64eb 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -107,6 +107,8 @@ (smin "smin") (smax "smax")]) +(define_code_iterator fpcmpun [ungt unge unlt unle]) + ;; Vector move instructions. Little-endian VSX loads and stores require ;; special handling to circumvent "element endianness." @@ -493,6 +495,241 @@ FAIL; }) +;; To support vector condition vectorization, define vcond_mask and vec_cmp. + +;; Same mode for condition true/false values and predicate operand. +(define_expand "vcond_mask_<mode><mode>" + [(match_operand:VEC_I 0 "vint_operand") + (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; Condition true/false values are float but predicate operand is of +;; type integer vector with same element size. +(define_expand "vcond_mask_<mode><VEC_int>" + [(match_operand:VEC_F 0 "vfloat_operand") + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand") + (match_operand:<VEC_INT> 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + gen_lowpart (<MODE>mode, operands[3]))); + DONE; +}) + +;; For signed integer vectors comparison. +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GE: + emit_insn ( + gen_vector_nlt<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case GT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); + break; + case LE: + emit_insn ( + gen_vector_ngt<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case LT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); + break; + case GEU: + emit_insn ( + gen_vector_nltu<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case GTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); + break; + case LEU: + emit_insn ( + gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case LTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For unsigned integer vectors comparison. +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +;; For float point vectors comparison. +(define_expand "vec_cmp<mode><VEC_int>" + [(set (match_operand:<VEC_INT> 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_F 2 "vfloat_operand") + (match_operand:VEC_F 3 "vfloat_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx res = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + emit_insn (gen_one_cmpl<VEC_int>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case GE: + emit_insn (gen_vector_ge<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case GT: + emit_insn (gen_vector_gt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LE: + emit_insn (gen_vector_ge<mode> (res, operands[3], operands[2])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LT: + emit_insn (gen_vector_gt<mode> (res, operands[3], operands[2])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LTGT: + emit_insn (gen_vector_ltgt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNORDERED: + emit_insn (gen_vector_unordered<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case ORDERED: + emit_insn (gen_vector_ordered<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNEQ: + emit_insn (gen_vector_uneq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNGE: + emit_insn (gen_vector_unge<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNGT: + emit_insn (gen_vector_ungt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNLE: + emit_insn (gen_vector_unle<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNLT: + emit_insn (gen_vector_unlt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + + default: + gcc_unreachable (); + } + DONE; +}) + +;; For below vector_UN<cc><mode>: +;; op3 = (op1 >= op1) # !isNaN (op1) +;; op4 = (op2 >= op2) # !isNaN (op2) +;; op5 = !(op3 & op4) # isNaN (op1) || isNaN (op2) +;; op3 = op3 & op1 # isNaN (op1)? 0.0 : op1 +;; op4 = op4 & op2 # isNaN (op2)? 0.0 : op2 +;; op0 = op3 <cc> op4 # normal cmp if no NaNs +;; op0 = op5 | op0 # UNORDERED | normal cmp + +(define_expand "vector_<code><mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (fpcmpun:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rtx op3 = gen_reg_rtx (<MODE>mode); + rtx op4 = gen_reg_rtx (<MODE>mode); + rtx op5 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_vector_ge<mode> (op3, operands[1], operands[1])); + emit_insn (gen_vector_ge<mode> (op4, operands[2], operands[2])); + emit_insn (gen_and<mode>3 (op5, op3, op4)); + emit_insn (gen_one_cmpl<mode>3_internal (op5, op5)); + emit_insn (gen_and<mode>3 (op3, op3, operands[1])); + emit_insn (gen_and<mode>3 (op4, op4, operands[2])); + + switch (<CODE>) + { + case UNLT: + std::swap (op3, op4); + /* Fall through. */ + case UNGT: + emit_insn (gen_vector_gt<mode> (operands[0], op3, op4)); + break; + case UNLE: + std::swap (op3, op4); + /* Fall through. */ + case UNGE: + emit_insn (gen_vector_ge<mode> (operands[0], op3, op4)); + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_ior<mode>3 (operands[0], op5, operands[0])); + DONE; +}) + + (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") @@ -575,7 +812,7 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_insn_and_split "*vector_uneq<mode>" +(define_insn_and_split "vector_uneq<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -596,7 +833,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ltgt<mode>" +(define_insn_and_split "vector_ltgt<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -617,7 +854,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ordered<mode>" +(define_insn_and_split "vector_ordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -638,7 +875,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_unordered<mode>" +(define_insn_and_split "vector_unordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c new file mode 100644 index 0000000..1023e8c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + double type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE double + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c new file mode 100644 index 0000000..db7b9ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vmx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -maltivec -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + float type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE float + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c new file mode 100644 index 0000000..a786811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + signed long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE signed long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c new file mode 100644 index 0000000..dd3c030 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + unsigned long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE unsigned long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */
On Tue, Oct 29, 2019 at 01:16:53PM +0800, Kewen.Lin wrote: > (vcond_mask_<mode><mode>): New expand. Say for which mode please? Like (vcond_mask_<mode><mode> for VEC_I and VEC_I): New expand. > (vcond_mask_<mode><VEC_int>): Likewise. "for VEC_I and VEC_F", here, but the actual names in the pattern are for vector modes of same-size integer elements. Maybe it is clear enough like this, dunno. > (vector_{ungt,unge,unlt,unle}<mode>): Likewise. Never use wildcards (or shell expansions) in the "what changed" part of a changelog, because people try to search for that. > ;; 128-bit one's complement > -(define_insn_and_split "*one_cmpl<mode>3_internal" > +(define_insn_and_split "one_cmpl<mode>3_internal" Instead, rename it to "one_cmpl<mode>3" and delete the define_expand that serves no function? > +(define_code_iterator fpcmpun [ungt unge unlt unle]) Why these four? Should there be more? Should this be added to some existing iterator? It's not all comparisons including unordered, there are uneq, unordered itself, and ne as well. > +;; Same mode for condition true/false values and predicate operand. > +(define_expand "vcond_mask_<mode><mode>" > + [(match_operand:VEC_I 0 "vint_operand") > + (match_operand:VEC_I 1 "vint_operand") > + (match_operand:VEC_I 2 "vint_operand") > + (match_operand:VEC_I 3 "vint_operand")] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > +{ > + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], > + operands[3])); > + DONE; > +}) So is this exactly the same as vsel/xxsel? > +;; For signed integer vectors comparison. > +(define_expand "vec_cmp<mode><mode>" > + case GEU: > + emit_insn ( > + gen_vector_nltu<mode> (operands[0], operands[2], operands[3], tmp)); > + break; > + case GTU: > + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); > + break; > + case LEU: > + emit_insn ( > + gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], tmp)); > + break; > + case LTU: > + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); > + break; You shouldn't allow those for signed comparisons, that will only hide problems. You can do all the rest with some iterator / code attribute? Or two cases, one for the codes that need ops 2 and 3 swapped, one for the rest? > +;; For unsigned integer vectors comparison. > +(define_expand "vec_cmpu<mode><mode>" > + [(set (match_operand:VEC_I 0 "vint_operand") > + (match_operator 1 "comparison_operator" > + [(match_operand:VEC_I 2 "vint_operand") > + (match_operand:VEC_I 3 "vint_operand")]))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > +{ > + emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], > + operands[2], operands[3])); > + DONE; > +}) unsigned_comparison_operator? Why *are* there separate vec_cmp and vec_cmpu patterns, in the first place? Segher
Hi Segher, Thanks for the comments! on 2019/11/2 上午7:17, Segher Boessenkool wrote: > On Tue, Oct 29, 2019 at 01:16:53PM +0800, Kewen.Lin wrote: >> (vcond_mask_<mode><mode>): New expand. > > Say for which mode please? Like > (vcond_mask_<mode><mode> for VEC_I and VEC_I): New expand. > Fixed as below. >> (vcond_mask_<mode><VEC_int>): Likewise. > > "for VEC_I and VEC_F", here, but the actual names in the pattern are for > vector modes of same-size integer elements. Maybe it is clear enough like > this, dunno. Changed to for VEC_F, New expand for float vector modes and same-size integer vector modes. > >> (vector_{ungt,unge,unlt,unle}<mode>): Likewise. > > Never use wildcards (or shell expansions) in the "what changed" part of a > changelog, because people try to search for that. Thanks for the explanation, fixed. > >> ;; 128-bit one's complement >> -(define_insn_and_split "*one_cmpl<mode>3_internal" >> +(define_insn_and_split "one_cmpl<mode>3_internal" > > Instead, rename it to "one_cmpl<mode>3" and delete the define_expand that > serves no function? Renamed. Sorry, what's the "define_expand" specified here. I thought it's for existing one_cmpl<mode>3 but I didn't find it. > >> +(define_code_iterator fpcmpun [ungt unge unlt unle]) > > Why these four? Should there be more? Should this be added to some > existing iterator? For floating point comparison operator and vector type, currently rs6000 supports eq, gt, ge, *ltgt, *unordered, *ordered, *uneq (* for unnamed). We can leverage gt, ge, eq for lt, le, ne, then these four left. I originally wanted to merge them into the existing unordered or uneq, but I found it's hard to share their existing patterns. For example, the uneq looks like: [(set (match_dup 3) (gt:VEC_F (match_dup 1) (match_dup 2))) (set (match_dup 4) (gt:VEC_F (match_dup 2) (match_dup 1))) (set (match_dup 0) (and:VEC_F (not:VEC_F (match_dup 3)) (not:VEC_F (match_dup 4))))] While ungt looks like: [(set (match_dup 3) (ge:VEC_F (match_dup 1) (match_dup 2))) (set (match_dup 4) (ge:VEC_F (match_dup 2) (match_dup 1))) (set (match_dup 3) (ior:VEC_F (not:VEC_F (match_dup 3)) (not:VEC_F (match_dup 4)))) (set (match_dup 4) (gt:VEC_F (match_dup 1) (match_dup 2))) (set (match_dup 3) (ior:VEC_F (match_dup 3) (match_dup 4)))] > > It's not all comparisons including unordered, there are uneq, unordered > itself, and ne as well. Yes, they are not, just a list holding missing support comparison operator. > >> +;; Same mode for condition true/false values and predicate operand. >> +(define_expand "vcond_mask_<mode><mode>" >> + [(match_operand:VEC_I 0 "vint_operand") >> + (match_operand:VEC_I 1 "vint_operand") >> + (match_operand:VEC_I 2 "vint_operand") >> + (match_operand:VEC_I 3 "vint_operand")] >> + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" >> +{ >> + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], >> + operands[3])); >> + DONE; >> +}) > > So is this exactly the same as vsel/xxsel? Yes, expanded into if_then_else and ne against zero, can match their patterns. > >> +;; For signed integer vectors comparison. >> +(define_expand "vec_cmp<mode><mode>" > >> + case GEU: >> + emit_insn ( >> + gen_vector_nltu<mode> (operands[0], operands[2], operands[3], tmp)); >> + break; >> + case GTU: >> + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); >> + break; >> + case LEU: >> + emit_insn ( >> + gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], tmp)); >> + break; >> + case LTU: >> + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); >> + break; > > You shouldn't allow those for signed comparisons, that will only hide > problems. OK, moved into vec_cmpu*. > > You can do all the rest with some iterator / code attribute? Or two cases, > one for the codes that need ops 2 and 3 swapped, one for the rest? > Sorry, I tried to use code attributes here but failed. I think the reason is the pattern name doesn't have <code>. I can only get the code from operand 1, then have to use "switch case"? I can change it with one more define_expand, but is that what we wanted? It looks we still need "case"s. define_expand "vec_cmp<mode><mode>" ... {... enum rtx_code code = GET_CODE (operands[1]); switch (code) case GT: ... gen_vec_cmp<mode><mode>gt ... } define_expand "vec_cmp<mode><mode><code>" ... gen_vector_<code_name><mode> >> +;; For unsigned integer vectors comparison. >> +(define_expand "vec_cmpu<mode><mode>" >> + [(set (match_operand:VEC_I 0 "vint_operand") >> + (match_operator 1 "comparison_operator" >> + [(match_operand:VEC_I 2 "vint_operand") >> + (match_operand:VEC_I 3 "vint_operand")]))] >> + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" >> +{ >> + emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], >> + operands[2], operands[3])); >> + DONE; >> +}) > > unsigned_comparison_operator? Good point, fixed. > > Why *are* there separate vec_cmp and vec_cmpu patterns, in the first place? > If I understood the question correctly, you were asking why not have one unique pattern for them? I noticed some vectorization related SPNs have separate signed and unsigned patterns, I guess it's due to that sign matters for some vector instructions, some platform may only support some of them, using sign for fine grain queries and checks? Updated patch attached by addressing above comments. BR, Kewen ------------------ gcc/ChangeLog 2019-11-05 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * config/rs6000/rs6000.md (one_cmpl<mode>3_internal): Rename to one_cmpl<mode>3 and expose. * config/rs6000/predicates.md (signed_or_equality_comparison_operator): New predicate. * config/rs6000/vector.md (fpcmpun_gtelte): New code_iterator. (vcond_mask_<mode><mode> for VEC_I and VEC_I): New expand. (vec_cmp<mode><mode> for VEC_I and VEC_I): Likewise. (vec_cmpu<mode><mode> for VEC_I and VEC_I): Likewise. (vcond_mask_<mode><VEC_int> for VEC_F): New expand for float vector modes and same-size integer vector modes. (vec_cmp<mode><VEC_int> for VEC_F): Likewise. (vector_<code><mode> for fpcmpun_gtelte): New expand. (vector_uneq<mode>): Expose name. (vector_ltgt<mode>): Likewise. (vector_unordered<mode>): Likewise. (vector_ordered<mode>): Likewise. gcc/testsuite/ChangeLog 2019-11-05 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * gcc.target/powerpc/pr92132-fp-1.c: New test. * gcc.target/powerpc/pr92132-fp-2.c: New test. * gcc.target/powerpc/pr92132-int-1.c: New test. * gcc.target/powerpc/pr92132-int-2.c: New test. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 345d9c3..5665174 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1140,6 +1140,11 @@ (define_predicate "signed_comparison_operator" (match_code "lt,gt,le,ge")) +;; Return 1 if OP is a signed comparison or an equality operator. +(define_predicate "signed_or_equality_comparison_operator" + (ior (match_operand 0 "equality_operator") + (match_operand 0 "signed_comparison_operator"))) + ;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- ;; it must be a positive comparison. (define_predicate "scc_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d0cca1e..e3429d7 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6800,7 +6800,7 @@ (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "*one_cmpl<mode>3_internal" +(define_insn_and_split "one_cmpl<mode>3" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 886cbad..7111b43 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -107,6 +107,8 @@ (smin "smin") (smax "smax")]) +(define_code_iterator fpcmpun_gtelte [ungt unge unlt unle]) + ;; Vector move instructions. Little-endian VSX loads and stores require ;; special handling to circumvent "element endianness." @@ -493,6 +495,217 @@ FAIL; }) +;; To support vector condition vectorization, define vcond_mask and vec_cmp. + +;; Same mode for condition true/false values and predicate operand. +(define_expand "vcond_mask_<mode><mode>" + [(match_operand:VEC_I 0 "vint_operand") + (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; Condition true/false values are float but predicate operand is of +;; type integer vector with same element size. +(define_expand "vcond_mask_<mode><VEC_int>" + [(match_operand:VEC_F 0 "vfloat_operand") + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand") + (match_operand:<VEC_INT> 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + gen_lowpart (<MODE>mode, operands[3]))); + DONE; +}) + +;; For signed integer vectors comparison. +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "signed_or_equality_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GE: + emit_insn (gen_vector_nlt<mode> (operands[0],operands[2], operands[3], + tmp)); + break; + case GT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); + break; + case LE: + emit_insn (gen_vector_ngt<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For unsigned integer vectors comparison. +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "unsigned_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case GEU: + emit_insn (gen_vector_nltu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case GTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); + break; + case LEU: + emit_insn (gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For float point vectors comparison. +(define_expand "vec_cmp<mode><VEC_int>" + [(set (match_operand:<VEC_INT> 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_F 2 "vfloat_operand") + (match_operand:VEC_F 3 "vfloat_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx res = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>3 (res, res)); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + break; + case GE: + emit_insn (gen_vector_ge<mode> (res, operands[2], operands[3])); + break; + case GT: + emit_insn (gen_vector_gt<mode> (res, operands[2], operands[3])); + break; + case LE: + emit_insn (gen_vector_ge<mode> (res, operands[3], operands[2])); + break; + case LT: + emit_insn (gen_vector_gt<mode> (res, operands[3], operands[2])); + break; + case LTGT: + emit_insn (gen_vector_ltgt<mode> (res, operands[2], operands[3])); + break; + case UNORDERED: + emit_insn (gen_vector_unordered<mode> (res, operands[2], operands[3])); + break; + case ORDERED: + emit_insn (gen_vector_ordered<mode> (res, operands[2], operands[3])); + break; + case UNEQ: + emit_insn (gen_vector_uneq<mode> (res, operands[2], operands[3])); + break; + case UNGE: + emit_insn (gen_vector_unge<mode> (res, operands[2], operands[3])); + break; + case UNGT: + emit_insn (gen_vector_ungt<mode> (res, operands[2], operands[3])); + break; + case UNLE: + emit_insn (gen_vector_unle<mode> (res, operands[2], operands[3])); + break; + case UNLT: + emit_insn (gen_vector_unlt<mode> (res, operands[2], operands[3])); + break; + + default: + gcc_unreachable (); + } + + emit_insn (gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + DONE; +}) + +;; For below vector_UN<cc><mode>: +;; op3 = (op1 >= op2) # !isNaN (op1) +;; op4 = (op2 >= op1) # !isNaN (op2) +;; op3 = !(op3 & op4) # isNaN (op1) | isNaN (op2) +;; op4 = op1 <cc> op2 # normal cmp +;; op0 = op3 | op04 # UNORDERED result | normal cmp result + +(define_expand "vector_<code><mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (fpcmpun_gtelte:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rtx op3 = gen_reg_rtx (<MODE>mode); + rtx op4 = gen_reg_rtx (<MODE>mode); + + /* Refer to vector_unordered. */ + emit_insn (gen_vector_ge<mode> (op3, operands[1], operands[2])); + emit_insn (gen_vector_ge<mode> (op4, operands[2], operands[1])); + emit_insn (gen_and<mode>3 (op3, op3, op4)); + emit_insn (gen_one_cmpl<mode>3 (op3, op3)); + + switch (<CODE>) + { + case UNLT: + std::swap (operands[1], operands[2]); + /* Fall through. */ + case UNGT: + emit_insn (gen_vector_gt<mode> (op4, operands[1], operands[2])); + break; + case UNLE: + std::swap (operands[1], operands[2]); + /* Fall through. */ + case UNGE: + emit_insn (gen_vector_ge<mode> (op4, operands[1], operands[2])); + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_ior<mode>3 (operands[0], op3, op4)); + DONE; +}) + + (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") @@ -575,7 +788,7 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_insn_and_split "*vector_uneq<mode>" +(define_insn_and_split "vector_uneq<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -596,7 +809,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ltgt<mode>" +(define_insn_and_split "vector_ltgt<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -617,7 +830,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ordered<mode>" +(define_insn_and_split "vector_ordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -638,7 +851,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_unordered<mode>" +(define_insn_and_split "vector_unordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c new file mode 100644 index 0000000..1023e8c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + double type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE double + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c new file mode 100644 index 0000000..db7b9ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vmx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -maltivec -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + float type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE float + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c new file mode 100644 index 0000000..a786811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + signed long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE signed long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c new file mode 100644 index 0000000..dd3c030 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + unsigned long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE unsigned long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */
Hi Ke Wen, On Tue, Nov 05, 2019 at 04:35:05PM +0800, Kewen.Lin wrote: > >> ;; 128-bit one's complement > >> -(define_insn_and_split "*one_cmpl<mode>3_internal" > >> +(define_insn_and_split "one_cmpl<mode>3_internal" > > > > Instead, rename it to "one_cmpl<mode>3" and delete the define_expand that > > serves no function? > > Renamed. Sorry, what's the "define_expand" specified here. I thought it's > for existing one_cmpl<mode>3 but I didn't find it. The expander named "one_cmpl<mode>3": Erm. 2, not 3 :-) (define_expand "one_cmpl<mode>2" [(set (match_operand:BOOL_128 0 "vlogical_operand") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")))] "" "") while the define_insn is (define_insn_and_split "*one_cmpl<mode>3_internal" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] "" { etc., so you can just delete the expand and rename the insn to the proper name (one_cmpl<mode>2). It sometimes is useful to have an expand like this if there are multiple insns that could implement this, but that is not the case here. > >> +(define_code_iterator fpcmpun [ungt unge unlt unle]) > > > > Why these four? Should there be more? Should this be added to some > > existing iterator? > > For floating point comparison operator and vector type, currently rs6000 > supports eq, gt, ge, *ltgt, *unordered, *ordered, *uneq (* for unnamed). > We can leverage gt, ge, eq for lt, le, ne, then these four left. There are four conditions for FP: lt/gt/eq/un. For every comparison, exactly one of the four is true. If not HONOR_NANS for this mode you never have un, so it is one of lt/gt/eq then, just like with integers. If we have HONOR_NANS(mode) (or !flag_finite_math_only), there are 14 possible combinations to test for (testing for any of the four or none of the four is easy ;-) ) Four test just if lt, gt, eq, or un is set. Another four test if one of the flags is *not* set, or said differently, if one of three flags is set: ordered, ne, unle, unge. The remaining six test two flags each: ltgt, le, unlt, ge, ungt, uneq. > I originally wanted to merge them into the existing unordered or uneq, but > I found it's hard to share their existing patterns. For example, the uneq > looks like: > > [(set (match_dup 3) > (gt:VEC_F (match_dup 1) > (match_dup 2))) > (set (match_dup 4) > (gt:VEC_F (match_dup 2) > (match_dup 1))) > (set (match_dup 0) > (and:VEC_F (not:VEC_F (match_dup 3)) > (not:VEC_F (match_dup 4))))] Or ge/ge/eqv, etc. -- there are multiple options. > While ungt looks like: > > [(set (match_dup 3) > (ge:VEC_F (match_dup 1) > (match_dup 2))) > (set (match_dup 4) > (ge:VEC_F (match_dup 2) > (match_dup 1))) > (set (match_dup 3) > (ior:VEC_F (not:VEC_F (match_dup 3)) > (not:VEC_F (match_dup 4)))) > (set (match_dup 4) > (gt:VEC_F (match_dup 1) > (match_dup 2))) > (set (match_dup 3) > (ior:VEC_F (match_dup 3) > (match_dup 4)))] (set (match_dup 3) (ge:VEC_F (match_dup 2) (match_dup 1))) (set (match_dup 0) (not:VEC_F (match_dup 3))) should be enough? So we have only gt/ge/eq. I think the following are ooptimal (not tested!): lt(a,b) = gt(b,a) gt(a,b) = gt(a,b) eq(a,b) = eq(a,b) un(a,b) = ~(ge(a,b) | ge(b,a)) ltgt(a,b) = ge(a,b) ^ ge(b,a) le(a,b) = ge(b,a) unlt(a,b) = ~ge(a,b) ge(a,b) = ge(a,b) ungt(a,b) = ~ge(b,a) uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) ord(a,b) = ge(a,b) | ge(b,a) ne(a,b) = ~eq(a,b) unle(a,b) = ~gt(a,b) unge(a,b) = ~gt(b,a) This is quite regular :-) 5 are done with one cmp; 5 are done with a cmp and an inversion; 4 are done with two compares and one xor/eqv/or/nor. Half are pretty simple: lt(a,b) = gt(b,a) gt(a,b) = gt(a,b) eq(a,b) = eq(a,b) le(a,b) = ge(b,a) ge(a,b) = ge(a,b) ltgt(a,b) = ge(a,b) ^ ge(b,a) ord(a,b) = ge(a,b) | ge(b,a) The other half are the negations of those: unge(a,b) = ~gt(b,a) unle(a,b) = ~gt(a,b) ne(a,b) = ~eq(a,b) ungt(a,b) = ~ge(b,a) unlt(a,b) = ~ge(a,b) uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) un(a,b) = ~(ge(a,b) | ge(b,a)) And please remember to test everythin with -ffast-math :-) That is, when flag_finite_math_only is set. You cannot get unordered results, then, making the optimal sequences different in some cases (and changing what "ne" means!) 8 codes, ordered: never lt gt ltgt eq le ge ordered 8 codes, unordered: unordered unlt ungt ne uneq unle unge always 8 codes, fast-math: never lt gt ne eq le ge always 8 codes, non-fp: never lt gt ne eq le ge always > >> +;; Same mode for condition true/false values and predicate operand. > >> +(define_expand "vcond_mask_<mode><mode>" > >> + [(match_operand:VEC_I 0 "vint_operand") > >> + (match_operand:VEC_I 1 "vint_operand") > >> + (match_operand:VEC_I 2 "vint_operand") > >> + (match_operand:VEC_I 3 "vint_operand")] > >> + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > >> +{ > >> + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], > >> + operands[3])); > >> + DONE; > >> +}) > > > > So is this exactly the same as vsel/xxsel? > > Yes, expanded into if_then_else and ne against zero, can match their patterns. Ah, so vector_select is not the canonical name. > > You shouldn't allow those for signed comparisons, that will only hide > > problems. > > OK, moved into vec_cmpu*. > > Why *are* there separate vec_cmp and vec_cmpu patterns, in the first place? > > If I understood the question correctly, you were asking why not have one > unique pattern for them? Yes, it is redundant, the comparison code already says if it is an unsigned comparison. So this a question about the generic patterns, not your implementation of them :-) And if it is *one* pattern then handling LTU etc. makes perfect sense. > I noticed some vectorization related SPNs have > separate signed and unsigned patterns, I guess it's due to that sign matters > for some vector instructions, some platform may only support some of them, > using sign for fine grain queries and checks? I think it is because one particular implementation has different machine insns for both, the one this interface was implemented for first. > Updated patch attached by addressing above comments. I'll review it later, sorry. Segher
Hi Segher, on 2019/11/7 上午7:49, Segher Boessenkool wrote: > > The expander named "one_cmpl<mode>3": > > Erm. 2, not 3 :-) > > (define_expand "one_cmpl<mode>2" > [(set (match_operand:BOOL_128 0 "vlogical_operand") > (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")))] > "" > "") > > while the define_insn is > > (define_insn_and_split "*one_cmpl<mode>3_internal" > [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") > (not:BOOL_128 > (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] > "" > { > Ah, sorry I didn't notice we have one cmpl<mode>**3** but actually for one cmpl<mode>**2** expand, a bit surprised. Done. Thanks for pointing that. > etc., so you can just delete the expand and rename the insn to the proper > name (one_cmpl<mode>2). It sometimes is useful to have an expand like > this if there are multiple insns that could implement this, but that is > not the case here. > OK, example like vector_select? :) >>>> +(define_code_iterator fpcmpun [ungt unge unlt unle]) >>> >>> Why these four? Should there be more? Should this be added to some >>> existing iterator? >> >> For floating point comparison operator and vector type, currently rs6000 >> supports eq, gt, ge, *ltgt, *unordered, *ordered, *uneq (* for unnamed). >> We can leverage gt, ge, eq for lt, le, ne, then these four left. > > There are four conditions for FP: lt/gt/eq/un. For every comparison, > exactly one of the four is true. If not HONOR_NANS for this mode you > never have un, so it is one of lt/gt/eq then, just like with integers. > > If we have HONOR_NANS(mode) (or !flag_finite_math_only), there are 14 > possible combinations to test for (testing for any of the four or none > of the four is easy ;-) ) > > Four test just if lt, gt, eq, or un is set. Another four test if one of > the flags is *not* set, or said differently, if one of three flags is set: > ordered, ne, unle, unge. The remaining six test two flags each: ltgt, le, > unlt, ge, ungt, uneq. Yes, for these 14, rs6000 current support status: ge: vector_ge -> define_expand -> match vsx/altivec insn gt: vector_gt -> define_expand -> match vsx/altivec insn eq: vector_eq -> define_expand -> match vsx/altivec insn ltgt: *vector_ltgt -> define_insn_and_split ord: *vector_ordered -> define_insn_and_split unord: *vector_unordered -> define_insn_and_split uneq: *vector_uneq -> define_insn_and_split ne: no RTL pattern. lt: Likewise. le: Likewise. unge: Likewise. ungt: Likewise. unle: Likewise. unlt: Likewise. Since I thought the un{ge,gt,le,lt} is a bit complicated than ne/lt/le (wrong thought actually), I added the specific define_expand for them. As your simpler example below, I've added the RTL patterns with define_expand for the missing ne, lt, le, unge, ungt, unle, unlt. I didn't use iterator any more, since without further refactoring, just several ones (2 each pair) can be shared with iterators, and need to check <code> to decide swap or not. Maybe the subsequent uniform refactoring patch is required to make it? > >> I originally wanted to merge them into the existing unordered or uneq, but >> I found it's hard to share their existing patterns. For example, the uneq >> looks like: >> >> [(set (match_dup 3) >> (gt:VEC_F (match_dup 1) >> (match_dup 2))) >> (set (match_dup 4) >> (gt:VEC_F (match_dup 2) >> (match_dup 1))) >> (set (match_dup 0) >> (and:VEC_F (not:VEC_F (match_dup 3)) >> (not:VEC_F (match_dup 4))))] > > Or ge/ge/eqv, etc. -- there are multiple options. > >> While ungt looks like: >> >> [(set (match_dup 3) >> (ge:VEC_F (match_dup 1) >> (match_dup 2))) >> (set (match_dup 4) >> (ge:VEC_F (match_dup 2) >> (match_dup 1))) >> (set (match_dup 3) >> (ior:VEC_F (not:VEC_F (match_dup 3)) >> (not:VEC_F (match_dup 4)))) >> (set (match_dup 4) >> (gt:VEC_F (match_dup 1) >> (match_dup 2))) >> (set (match_dup 3) >> (ior:VEC_F (match_dup 3) >> (match_dup 4)))] > > (set (match_dup 3) > (ge:VEC_F (match_dup 2) > (match_dup 1))) > (set (match_dup 0) > (not:VEC_F (match_dup 3))) > > should be enough? > Nice! I was trapped to get unordered first. :( > > So we have only gt/ge/eq. > > I think the following are ooptimal (not tested!): > > lt(a,b) = gt(b,a) yes, this is what I used for that operator. > gt(a,b) = gt(a,b) > eq(a,b) = eq(a,b) > un(a,b) = ~(ge(a,b) | ge(b,a)) > existing code uses (~ge(a,b) & ~ge(b,a)) but should be the same. > ltgt(a,b) = ge(a,b) ^ ge(b,a) existing code uses gt(a,b) | gt(b,a) but should be the same. > le(a,b) = ge(b,a) > unlt(a,b) = ~ge(a,b) > ge(a,b) = ge(a,b) > ungt(a,b) = ~ge(b,a) > uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) > existing code uses ~gt(a,b) & ~gt(b,a) but should be the same. > ord(a,b) = ge(a,b) | ge(b,a) > ne(a,b) = ~eq(a,b) > unle(a,b) = ~gt(a,b) > unge(a,b) = ~gt(b,a) > > This is quite regular :-) 5 are done with one cmp; 5 are done with a cmp > and an inversion; 4 are done with two compares and one xor/eqv/or/nor. > > > Half are pretty simple: > > lt(a,b) = gt(b,a) > gt(a,b) = gt(a,b) > eq(a,b) = eq(a,b) > le(a,b) = ge(b,a) > ge(a,b) = ge(a,b) > > ltgt(a,b) = ge(a,b) ^ ge(b,a) > ord(a,b) = ge(a,b) | ge(b,a) > > The other half are the negations of those: > > unge(a,b) = ~gt(b,a) > unle(a,b) = ~gt(a,b) > ne(a,b) = ~eq(a,b) > ungt(a,b) = ~ge(b,a) > unlt(a,b) = ~ge(a,b) > > uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) > un(a,b) = ~(ge(a,b) | ge(b,a)) > Awesome! Do you suggest refactoring on them? :) > > And please remember to test everythin with -ffast-math :-) That is, when > flag_finite_math_only is set. You cannot get unordered results, then, > making the optimal sequences different in some cases (and changing what > "ne" means!) Thanks for the remind! On RTL pattern, I think we won't get any un* related operators with -ffast-math, so that part on un* expansion would be fine? > > 8 codes, ordered: never lt gt ltgt eq le ge ordered > 8 codes, unordered: unordered unlt ungt ne uneq unle unge always > 8 codes, fast-math: never lt gt ne eq le ge always > 8 codes, non-fp: never lt gt ne eq le ge always > > Sorry, I don't quite follow this table. What's the column heads? >>>> +;; Same mode for condition true/false values and predicate operand. >>>> +(define_expand "vcond_mask_<mode><mode>" >>>> + [(match_operand:VEC_I 0 "vint_operand") >>>> + (match_operand:VEC_I 1 "vint_operand") >>>> + (match_operand:VEC_I 2 "vint_operand") >>>> + (match_operand:VEC_I 3 "vint_operand")] >>>> + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" >>>> +{ >>>> + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], >>>> + operands[3])); >>>> + DONE; >>>> +}) >>> >>> So is this exactly the same as vsel/xxsel? >> >> Yes, expanded into if_then_else and ne against zero, can match their patterns. > > Ah, so vector_select is not the canonical name. OK. > >>> You shouldn't allow those for signed comparisons, that will only hide >>> problems. >> >> OK, moved into vec_cmpu*. > >>> Why *are* there separate vec_cmp and vec_cmpu patterns, in the first place? >> >> If I understood the question correctly, you were asking why not have one >> unique pattern for them? > > Yes, it is redundant, the comparison code already says if it is an > unsigned comparison. So this a question about the generic patterns, not > your implementation of them :-) > > And if it is *one* pattern then handling LTU etc. makes perfect sense. > Fully agree, but it separates for now. :) >> I noticed some vectorization related SPNs have >> separate signed and unsigned patterns, I guess it's due to that sign matters >> for some vector instructions, some platform may only support some of them, >> using sign for fine grain queries and checks? > > I think it is because one particular implementation has different machine > insns for both, the one this interface was implemented for first. > Good point, but it would be strange if there are different machine instructions for eq/ne (only these two can be shared between signed and unsigned). >> Updated patch attached by addressing above comments. > > I'll review it later, sorry. > Thanks again! I've updated a new version as some comments, you can review this one to save your time. :) BR, Kewen ------------- gcc/ChangeLog 2019-11-07 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * config/rs6000/predicates.md (signed_or_equality_comparison_operator): New predicate. (unsigned_or_equality_comparison_operator): Likewise. * config/rs6000/rs6000.md (one_cmpl<mode>2): Remove expand. (one_cmpl<mode>3_internal): Rename to one_cmpl<mode>2. * config/rs6000/vector.md (vcond_mask_<mode><mode> for VEC_I and VEC_I): New expand. (vec_cmp<mode><mode> for VEC_I and VEC_I): Likewise. (vec_cmpu<mode><mode> for VEC_I and VEC_I): Likewise. (vcond_mask_<mode><VEC_int> for VEC_F): New expand for float vector modes and same-size integer vector modes. (vec_cmp<mode><VEC_int> for VEC_F): Likewise. (vector_lt<mode> for VEC_F): New expand. (vector_le<mode> for VEC_F): Likewise. (vector_ne<mode> for VEC_F): Likewise. (vector_unge<mode> for VEC_F): Likewise. (vector_ungt<mode> for VEC_F): Likewise. (vector_unle<mode> for VEC_F): Likewise. (vector_unlt<mode> for VEC_F): Likewise. (vector_uneq<mode>): Expose name. (vector_ltgt<mode>): Likewise. (vector_unordered<mode>): Likewise. (vector_ordered<mode>): Likewise. gcc/testsuite/ChangeLog 2019-11-07 Kewen Lin <linkw@gcc.gnu.org> PR target/92132 * gcc.target/powerpc/pr92132-fp-1.c: New test. * gcc.target/powerpc/pr92132-fp-2.c: New test. * gcc.target/powerpc/pr92132-int-1.c: New test. * gcc.target/powerpc/pr92132-int-2.c: New test. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 345d9c3..1eecc42 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1140,6 +1140,16 @@ (define_predicate "signed_comparison_operator" (match_code "lt,gt,le,ge")) +;; Return 1 if OP is a signed comparison or an equality operator. +(define_predicate "signed_or_equality_comparison_operator" + (ior (match_operand 0 "equality_operator") + (match_operand 0 "signed_comparison_operator"))) + +;; Return 1 if OP is an unsigned comparison or an equality operator. +(define_predicate "unsigned_or_equality_comparison_operator" + (ior (match_operand 0 "equality_operator") + (match_operand 0 "unsigned_comparison_operator"))) + ;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- ;; it must be a positive comparison. (define_predicate "scc_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d0cca1e..68eb40d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6500,12 +6500,6 @@ "" "") -(define_expand "one_cmpl<mode>2" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")))] - "" - "") - (define_expand "nor<mode>3" [(set (match_operand:BOOL_128 0 "vlogical_operand") (and:BOOL_128 @@ -6800,7 +6794,7 @@ (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "*one_cmpl<mode>3_internal" +(define_insn_and_split "one_cmpl<mode>2" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 886cbad..b132037 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -493,6 +493,260 @@ FAIL; }) +;; To support vector condition vectorization, define vcond_mask and vec_cmp. + +;; Same mode for condition true/false values and predicate operand. +(define_expand "vcond_mask_<mode><mode>" + [(match_operand:VEC_I 0 "vint_operand") + (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; Condition true/false values are float but predicate operand is of +;; type integer vector with same element size. +(define_expand "vcond_mask_<mode><VEC_int>" + [(match_operand:VEC_F 0 "vfloat_operand") + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand") + (match_operand:<VEC_INT> 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + gen_lowpart (<MODE>mode, operands[3]))); + DONE; +}) + +;; For signed integer vectors comparison. +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "signed_or_equality_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GE: + emit_insn (gen_vector_nlt<mode> (operands[0],operands[2], operands[3], + tmp)); + break; + case GT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); + break; + case LE: + emit_insn (gen_vector_ngt<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For unsigned integer vectors comparison. +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "unsigned_or_equality_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GEU: + emit_insn (gen_vector_nltu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case GTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); + break; + case LEU: + emit_insn (gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For float point vectors comparison. +(define_expand "vec_cmp<mode><VEC_int>" + [(set (match_operand:<VEC_INT> 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_F 2 "vfloat_operand") + (match_operand:VEC_F 3 "vfloat_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx res = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_ne<mode> (res, operands[2], operands[3])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + break; + case GE: + emit_insn (gen_vector_ge<mode> (res, operands[2], operands[3])); + break; + case GT: + emit_insn (gen_vector_gt<mode> (res, operands[2], operands[3])); + break; + case LE: + emit_insn (gen_vector_le<mode> (res, operands[2], operands[3])); + break; + case LT: + emit_insn (gen_vector_lt<mode> (res, operands[2], operands[3])); + break; + case LTGT: + emit_insn (gen_vector_ltgt<mode> (res, operands[2], operands[3])); + break; + case UNORDERED: + emit_insn (gen_vector_unordered<mode> (res, operands[2], operands[3])); + break; + case ORDERED: + emit_insn (gen_vector_ordered<mode> (res, operands[2], operands[3])); + break; + case UNEQ: + emit_insn (gen_vector_uneq<mode> (res, operands[2], operands[3])); + break; + case UNGE: + emit_insn (gen_vector_unge<mode> (res, operands[2], operands[3])); + break; + case UNGT: + emit_insn (gen_vector_ungt<mode> (res, operands[2], operands[3])); + break; + case UNLE: + emit_insn (gen_vector_unle<mode> (res, operands[2], operands[3])); + break; + case UNLT: + emit_insn (gen_vector_unlt<mode> (res, operands[2], operands[3])); + break; + + default: + gcc_unreachable (); + } + + emit_insn (gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + DONE; +}) + +; lt(a,b) = gt(b,a) +(define_expand "vector_lt<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (lt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[1])); + DONE; +}) + +; le(a,b) = ge(b,a) +(define_expand "vector_le<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (le:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_ge<mode> (operands[0], operands[2], operands[1])); + DONE; +}) + +; ne(a,b) = ~eq(a,b) +(define_expand "vector_ne<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (ne:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_eq<mode> (operands[0], operands[1], operands[2])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + DONE; +}) + +; unge(a,b) = ~gt(b,a) +(define_expand "vector_unge<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (unge:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[1])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + DONE; +}) + +; ungt(a,b) = ~ge(b,a) +(define_expand "vector_ungt<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (ungt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_ge<mode> (operands[0], operands[2], operands[1])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + DONE; +}) + +; unle(a,b) = ~gt(a,b) +(define_expand "vector_unle<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (unle:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_gt<mode> (operands[0], operands[1], operands[2])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + DONE; +}) + +; unlt(a,b) = ~ge(a,b) +(define_expand "vector_unlt<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (unlt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_ge<mode> (operands[0], operands[1], operands[2])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + DONE; +}) + (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") @@ -575,7 +829,7 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_insn_and_split "*vector_uneq<mode>" +(define_insn_and_split "vector_uneq<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -596,7 +850,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ltgt<mode>" +(define_insn_and_split "vector_ltgt<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -617,7 +871,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ordered<mode>" +(define_insn_and_split "vector_ordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -638,7 +892,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_unordered<mode>" +(define_insn_and_split "vector_unordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c new file mode 100644 index 0000000..1023e8c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + double type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE double + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c new file mode 100644 index 0000000..db7b9ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vmx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -maltivec -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + float type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE float + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c new file mode 100644 index 0000000..a786811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + signed long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE signed long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c new file mode 100644 index 0000000..dd3c030 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + unsigned long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE unsigned long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */
Hi! On Thu, Nov 07, 2019 at 06:17:53PM +0800, Kewen.Lin wrote: > on 2019/11/7 上午7:49, Segher Boessenkool wrote: > > The expander named "one_cmpl<mode>3": > > > > Erm. 2, not 3 :-) > Ah, sorry I didn't notice we have one cmpl<mode>**3** but actually for one > cmpl<mode>**2** expand, a bit surprised. Done. Thanks for pointing that. Yeah, I suddenly couldn't find it myself either. Real head-scratcher :-) > > etc., so you can just delete the expand and rename the insn to the proper > > name (one_cmpl<mode>2). It sometimes is useful to have an expand like > > this if there are multiple insns that could implement this, but that is > > not the case here. > > OK, example like vector_select? :) Sure, like that. There are many examples where you are required to have just one define_expand, it is called by name after all, but you want to have different define_insns (for different cpus, say). > > So we have only gt/ge/eq. > > > > I think the following are ooptimal (not tested!): > > > > lt(a,b) = gt(b,a) > yes, this is what I used for that operator. > > > gt(a,b) = gt(a,b) > > eq(a,b) = eq(a,b) > > un(a,b) = ~(ge(a,b) | ge(b,a)) > > > > existing code uses (~ge(a,b) & ~ge(b,a)) > but should be the same. Yup, it's just ge/ge/nor, whatever way you write it :-) (RTL requires you write the expression in your form, with all the NOTs "pushed in"). > > ltgt(a,b) = ge(a,b) ^ ge(b,a) > > existing code uses gt(a,b) | gt(b,a) > but should be the same. Yup, computes exactly the same, and exactly the same execution speeds. Your form might be slightly easier to optimise with (it has no XOR). > > Half are pretty simple: > > > > lt(a,b) = gt(b,a) > > gt(a,b) = gt(a,b) > > eq(a,b) = eq(a,b) > > le(a,b) = ge(b,a) > > ge(a,b) = ge(a,b) > > > > ltgt(a,b) = ge(a,b) ^ ge(b,a) > > ord(a,b) = ge(a,b) | ge(b,a) > > > > The other half are the negations of those: > > > > unge(a,b) = ~gt(b,a) > > unle(a,b) = ~gt(a,b) > > ne(a,b) = ~eq(a,b) > > ungt(a,b) = ~ge(b,a) > > unlt(a,b) = ~ge(a,b) > > > > uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) > > un(a,b) = ~(ge(a,b) | ge(b,a)) > > Awesome! Do you suggest refactoring on them? :) I'd do the first five in one pattern (which then swaps two ops and the condition in the lt and le case), and the other five in another pattern. And the rest in two or four patterns? Just try it out, see what works well. It helps to do a bunch together in one pattern, but if that then turns into special cases for everything, more might be lost than gained. > > And please remember to test everythin with -ffast-math :-) That is, when > > flag_finite_math_only is set. You cannot get unordered results, then, > > making the optimal sequences different in some cases (and changing what > > "ne" means!) > > Thanks for the remind! On RTL pattern, I think we won't get any un* > related operators with -ffast-math, so that part on un* expansion > would be fine? Yeah, but look what you should do for "ne" :-) > > 8 codes, ordered: never lt gt ltgt eq le ge ordered > > 8 codes, unordered: unordered unlt ungt ne uneq unle unge always > > 8 codes, fast-math: never lt gt ne eq le ge always > > 8 codes, non-fp: never lt gt ne eq le ge always > > Sorry, I don't quite follow this table. What's the column heads? The first row is the eight possible fp conditions that are not always true if unordered is set; the second row is those that *are* always true if it is set. The other two rows (which are the same) is just the eight conditions that do not test unordered at all. The tricky one is "ne": for FP *with* NaNs, "ne" means "less than, or greater than, or unordered", while without NaNs (i.e. -ffast-math) it means "less than, or greater than". You could write the column heads as --/--/-- lt/--/-- --/gt/-- lt/gt/-- --/--/eq lt/--/eq --/gt/eq lt/gt/eq if that helps? Just the eight combinations of the first free flags. > > Yes, it is redundant, the comparison code already says if it is an > > unsigned comparison. So this a question about the generic patterns, not > > your implementation of them :-) > > > > And if it is *one* pattern then handling LTU etc. makes perfect sense. > > Fully agree, but it separates for now. :) Sure :-) > Thanks again! I've updated a new version as some comments, you can review this > one to save your time. :) > +;; Return 1 if OP is a signed comparison or an equality operator. > +(define_predicate "signed_or_equality_comparison_operator" > + (ior (match_operand 0 "equality_operator") > + (match_operand 0 "signed_comparison_operator"))) > + > +;; Return 1 if OP is an unsigned comparison or an equality operator. > +(define_predicate "unsigned_or_equality_comparison_operator" > + (ior (match_operand 0 "equality_operator") > + (match_operand 0 "unsigned_comparison_operator"))) Hrm. Unpleasant. > +(define_expand "vcond_mask_<mode><mode>" > + [(match_operand:VEC_I 0 "vint_operand") > + (match_operand:VEC_I 1 "vint_operand") > + (match_operand:VEC_I 2 "vint_operand") > + (match_operand:VEC_I 3 "vint_operand")] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > +{ > + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], > + operands[3])); Yeah, with the args swapped, good point. Details details ;-) > +;; For signed integer vectors comparison. > +(define_expand "vec_cmp<mode><mode>" > + [(set (match_operand:VEC_I 0 "vint_operand") > + (match_operator 1 "signed_or_equality_comparison_operator" > + [(match_operand:VEC_I 2 "vint_operand") > + (match_operand:VEC_I 3 "vint_operand")]))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > +{ > + enum rtx_code code = GET_CODE (operands[1]); > + rtx tmp = gen_reg_rtx (<MODE>mode); > + switch (code) > + { > + case NE: > + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); > + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); > + break; > + case EQ: > + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); > + break; > + case GE: > + emit_insn (gen_vector_nlt<mode> (operands[0],operands[2], operands[3], > + tmp)); > + break; > + case GT: > + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); > + break; > + case LE: > + emit_insn (gen_vector_ngt<mode> (operands[0], operands[2], operands[3], > + tmp)); > + break; > + case LT: > + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); > + break; > + default: > + gcc_unreachable (); > + break; > + } > + DONE; > +}) I would think this can be done easier, but it is alright for now, it can be touched up later if we want. > +;; For float point vectors comparison. > +(define_expand "vec_cmp<mode><VEC_int>" This, too. > + [(set (match_operand:<VEC_INT> 0 "vint_operand") > + (match_operator 1 "comparison_operator" If you make an iterator for this instead, it is simpler code (you can then use <code> to do all these cases in one statement). But that can be done later. Okay for trunk. Thanks! Segher
Hi Segher, on 2019/11/8 上午8:07, Segher Boessenkool wrote: > Hi! > >>> Half are pretty simple: >>> >>> lt(a,b) = gt(b,a) >>> gt(a,b) = gt(a,b) >>> eq(a,b) = eq(a,b) >>> le(a,b) = ge(b,a) >>> ge(a,b) = ge(a,b) >>> >>> ltgt(a,b) = ge(a,b) ^ ge(b,a) >>> ord(a,b) = ge(a,b) | ge(b,a) >>> >>> The other half are the negations of those: >>> >>> unge(a,b) = ~gt(b,a) >>> unle(a,b) = ~gt(a,b) >>> ne(a,b) = ~eq(a,b) >>> ungt(a,b) = ~ge(b,a) >>> unlt(a,b) = ~ge(a,b) >>> >>> uneq(a,b) = ~(ge(a,b) ^ ge(b,a)) >>> un(a,b) = ~(ge(a,b) | ge(b,a)) >> >> Awesome! Do you suggest refactoring on them? :) > > I'd do the first five in one pattern (which then swaps two ops and the > condition in the lt and le case), and the other five in another pattern. > And the rest in two or four patterns? Just try it out, see what works > well. It helps to do a bunch together in one pattern, but if that then > turns into special cases for everything, more might be lost than gained.> Got it, I'll make a refactoring patch for this part later. > >>> 8 codes, ordered: never lt gt ltgt eq le ge ordered >>> 8 codes, unordered: unordered unlt ungt ne uneq unle unge always >>> 8 codes, fast-math: never lt gt ne eq le ge always >>> 8 codes, non-fp: never lt gt ne eq le ge always >> >> Sorry, I don't quite follow this table. What's the column heads? > > The first row is the eight possible fp conditions that are not always > true if unordered is set; the second row is those that *are* always true > if it is set. The other two rows (which are the same) is just the eight > conditions that do not test unordered at all. > > The tricky one is "ne": for FP *with* NaNs, "ne" means "less than, or > greater than, or unordered", while without NaNs (i.e. -ffast-math) it > means "less than, or greater than". > > You could write the column heads as > --/--/-- lt/--/-- --/gt/-- lt/gt/-- --/--/eq lt/--/eq --/gt/eq lt/gt/eq > if that helps? Just the eight combinations of the first free flags. > Thanks a lot for the explanation. It's helpful! >> +;; For signed integer vectors comparison. >> +(define_expand "vec_cmp<mode><mode>" >> + [(set (match_operand:VEC_I 0 "vint_operand") >> + (match_operator 1 "signed_or_equality_comparison_operator" >> + [(match_operand:VEC_I 2 "vint_operand") >> + (match_operand:VEC_I 3 "vint_operand")]))] >> + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" >> +{ >> + enum rtx_code code = GET_CODE (operands[1]); >> + rtx tmp = gen_reg_rtx (<MODE>mode); >> + switch (code) >> + { >> + case NE: >> + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); >> + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); >> + break; >> + case EQ: >> + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); >> + break; >> + case GE: >> + emit_insn (gen_vector_nlt<mode> (operands[0],operands[2], operands[3], >> + tmp)); >> + break; >> + case GT: >> + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); >> + break; >> + case LE: >> + emit_insn (gen_vector_ngt<mode> (operands[0], operands[2], operands[3], >> + tmp)); >> + break; >> + case LT: >> + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); >> + break; >> + default: >> + gcc_unreachable (); >> + break; >> + } >> + DONE; >> +}) > > I would think this can be done easier, but it is alright for now, it can > be touched up later if we want. > >> +;; For float point vectors comparison. >> +(define_expand "vec_cmp<mode><VEC_int>" > > This, too. > >> + [(set (match_operand:<VEC_INT> 0 "vint_operand") >> + (match_operator 1 "comparison_operator" > > If you make an iterator for this instead, it is simpler code (you can then > use <code> to do all these cases in one statement). If my understanding is correct and based on some tries before, I think we have to leave these **CASEs** there (at least at the 1st level define_expand for vec_cmp*), since vec_cmp* doesn't have <code> field in the pattern name. The code can be only extracted from operator 1. I tried to add one dummy operand to hold <code> but it's impractical. Sorry, I may miss something here, I'm happy to make a subsequent patch to uniform these cases if there is a good way to run a code iterator on them. > > But that can be done later. Okay for trunk. Thanks! > Many thanks for your time! BR, Kewen
Hi! On Fri, Nov 08, 2019 at 10:38:13AM +0800, Kewen.Lin wrote: > >> + [(set (match_operand:<VEC_INT> 0 "vint_operand") > >> + (match_operator 1 "comparison_operator" > > > > If you make an iterator for this instead, it is simpler code (you can then > > use <code> to do all these cases in one statement). > > If my understanding is correct and based on some tries before, I think we > have to leave these **CASEs** there (at least at the 1st level define_expand > for vec_cmp*), since vec_cmp* doesn't have <code> field in the pattern name. > The code can be only extracted from operator 1. I tried to add one dummy > operand to hold <code> but it's impractical. > > Sorry, I may miss something here, I'm happy to make a subsequent patch to > uniform these cases if there is a good way to run a code iterator on them. Instead of [(set (match_operand:VEC_I 0 "vint_operand") (match_operator 1 "signed_or_equality_comparison_operator" [(match_operand:VEC_I 2 "vint_operand") (match_operand:VEC_I 3 "vint_operand")]))] you can do [(set (match_operand:VEC_I 0 "vint_operand") (some_iter:VEC_I (match_operand:VEC_I 1 "vint_operand") (match_operand:VEC_I 2 "vint_operand")))] with some_iter some code_iterator, (note you need to renumber), and in the body you can then just use <code> (or <CODE>, or some other code_attribute). code_iterator is more flexible than match_operator, in most ways. Segher
Hi Segher, on 2019/11/9 上午1:36, Segher Boessenkool wrote: > Hi! > > On Fri, Nov 08, 2019 at 10:38:13AM +0800, Kewen.Lin wrote: >>>> + [(set (match_operand:<VEC_INT> 0 "vint_operand") >>>> + (match_operator 1 "comparison_operator" >>> >>> If you make an iterator for this instead, it is simpler code (you can then >>> use <code> to do all these cases in one statement). >> >> If my understanding is correct and based on some tries before, I think we >> have to leave these **CASEs** there (at least at the 1st level define_expand >> for vec_cmp*), since vec_cmp* doesn't have <code> field in the pattern name. >> The code can be only extracted from operator 1. I tried to add one dummy >> operand to hold <code> but it's impractical. >> >> Sorry, I may miss something here, I'm happy to make a subsequent patch to >> uniform these cases if there is a good way to run a code iterator on them. > > Instead of > > [(set (match_operand:VEC_I 0 "vint_operand") > (match_operator 1 "signed_or_equality_comparison_operator" > [(match_operand:VEC_I 2 "vint_operand") > (match_operand:VEC_I 3 "vint_operand")]))] > > you can do > > [(set (match_operand:VEC_I 0 "vint_operand") > (some_iter:VEC_I (match_operand:VEC_I 1 "vint_operand") > (match_operand:VEC_I 2 "vint_operand")))] > Thanks for your example. But I'm afraid that it doesn't work for these patterns. I tried it with simple code below: ; For testing (define_code_iterator some_iter [eq gt]) (define_expand "vec_cmp<mode><mode>" [(set (match_operand:VEC_I 0 "vint_operand") (some_iter:VEC_I (match_operand:VEC_I 2 "vint_operand") (match_operand:VEC_I 3 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { emit_insn (gen_vector_<code><mode> (operands[0], operands[2], operands[3])); DONE; }) Error messages were emitted: /home/linkw/gcc/gcc-git-fix/gcc/config/rs6000/vector.md:531:1: duplicate definition of 'vec_cmpv16qiv16qi' /home/linkw/gcc/gcc-git-fix/gcc/config/rs6000/vector.md:531:1: duplicate definition of 'vec_cmpv8hiv8hi' /home/linkw/gcc/gcc-git-fix/gcc/config/rs6000/vector.md:531:1: duplicate definition of 'vec_cmpv4siv4si' /home/linkw/gcc/gcc-git-fix/gcc/config/rs6000/vector.md:531:1: duplicate definition of 'vec_cmpv2div2di' It's expected, since the pattern here is vec_cmp<mode><mode> rather than vec_cmp<mode><mode><code>, your example would work perfectly for the later. Btw, in that pattern, the comparison operator is passed in operand 1. BR, Kewen > with some_iter some code_iterator, (note you need to renumber), and in the > body you can then just use <code> (or <CODE>, or some other code_attribute). > > code_iterator is more flexible than match_operator, in most ways. > > > Segher >
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d0cca1e..2a68548 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6800,7 +6800,7 @@ (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "*one_cmpl<mode>3_internal" +(define_insn_and_split "one_cmpl<mode>3_internal" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 886cbad..64c3c60 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -107,6 +107,8 @@ (smin "smin") (smax "smax")]) +(define_code_iterator fpcmpun [ungt unge unlt unle]) + ;; Vector move instructions. Little-endian VSX loads and stores require ;; special handling to circumvent "element endianness." @@ -493,6 +495,241 @@ FAIL; }) +;; To support vector condition vectorization, define vcond_mask and vec_cmp. + +;; Same mode for condition true/false values and predicate operand. +(define_expand "vcond_mask_<mode><mode>" + [(match_operand:VEC_I 0 "vint_operand") + (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; Condition true/false values are float but predicate operand is of +;; type integer vector with same element size. +(define_expand "vcond_mask_<mode><VEC_int>" + [(match_operand:VEC_F 0 "vfloat_operand") + (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand") + (match_operand:<VEC_INT> 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; For signed integer vectors comparison. +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GE: + emit_insn ( + gen_vector_nlt<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case GT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); + break; + case LE: + emit_insn ( + gen_vector_ngt<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case LT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); + break; + case GEU: + emit_insn ( + gen_vector_nltu<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case GTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); + break; + case LEU: + emit_insn ( + gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], tmp)); + break; + case LTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For unsigned integer vectors comparison. +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +;; For float point vectors comparison. +(define_expand "vec_cmp<mode><VEC_int>" + [(set (match_operand:<VEC_INT> 0 "vint_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_F 2 "vfloat_operand") + (match_operand:VEC_F 3 "vfloat_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx res = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + emit_insn (gen_one_cmpl<VEC_int>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case GE: + emit_insn (gen_vector_ge<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case GT: + emit_insn (gen_vector_gt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LE: + emit_insn (gen_vector_ge<mode> (res, operands[3], operands[2])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LT: + emit_insn (gen_vector_gt<mode> (res, operands[3], operands[2])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case LTGT: + emit_insn (gen_vector_ltgt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNORDERED: + emit_insn (gen_vector_unordered<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case ORDERED: + emit_insn (gen_vector_ordered<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNEQ: + emit_insn (gen_vector_uneq<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNGE: + emit_insn (gen_vector_unge<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNGT: + emit_insn (gen_vector_ungt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNLE: + emit_insn (gen_vector_unle<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + case UNLT: + emit_insn (gen_vector_unlt<mode> (res, operands[2], operands[3])); + emit_insn ( + gen_move_insn (operands[0], gen_lowpart (<VEC_INT>mode, res))); + break; + + default: + gcc_unreachable (); + } + DONE; +}) + +;; For below vector_UN<cc><mode>: +;; op3 = (op1 >= op1) # !isNaN (op1) +;; op4 = (op2 >= op2) # !isNaN (op2) +;; op5 = !(op3 & op4) # isNaN (op1) || isNaN (op2) +;; op3 = op3 & op1 # isNaN (op1)? 0.0 : op1 +;; op4 = op4 & op2 # isNaN (op2)? 0.0 : op2 +;; op0 = op3 <cc> op4 # normal cmp if no NaNs +;; op0 = op5 | op0 # UNORDERED | normal cmp + +(define_expand "vector_<code><mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand") + (fpcmpun:VEC_F (match_operand:VEC_F 1 "vfloat_operand") + (match_operand:VEC_F 2 "vfloat_operand")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rtx op3 = gen_reg_rtx (<MODE>mode); + rtx op4 = gen_reg_rtx (<MODE>mode); + rtx op5 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_vector_ge<mode> (op3, operands[1], operands[1])); + emit_insn (gen_vector_ge<mode> (op4, operands[2], operands[2])); + emit_insn (gen_and<mode>3 (op5, op3, op4)); + emit_insn (gen_one_cmpl<mode>3_internal (op5, op5)); + emit_insn (gen_and<mode>3 (op3, op3, operands[1])); + emit_insn (gen_and<mode>3 (op4, op4, operands[2])); + + switch (<CODE>) + { + case UNLT: + std::swap (op3, op4); + /* Fall through. */ + case UNGT: + emit_insn (gen_vector_gt<mode> (operands[0], op3, op4)); + break; + case UNLE: + std::swap (op3, op4); + /* Fall through. */ + case UNGE: + emit_insn (gen_vector_ge<mode> (operands[0], op3, op4)); + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_ior<mode>3 (operands[0], op5, operands[0])); + DONE; +}) + + (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") @@ -575,7 +812,7 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_insn_and_split "*vector_uneq<mode>" +(define_insn_and_split "vector_uneq<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -596,7 +833,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ltgt<mode>" +(define_insn_and_split "vector_ltgt<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -617,7 +854,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_ordered<mode>" +(define_insn_and_split "vector_ordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] @@ -638,7 +875,7 @@ operands[4] = gen_reg_rtx (<MODE>mode); }) -(define_insn_and_split "*vector_unordered<mode>" +(define_insn_and_split "vector_unordered<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand") (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand") (match_operand:VEC_F 2 "vfloat_operand")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c new file mode 100644 index 0000000..1023e8c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-1.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + double type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE double + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c new file mode 100644 index 0000000..db7b9ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-fp-2.c @@ -0,0 +1,297 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vmx_hw } */ +/* { dg-options "-O2 -ftree-vectorize -maltivec -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + float type and condition true/false values are integer type. Cover all + float point comparison codes. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE float + +#define LTGT(a, b) (__builtin_islessgreater ((a), (b))) +#define UNORD(a, b) (__builtin_isunordered ((a), (b))) +#define ORD(a, b) (!__builtin_isunordered ((a), (b))) +#define UNEQ(a, b) (!__builtin_islessgreater ((a), (b))) +#define UNGT(a, b) (!__builtin_islessequal ((a), (b))) +#define UNGE(a, b) (!__builtin_isless ((a), (b))) +#define UNLT(a, b) (!__builtin_isgreaterequal ((a), (b))) +#define UNLE(a, b) (!__builtin_isgreater ((a), (b))) + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ltgt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (LTGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (ORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unord (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNORD (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_uneq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNEQ (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ungt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNGE (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unlt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLT (a[i], min_v)) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_unle (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (UNLE (a[i], min_v)) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, NAN, 25, 26, 27}; + + FP_TYPE a3[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + FP_TYPE a4[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 6, 7}; + + FP_TYPE a5[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, NAN, 10, 10}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ge (a3, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + ret = test_ltgt (a3, 10); + if (ret != 26) + abort (); + + ret = test_ltgt (a5, 10); + if (ret != 23) + abort (); + + ret = test_unord (a5, 10); + if (ret != 24) + abort (); + + ret = test_ord (a5, 10); + if (ret != 26) + abort (); + + ret = test_uneq (a1, 10); + if (ret != 19) + abort (); + + ret = test_uneq (a4, 10); + if (ret != 24) + abort (); + + ret = test_ungt (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unge (a3, 10); + if (ret != 19) + abort (); + + ret = test_ungt (a4, 10); + if (ret != 24) + abort (); + + ret = test_unlt (a1, 10); + if (ret != 18) + abort (); + + ret = test_unlt (a2, 10); + if (ret != 23) + abort (); + + ret = test_unle (a1, 10); + if (ret != 19) + abort (); + + ret = test_unle (a2, 10); + if (ret != 23) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 14 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c new file mode 100644 index 0000000..a786811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-1.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + signed long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE signed long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c new file mode 100644 index 0000000..dd3c030 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92132-int-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + unsigned long long type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define FP_TYPE unsigned long long + +__attribute__ ((noinline)) int +test_eq (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (FP_TYPE *a, FP_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + FP_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + FP_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */