Message ID | 20240830101605.1400739-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1] Vect: Support form 1 of vector signed integer .SAT_ADD | expand |
Kindly ping. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Friday, August 30, 2024 6:16 PM To: gcc-patches@gcc.gnu.org Cc: richard.guenther@gmail.com; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com> Subject: [PATCH v1] Vect: Support form 1 of vector signed integer .SAT_ADD From: Pan Li <pan2.li@intel.com> This patch would like to support the vector signed ssadd pattern for the RISC-V backend. Aka Form 1: #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ { \ T sum = (UT)x[i] + (UT)y[i]; \ out[i] = (x[i] ^ y[i]) < 0 \ ? sum \ : (sum ^ x[i]) >= 0 \ ? sum \ : x[i] < 0 ? MIN : MAX; \ } \ } DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) If the backend implemented the vector mode of ssadd, we will see IR diff similar as below: Before this patch: 108 │ _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]); 109 │ ivtmp_77 = _114 * 8; 110 │ vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ... }, _114, 0); 111 │ vect__5.10_81 = VIEW_CONVERT_EXPR<vector([2,2]) long unsigned int>(vect__4.9_80); 112 │ vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ... }, _114, 0); 113 │ vect__8.14_86 = VIEW_CONVERT_EXPR<vector([2,2]) long unsigned int>(vect__7.13_85); 114 │ vect__9.15_87 = vect__5.10_81 + vect__8.14_86; 115 │ vect_sum_20.16_88 = VIEW_CONVERT_EXPR<vector([2,2]) long int>(vect__9.15_87); 116 │ vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85; 117 │ vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88; 118 │ mask__46.19_92 = vect__10.17_89 >= { 0, ... }; 119 │ _36 = vect__4.9_80 >> 63; 120 │ mask__44.26_104 = vect__11.18_90 < { 0, ... }; 121 │ mask__43.27_105 = mask__46.19_92 & mask__44.26_104; 122 │ _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, ... }, vect_sum_20.16_88); 123 │ .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, _115); 124 │ vectp_x.7_79 = vectp_x.7_78 + ivtmp_77; 125 │ vectp_y.11_84 = vectp_y.11_83 + ivtmp_77; 126 │ vectp_out.29_109 = vectp_out.29_108 + ivtmp_77; 127 │ ivtmp_113 = ivtmp_112 - _114; After this patch: 94 │ # vectp_x.7_82 = PHI <vectp_x.7_83(6), x_18(D)(5)> 95 │ # vectp_y.10_86 = PHI <vectp_y.10_87(6), y_19(D)(5)> 96 │ # vectp_out.14_91 = PHI <vectp_out.14_92(6), out_21(D)(5)> 97 │ # ivtmp_95 = PHI <ivtmp_96(6), _94(5)> 98 │ _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]); 99 │ ivtmp_81 = _97 * 8; 100 │ vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ... }, _97, 0); 101 │ vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ... }, _97, 0); 102 │ vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88); 103 │ .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, vect_patt_40.13_89); 104 │ vectp_x.7_83 = vectp_x.7_82 + ivtmp_81; 105 │ vectp_y.10_87 = vectp_y.10_86 + ivtmp_81; 106 │ vectp_out.14_92 = vectp_out.14_91 + ivtmp_81; 107 │ ivtmp_96 = ivtmp_95 - _97; The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Add case 2 for the signed .SAT_ADD consumed by vect pattern. * tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new matching func decl for signed .SAT_ADD. (vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/match.pd | 17 +++++++++++++++++ gcc/tree-vect-patterns.cc | 5 ++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index be211535a49..578c9dd5b77 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* Signed saturation add, case 2: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1)))) + integer_zerop) + (ge (bit_xor:c @0 @1) integer_zerop)) + (bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop))))) + max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 3162250bbdd..8504dc06760 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4497,6 +4497,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static gimple * vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, internal_fn fn, tree *type_out, @@ -4557,7 +4559,8 @@ vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree ops[2]; tree lhs = gimple_assign_lhs (last_stmt); - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) + || gimple_signed_integer_sat_add (lhs, ops, NULL)) { if (TREE_CODE (ops[1]) == INTEGER_CST) ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
On Fri, Aug 30, 2024 at 12:16 PM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > This patch would like to support the vector signed ssadd pattern > for the RISC-V backend. Aka > > Form 1: > #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ > void __attribute__((noinline)) \ > vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \ > { \ > for (unsigned i = 0; i < n; i++) \ > { \ > T sum = (UT)x[i] + (UT)y[i]; \ > out[i] = (x[i] ^ y[i]) < 0 \ > ? sum \ > : (sum ^ x[i]) >= 0 \ > ? sum \ > : x[i] < 0 ? MIN : MAX; \ > } \ > } > > DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) > > If the backend implemented the vector mode of ssadd, we will see IR diff > similar as below: > > Before this patch: > 108 │ _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]); > 109 │ ivtmp_77 = _114 * 8; > 110 │ vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ... }, _114, 0); > 111 │ vect__5.10_81 = VIEW_CONVERT_EXPR<vector([2,2]) long unsigned int>(vect__4.9_80); > 112 │ vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ... }, _114, 0); > 113 │ vect__8.14_86 = VIEW_CONVERT_EXPR<vector([2,2]) long unsigned int>(vect__7.13_85); > 114 │ vect__9.15_87 = vect__5.10_81 + vect__8.14_86; > 115 │ vect_sum_20.16_88 = VIEW_CONVERT_EXPR<vector([2,2]) long int>(vect__9.15_87); > 116 │ vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85; > 117 │ vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88; > 118 │ mask__46.19_92 = vect__10.17_89 >= { 0, ... }; > 119 │ _36 = vect__4.9_80 >> 63; > 120 │ mask__44.26_104 = vect__11.18_90 < { 0, ... }; > 121 │ mask__43.27_105 = mask__46.19_92 & mask__44.26_104; > 122 │ _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, ... }, vect_sum_20.16_88); > 123 │ .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, _115); > 124 │ vectp_x.7_79 = vectp_x.7_78 + ivtmp_77; > 125 │ vectp_y.11_84 = vectp_y.11_83 + ivtmp_77; > 126 │ vectp_out.29_109 = vectp_out.29_108 + ivtmp_77; > 127 │ ivtmp_113 = ivtmp_112 - _114; > > After this patch: > 94 │ # vectp_x.7_82 = PHI <vectp_x.7_83(6), x_18(D)(5)> > 95 │ # vectp_y.10_86 = PHI <vectp_y.10_87(6), y_19(D)(5)> > 96 │ # vectp_out.14_91 = PHI <vectp_out.14_92(6), out_21(D)(5)> > 97 │ # ivtmp_95 = PHI <ivtmp_96(6), _94(5)> > 98 │ _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]); > 99 │ ivtmp_81 = _97 * 8; > 100 │ vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ... }, _97, 0); > 101 │ vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ... }, _97, 0); > 102 │ vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88); > 103 │ .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, vect_patt_40.13_89); > 104 │ vectp_x.7_83 = vectp_x.7_82 + ivtmp_81; > 105 │ vectp_y.10_87 = vectp_y.10_86 + ivtmp_81; > 106 │ vectp_out.14_92 = vectp_out.14_91 + ivtmp_81; > 107 │ ivtmp_96 = ivtmp_95 - _97; > > The below test suites are passed for this patch: > 1. The rv64gcv fully regression tests. > 2. The x86 bootstrap tests. > 3. The x86 fully regression tests. > > gcc/ChangeLog: > > * match.pd: Add case 2 for the signed .SAT_ADD consumed by > vect pattern. > * tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new > matching func decl for signed .SAT_ADD. > (vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/match.pd | 17 +++++++++++++++++ > gcc/tree-vect-patterns.cc | 5 ++++- > 2 files changed, 21 insertions(+), 1 deletion(-) > > diff --git a/gcc/match.pd b/gcc/match.pd > index be211535a49..578c9dd5b77 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3207,6 +3207,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* Signed saturation add, case 2: > + T sum = (T)((UT)X + (UT)Y) > + SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum; > + > + The T and UT are type pair like T=int8_t, UT=uint8_t. */ > +(match (signed_integer_sat_add @0 @1) > + (cond^ (bit_and:c (lt (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) > + (nop_convert @1)))) > + integer_zerop) > + (ge (bit_xor:c @0 @1) integer_zerop)) You should only need one :c on either of the bit_xor above. > + (bit_xor:c (nop_convert (negate (nop_convert (convert > + (lt @0 integer_zerop))))) > + max_value) > + @2) > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) And the types_match looks superfluous because they appear in bit_xor together. Otherwise OK. Richard. > + > /* Unsigned saturation sub, case 1 (branch with gt): > SAT_U_SUB = X > Y ? X - Y : 0 */ > (match (unsigned_integer_sat_sub @0 @1) > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 3162250bbdd..8504dc06760 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -4497,6 +4497,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); > > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); > + > static gimple * > vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, > internal_fn fn, tree *type_out, > @@ -4557,7 +4559,8 @@ vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, > tree ops[2]; > tree lhs = gimple_assign_lhs (last_stmt); > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > + || gimple_signed_integer_sat_add (lhs, ops, NULL)) > { > if (TREE_CODE (ops[1]) == INTEGER_CST) > ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); > -- > 2.43.0 >
diff --git a/gcc/match.pd b/gcc/match.pd index be211535a49..578c9dd5b77 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* Signed saturation add, case 2: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1)))) + integer_zerop) + (ge (bit_xor:c @0 @1) integer_zerop)) + (bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop))))) + max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 3162250bbdd..8504dc06760 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4497,6 +4497,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); + static gimple * vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, internal_fn fn, tree *type_out, @@ -4557,7 +4559,8 @@ vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree ops[2]; tree lhs = gimple_assign_lhs (last_stmt); - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) + || gimple_signed_integer_sat_add (lhs, ops, NULL)) { if (TREE_CODE (ops[1]) == INTEGER_CST) ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);