Message ID | 20240920105729.1058948-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/2] Match: Support form 2 for vector signed integer .SAT_ADD | expand |
On Fri, Sep 20, 2024 at 12:58 PM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > This patch would like to support the form 2 of the vector signed > integer .SAT_ADD. Aka below example: > > Form 2: > #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ > void __attribute__((noinline)) \ > vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \ > { \ > unsigned i; \ > for (i = 0; i < limit; i++) \ > { \ > T x = op_1[i]; \ > T y = op_2[i]; \ > T sum = (UT)x + (UT)y; \ > if ((x ^ y) < 0 || (sum ^ x) >= 0) \ > out[i] = sum; \ > else \ > out[i] = x < 0 ? MIN : MAX; \ > } \ > } > > DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) > > Before this patch: > 104 │ loop_len_79 = MIN_EXPR <ivtmp.51_53, POLY_INT_CST [16, 16]>; > 105 │ _50 = &MEM <vector([16,16]) signed char> [(int8_t *)vectp_op_1.9_77]; > 106 │ vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0); > 107 │ _70 = vect_x_18.11_80 >> 7; > 108 │ vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_x_18.11_80); > 109 │ _26 = (void *) ivtmp.47_20; > 110 │ _27 = &MEM <vector([16,16]) signed char> [(int8_t *)_26]; > 111 │ vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0); > 112 │ vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84; > 113 │ mask__50.23_92 = vect__7.21_90 >= { 0, ... }; > 114 │ vect_y.16_85 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_y_20.15_84); > 115 │ vect__6.17_86 = vect_x.12_81 + vect_y.16_85; > 116 │ vect_sum_21.18_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(vect__6.17_86); > 117 │ vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87; > 118 │ mask__45.20_89 = vect__8.19_88 < { 0, ... }; > 119 │ mask__44.24_93 = mask__45.20_89 & mask__50.23_92; > 120 │ _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87); > 121 │ _60 = (void *) ivtmp.49_6; > 122 │ _61 = &MEM <vector([16,16]) signed char> [(int8_t *)_60]; > 123 │ .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40); > 124 │ vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16]; > 125 │ ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16]; > 126 │ ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16]; > 127 │ ivtmp.51_98 = ivtmp.51_53; > 128 │ ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600]; > > After this patch: > 88 │ _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]); > 89 │ vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0); > 90 │ vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0); > 91 │ vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94); > 92 │ .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95); > 93 │ vectp_op_1.9_89 = vectp_op_1.9_88 + _103; > 94 │ vectp_op_2.12_93 = vectp_op_2.12_92 + _103; > 95 │ vectp_out.16_98 = vectp_out.16_97 + _103; > 96 │ ivtmp_102 = ivtmp_101 - _103; > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. OK. Thanks, Richard. > gcc/ChangeLog: > > * match.pd: Add the case 3 for signed .SAT_ADD matching. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/match.pd | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/gcc/match.pd b/gcc/match.pd > index fdb59ff0d44..940292d0d49 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* Signed saturation add, case 5: > + T sum = (T)((UT)X + (UT)Y); > + SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum; > + > + The T and UT are type pair like T=int8_t, UT=uint8_t. */ > +(match (signed_integer_sat_add @0 @1) > + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) > + (nop_convert @1)))) > + integer_zerop) > + (bit_not (lt (bit_xor:c @0 @1) integer_zerop))) > + (bit_xor:c (nop_convert (negate (nop_convert (convert > + (lt @0 integer_zerop))))) > + max_value) > + @2) > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)))) > + > /* Unsigned saturation sub, case 1 (branch with gt): > SAT_U_SUB = X > Y ? X - Y : 0 */ > (match (unsigned_integer_sat_sub @0 @1) > -- > 2.43.0 >
diff --git a/gcc/match.pd b/gcc/match.pd index fdb59ff0d44..940292d0d49 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* Signed saturation add, case 5: + T sum = (T)((UT)X + (UT)Y); + SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1)))) + integer_zerop) + (bit_not (lt (bit_xor:c @0 @1) integer_zerop))) + (bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop))))) + max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)))) + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1)