Message ID | 20241008083315.190027-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/4] Match: Support form 1 for scalar signed integer SAT_TRUNC | expand |
LGTM <pan2.li@intel.com> 於 2024年10月8日 週二 16:33 寫道: > From: Pan Li <pan2.li@intel.com> > > This patch would like to implement the sstrunc for scalar signed > integer. > > Form 1: > #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \ > NT __attribute__((noinline)) \ > sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \ > { \ > NT trunc = (NT)x; \ > return (WT)NT_MIN <= x && x <= (WT)NT_MAX \ > ? trunc \ > : x < 0 ? NT_MIN : NT_MAX; \ > } > > DEF_SAT_S_TRUNC_FMT_1(int64_t, int32_t, INT32_MIN, INT32_MAX) > > Before this patch: > 10 │ sat_s_trunc_int64_t_to_int32_t_fmt_1: > 11 │ li a5,1 > 12 │ slli a5,a5,31 > 13 │ li a4,-1 > 14 │ add a5,a0,a5 > 15 │ srli a4,a4,32 > 16 │ bgtu a5,a4,.L2 > 17 │ sext.w a0,a0 > 18 │ ret > 19 │ .L2: > 20 │ srai a5,a0,63 > 21 │ li a0,-2147483648 > 22 │ xor a0,a0,a5 > 23 │ not a0,a0 > 24 │ ret > > After this patch: > 10 │ sat_s_trunc_int64_t_to_int32_t_fmt_1: > 11 │ li a5,-2147483648 > 12 │ xori a3,a5,-1 > 13 │ slt a4,a0,a3 > 14 │ slt a5,a5,a0 > 15 │ and a5,a4,a5 > 16 │ srai a4,a0,63 > 17 │ xor a4,a4,a3 > 18 │ addi a3,a5,-1 > 19 │ neg a5,a5 > 20 │ and a4,a4,a3 > 21 │ and a0,a0,a5 > 22 │ or a0,a0,a4 > 23 │ sext.w a0,a0 > 24 │ ret > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > > gcc/ChangeLog: > > * config/riscv/riscv-protos.h (riscv_expand_sstrunc): Add new > func decl to expand SAT_TRUNC. > * config/riscv/riscv.cc (riscv_expand_sstrunc): Add new func > impl to expand SAT_TRUNC. > * config/riscv/riscv.md (sstrunc<mode><anyi_double_truncated>2): > Add new pattern for double truncation. > (sstrunc<mode><anyi_quad_truncated>2): Ditto but for quad. > (sstrunc<mode><anyi_oct_truncated>2): Ditto but for oct. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv.cc | 61 +++++++++++++++++++++++++++++++++ > gcc/config/riscv/riscv.md | 30 ++++++++++++++++ > 3 files changed, 92 insertions(+) > > diff --git a/gcc/config/riscv/riscv-protos.h > b/gcc/config/riscv/riscv-protos.h > index 3d8775e582d..1e6d10a1402 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -138,6 +138,7 @@ extern void riscv_expand_ssadd (rtx, rtx, rtx); > extern void riscv_expand_ussub (rtx, rtx, rtx); > extern void riscv_expand_sssub (rtx, rtx, rtx); > extern void riscv_expand_ustrunc (rtx, rtx); > +extern void riscv_expand_sstrunc (rtx, rtx); > > #ifdef RTX_CODE > extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool > *invert_ptr = 0); > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 8708a7b42c6..57f2554d491 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -12438,6 +12438,67 @@ riscv_expand_ustrunc (rtx dest, rtx src) > emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); > } > > +/* Implement the signed saturation truncation for int mode. > + > + b = SAT_TRUNC (a); > + => > + 1. lt = a < max > + 2. gt = min < a > + 3. mask = lt & gt > + 4. trunc_mask = -mask > + 5. sat_mask = mask - 1 > + 6. lt = a < 0 > + 7. neg = -lt > + 8. sat = neg ^ max > + 9. trunc = src & trunc_mask > + 10. sat = sat & sat_mask > + 11. dest = trunc | sat */ > + > +void > +riscv_expand_sstrunc (rtx dest, rtx src) > +{ > + machine_mode mode = GET_MODE (dest); > + unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant (); > + HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127 > + HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128 > + > + rtx xmode_narrow_max = gen_reg_rtx (Xmode); > + rtx xmode_narrow_min = gen_reg_rtx (Xmode); > + rtx xmode_lt = gen_reg_rtx (Xmode); > + rtx xmode_gt = gen_reg_rtx (Xmode); > + rtx xmode_src = gen_lowpart (Xmode, src); > + rtx xmode_dest = gen_reg_rtx (Xmode); > + rtx xmode_mask = gen_reg_rtx (Xmode); > + rtx xmode_sat = gen_reg_rtx (Xmode); > + rtx xmode_trunc = gen_reg_rtx (Xmode); > + rtx xmode_sat_mask = gen_reg_rtx (Xmode); > + rtx xmode_trunc_mask = gen_reg_rtx (Xmode); > + > + /* Step-1: lt = src < max, gt = min < src, mask = lt & gt */ > + emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode)); > + emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode)); > + riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max); > + riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src); > + riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt); > + > + /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask */ > + riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX > (Xmode)); > + riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask); > + > + /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max */ > + riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode)); > + riscv_emit_unary (NEG, xmode_lt, xmode_lt); > + riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max); > + > + /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask) */ > + riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask); > + riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask); > + riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat); > + > + /* Step-5: dest = xmode_dest */ > + emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); > +} > + > /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for > TI_LONG_DOUBLE_TYPE which is for long double type, go with the > default one for the others. */ > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 067c2415db1..688c07df46c 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -4413,6 +4413,16 @@ (define_expand > "ustrunc<mode><anyi_double_truncated>2" > } > ) > > +(define_expand "sstrunc<mode><anyi_double_truncated>2" > + [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand") > + (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")] > + "" > + { > + riscv_expand_sstrunc (operands[0], operands[1]); > + DONE; > + } > +) > + > (define_expand "ustrunc<mode><anyi_quad_truncated>2" > [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand") > (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")] > @@ -4423,6 +4433,16 @@ (define_expand "ustrunc<mode><anyi_quad_truncated>2" > } > ) > > +(define_expand "sstrunc<mode><anyi_quad_truncated>2" > + [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand") > + (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")] > + "" > + { > + riscv_expand_sstrunc (operands[0], operands[1]); > + DONE; > + } > +) > + > (define_expand "ustrunc<mode><anyi_oct_truncated>2" > [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand") > (match_operand:ANYI_OCT_TRUNC 1 "register_operand")] > @@ -4433,6 +4453,16 @@ (define_expand "ustrunc<mode><anyi_oct_truncated>2" > } > ) > > +(define_expand "sstrunc<mode><anyi_oct_truncated>2" > + [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand") > + (match_operand:ANYI_OCT_TRUNC 1 "register_operand")] > + "" > + { > + riscv_expand_sstrunc (operands[0], operands[1]); > + DONE; > + } > +) > + > ;; These are forms of (x << C1) + C2, potentially canonicalized from > ;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may > ;; want to go ahead and recognize this form as C2 may be cheaper to > -- > 2.43.0 > >
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 3d8775e582d..1e6d10a1402 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -138,6 +138,7 @@ extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_sssub (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); +extern void riscv_expand_sstrunc (rtx, rtx); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8708a7b42c6..57f2554d491 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12438,6 +12438,67 @@ riscv_expand_ustrunc (rtx dest, rtx src) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Implement the signed saturation truncation for int mode. + + b = SAT_TRUNC (a); + => + 1. lt = a < max + 2. gt = min < a + 3. mask = lt & gt + 4. trunc_mask = -mask + 5. sat_mask = mask - 1 + 6. lt = a < 0 + 7. neg = -lt + 8. sat = neg ^ max + 9. trunc = src & trunc_mask + 10. sat = sat & sat_mask + 11. dest = trunc | sat */ + +void +riscv_expand_sstrunc (rtx dest, rtx src) +{ + machine_mode mode = GET_MODE (dest); + unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant (); + HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127 + HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128 + + rtx xmode_narrow_max = gen_reg_rtx (Xmode); + rtx xmode_narrow_min = gen_reg_rtx (Xmode); + rtx xmode_lt = gen_reg_rtx (Xmode); + rtx xmode_gt = gen_reg_rtx (Xmode); + rtx xmode_src = gen_lowpart (Xmode, src); + rtx xmode_dest = gen_reg_rtx (Xmode); + rtx xmode_mask = gen_reg_rtx (Xmode); + rtx xmode_sat = gen_reg_rtx (Xmode); + rtx xmode_trunc = gen_reg_rtx (Xmode); + rtx xmode_sat_mask = gen_reg_rtx (Xmode); + rtx xmode_trunc_mask = gen_reg_rtx (Xmode); + + /* Step-1: lt = src < max, gt = min < src, mask = lt & gt */ + emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode)); + emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode)); + riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max); + riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src); + riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt); + + /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask */ + riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX (Xmode)); + riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask); + + /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max */ + riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode)); + riscv_emit_unary (NEG, xmode_lt, xmode_lt); + riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max); + + /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask) */ + riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask); + riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask); + riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat); + + /* Step-5: dest = xmode_dest */ + emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); +} + /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for TI_LONG_DOUBLE_TYPE which is for long double type, go with the default one for the others. */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 067c2415db1..688c07df46c 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4413,6 +4413,16 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2" } ) +(define_expand "sstrunc<mode><anyi_double_truncated>2" + [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand") + (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")] + "" + { + riscv_expand_sstrunc (operands[0], operands[1]); + DONE; + } +) + (define_expand "ustrunc<mode><anyi_quad_truncated>2" [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand") (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")] @@ -4423,6 +4433,16 @@ (define_expand "ustrunc<mode><anyi_quad_truncated>2" } ) +(define_expand "sstrunc<mode><anyi_quad_truncated>2" + [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand") + (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")] + "" + { + riscv_expand_sstrunc (operands[0], operands[1]); + DONE; + } +) + (define_expand "ustrunc<mode><anyi_oct_truncated>2" [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand") (match_operand:ANYI_OCT_TRUNC 1 "register_operand")] @@ -4433,6 +4453,16 @@ (define_expand "ustrunc<mode><anyi_oct_truncated>2" } ) +(define_expand "sstrunc<mode><anyi_oct_truncated>2" + [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand") + (match_operand:ANYI_OCT_TRUNC 1 "register_operand")] + "" + { + riscv_expand_sstrunc (operands[0], operands[1]); + DONE; + } +) + ;; These are forms of (x << C1) + C2, potentially canonicalized from ;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may ;; want to go ahead and recognize this form as C2 may be cheaper to