@@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno)
return default_get_reg_raw_mode (regno);
}
-/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
- The rtx x will be zero extended to Xmode if the mode is HI/QImode, and
- the new zero extended Xmode rtx will be returned.
- Or the gen_lowpart rtx of Xmode will be returned. */
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+ The rtx x can be REG (QI/HI/SI/DI) or const_int.
+ The machine_mode mode is the original mode from define pattern.
+
+ If rtx is REG and Xmode, the RTX x will be returned directly.
+
+ If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
+ returned.
+
+ If rtx is const_int, a new REG rtx will be created to hold the value of
+ const_int and then returned.
+
+ According to the gccint doc, the constants generated for modes with fewer
+ bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
+ will be two cases here, take QImode as example.
+
+ For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
+ mov from const_int to the new REG rtx is good enough here.
+
+ For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
+ Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
+ of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
+ from the (const_int -2).
+
+ Then the underlying expanding can perform the code generation based on
+ the REG rtx of Xmode, instead of taking care of these in expand func. */
static rtx
riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
{
+ rtx xmode_reg = gen_reg_rtx (Xmode);
+
+ if (!CONST_INT_P (x))
+ {
+ if (mode == Xmode)
+ return x;
+
+ riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+ return xmode_reg;
+ }
+
if (mode == Xmode)
- return x;
+ emit_move_insn (xmode_reg, x);
+ else
+ {
+ rtx reg_x = gen_reg_rtx (mode);
- rtx xmode_reg = gen_reg_rtx (Xmode);
- riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+ emit_move_insn (reg_x, x);
+ riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x);
+ }
return xmode_reg;
}
@@ -11959,50 +11996,6 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
-/* Generate a REG rtx of Xmode from the given rtx and mode.
- The rtx x can be REG (QI/HI/SI/DI) or const_int.
- The machine_mode mode is the original mode from define pattern.
-
- If rtx is REG, the gen_lowpart of Xmode will be returned.
-
- If rtx is const_int, a new REG rtx will be created to hold the value of
- const_int and then returned.
-
- According to the gccint doc, the constants generated for modes with fewer
- bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
- will be two cases here, take QImode as example.
-
- For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
- mov from const_int to the new REG rtx is good enough here.
-
- For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
- Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
- of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
- from the (const_int -2).
-
- Then the underlying expanding can perform the code generation based on
- the REG rtx of Xmode, instead of taking care of these in expand func. */
-
-static rtx
-riscv_gen_unsigned_xmode_reg (rtx x, machine_mode mode)
-{
- if (!CONST_INT_P (x))
- return gen_lowpart (Xmode, x);
-
- rtx xmode_x = gen_reg_rtx (Xmode);
-
- if (mode == Xmode)
- emit_move_insn (xmode_x, x);
- else
- {
- rtx reg_x = gen_reg_rtx (mode);
- emit_move_insn (reg_x, x);
- riscv_emit_unary (ZERO_EXTEND, xmode_x, reg_x);
- }
-
- return xmode_x;
-}
-
/* Implements the unsigned saturation sub standard name usadd for int mode.
z = SAT_SUB(x, y).
@@ -12016,8 +12009,8 @@ void
riscv_expand_ussub (rtx dest, rtx x, rtx y)
{
machine_mode mode = GET_MODE (dest);
- rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode);
- rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode);
+ rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
+ rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
rtx xmode_lt = gen_reg_rtx (Xmode);
rtx xmode_minus = gen_reg_rtx (Xmode);
rtx xmode_dest = gen_reg_rtx (Xmode);
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_3:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_4:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_5:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_6:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_7:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_1:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_8:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_9:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_10:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_11:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_12:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** slli\s+a1,\s*a1,\s*32
+** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
@@ -7,6 +7,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_3:
** li\s+[atx][0-9]+,\s*255
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_4:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** addi\s+[atx][0-9]+,\s*a0,\s*-255
** sltiu\s+a0,\s*[atx][0-9]+,\s*255
** addi\s+a0,\s*a0,\s*-1
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm2147483648_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** sub\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm68719476732_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4
@@ -7,6 +7,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_1:
** li\s+[atx][0-9]+,\s*255
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -8,6 +8,8 @@
** sat_u_sub_imm2147483648_uint32_t_fmt_1:
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** addi\s+[atx][0-9]+,\s*a0,\s*-255
** sltiu\s+a0,\s*[atx][0-9]+,\s*255
** addi\s+a0,\s*a0,\s*-1
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm2147483648_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** sub\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
@@ -6,6 +6,8 @@
/*
** sat_u_sub_imm68719476732_uint32_t_fmt_2:
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4