@@ -626,6 +626,7 @@ extern bool riscv_expand_strcmp (rtx, rtx, rtx, rtx, rtx);
extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
/* Routines implemented in thead.cc. */
+extern bool extract_base_offset_in_addr (rtx, rtx *, rtx *);
extern bool th_mempair_operands_p (rtx[4], bool, machine_mode);
extern void th_mempair_order_operands (rtx[4], bool, machine_mode);
extern void th_mempair_prepare_save_restore_operands (rtx[4], bool,
@@ -249,6 +249,21 @@ struct riscv_integer_op {
The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
#define RISCV_MAX_INTEGER_OPS 8
+enum riscv_fusion_pairs
+{
+ RISCV_FUSE_NOTHING = 0,
+ RISCV_FUSE_ZEXTW = (1 << 0),
+ RISCV_FUSE_ZEXTH = (1 << 1),
+ RISCV_FUSE_ZEXTWS = (1 << 2),
+ RISCV_FUSE_LDINDEXED = (1 << 3),
+ RISCV_FUSE_LUI_ADDI = (1 << 4),
+ RISCV_FUSE_AUIPC_ADDI = (1 << 5),
+ RISCV_FUSE_LUI_LD = (1 << 6),
+ RISCV_FUSE_AUIPC_LD = (1 << 7),
+ RISCV_FUSE_LDPREINCREMENT = (1 << 8),
+ RISCV_FUSE_ALIGNED_STD = (1 << 9),
+};
+
/* Costs of various operations on the different architectures. */
struct riscv_tune_param
@@ -264,6 +279,7 @@ struct riscv_tune_param
unsigned short fmv_cost;
bool slow_unaligned_access;
bool use_divmod_expansion;
+ unsigned int fusible_ops;
};
@@ -344,6 +360,7 @@ static const struct riscv_tune_param rocket_tune_info = {
8, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -359,6 +376,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
8, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -373,7 +391,8 @@ static const struct riscv_tune_param thead_c906_tune_info = {
5, /* memory_cost */
8, /* fmv_cost */
false, /* slow_unaligned_access */
- false /* use_divmod_expansion */
+ false, /* use_divmod_expansion */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
};
/* Costs to use when optimizing for a generic ooo profile. */
@@ -389,6 +408,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
4, /* fmv_cost */
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
};
/* Costs to use when optimizing for size. */
@@ -404,6 +424,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
8, /* fmv_cost */
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
};
static bool riscv_avoid_shrink_wrapping_separate ();
@@ -7794,6 +7815,259 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
return more - 1;
}
+/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
+ instruction fusion of some sort. */
+
+static bool
+riscv_macro_fusion_p (void)
+{
+ return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
+}
+
+/* Return true iff the instruction fusion described by OP is enabled. */
+
+static bool
+riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
+{
+ return tune_param->fusible_ops & op;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
+ should be kept together during scheduling. */
+
+static bool
+riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+ rtx prev_set = single_set (prev);
+ rtx curr_set = single_set (curr);
+ /* prev and curr are simple SET insns i.e. no flag setting or branching. */
+ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+ if (!riscv_macro_fusion_p ())
+ return false;
+
+ if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
+ riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
+ {
+ /* We are trying to match the following:
+ prev (slli) == (set (reg:DI rD)
+ (ashift:DI (reg:DI rS) (const_int 32)))
+ curr (slri) == (set (reg:DI rD)
+ (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
+ with <shift> being either 32 for FUSE_ZEXTW, or
+ `less than 32 for FUSE_ZEXTWS. */
+
+ if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+ && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+ && REG_P (SET_DEST (prev_set))
+ && REG_P (SET_DEST (curr_set))
+ && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+ && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+ && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+ && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+ && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
+ && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
+ && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
+ || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
+ && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
+ {
+ /* We are trying to match the following:
+ prev (slli) == (set (reg:DI rD)
+ (ashift:DI (reg:DI rS) (const_int 48)))
+ curr (slri) == (set (reg:DI rD)
+ (lshiftrt:DI (reg:DI rD) (const_int 48))) */
+
+ if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+ && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+ && REG_P (SET_DEST (prev_set))
+ && REG_P (SET_DEST (curr_set))
+ && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+ && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+ && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+ && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+ && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
+ && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
+ {
+ /* We are trying to match the following:
+ prev (add) == (set (reg:DI rD)
+ (plus:DI (reg:DI rS1) (reg:DI rS2))
+ curr (ld) == (set (reg:DI rD)
+ (mem:DI (reg:DI rD))) */
+
+ if (MEM_P (SET_SRC (curr_set))
+ && REG_P (XEXP (SET_SRC (curr_set), 0))
+ && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+ && GET_CODE (SET_SRC (prev_set)) == PLUS
+ && REG_P (XEXP (SET_SRC (prev_set), 0))
+ && REG_P (XEXP (SET_SRC (prev_set), 1)))
+ return true;
+
+ /* We are trying to match the following:
+ prev (add) == (set (reg:DI rD)
+ (plus:DI (reg:DI rS1) (reg:DI rS2)))
+ curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
+
+ if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+ || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+ && MEM_P (XEXP (SET_SRC (curr_set), 0))
+ && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+ && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
+ && GET_CODE (SET_SRC (prev_set)) == PLUS
+ && REG_P (XEXP (SET_SRC (prev_set), 0))
+ && REG_P (XEXP (SET_SRC (prev_set), 1)))
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
+ {
+ /* We are trying to match the following:
+ prev (add) == (set (reg:DI rS)
+ (plus:DI (reg:DI rS) (const_int))
+ curr (ld) == (set (reg:DI rD)
+ (mem:DI (reg:DI rS))) */
+
+ if (MEM_P (SET_SRC (curr_set))
+ && REG_P (XEXP (SET_SRC (curr_set), 0))
+ && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+ && GET_CODE (SET_SRC (prev_set)) == PLUS
+ && REG_P (XEXP (SET_SRC (prev_set), 0))
+ && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
+ {
+ /* We are trying to match the following:
+ prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
+ curr (addi) == (set (reg:DI rD)
+ (plus:DI (reg:DI rD) (const_int IMM12))) */
+
+ if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
+ || (GET_CODE (SET_SRC (curr_set)) == PLUS
+ && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+ && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
+ && (GET_CODE (SET_SRC (prev_set)) == HIGH
+ || (CONST_INT_P (SET_SRC (prev_set))
+ && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
+ {
+ /* We are trying to match the following:
+ prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+ curr (addi) == (set (reg:DI rD)
+ (plus:DI (reg:DI rD) (const_int IMM12)))
+ and
+ prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+ curr (addi) == (set (reg:DI rD)
+ (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
+
+ if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+ && XINT (prev_set, 1) == UNSPEC_AUIPC
+ && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+ || (GET_CODE (SET_SRC (curr_set)) == PLUS
+ && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
+
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
+ {
+ /* We are trying to match the following:
+ prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
+ curr (ld) == (set (reg:DI rD)
+ (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+ if (CONST_INT_P (SET_SRC (prev_set))
+ && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
+ && MEM_P (SET_SRC (curr_set))
+ && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+ return true;
+
+ if (GET_CODE (SET_SRC (prev_set)) == HIGH
+ && MEM_P (SET_SRC (curr_set))
+ && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
+ return true;
+
+ if (GET_CODE (SET_SRC (prev_set)) == HIGH
+ && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+ || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
+ && MEM_P (XEXP (SET_SRC (curr_set), 0))
+ && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
+ {
+ /* We are trying to match the following:
+ prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+ curr (ld) == (set (reg:DI rD)
+ (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+ if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+ && XINT (prev_set, 1) == UNSPEC_AUIPC
+ && MEM_P (SET_SRC (curr_set))
+ && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+ return true;
+ }
+
+ if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
+ {
+ /* We are trying to match the following:
+ prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
+ (reg rS1))
+ curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
+ (reg rS2)) */
+
+ if (MEM_P (SET_DEST (prev_set))
+ && MEM_P (SET_DEST (curr_set))
+ /* We can probably relax this condition. The documentation is a bit
+ unclear about sub-word cases. So we just model DImode for now. */
+ && GET_MODE (SET_DEST (curr_set)) == DImode
+ && GET_MODE (SET_DEST (prev_set)) == DImode)
+ {
+ rtx base_prev, base_curr, offset_prev, offset_curr;
+
+ extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
+ extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
+
+ /* The two stores must be contained within opposite halves of the same
+ 16 byte aligned block of memory. We know that the stack pointer and
+ the frame pointer have suitable alignment. So we just need to check
+ the offsets of the two stores for suitable alignment.
+
+ Originally the thought was to check MEM_ALIGN, but that was reporting
+ incorrect alignments, even for SP/FP accesses, so we gave up on that
+ approach. */
+ if (base_prev != NULL_RTX
+ && base_curr != NULL_RTX
+ && REG_P (base_prev)
+ && REG_P (base_curr)
+ && REGNO (base_prev) == REGNO (base_curr)
+ && (REGNO (base_prev) == STACK_POINTER_REGNUM
+ || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
+ && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
+ && (INTVAL (offset_prev) % 16) == 0)
+ || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
+ && (INTVAL (offset_curr) % 16) == 0)))
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* Adjust the cost/latency of instructions for scheduling.
For now this is just used to change the latency of vector instructions
according to their LMUL. We assume that an insn with LMUL == 8 requires
@@ -9780,6 +10054,40 @@ riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
return default_preferred_else_value (ifn, vectype, nops, ops);
}
+/* If MEM is in the form of "base+offset", extract the two parts
+ of address and set to BASE and OFFSET, otherwise return false
+ after clearing BASE and OFFSET. */
+
+bool
+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (REG_P (addr))
+ {
+ *base = addr;
+ *offset = const0_rtx;
+ return true;
+ }
+
+ if (GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return true;
+ }
+
+ *base = NULL_RTX;
+ *offset = NULL_RTX;
+
+ return false;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -9802,6 +10110,10 @@ riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
@@ -36,40 +36,6 @@
#include "regs.h"
#include "riscv-protos.h"
-/* If MEM is in the form of "base+offset", extract the two parts
- of address and set to BASE and OFFSET, otherwise return false
- after clearing BASE and OFFSET. */
-
-static bool
-extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
-{
- rtx addr;
-
- gcc_assert (MEM_P (mem));
-
- addr = XEXP (mem, 0);
-
- if (REG_P (addr))
- {
- *base = addr;
- *offset = const0_rtx;
- return true;
- }
-
- if (GET_CODE (addr) == PLUS
- && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
- {
- *base = XEXP (addr, 0);
- *offset = XEXP (addr, 1);
- return true;
- }
-
- *base = NULL_RTX;
- *offset = NULL_RTX;
-
- return false;
-}
-
/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */