===================================================================
@@ -63,6 +63,112 @@
extern void rx_split_cbranch (machine_mode, enum rtx_code,
rtx, rtx, rtx);
extern machine_mode rx_select_cc_mode (enum rtx_code, rtx, rtx);
+
+extern bool rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno);
+extern void rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src,
+ rtx_insn* dst);
+
+extern bool rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
+ rtx (*gen_insn)(rtx, rtx));
+
+/* Result value of rx_find_set_of_reg. */
+struct set_of_reg
+{
+ /* The insn where sh_find_set_of_reg stopped looking.
+ Can be NULL_RTX if the end of the insn list was reached. */
+ rtx_insn* insn;
+
+ /* The set rtx of the specified reg if found, NULL_RTX otherwise. */
+ const_rtx set_rtx;
+
+ /* The set source rtx of the specified reg if found, NULL_RTX otherwise.
+ Usually, this is the most interesting return value. */
+ rtx set_src;
+};
+
+/* FIXME: Copy-pasta from SH. Move to rtl.h.
+ Given a reg rtx and a start insn, try to find the insn that sets
+ the specified reg by using the specified insn stepping function,
+ such as 'prev_nonnote_nondebug_insn_bb'. When the insn is found,
+ try to extract the rtx of the reg set. */
+template <typename F> inline set_of_reg
+rx_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc,
+ bool ignore_reg_reg_copies = false)
+{
+ set_of_reg result;
+ result.insn = insn;
+ result.set_rtx = NULL_RTX;
+ result.set_src = NULL_RTX;
+
+ if (!REG_P (reg) || insn == NULL_RTX)
+ return result;
+
+ for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
+ {
+ if (BARRIER_P (i))
+ break;
+ if (!INSN_P (i) || DEBUG_INSN_P (i))
+ continue;
+ if (reg_set_p (reg, i))
+ {
+ if (CALL_P (i))
+ break;
+
+ result.insn = i;
+ result.set_rtx = set_of (reg, i);
+
+ if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+ break;
+
+ result.set_src = XEXP (result.set_rtx, 1);
+
+ if (ignore_reg_reg_copies && REG_P (result.set_src))
+ {
+ reg = result.set_src;
+ continue;
+ }
+ if (ignore_reg_reg_copies && SUBREG_P (result.set_src)
+ && REG_P (SUBREG_REG (result.set_src)))
+ {
+ reg = SUBREG_REG (result.set_src);
+ continue;
+ }
+
+ break;
+ }
+ }
+
+ /* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of
+ will return NULL and set_rtx will be NULL.
+ In this case report a 'not found'. result.insn will always be non-null
+ at this point, so no need to check it. */
+ if (result.set_src != NULL && result.set_rtx == NULL)
+ result.set_src = NULL;
+
+ return result;
+}
+
+/* FIXME: Move to rtlh.h. */
+template <typename F> inline rtx_insn*
+rx_find_use_of_reg (rtx reg, rtx_insn* insn, F stepfunc)
+{
+ if (!REG_P (reg) || insn == NULL_RTX)
+ return NULL;
+
+ for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
+ {
+ if (BARRIER_P (i))
+ break;
+ if (!INSN_P (i) || DEBUG_INSN_P (i))
+ continue;
+ if (reg_overlap_mentioned_p (reg, PATTERN (i))
+ || (CALL_P (i) && find_reg_fusage (i, USE, reg)))
+ return i;
+ }
+
+ return NULL;
+}
+
#endif
#endif /* GCC_RX_PROTOS_H */
===================================================================
@@ -3439,6 +3439,88 @@
emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg));
}
+/* Given an insn and a reg number, tell whether the reg dies or is unused
+ after the insn. */
+bool
+rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
+{
+ return find_regno_note (i, REG_DEAD, regno) != NULL
+ || find_regno_note (i, REG_UNUSED, regno) != NULL;
+}
+
+/* Copy dead and unused notes from SRC to DST for the specified REGNO. */
+void
+rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, rtx_insn* dst)
+{
+ int regno = REGNO (SUBREG_P (reg) ? SUBREG_REG (reg) : reg);
+
+ if (rtx note = find_regno_note (src, REG_DEAD, regno))
+ add_shallow_copy_of_reg_note (dst, note);
+
+ if (rtx note = find_regno_note (src, REG_UNUSED, regno))
+ add_shallow_copy_of_reg_note (dst, note);
+}
+
+/* Try to fuse the current bit-operation insn with the surrounding memory load
+ and store. */
+bool
+rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
+ rtx (*gen_insn)(rtx, rtx))
+{
+ rtx op2_reg = SUBREG_P (operands[2]) ? SUBREG_REG (operands[2]) : operands[2];
+
+ set_of_reg op2_def = rx_find_set_of_reg (op2_reg, curr_insn,
+ prev_nonnote_nondebug_insn_bb);
+ if (op2_def.set_src == NULL_RTX
+ || !MEM_P (op2_def.set_src)
+ || GET_MODE (op2_def.set_src) != QImode
+ || !rx_is_restricted_memory_address (XEXP (op2_def.set_src, 0),
+ GET_MODE (op2_def.set_src))
+ || reg_used_between_p (operands[2], op2_def.insn, curr_insn)
+ || !rx_reg_dead_or_unused_after_insn (curr_insn, REGNO (op2_reg))
+ )
+ return false;
+
+ /* The register operand originates from a memory load and the memory load
+ could be fused with the bitop insn.
+ Look for the following memory store with the same memory operand. */
+ rtx mem = op2_def.set_src;
+
+ /* If the memory is an auto-mod address, it can't be fused. */
+ if (GET_CODE (XEXP (mem, 0)) == POST_INC
+ || GET_CODE (XEXP (mem, 0)) == PRE_INC
+ || GET_CODE (XEXP (mem, 0)) == POST_DEC
+ || GET_CODE (XEXP (mem, 0)) == PRE_DEC)
+ return false;
+
+ rtx_insn* op0_use = rx_find_use_of_reg (operands[0], curr_insn,
+ next_nonnote_nondebug_insn_bb);
+ if (op0_use == NULL
+ || !(GET_CODE (PATTERN (op0_use)) == SET
+ && RX_REG_P (XEXP (PATTERN (op0_use), 1))
+ && reg_overlap_mentioned_p (operands[0], XEXP (PATTERN (op0_use), 1))
+ && rtx_equal_p (mem, XEXP (PATTERN (op0_use), 0)))
+ || !rx_reg_dead_or_unused_after_insn (op0_use, REGNO (operands[0]))
+ || reg_set_between_p (operands[2], curr_insn, op0_use))
+ return false;
+
+ /* If the load-modify-store operation is fused it could potentially modify
+ load/store ordering if there are other memory accesses between the load
+ and the store for this insn. If there are volatile mems between the load
+ and store it's better not to change the ordering. If there is a call
+ between the load and store, it's also not safe to fuse it. */
+ for (rtx_insn* i = next_nonnote_nondebug_insn_bb (op2_def.insn);
+ i != NULL && i != op0_use;
+ i = next_nonnote_nondebug_insn_bb (i))
+ if (volatile_insn_p (PATTERN (i)) || CALL_P (i))
+ return false;
+
+ emit_insn (gen_insn (mem, operands[1]));
+ set_insn_deleted (op2_def.insn);
+ set_insn_deleted (op0_use);
+ return true;
+}
+
/* Implement TARGET_HARD_REGNO_NREGS. */
static unsigned int
===================================================================
@@ -1094,7 +1094,7 @@
DONE;
})
-(define_insn "andsi3"
+(define_insn_and_split "andsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r")
(and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@@ -1110,6 +1110,21 @@
and\t%1, %0
and\t%2, %1, %0
and\t%Q2, %0"
+ "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+ && pow2p_hwi (~UINTVAL (operands[2]))"
+ [(const_int 0)]
+{
+ /* For negated single bit constants use the bclr insn for smaller code. */
+
+ if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+ FAIL;
+
+ rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+ emit_insn (gen_bitclr (operands[0],
+ GEN_INT (exact_log2 (~UINTVAL (operands[2]))),
+ operands[1])));
+ DONE;
+}
[(set_attr "timings" "11,11,11,11,11,11,11,11,33")
(set_attr "length" "2,2,3,4,5,6,2,5,5")]
)
@@ -1383,7 +1398,7 @@
[(set_attr "length" "2,3")]
)
-(define_insn "iorsi3"
+(define_insn_and_split "iorsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r")
(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@@ -1399,6 +1414,21 @@
or\t%1, %0
or\t%2, %1, %0
or\t%Q2, %0"
+ "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+ && pow2p_hwi (UINTVAL (operands[2]))"
+ [(const_int 0)]
+{
+ /* For single bit constants use the bset insn for smaller code. */
+
+ if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+ FAIL;
+
+ rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+ emit_insn (gen_bitset (operands[0],
+ GEN_INT (exact_log2 (UINTVAL (operands[2]))),
+ operands[1])));
+ DONE;
+}
[(set_attr "timings" "11,11,11,11,11,11,11,11,33")
(set_attr "length" "2,2,3,4,5,6,2,3,5")]
)
@@ -1704,7 +1734,7 @@
DONE;
})
-(define_insn "xorsi3"
+(define_insn_and_split "xorsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r")
(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
(match_operand:SI 2 "rx_source_operand"
@@ -1712,6 +1742,21 @@
(clobber (reg:CC CC_REG))]
""
"xor\t%Q2, %0"
+ "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+ && pow2p_hwi (UINTVAL (operands[2]))"
+ [(const_int 0)]
+{
+ /* For single bit constants use the bnot insn for smaller code. */
+
+ if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+ FAIL;
+
+ rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+ emit_insn (gen_bitinvert (operands[0],
+ GEN_INT (exact_log2 (UINTVAL (operands[2]))),
+ operands[1])));
+ DONE;
+}
[(set_attr "timings" "11,11,11,11,11,33")
(set_attr "length" "3,4,5,6,7,6")]
)
@@ -1960,19 +2005,16 @@
;; Bit manipulation instructions.
-;; ??? The *_in_memory patterns will not be matched without further help.
-;; At one time we had the insv expander generate them, but I suspect that
-;; in general we get better performance by exposing the register load to
-;; the optimizers.
-;;
-;; An alternate solution would be to re-organize these patterns such
-;; that allow both register and memory operands. This would allow the
-;; register allocator to spill and not load the register operand. This
-;; would be possible only for operations for which we have a constant
-;; bit offset, so that we can adjust the address by ofs/8 and replace
-;; the offset in the insn by ofs%8.
+;; The *_in_memory patterns will not be matched automatically, not even with
+;; combiner bridge patterns. Especially when the memory operands have a
+;; displacement, the resulting patterns look too complex.
+;; Instead we manually look around the matched insn to see if there is a
+;; preceeding memory load and a following memory store of the modified register
+;; which can be fused into the single *_in_memory insn.
+;; Do that before register allocation, as it can eliminate one temporary
+;; register that needs to be allocated.
-(define_insn "*bitset"
+(define_insn_and_split "bitset"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "rx_shift_operand" "ri"))
@@ -1979,10 +2021,18 @@
(match_operand:SI 2 "register_operand" "0")))]
""
"bset\t%1, %0"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitset_in_memory))
+ DONE;
+ else
+ FAIL;
+}
[(set_attr "length" "3")]
)
-(define_insn "*bitset_in_memory"
+(define_insn "bitset_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(ior:QI (ashift:QI (const_int 1)
(match_operand:QI 1 "nonmemory_operand" "ri"))
@@ -1989,11 +2039,11 @@
(match_dup 0)))]
""
"bset\t%1, %0.B"
- [(set_attr "length" "3")
+ [(set_attr "length" "5")
(set_attr "timings" "33")]
)
-(define_insn "*bitinvert"
+(define_insn_and_split "bitinvert"
[(set (match_operand:SI 0 "register_operand" "=r")
(xor:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "rx_shift_operand" "ri"))
@@ -2000,10 +2050,18 @@
(match_operand:SI 2 "register_operand" "0")))]
""
"bnot\t%1, %0"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitinvert_in_memory))
+ DONE;
+ else
+ FAIL;
+}
[(set_attr "length" "3")]
)
-(define_insn "*bitinvert_in_memory"
+(define_insn "bitinvert_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(xor:QI (ashift:QI (const_int 1)
(match_operand:QI 1 "nonmemory_operand" "ri"))
@@ -2014,7 +2072,7 @@
(set_attr "timings" "33")]
)
-(define_insn "*bitclr"
+(define_insn_and_split "bitclr"
[(set (match_operand:SI 0 "register_operand" "=r")
(and:SI (not:SI
(ashift:SI
@@ -2023,10 +2081,18 @@
(match_operand:SI 2 "register_operand" "0")))]
""
"bclr\t%1, %0"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitclr_in_memory))
+ DONE;
+ else
+ FAIL;
+}
[(set_attr "length" "3")]
)
-(define_insn "*bitclr_in_memory"
+(define_insn "bitclr_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(and:QI (not:QI
(ashift:QI
@@ -2035,7 +2101,7 @@
(match_dup 0)))]
""
"bclr\t%1, %0.B"
- [(set_attr "length" "3")
+ [(set_attr "length" "5")
(set_attr "timings" "33")]
)
===================================================================
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+/* { dg-final { scan-assembler-times "bclr" 6 } } */
+/* { dg-final { scan-assembler-times "bset" 6 } } */
+/* { dg-final { scan-assembler-times "bnot" 6 } } */
+
+void
+test_0 (char* x, unsigned int y)
+{
+ /* Expect 4x bclr here. */
+ x[0] &= 0xFE;
+ x[1] = y & ~(1 << 1);
+ x[2] &= 0xFE;
+ x[65000] &= 0xFE;
+}
+
+unsigned int
+test_1 (unsigned int x, unsigned int y)
+{
+ /* Expect 1x bclr here. */
+ return x & ~(1 << y);
+}
+
+unsigned int
+test_2 (unsigned int x)
+{
+ /* Expect 1x bclr here. */
+ return x & ~(1 << 1);
+}
+
+void
+test_3 (char* x, unsigned int y)
+{
+ /* Expect 4x bset here. */
+ x[0] |= 0x10;
+ x[1] = y | (1 << 1);
+ x[2] |= 0x10;
+ x[65000] |= 0x10;
+}
+
+unsigned int
+test_4 (unsigned int x, unsigned int y)
+{
+ /* Expect 1x bset here. */
+ return x | (1 << y);
+}
+
+unsigned int
+test_5 (unsigned int x)
+{
+ /* Expect 1x bset here. */
+ return x | (1 << 8);
+}
+
+void
+test_6 (char* x, unsigned int y)
+{
+ /* Expect 4x bnot here. */
+ x[0] ^= 0x10;
+ x[1] = y ^ (1 << 1);
+ x[2] ^= 0x10;
+ x[65000] ^= 0x10;
+}
+
+unsigned int
+test_7 (unsigned int x, unsigned int y)
+{
+ /* Expect 1x bnot here. */
+ return x ^ (1 << y);
+}
+
+unsigned int
+test_8 (unsigned int x)
+{
+ /* Expect 1x bnot here. */
+ return x ^ (1 << 8);
+}