2014-03-20 Christian Bruel <christian.bruel@st.com>
* config/sh/sh.md (setmemqi): New expand pattern.
(CLEAR_RATIO): Define.
* config/sh/sh-mem.cc (sh_expand_setmem): Define.
* config/sh/sh-protos.h (sh_expand_setmem): Declare.
2014-01-20 Christian Bruel <christian.bruel@st.com>
* gcc.target/sh/memset.c: New test.
===================================================================
@@ -608,3 +608,106 @@ sh_expand_strlen (rtx *operands)
return true;
}
+
+/* Emit code to perform a memset
+
+ OPERANDS[0] is the destination.
+ OPERANDS[1] is the size;
+ OPERANDS[2] is the char to search.
+ OPERANDS[3] is the alignment. */
+void
+sh_expand_setmem (rtx *operands)
+{
+ rtx L_loop_byte = gen_label_rtx ();
+ rtx L_loop_word = gen_label_rtx ();
+ rtx L_return = gen_label_rtx ();
+ rtx jump;
+ rtx dest = copy_rtx (operands[0]);
+ rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
+ rtx val = force_reg (SImode, operands[2]);
+ int align = INTVAL (operands[3]);
+ int count = 0;
+ rtx len = force_reg (SImode, operands[1]);
+
+ if (! CONST_INT_P (operands[1]))
+ return;
+
+ count = INTVAL (operands[1]);
+
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
+ {
+ rtx lenw = gen_reg_rtx (SImode);
+
+ if (align < 4)
+ {
+ emit_insn (gen_tstsi_t (GEN_INT (3), dest_addr));
+ jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ }
+
+ /* word count. Do we have iterations ? */
+ emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
+
+ dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
+
+ /* start loop. */
+ emit_label (L_loop_word);
+
+ if (TARGET_SH2)
+ emit_insn (gen_dect (lenw, lenw));
+ else
+ {
+ emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
+ emit_insn (gen_tstsi_t (lenw, lenw));
+ }
+
+ emit_move_insn (dest, val);
+ emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
+ GET_MODE_SIZE (SImode)));
+
+
+ jump = emit_jump_insn (gen_branch_false (L_loop_word));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ count = count % 4;
+
+ dest = adjust_address (dest, QImode, 0);
+
+ val = gen_lowpart (QImode, val);
+
+ while (count--)
+ {
+ emit_move_insn (dest, val);
+ emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
+ GET_MODE_SIZE (QImode)));
+ }
+
+ jump = emit_jump_insn (gen_jump_compact (L_return));
+ emit_barrier_after (jump);
+ }
+
+ dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
+
+ /* start loop. */
+ emit_label (L_loop_byte);
+
+ if (TARGET_SH2)
+ emit_insn (gen_dect (len, len));
+ else
+ {
+ emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
+ emit_insn (gen_tstsi_t (len, len));
+ }
+
+ val = gen_lowpart (QImode, val);
+ emit_move_insn (dest, val);
+ emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
+ GET_MODE_SIZE (QImode)));
+
+ jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+ emit_label (L_return);
+
+ return;
+}
===================================================================
@@ -119,6 +119,7 @@ extern void prepare_move_operands (rtx[], enum mac
extern bool sh_expand_cmpstr (rtx *);
extern bool sh_expand_cmpnstr (rtx *);
extern bool sh_expand_strlen (rtx *);
+extern void sh_expand_setmem (rtx *);
extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
enum rtx_code comparison);
extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
===================================================================
@@ -1594,6 +1594,11 @@ struct sh_args {
#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
+/* If a memory clear move would take CLEAR_RATIO or more simple
+ move-instruction pairs, we will do a setmem instead. */
+
+#define CLEAR_RATIO(speed) ((speed) ? 15 : 3)
+
/* Macros to check register numbers against specific register classes. */
/* These assume that REGNO is a hard or pseudo reg number.
===================================================================
@@ -12089,6 +12089,20 @@ label:
FAIL;
})
+(define_expand "setmemqi"
+ [(parallel [(set (match_operand:BLK 0 "memory_operand")
+ (match_operand 2 "const_int_operand"))
+ (use (match_operand:QI 1 "const_int_operand"))
+ (use (match_operand:QI 3 "const_int_operand"))])]
+ "TARGET_SH1 && optimize"
+ {
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ sh_expand_setmem (operands);
+ DONE;
+ })
+
;; -------------------------------------------------------------------------
;; Floating point instructions.
===================================================================
@@ -0,0 +1,13 @@
+/* Check that the __builtin_memset function is inlined when
+ optimizing for speed. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+
+void
+test00(char *dstb)
+{
+ __builtin_memset (dstb, 0, 15);
+}
+