2013-10-17 Christian Bruel <christian.bruel@st.com>
* gcc/config/sh/sh-mem.c (sh4_expand_cmpstr): New function.
* gcc/config/sh/sh-protos.h (sh4_expand_cmpstr): Declare.
* gcc/config/sh/sh.md (cmpstrsi, cmpstr_t): New patterns.
(rotlhi3_8): Rename.
@@ -31,9 +31,6 @@
;; ??? The MAC.W and MAC.L instructions are not supported. There is no
;; way to generate them.
-;; ??? The cmp/str instruction is not supported. Perhaps it can be used
-;; for a str* inline function.
-
;; BSR is not generated by the compiler proper, but when relaxing, it
;; generates .uses pseudo-ops that allow linker relaxation to create
;; BSR. This is actually implemented in bfd/{coff,elf32}-sh.c
@@ -4037,7 +4034,7 @@
DONE;
})
-(define_insn "*rotlhi3_8"
+(define_insn "rotlhi3_8"
[(set (match_operand:HI 0 "arith_reg_dest" "=r")
(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
(const_int 8)))]
@@ -11912,6 +11909,41 @@
"jsr @%0%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
+
+;; byte compare pattern
+;; temp = a ^ b;
+;; !((temp & 0xF000) && (temp & 0x0F00) && (temp & 0x00F0) && (temp & 0x000F))
+(define_insn "cmpstr_t"
+ [(set (reg:SI T_REG)
+ (eq:SI (and:SI
+ (and:SI
+ (and:SI
+ (zero_extract:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "r")
+ (match_operand:SI 1 "arith_reg_operand" "r"))
+ (const_int 8) (const_int 0))
+ (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+ (const_int 8) (const_int 8)))
+ (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+ (const_int 8) (const_int 16)))
+ (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+ (const_int 8) (const_int 24))) (const_int 0)))]
+ "TARGET_SH1"
+ "cmp/str %0,%1"
+ [(set_attr "type" "mt_group")])
+
+(define_expand "cmpstrsi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (compare:SI (match_operand:BLK 1 "memory_operand" "")
+ (match_operand:BLK 2 "memory_operand" "")))
+ (use (match_operand 3 "immediate_operand" ""))]
+ "TARGET_SH1"
+ "
+{
+ if (! optimize_insn_for_size_p () && sh4_expand_cmpstr(operands))
+ DONE;
+ else FAIL;
+}")
+
;; -------------------------------------------------------------------------
;; Floating point instructions.
@@ -23,6 +23,7 @@
#include "tm.h"
#include "expr.h"
#include "tm_p.h"
+#include "basic-block.h"
/* Like force_operand, but guarantees that VALUE ends up in TARGET. */
static void
@@ -174,3 +175,130 @@
return false;
}
+
+/* Emit code to perform a strcmp.
+
+ OPERANDS[0] is the destination.
+ OPERANDS[1] is the first string.
+ OPERANDS[2] is the second string.
+ OPERANDS[3] is the align. */
+bool
+sh4_expand_cmpstr (rtx *operands)
+{
+ rtx s1 = copy_rtx (operands[1]);
+ rtx s2 = copy_rtx (operands[2]);
+ rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
+ rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0));
+ rtx tmp0 = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (SImode);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ rtx tmp3 = gen_reg_rtx (SImode);
+
+ rtx L_return = gen_label_rtx ();
+ rtx L_loop_byte = gen_label_rtx ();
+ rtx L_end_loop_byte = gen_label_rtx ();
+ rtx L_loop_long = gen_label_rtx ();
+ rtx L_end_loop_long = gen_label_rtx ();
+
+ rtx jump, addr1, addr2;
+ int prob_unlikely = REG_BR_PROB_BASE / 10;
+ int prob_likely = REG_BR_PROB_BASE / 4;
+
+ emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+ emit_move_insn (tmp0, GEN_INT (3));
+
+ emit_insn (gen_tstsi_t (tmp0, tmp1));
+
+ emit_move_insn (tmp0, const0_rtx);
+
+ jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+ addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
+
+ /* tmp2 is aligned, OK to load. */
+ emit_move_insn (tmp3, addr2);
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+ /*start long loop. */
+ emit_label (L_loop_long);
+
+ emit_move_insn (tmp2, tmp3);
+
+ /* tmp1 is aligned, OK to load. */
+ emit_move_insn (tmp1, addr1);
+ emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
+
+ /* Is there a 0 byte ? */
+ emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
+
+ emit_insn (gen_cmpstr_t (tmp0, tmp3));
+ jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+
+ /* tmp2 is aligned, OK to load. */
+ emit_move_insn (tmp3, addr2);
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+ jump = emit_jump_insn (gen_branch_true (L_loop_long));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ /* end loop. */
+
+ /* Fallthu, check if one of the word is greater. */
+ if (TARGET_LITTLE_ENDIAN)
+ {
+ rtx low_1 = gen_lowpart (HImode, tmp1);
+ rtx low_2 = gen_lowpart (HImode, tmp2);
+
+ emit_insn (gen_rotlhi3_8 (low_1, low_1));
+ emit_insn (gen_rotlhi3_8 (low_2, low_2));
+ emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
+ emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
+ emit_insn (gen_rotlhi3_8 (low_1, low_1));
+ emit_insn (gen_rotlhi3_8 (low_2, low_2));
+ }
+
+ jump = emit_jump_insn (gen_jump_compact (L_return));
+ emit_barrier_after (jump);
+
+ /* start byte loop. */
+ addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
+
+ emit_label (L_end_loop_long);
+
+ emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
+
+ emit_label (L_loop_byte);
+
+ emit_insn (gen_extendqisi2 (tmp2, addr2));
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
+
+ emit_insn (gen_extendqisi2 (tmp1, addr1));
+ emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
+
+ emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+ jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+ emit_jump_insn (gen_branch_true (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ /* end loop. */
+
+ emit_label (L_end_loop_byte);
+
+ emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+ emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
+
+ emit_label (L_return);
+
+ emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+
+ return true;
+}
+
@@ -116,6 +116,7 @@
extern void output_pic_addr_const (FILE *, rtx);
extern bool expand_block_move (rtx *);
extern void prepare_move_operands (rtx[], enum machine_mode mode);
+extern bool sh4_expand_cmpstr (rtx *);
extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
enum rtx_code comparison);
extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);