===================================================================
@@ -49,6 +49,7 @@
;; Sbw: QImode address with 12 bit displacement
;; Snd: address without displacement
;; Sdd: address with displacement
+;; Sra: simple register address
;; W: vector
;; Z: zero in any mode
;;
@@ -307,3 +308,8 @@
(match_test "GET_MODE (op) == QImode")
(match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
+(define_memory_constraint "Sra"
+ "A memory reference that uses a simple register addressing."
+ (and (match_test "MEM_P (op)")
+ (match_test "REG_P (XEXP (op, 0))")))
+
===================================================================
@@ -4842,6 +4842,88 @@
"extu.b %1,%0"
[(set_attr "type" "arith")])
+;; SH2A supports two zero extending load instructions: movu.b and movu.w.
+;; They could also be used for simple memory addresses like @Rn by setting
+;; the displacement value to zero. However, doing so too early results in
+;; missed opportunities for other optimizations such as post-inc or index
+;; addressing loads.
+;; Although the 'zero_extend_movu_operand' predicate does not allow simple
+;; register addresses (an address without a displacement, index, post-inc),
+;; zero-displacement addresses might be generated during reload, wich are
+;; simplified to simple register addresses in turn. Thus, we have to
+;; provide the Sdd and Sra alternatives in the patterns.
+(define_insn "*zero_extendqisi2_disp_mem"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+ (zero_extend:SI
+ (match_operand:QI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+ "TARGET_SH2A"
+ "@
+ movu.b %1,%0
+ movu.b @(0,%t1),%0"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
+(define_insn "*zero_extendhisi2_disp_mem"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+ (zero_extend:SI
+ (match_operand:HI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+ "TARGET_SH2A"
+ "@
+ movu.w %1,%0
+ movu.w @(0,%t1),%0"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
+;; Convert the zero extending loads in sequences such as:
+;; movu.b @(1,r5),r0 movu.w @(2,r5),r0
+;; mov.b r0,@(1,r4) mov.b r0,@(1,r4)
+;;
+;; back to sign extending loads like:
+;; mov.b @(1,r5),r0 mov.w @(2,r5),r0
+;; mov.b r0,@(1,r4) mov.b r0,@(1,r4)
+;;
+;; if the extension type is irrelevant. The sign extending mov.{b|w} insn
+;; is only 2 bytes in size if the displacement is {K04|K05}.
+;; If the displacement is greater it doesn't matter, so we convert anyways.
+(define_peephole2
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (zero_extend:SI (match_operand 1 "displacement_mem_operand" "")))
+ (set (match_operand 2 "general_operand" "")
+ (match_operand 3 "arith_reg_operand" ""))]
+ "TARGET_SH2A
+ && REGNO (operands[0]) == REGNO (operands[3])
+ && peep2_reg_dead_p (2, operands[0])
+ && GET_MODE_SIZE (GET_MODE (operands[2]))
+ <= GET_MODE_SIZE (GET_MODE (operands[1]))"
+ [(set (match_dup 0) (sign_extend:SI (match_dup 1)))
+ (set (match_dup 2) (match_dup 3))])
+
+;; Fold sequences such as
+;; mov.b @r3,r7
+;; extu.b r7,r7
+;; into
+;; movu.b @(0,r3),r7
+;; This does not reduce the code size but the number of instructions is
+;; halved, which results in faster code.
+(define_peephole2
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (sign_extend:SI (match_operand 1 "simple_mem_operand" "")))
+ (set (match_operand:SI 2 "arith_reg_dest" "")
+ (zero_extend:SI (match_operand 3 "arith_reg_operand" "")))]
+ "TARGET_SH2A
+ && GET_MODE (operands[1]) == GET_MODE (operands[3])
+ && (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+ && REGNO (operands[0]) == REGNO (operands[3])
+ && (REGNO (operands[2]) == REGNO (operands[0])
+ || peep2_reg_dead_p (2, operands[0]))"
+ [(set (match_dup 2) (zero_extend:SI (match_dup 4)))]
+{
+ operands[4]
+ = replace_equiv_address (operands[1],
+ gen_rtx_PLUS (SImode, XEXP (operands[1], 0),
+ const0_rtx));
+})
+
;; -------------------------------------------------------------------------
;; Sign extension instructions
;; -------------------------------------------------------------------------
===================================================================
@@ -368,12 +368,33 @@
: nonimmediate_operand) (op, mode);
})
+;; Returns 1 if OP is a simple register address.
+(define_predicate "simple_mem_operand"
+ (and (match_code "mem")
+ (match_test "arith_reg_operand (XEXP (op, 0), SImode)")))
+
+;; Returns 1 if OP is a valid displacement address.
+(define_predicate "displacement_mem_operand"
+ (and (match_code "mem")
+ (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+ (match_test "arith_reg_operand (XEXP (XEXP (op, 0), 0), SImode)")
+ (match_test "sh_legitimate_index_p (GET_MODE (op),
+ XEXP (XEXP (op, 0), 1),
+ TARGET_SH2A, true)")))
+
+;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
+(define_predicate "zero_extend_movu_operand"
+ (and (match_operand 0 "displacement_mem_operand")
+ (match_test "GET_MODE (op) == QImode || GET_MODE (op) == HImode")))
+
;; Returns 1 if the operand can be used in a zero_extend.
(define_predicate "zero_extend_operand"
(ior (and (match_test "TARGET_SHMEDIA")
(match_operand 0 "general_extend_operand"))
(and (match_test "! TARGET_SHMEDIA")
- (match_operand 0 "arith_reg_operand"))))
+ (match_operand 0 "arith_reg_operand"))
+ (and (match_test "TARGET_SH2A")
+ (match_operand 0 "zero_extend_movu_operand"))))
;; Returns 1 if OP can be source of a simple move operation. Same as
;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as
===================================================================
@@ -0,0 +1,100 @@
+/* Check that on SH2A the 4 byte movu.b and movu.w displacement insns are
+ generated. This has to be checked with -O2 because some of the patterns
+ rely on peepholes. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */
+/* { dg-final { scan-assembler-times "movu.b" 4 } } */
+/* { dg-final { scan-assembler-times "movu.w" 3 } } */
+
+int
+test_00 (unsigned char* x)
+{
+ /* 1x movu.b */
+ return x[0];
+}
+
+int
+test_01 (unsigned short* x)
+{
+ /* 1x movu.w */
+ return x[0];
+}
+
+int
+test_02 (unsigned char* x)
+{
+ /* 1x movu.b */
+ return x[1];
+}
+
+int
+test_03 (unsigned char* x)
+{
+ /* 1x movu.b */
+ return x[32];
+}
+
+int
+test_04 (unsigned char* x)
+{
+ /* 1x movu.b */
+ return x[9000];
+}
+
+int
+test_05 (unsigned short* x)
+{
+ /* 1x movu.w */
+ return x[9000];
+}
+
+int
+test_06 (unsigned char* x, int i)
+{
+ /* No movu.b expected here. Should use mov.b (r0,r4) + extu.b instead. */
+ return x[i];
+}
+
+int
+test_07 (unsigned short* x, int i)
+{
+ /* No movu.w expected here. Should use mov.w (r0,r4) + extu.w instead. */
+ return x[i];
+}
+
+int
+test_08 (unsigned char* x, int c)
+{
+ /* No movu.b expected here. Should use post-inc addressing instead. */
+ int s = 0;
+ int i;
+ for (i = 0; i < c; ++i)
+ s += x[i];
+ return s;
+}
+
+void
+test_09 (unsigned char* x, unsigned char* y)
+{
+ /* No movu.b expected here, since the zero-extension is irrelevant. */
+ x[1] = y[1];
+ x[2] = y[2];
+}
+
+void
+test_10 (unsigned char* x, unsigned short* y)
+{
+ /* No movu.w expected here, since the zero-extension is irrelevant. */
+ x[1] = y[1];
+ x[2] = y[2];
+}
+
+int
+test_11 (unsigned char* x, unsigned short* y)
+{
+ /* 1x movu.w */
+ int yy = y[1];
+ x[1] = yy;
+ return yy;
+}