===================================================================
@@ -418,6 +418,30 @@
return general_operand (op, mode);
})
+;; Same as movsrc_operand, but rejects displacement addressing.
+
+(define_predicate "movsrc_no_disp_mem_operand"
+ (match_code "subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,const,const_vector")
+{
+ if (!general_movsrc_operand (op, mode))
+ return 0;
+
+ if ((mode == QImode || mode == HImode)
+ && mode == GET_MODE (op)
+ && (MEM_P (op)
+ || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+ {
+ rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0);
+
+ if (GET_CODE (x) == PLUS
+ && REG_P (XEXP (x, 0))
+ && CONST_INT_P (XEXP (x, 1)))
+ return 0;
+ }
+
+ return 1;
+})
+
;; Returns 1 if OP can be a destination of a move. Same as
;; general_operand, but no preinc allowed.
===================================================================
@@ -3137,6 +3137,11 @@
sh_address_cost (rtx X,
bool speed ATTRIBUTE_UNUSED)
{
+ /* SH2A supports 4 byte displacement mov insns with higher offsets.
+ Consider those as more expensive than 2 byte insns. */
+ if (DISP_ADDR_P (X) && GET_MODE (X) == QImode)
+ return DISP_ADDR_OFFSET (X) < 16 ? 0 : 1;
+
return (GET_CODE (X) == PLUS
&& ! CONSTANT_P (XEXP (X, 1))
&& ! TARGET_SHMEDIA ? 1 : 0);
@@ -9606,11 +9611,13 @@
if (TARGET_SH2A)
{
- if (GET_MODE_SIZE (mode) == 1
- && (unsigned) INTVAL (op) < 4096)
+ if (mode == QImode && (unsigned) INTVAL (op) < 4096)
return true;
}
+ if (mode == QImode && (unsigned) INTVAL (op) < 16)
+ return true;
+
if ((GET_MODE_SIZE (mode) == 4
&& (unsigned) INTVAL (op) < 64
&& !(INTVAL (op) & 3)
@@ -9816,6 +9823,25 @@
}
}
+ /* This could be generalized for SImode, HImode, QImode displacement
+ addressing. */
+ if (mode == QImode && GET_CODE (x) == PLUS
+ && BASE_REGISTER_RTX_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+ {
+ rtx index_rtx = XEXP (x, 1);
+ HOST_WIDE_INT offset = INTVAL (index_rtx);
+ HOST_WIDE_INT offset_base = offset & ~15;
+
+ if (offset - offset_base <= 16)
+ {
+ rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
+ GEN_INT (offset_base), NULL_RTX, 0,
+ OPTAB_LIB_WIDEN);
+
+ return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
+ }
+ }
+
return x;
}
@@ -11444,8 +11470,13 @@
{
/* We want to enable the use of SUBREGs as a means to
VEC_SELECT a single element of a vector. */
+
+ /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
+ This can be problematic when SFmode vector subregs need to be accessed
+ on the stack with displacement addressing, as it happens with -O0.
+ Thus we disallow the mode change for -O0. */
if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
- return (reg_classes_intersect_p (GENERAL_REGS, rclass));
+ return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
{
@@ -11460,7 +11491,7 @@
return reg_classes_intersect_p (DF_HI_REGS, rclass);
}
}
- return 0;
+ return false;
}
/* Return true if registers in machine mode MODE will likely be
@@ -12472,6 +12503,25 @@
if (rclass == FPUL_REGS && true_regnum (x) == -1)
return GENERAL_REGS;
+ /* Force mov.b displacement addressing insn to use R0 as the other operand.
+ On SH2A could also just leave it alone here, which would result in a
+ 4 byte move insn being generated instead. However, for this to work
+ the insns must have the appropriate alternatives. */
+ if (mode == QImode && rclass != R0_REGS
+ && DISP_ADDR_P (x) && DISP_ADDR_OFFSET (x) < 16)
+ return R0_REGS;
+
+ /* When reload is trying to address a QImode or HImode subreg on the stack,
+ force any subreg byte into R0_REGS, as this is going to become a
+ displacement address.
+ We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
+ is on the stack, the memref to it might already require a displacement
+ and that has to be added to the final address. At this point we don't
+ know the cumulative displacement so we assume the worst case. */
+ if ((mode == QImode || mode == HImode) && rclass != R0_REGS
+ && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
+ return R0_REGS;
+
return NO_REGS;
}
===================================================================
@@ -1210,12 +1210,24 @@
((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \
|| (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32))
+#define CONST_OK_FOR_K04(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+ && ((HOST_WIDE_INT)(VALUE)) <= 15)
+
#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
&& ((HOST_WIDE_INT)(VALUE)) <= 255)
+#define CONST_OK_FOR_K12(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+ && ((HOST_WIDE_INT)(VALUE)) <= 4095)
+
#define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
(((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
+#define DISP_ADDR_P(X) (MEM_P (X) && GET_CODE (XEXP (X, 0)) == PLUS \
+ && REG_P (XEXP (XEXP (X, 0), 0)) \
+ && CONST_INT_P (XEXP (XEXP (X, 0), 1)))
+
+#define DISP_ADDR_OFFSET(X) (INTVAL (XEXP (XEXP (X, 0), 1)))
+
#if 0
#define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \
((((REGCLASS_HAS_FP_REG (CLASS) \
===================================================================
@@ -123,6 +123,7 @@
(match_test "ival >= -134217728 && ival <= 134217727")
(match_test "(ival & 255) == 0")
(match_test "TARGET_SH2A")))
+
(define_constraint "J16"
"0xffffffff00000000 or 0x00000000ffffffff."
(and (match_code "const_int")
@@ -133,6 +134,11 @@
(and (match_code "const_int")
(match_test "ival >= 0 && ival <= 7")))
+(define_constraint "K04"
+ "An unsigned 4-bit constant, as used in mov.b displacement addressing."
+ (and (match_code "const_int")
+ (match_test "ival >= 0 && ival <= 15")))
+
(define_constraint "K08"
"An unsigned 8-bit constant, as used in and, or, etc."
(and (match_code "const_int")
@@ -266,3 +272,11 @@
(match_test "GET_CODE (XEXP (op, 0)) == PLUS")
(match_test "REG_P (XEXP (XEXP (op, 0), 0))")
(match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Snd"
+ "A memory reference that excludes displacement addressing."
+ (match_test "! DISP_ADDR_P (op)"))
+
+(define_memory_constraint "Sdd"
+ "A memory reference that uses displacement addressing."
+ (match_test "DISP_ADDR_P (op)"))
===================================================================
@@ -4871,25 +4871,57 @@
}")
(define_expand "extendqisi2"
- [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
- (sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (sign_extend:SI (match_operand:QI 1 "general_extend_operand" "")))]
""
"")
-(define_insn "*extendqisi2_compact"
+(define_insn "*extendqisi2_compact_reg"
[(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
- (sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+ (sign_extend:SI (match_operand:QI 1 "register_operand" "r,t")))]
"TARGET_SH1"
"@
exts.b %1,%0
- mov.b %1,%0"
- [(set_attr "type" "arith,load")
- (set_attr_alternative "length"
- [(const_int 2)
- (if_then_else
- (match_test "TARGET_SH2A")
- (const_int 4) (const_int 2))])])
+ movt %0"
+ [(set_attr "type" "arith,arith")])
+;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute.
+;; See movqi insns.
+(define_insn "*extendqisi2_compact_mem_disp"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+ (sign_extend:SI
+ (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+ (match_operand:SI 2 "const_int_operand" "K04,N")))))]
+ "TARGET_SH1 && ! TARGET_SH2A && CONST_OK_FOR_K04 (INTVAL (operands[2]))"
+ "@
+ mov.b @(%O2,%1),%0
+ mov.b @%1,%0"
+ [(set_attr "type" "load")])
+
+(define_insn "*extendqisi2_compact_mem_disp"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r")
+ (sign_extend:SI
+ (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+ (match_operand:SI 2 "const_int_operand" "K04,N,K12")))))]
+ "TARGET_SH2A
+ && (CONST_OK_FOR_K04 (INTVAL (operands[2]))
+ || (CONST_OK_FOR_K12 (INTVAL (operands[2]))))"
+ "@
+ mov.b @(%O2,%1),%0
+ mov.b @%1,%0
+ mov.b @(%O2,%1),%0"
+ [(set_attr "type" "load")
+ (set_attr "length" "2,2,4")])
+
+;; This will take care of other QImode addressing modes than displacement
+;; addressing.
+(define_insn "*extendqisi2_compact_snd"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+ (sign_extend:SI (match_operand:QI 1 "movsrc_no_disp_mem_operand" "Snd")))]
+ "TARGET_SH1"
+ "mov.b %1,%0"
+ [(set_attr "type" "load")])
+
(define_insn "*extendqisi2_media"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
@@ -4919,19 +4951,18 @@
subreg_lowpart_offset (SImode, GET_MODE (op1)));
}")
-(define_insn "extendqihi2"
- [(set (match_operand:HI 0 "arith_reg_dest" "=r,r")
- (sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+(define_expand "extendqihi2"
+ [(set (match_operand:HI 0 "arith_reg_dest" "")
+ (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))]
+ ""
+ "")
+
+(define_insn "*extendqihi2_compact_reg"
+ [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+ (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
"TARGET_SH1"
- "@
- exts.b %1,%0
- mov.b %1,%0"
- [(set_attr "type" "arith,load")
- (set_attr_alternative "length"
- [(const_int 2)
- (if_then_else
- (match_test "TARGET_SH2A")
- (const_int 4) (const_int 2))])])
+ "exts.b %1,%0"
+ [(set_attr "type" "arith")])
/* It would seem useful to combine the truncXi patterns into the movXi
patterns, but unary operators are ignored when matching constraints,
@@ -5475,33 +5506,90 @@
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
-(define_insn "movqi_i"
- [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m,r,r,l")
- (match_operand:QI 1 "general_movsrc_operand" "r,i,m,r,t,l,r"))]
+(define_expand "movqi"
+ [(set (match_operand:QI 0 "general_operand" "")
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+ "{ if (prepare_move_operands (operands, QImode)) DONE; }")
+
+;; If movqi_reg_reg is specified as an alternative of movqi, movqi will be
+;; selected to copy QImode regs. If one of them happens to be allocated
+;; on the stack, reload will stick to movqi insn and generate wrong
+;; displacement addressing because of the generic m alternatives.
+;; With the movqi_reg_reg being specified before movqi it will be intially
+;; picked to load/store regs. If the regs regs are on the stack reload will
+;; try other insns and not stick to movqi_reg_reg.
+(define_insn "*movqi_reg_reg"
+ [(set (match_operand:QI 0 "arith_reg_dest" "=r,r")
+ (match_operand:QI 1 "register_operand" "r,t"))]
+ "TARGET_SH1"
+ "@
+ mov %1,%0
+ movt %0"
+ [(set_attr "type" "move,arith")])
+
+;; FIXME: The non-SH2A and SH2A variants should be combined by adding
+;; "enabled" attribute as it is done in other targets.
+(define_insn "*movqi_store_mem_disp04"
+ [(set (mem:QI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r")
+ (match_operand:SI 1 "const_int_operand" "K04,N")))
+ (match_operand:QI 2 "arith_reg_operand" "z,r"))]
+ "TARGET_SH1 && CONST_OK_FOR_K04 (INTVAL (operands[1]))"
+ "@
+ mov.b %2,@(%O1,%0)
+ mov.b %2,@%0"
+ [(set_attr "type" "store")])
+
+(define_insn "*movqi_store_mem_disp12"
+ [(set (mem:QI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+ (match_operand:SI 1 "const_int_operand" "K12")))
+ (match_operand:QI 2 "arith_reg_operand" "r"))]
+ "TARGET_SH2A && CONST_OK_FOR_K12 (INTVAL (operands[1]))"
+ "mov.b %2,@(%O1,%0)"
+ [(set_attr "type" "store")
+ (set_attr "length" "4")])
+
+(define_insn "*movqi_load_mem_disp"
+ [(set (match_operand:QI 0 "arith_reg_dest" "=z,r")
+ (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+ (match_operand:SI 2 "const_int_operand" "K04,N"))))]
+ "TARGET_SH1 && ! TARGET_SH2A && CONST_OK_FOR_K04 (INTVAL (operands[2]))"
+ "@
+ mov.b @(%O2,%1),%0
+ mov.b @%1,%0"
+ [(set_attr "type" "load")])
+
+(define_insn "*movqi_load_mem_disp"
+ [(set (match_operand:QI 0 "arith_reg_dest" "=z,r,r")
+ (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+ (match_operand:SI 2 "const_int_operand" "K04,N,K12"))))]
+ "TARGET_SH2A
+ && (CONST_OK_FOR_K04 (INTVAL (operands[2]))
+ || CONST_OK_FOR_K12 (INTVAL (operands[2])))"
+ "@
+ mov.b @(%O2,%1),%0
+ mov.b @%1,%0
+ mov.b @(%O2,%1),%0"
+ [(set_attr "type" "load")
+ (set_attr "length" "2,2,4")])
+
+;; The m constraints basically allow any kind of addresses to be used with any
+;; source/target register as the other operand. This is not true for
+;; displacement addressing modes on anything but SH2A. That's why the
+;; specialized load/store insns are specified above.
+(define_insn "*movqi"
+ [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,l")
+ (match_operand:QI 1 "general_movsrc_operand" "i,m,r,l,r"))]
"TARGET_SH1
&& (arith_reg_operand (operands[0], QImode)
|| arith_reg_operand (operands[1], QImode))"
"@
mov %1,%0
- mov %1,%0
mov.b %1,%0
mov.b %1,%0
- movt %0
sts %1,%0
lds %1,%0"
- [(set_attr "type" "move,movi8,load,store,arith,prget,prset")
- (set_attr_alternative "length"
- [(const_int 2)
- (const_int 2)
- (if_then_else
- (match_test "TARGET_SH2A")
- (const_int 4) (const_int 2))
- (if_then_else
- (match_test "TARGET_SH2A")
- (const_int 4) (const_int 2))
- (const_int 2)
- (const_int 2)
- (const_int 2)])])
+ [(set_attr "type" "movi8,load,store,prget,prset")])
(define_insn "*movqi_media"
[(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
@@ -5520,12 +5608,6 @@
(const_string "user")]
(const_string "ignore")))])
-(define_expand "movqi"
- [(set (match_operand:QI 0 "general_operand" "")
- (match_operand:QI 1 "general_operand" ""))]
- ""
- "{ if (prepare_move_operands (operands, QImode)) DONE; }")
-
(define_expand "reload_inqi"
[(set (match_operand:SI 2 "" "=&r")
(match_operand:QI 1 "inqhi_operand" ""))
@@ -7036,14 +7118,6 @@
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
-
-(define_split
- [(set (match_operand:SI 0 "register_operand" "")
- (match_operand:SI 1 "memory_operand" ""))
- (clobber (reg:SI R0_REG))]
- "TARGET_SH1"
- [(set (match_dup 0) (match_dup 1))]
- "")
;; ------------------------------------------------------------------------
;; Define the real conditional branch instructions.
@@ -11684,6 +11758,78 @@
"TARGET_SH2"
"dt %0")
+;; The following peepholes fold load sequences for which reload was not
+;; able to generate a displacement addressing move insn.
+;; This can happen when reload has to transform a move insn
+;; without displacement into one with displacement. Or when reload can't
+;; fit a displacement into the insn's constraints. In the latter case, the
+;; load destination reg remains at r0, which reload compensates by inserting
+;; another mov insn.
+
+;; Fold sequence:
+;; mov #54,r0
+;; mov.b @(r0,r15),r0
+;; mov r0,r3
+;; into:
+;; mov.b @(54,r15),r3
+;;
+(define_peephole2
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (match_operand:SI 1 "const_int_operand" ""))
+ (set (match_operand:SI 2 "arith_reg_dest" "")
+ (sign_extend:SI
+ (mem:QI (plus:SI (match_dup 0)
+ (match_operand:SI 3 "arith_reg_operand" "")))))
+ (set (match_operand:QI 4 "arith_reg_dest" "")
+ (match_operand:QI 5 "arith_reg_operand" ""))]
+ "TARGET_SH2A
+ && CONST_OK_FOR_K12 (INTVAL (operands[1]))
+ && REGNO (operands[2]) == REGNO (operands[5])
+ && peep2_reg_dead_p (3, operands[5])"
+ [(set (match_dup 4) (mem:QI (plus:SI (match_dup 3) (match_dup 1))))]
+ "")
+
+;; Fold sequence:
+;; mov #54,r0
+;; mov.b @(r0,r15),r1
+;; into:
+;; mov.b @(54,r15),r1
+;;
+(define_peephole2
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (match_operand:SI 1 "const_int_operand" ""))
+ (set (match_operand:SI 2 "arith_reg_dest" "")
+ (sign_extend:SI
+ (mem:QI (plus:SI (match_dup 0)
+ (match_operand:SI 3 "arith_reg_operand" "")))))]
+ "TARGET_SH2A
+ && CONST_OK_FOR_K12 (INTVAL (operands[1]))
+ && (peep2_reg_dead_p (2, operands[0])
+ || REGNO (operands[0]) == REGNO (operands[2]))"
+ [(set (match_dup 2)
+ (sign_extend:SI (mem:QI (plus:SI (match_dup 3) (match_dup 1)))))]
+ "")
+
+;; Fold sequence:
+;; mov.b @(r0,r15),r0
+;; mov r0,r3
+;; into:
+;; mov.b @(r0,r15),r3
+;;
+(define_peephole2
+ [(set (match_operand:SI 0 "arith_reg_dest" "")
+ (sign_extend:SI
+ (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "arith_reg_operand" "")))))
+ (set (match_operand:QI 3 "arith_reg_dest" "")
+ (match_operand:QI 4 "arith_reg_operand" ""))]
+ "TARGET_SH1
+ && REGNO (operands[0]) == REGNO (operands[4])
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3)
+ (mem:QI (plus:SI (match_dup 1) (match_dup 2))))]
+ "")
+
;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn'
;; to `mov #k,r0; mov.l @(r0,r15),rn'. These sequences are generated by
;; reload when the constant is too large for a reg+offset address.