===================================================================
@@ -5799,8 +5799,7 @@
switch (get_attr_type (insn))
{
case TYPE_LEA:
- operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
- return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
+ return "#";
case TYPE_INCDEC:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
@@ -5813,11 +5812,13 @@
}
default:
- /* This alternative was added for TARGET_OPT_AGU to use add as
- much as possible. But add is also faster than lea for
- !TARGET_OPT_AGU. */
+ /* For most processors, ADD is faster than LEA. This alternative
+ was added to use ADD as much as possible. */
if (which_alternative == 2)
- return "add{<imodesuffix>}\t{%1, %0|%0, %1}";
+ {
+ rtx tmp;
+ tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+ }
gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
@@ -5847,18 +5848,17 @@
;; patterns constructed from addsi_1 to match.
(define_insn "*addsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
- (match_operand:SI 2 "general_operand" "g,li"))))
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
+ (match_operand:SI 2 "general_operand" "g,0,li"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
{
switch (get_attr_type (insn))
{
case TYPE_LEA:
- operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
- return "lea{l}\t{%a2, %k0|%k0, %a2}";
+ return "#";
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
@@ -5870,6 +5870,14 @@
}
default:
+ /* For most processors, ADD is faster than LEA. This alternative
+ was added to use ADD as much as possible. */
+ if (which_alternative == 1)
+ {
+ rtx tmp;
+ tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+ }
+
if (x86_maybe_negate_const_int (&operands[2], SImode))
return "sub{l}\t{%2, %k0|%k0, %2}";
@@ -5877,7 +5885,7 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "2")
(const_string "lea")
(match_operand:SI 2 "incdec_operand" "")
(const_string "incdec")
@@ -5932,9 +5940,9 @@
;; for PII, and in fact harmful because of partial register stalls.
(define_insn "*addhi_1_lea"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
- (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
- (match_operand:HI 2 "general_operand" "rn,rm,ln")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,r")
+ (match_operand:HI 2 "general_operand" "rmn,rn,0,ln")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL
&& ix86_binary_operator_ok (PLUS, HImode, operands)"
@@ -5955,6 +5963,14 @@
}
default:
+ /* For most processors, ADD is faster than LEA. This alternative
+ was added to use ADD as much as possible. */
+ if (which_alternative == 2)
+ {
+ rtx tmp;
+ tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+ }
+
gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], HImode))
return "sub{w}\t{%2, %0|%0, %2}";
@@ -5963,18 +5979,20 @@
}
}
[(set (attr "type")
- (if_then_else (eq_attr "alternative" "2")
- (const_string "lea")
- (if_then_else (match_operand:HI 2 "incdec_operand" "")
- (const_string "incdec")
- (const_string "alu"))))
+ (cond [(eq_attr "alternative" "3")
+ (const_string "lea")
+ (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
(set (attr "length_immediate")
(if_then_else
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "HI,HI,SI")])
+ (set_attr "mode" "HI,HI,HI,SI")])
+;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*addqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
@@ -6020,16 +6038,17 @@
(const_string "*")))
(set_attr "mode" "QI,QI,SI")])
-;; %%% Potential partial reg stall on alternative 2. What to do?
+;; %%% Potential partial reg stall on alternatives 3 and 4. What to do?
(define_insn "*addqi_1_lea"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
- (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
- (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,q,r,r,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,r")
+ (match_operand:QI 2 "general_operand" "qmn,qn,0,rn,0,ln")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL
&& ix86_binary_operator_ok (PLUS, QImode, operands)"
{
- int widen = (which_alternative == 2);
+ int widen = (which_alternative == 3 || which_alternative == 4);
+
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -6046,6 +6065,14 @@
}
default:
+ /* For most processors, ADD is faster than LEA. These alternatives
+ were added to use ADD as much as possible. */
+ if (which_alternative == 2 || which_alternative == 4)
+ {
+ rtx tmp;
+ tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+ }
+
gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], QImode))
{
@@ -6061,17 +6088,18 @@
}
}
[(set (attr "type")
- (if_then_else (eq_attr "alternative" "3")
- (const_string "lea")
- (if_then_else (match_operand:QI 2 "incdec_operand" "")
- (const_string "incdec")
- (const_string "alu"))))
+ (cond [(eq_attr "alternative" "5")
+ (const_string "lea")
+ (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
(set (attr "length_immediate")
(if_then_else
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "QI,QI,SI,SI")])
+ (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
(define_insn "*addqi_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -6658,7 +6686,7 @@
(match_operand:DI 2 "x86_64_nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && reload_completed
- && ix86_lea_for_add_ok (PLUS, insn, operands)"
+ && ix86_lea_for_add_ok (insn, operands)"
[(set (match_dup 0)
(plus:DI (match_dup 1)
(match_dup 2)))]
@@ -6670,7 +6698,7 @@
(plus (match_operand 1 "register_operand" "")
(match_operand 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)"
+ "reload_completed && ix86_lea_for_add_ok (insn, operands)"
[(const_int 0)]
{
rtx pat;
===================================================================
@@ -86,7 +86,7 @@ extern void ix86_fixup_binary_operands_n
extern void ix86_expand_binary_operator (enum rtx_code,
enum machine_mode, rtx[]);
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
-extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
+extern bool ix86_lea_for_add_ok (rtx, rtx[]);
extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
===================================================================
@@ -14808,8 +14808,7 @@ distance_agu_use (unsigned int regno0, r
used soon, LEA is better and otherwise ADD is better. */
bool
-ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
- rtx insn, rtx operands[])
+ix86_lea_for_add_ok (rtx insn, rtx operands[])
{
unsigned int regno0 = true_regnum (operands[0]);
unsigned int regno1 = true_regnum (operands[1]);