===================================================================
@@ -413,7 +413,8 @@ should_replace_address (rtx old_rtx, rtx
eliminating the most insns without additional costs, and it
is the same that cse.c used to do. */
if (gain == 0)
- gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
+ gain = (rtx_cost2 (new_rtx, SET, NULL_RTX, speed)
+ - rtx_cost2 (old_rtx, SET, NULL_RTX, speed));
return (gain > 0);
}
@@ -962,7 +963,7 @@ try_fwprop_subst (df_ref use, rtx *loc,
multiple sets. If so, assume the cost of the new instruction is
not greater than the old one. */
if (set)
- old_cost = rtx_cost (SET_SRC (set), SET, speed);
+ old_cost = rtx_cost2 (SET_SRC (set), SET, SET_DEST (set), speed);
if (dump_file)
{
fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
@@ -983,7 +984,7 @@ try_fwprop_subst (df_ref use, rtx *loc,
else if (DF_REF_TYPE (use) == DF_REF_REG_USE
&& set
- && rtx_cost (SET_SRC (set), SET, speed) > old_cost)
+ && rtx_cost2 (SET_SRC (set), SET, SET_DEST (set), speed) > old_cost)
{
if (dump_file)
fprintf (dump_file, "Changes to insn %d not profitable\n",
===================================================================
@@ -6373,14 +6373,17 @@ Define this macro if a non-short-circuit
@code{BRANCH_COST} is greater than or equal to the value 2.
@end defmac
-@deftypefn {Target Hook} bool TARGET_RTX_COSTS (rtx @var{x}, int @var{code}, int @var{outer_code}, int *@var{total}, bool @var{speed})
+@deftypefn {Target Hook} bool TARGET_RTX_COSTS2 (rtx @var{x}, int @var{code}, int @var{outer_code}, rtx @var{set_lhs}, int *@var{total}, bool @var{speed})
This target hook describes the relative costs of RTL expressions.
The cost may depend on the precise form of the expression, which is
available for examination in @var{x}, and the rtx code of the expression
in which it is contained, found in @var{outer_code}. @var{code} is the
expression code---redundant, since it can be obtained with
-@code{GET_CODE (@var{x})}.
+@code{GET_CODE (@var{x})}. @var{set_lhs} may be @code{NULL_RTX}, otherwise
+it contains the @code{SET_DEST} of a @code{SET} for which the costs are
+being computed. In the latter case, the @var{x} is known to occur somewhere
+in the @code{SET_SRC}.
In implementing this hook, you can use the construct
@code{COSTS_N_INSNS (@var{n})} to specify a cost equal to @var{n} fast
@@ -6400,10 +6403,15 @@ The hook returns true when all subexpres
processed, and false when @code{rtx_cost} should recurse.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_RTX_COSTS (rtx @var{x}, int @var{code}, int @var{outer_code}, int *@var{total}, bool @var{speed})
+This target hook is an old form of @code{TARGET_RTX_COSTS2}, which omits
+the @var{set_lhs} argument. It should not be used in new code.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_ADDRESS_COST (rtx @var{address}, bool @var{speed})
This hook computes the cost of an addressing mode that contains
@var{address}. If not defined, the cost is computed from
-the @var{address} expression and the @code{TARGET_RTX_COST} hook.
+the @var{address} expression and the @code{TARGET_RTX_COSTS2} hook.
For most CISC machines, the default cost is a good approximation of the
true cost of the addressing mode. However, on RISC machines, all
===================================================================
@@ -6373,14 +6373,17 @@ Define this macro if a non-short-circuit
@code{BRANCH_COST} is greater than or equal to the value 2.
@end defmac
-@hook TARGET_RTX_COSTS
+@hook TARGET_RTX_COSTS2
This target hook describes the relative costs of RTL expressions.
The cost may depend on the precise form of the expression, which is
available for examination in @var{x}, and the rtx code of the expression
in which it is contained, found in @var{outer_code}. @var{code} is the
expression code---redundant, since it can be obtained with
-@code{GET_CODE (@var{x})}.
+@code{GET_CODE (@var{x})}. @var{set_lhs} may be @code{NULL_RTX}, otherwise
+it contains the @code{SET_DEST} of a @code{SET} for which the costs are
+being computed. In the latter case, the @var{x} is known to occur somewhere
+in the @code{SET_SRC}.
In implementing this hook, you can use the construct
@code{COSTS_N_INSNS (@var{n})} to specify a cost equal to @var{n} fast
@@ -6400,10 +6403,15 @@ The hook returns true when all subexpres
processed, and false when @code{rtx_cost} should recurse.
@end deftypefn
+@hook TARGET_RTX_COSTS
+This target hook is an old form of @code{TARGET_RTX_COSTS2}, which omits
+the @var{set_lhs} argument. It should not be used in new code.
+@end deftypefn
+
@hook TARGET_ADDRESS_COST
This hook computes the cost of an addressing mode that contains
@var{address}. If not defined, the cost is computed from
-the @var{address} expression and the @code{TARGET_RTX_COST} hook.
+the @var{address} expression and the @code{TARGET_RTX_COSTS2} hook.
For most CISC machines, the default cost is a good approximation of the
true cost of the addressing mode. However, on RISC machines, all
===================================================================
@@ -1198,6 +1198,13 @@ default_register_move_cost (enum machine
}
bool
+default_rtx_costs2 (rtx x, int code, int outer_code,
+ rtx set_lhs ATTRIBUTE_UNUSED, int *total, bool speed)
+{
+ return targetm.rtx_costs (x, code, outer_code, total, speed);
+}
+
+bool
default_profile_before_prologue (void)
{
#ifdef PROFILE_BEFORE_PROLOGUE
===================================================================
@@ -146,6 +146,7 @@ extern bool default_addr_space_subset_p
extern rtx default_addr_space_convert (rtx, tree, tree);
extern unsigned int default_case_values_threshold (void);
extern bool default_have_conditional_execution (void);
+extern bool default_rtx_costs2 (rtx, int, int, rtx, int *, bool);
extern int default_memory_move_cost (enum machine_mode, reg_class_t, bool);
extern int default_register_move_cost (enum machine_mode, reg_class_t,
reg_class_t);
===================================================================
@@ -1408,10 +1408,20 @@ DEFHOOK
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
- scanned. In either case, *TOTAL contains the cost result. */
+ scanned. In either case, *TOTAL contains the cost result.
+ If SET_LHS is nonnull, it means we're computing the costs of the
+ right-hand-side expression of a SET whose destination is SET_LHS. */
/* Note that CODE and OUTER_CODE ought to be RTX_CODE, but that's
not necessarily defined at this point. */
DEFHOOK
+(rtx_costs2,
+ "",
+ bool, (rtx x, int code, int outer_code, rtx set_lhs, int *total, bool speed),
+ default_rtx_costs2)
+
+/* The old form of rtx_costs2, without the SET_LHS argument. This should not
+ be used in new code. */
+DEFHOOK
(rtx_costs,
"",
bool, (rtx x, int code, int outer_code, int *total, bool speed),
===================================================================
@@ -1364,7 +1364,8 @@ avoid_expensive_constant (enum machine_m
if (mode != VOIDmode
&& optimize
&& CONSTANT_P (x)
- && rtx_cost (x, binoptab->code, speed) > rtx_cost (x, SET, speed))
+ && (rtx_cost2 (x, binoptab->code, NULL_RTX, speed)
+ > rtx_cost2 (x, SET, NULL_RTX, speed)))
{
if (CONST_INT_P (x))
{
@@ -3997,12 +3998,12 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx
/* If we are optimizing, force expensive constants into a register. */
if (CONSTANT_P (x) && optimize
- && (rtx_cost (x, COMPARE, optimize_insn_for_speed_p ())
+ && (rtx_cost2 (x, COMPARE, NULL_RTX, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
x = force_reg (mode, x);
if (CONSTANT_P (y) && optimize
- && (rtx_cost (y, COMPARE, optimize_insn_for_speed_p ())
+ && (rtx_cost2 (y, COMPARE, NULL_RTX, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
y = force_reg (mode, y);
===================================================================
@@ -231,7 +231,7 @@ reload_cse_simplify_set (rtx set, rtx in
{
int did_change = 0;
int dreg;
- rtx src;
+ rtx src, dest;
enum reg_class dclass;
int old_cost;
cselib_val *val;
@@ -241,7 +241,8 @@ reload_cse_simplify_set (rtx set, rtx in
#endif
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
- dreg = true_regnum (SET_DEST (set));
+ dest = SET_DEST (set);
+ dreg = true_regnum (dest);
if (dreg < 0)
return 0;
@@ -274,7 +275,7 @@ reload_cse_simplify_set (rtx set, rtx in
old_cost = register_move_cost (GET_MODE (src),
REGNO_REG_CLASS (REGNO (src)), dclass);
else
- old_cost = rtx_cost (src, SET, speed);
+ old_cost = rtx_cost2 (src, SET, dest, speed);
for (l = val->locs; l; l = l->next)
{
@@ -309,7 +310,7 @@ reload_cse_simplify_set (rtx set, rtx in
this_rtx = GEN_INT (this_val);
}
#endif
- this_cost = rtx_cost (this_rtx, SET, speed);
+ this_cost = rtx_cost2 (this_rtx, SET, dest, speed);
}
else if (REG_P (this_rtx))
{
@@ -317,7 +318,7 @@ reload_cse_simplify_set (rtx set, rtx in
if (extend_op != UNKNOWN)
{
this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
- this_cost = rtx_cost (this_rtx, SET, speed);
+ this_cost = rtx_cost2 (this_rtx, SET, dest, speed);
}
else
#endif
@@ -373,11 +374,11 @@ reload_cse_simplify_set (rtx set, rtx in
static int
reload_cse_simplify_operands (rtx insn, rtx testreg)
{
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
int i, j;
/* For each operand, all registers that are equivalent to it. */
HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS];
-
const char *constraints[MAX_RECOG_OPERANDS];
/* Vector recording how bad an alternative is. */
@@ -392,6 +393,9 @@ reload_cse_simplify_operands (rtx insn,
/* Array of alternatives, sorted in order of decreasing desirability. */
int *alternative_order;
+ rtx set;
+ struct full_rtx_costs oldcst, newcst;
+
extract_insn (insn);
if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0)
@@ -407,6 +411,12 @@ reload_cse_simplify_operands (rtx insn,
memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int));
memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int));
+ init_costs_to_zero (&oldcst);
+ init_costs_to_zero (&newcst);
+ set = single_set (insn);
+ if (set)
+ get_full_rtx_cost (SET_SRC (set), SET, SET_DEST (set), &oldcst);
+
/* For each operand, find out which regs are equivalent. */
for (i = 0; i < recog_data.n_operands; i++)
{
@@ -570,16 +580,12 @@ reload_cse_simplify_operands (rtx insn,
case ',': case '\0':
/* See if REGNO fits this alternative, and set it up as the
replacement register if we don't have one for this
- alternative yet and the operand being replaced is not
- a cheap CONST_INT. */
+ alternative yet. If the operand is a CONST_INT, do this only
+ if we are able to calculate costs. */
if (op_alt_regno[i][j] == -1
&& recog_data.alternative_enabled_p[j]
&& reg_fits_class_p (testreg, rclass, 0, mode)
- && (!CONST_INT_P (recog_data.operand[i])
- || (rtx_cost (recog_data.operand[i], SET,
- optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))
- > rtx_cost (testreg, SET,
- optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))))))
+ && (!CONST_INT_P (recog_data.operand[i]) || set))
{
alternative_nregs[j]++;
op_alt_regno[i][j] = regno;
@@ -657,6 +663,15 @@ reload_cse_simplify_operands (rtx insn,
gen_rtx_REG (mode, op_alt_regno[op][j]), 1);
}
+ if (set)
+ {
+ get_full_rtx_cost (SET_SRC (set), SET, SET_DEST (set), &newcst);
+ if (!costs_lt_p (&oldcst, &newcst, speed))
+ {
+ cancel_changes (0);
+ return 0;
+ }
+ }
return apply_change_group ();
}
@@ -913,12 +928,13 @@ try_replace_in_use (struct reg_use *use,
&& CONSTANT_P (XEXP (SET_SRC (new_set), 1)))
{
rtx new_src;
- int old_cost = rtx_cost (SET_SRC (new_set), SET, speed);
+ int old_cost = rtx_cost2 (SET_SRC (new_set), SET, SET_DEST (new_set),
+ speed);
gcc_assert (rtx_equal_p (XEXP (SET_SRC (new_set), 0), reg));
new_src = simplify_replace_rtx (SET_SRC (new_set), reg, src);
- if (rtx_cost (new_src, SET, speed) <= old_cost
+ if (rtx_cost2 (new_src, SET, SET_DEST (new_set), speed) <= old_cost
&& validate_change (use_insn, &SET_SRC (new_set),
new_src, 0))
return true;
@@ -1645,39 +1661,43 @@ move2add_use_add2_insn (rtx reg, rtx sym
if (INTVAL (off) == reg_offset [regno])
changed = validate_change (insn, &SET_SRC (pat), reg, 0);
}
- else if (rtx_cost (new_src, PLUS, speed) < rtx_cost (src, SET, speed)
- && have_add2_insn (reg, new_src))
+ else
{
+ struct full_rtx_costs oldcst, newcst;
rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src);
- changed = validate_change (insn, &SET_SRC (pat), tem, 0);
- }
- else if (sym == NULL_RTX && GET_MODE (reg) != BImode)
- {
- enum machine_mode narrow_mode;
- for (narrow_mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
- narrow_mode != VOIDmode
- && narrow_mode != GET_MODE (reg);
- narrow_mode = GET_MODE_WIDER_MODE (narrow_mode))
+
+ get_full_rtx_cost (src, SET, SET_DEST (pat), &oldcst);
+ get_full_rtx_cost (tem, SET, SET_DEST (pat), &newcst);
+
+ if (costs_lt_p (&newcst, &oldcst, speed)
+ && have_add2_insn (reg, new_src))
+ changed = validate_change (insn, &SET_SRC (pat), tem, 0);
+ else if (sym == NULL_RTX && GET_MODE (reg) != BImode)
{
- if (have_insn_for (STRICT_LOW_PART, narrow_mode)
- && ((reg_offset[regno]
- & ~GET_MODE_MASK (narrow_mode))
- == (INTVAL (off)
- & ~GET_MODE_MASK (narrow_mode))))
+ enum machine_mode narrow_mode;
+ for (narrow_mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+ narrow_mode != VOIDmode
+ && narrow_mode != GET_MODE (reg);
+ narrow_mode = GET_MODE_WIDER_MODE (narrow_mode))
{
- rtx narrow_reg = gen_rtx_REG (narrow_mode,
- REGNO (reg));
- rtx narrow_src = gen_int_mode (INTVAL (off),
- narrow_mode);
- rtx new_set =
- gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode,
- narrow_reg),
- narrow_src);
- changed = validate_change (insn, &PATTERN (insn),
- new_set, 0);
- if (changed)
- break;
+ if (have_insn_for (STRICT_LOW_PART, narrow_mode)
+ && ((reg_offset[regno] & ~GET_MODE_MASK (narrow_mode))
+ == (INTVAL (off) & ~GET_MODE_MASK (narrow_mode))))
+ {
+ rtx narrow_reg = gen_rtx_REG (narrow_mode,
+ REGNO (reg));
+ rtx narrow_src = gen_int_mode (INTVAL (off),
+ narrow_mode);
+ rtx new_set
+ = gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode,
+ narrow_reg),
+ narrow_src);
+ changed = validate_change (insn, &PATTERN (insn),
+ new_set, 0);
+ if (changed)
+ break;
+ }
}
}
}
@@ -1705,11 +1725,16 @@ move2add_use_add3_insn (rtx reg, rtx sym
rtx pat = PATTERN (insn);
rtx src = SET_SRC (pat);
int regno = REGNO (reg);
- int min_cost = INT_MAX;
int min_regno = 0;
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
int i;
bool changed = false;
+ struct full_rtx_costs oldcst, newcst, mincst;
+ rtx plus_expr;
+
+ init_costs_to_max (&mincst);
+
+ plus_expr = gen_rtx_PLUS (GET_MODE (reg), reg, const0_rtx);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (reg_set_luid[i] > move2add_last_label_luid
@@ -1728,22 +1753,26 @@ move2add_use_add3_insn (rtx reg, rtx sym
no-op moves. */
if (new_src == const0_rtx)
{
- min_cost = 0;
+ init_costs_to_zero (&mincst);
min_regno = i;
break;
}
else
{
- int cost = rtx_cost (new_src, PLUS, speed);
- if (cost < min_cost)
+ XEXP (plus_expr, 1) = new_src;
+ get_full_rtx_cost (plus_expr, SET, SET_DEST (pat), &newcst);
+
+ if (costs_lt_p (&newcst, &mincst, speed))
{
- min_cost = cost;
+ mincst = newcst;
min_regno = i;
}
}
}
- if (min_cost < rtx_cost (src, SET, speed))
+ get_full_rtx_cost (src, SET, SET_DEST (pat), &oldcst);
+
+ if (costs_lt_p (&mincst, &oldcst, speed))
{
rtx tem;
@@ -1879,18 +1908,23 @@ reload_cse_move2add (rtx first)
/* See above why we create (set (reg) (reg)) here. */
success
= validate_change (next, &SET_SRC (set), reg, 0);
- else if ((rtx_cost (new_src, PLUS, speed)
- < COSTS_N_INSNS (1) + rtx_cost (src3, SET, speed))
- && have_add2_insn (reg, new_src))
+ else
{
- rtx newpat = gen_rtx_SET (VOIDmode,
- reg,
- gen_rtx_PLUS (GET_MODE (reg),
- reg,
- new_src));
- success
- = validate_change (next, &PATTERN (next),
- newpat, 0);
+ struct full_rtx_costs oldcst, newcst;
+ rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src);
+
+ get_full_rtx_cost (src3, SET, reg, &oldcst);
+ get_full_rtx_cost (tem, SET, reg, &newcst);
+ costs_add_n_insns (&oldcst, 1);
+
+ if (costs_lt_p (&newcst, &oldcst, speed)
+ && have_add2_insn (reg, new_src))
+ {
+ rtx newpat = gen_rtx_SET (VOIDmode, reg, tem);
+ success
+ = validate_change (next, &PATTERN (next),
+ newpat, 0);
+ }
}
if (success)
delete_insn (insn);
===================================================================
@@ -3534,7 +3534,7 @@ label_is_jump_target_p (const_rtx label,
be returned. */
int
-rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED, bool speed)
+rtx_cost2 (rtx x, enum rtx_code outer_code, rtx set_lhs, bool speed)
{
int i, j;
enum rtx_code code;
@@ -3582,24 +3582,36 @@ rtx_cost (rtx x, enum rtx_code outer_cod
break;
default:
- if (targetm.rtx_costs (x, code, outer_code, &total, speed))
+ if (targetm.rtx_costs2 (x, code, outer_code, set_lhs, &total, speed))
return total;
break;
}
+ if (code == SET)
+ set_lhs = SET_DEST (x);
+
/* Sum the costs of the sub-rtx's, plus cost of this operation,
which is already in total. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
if (fmt[i] == 'e')
- total += rtx_cost (XEXP (x, i), code, speed);
+ total += rtx_cost2 (XEXP (x, i), code, set_lhs, speed);
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (x, i); j++)
- total += rtx_cost (XVECEXP (x, i, j), code, speed);
+ total += rtx_cost2 (XVECEXP (x, i, j), code, set_lhs, speed);
return total;
}
+
+void
+get_full_rtx_cost (rtx x, enum rtx_code outer, rtx set_lhs,
+ struct full_rtx_costs *c)
+{
+ c->speed = rtx_cost2 (x, outer, set_lhs, true);
+ c->size = rtx_cost2 (x, outer, set_lhs, false);
+}
+
/* Return cost of address expression X.
Expect that X is properly formed address reference.
@@ -3625,7 +3637,7 @@ address_cost (rtx x, enum machine_mode m
int
default_address_cost (rtx x, bool speed)
{
- return rtx_cost (x, MEM, speed);
+ return rtx_cost2 (x, MEM, NULL_RTX, speed);
}
@@ -4653,7 +4665,7 @@ insn_rtx_cost (rtx pat, bool speed)
else
return 0;
- cost = rtx_cost (SET_SRC (set), SET, speed);
+ cost = rtx_cost2 (SET_SRC (set), SET, SET_DEST (set), speed);
return cost > 0 ? cost : COSTS_N_INSNS (1);
}
===================================================================
@@ -314,7 +314,7 @@ seq_cost (const_rtx seq, bool speed)
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET, speed);
+ cost += rtx_cost2 (set, SET, SET_DEST (set), speed);
else
cost++;
}
===================================================================
@@ -484,9 +484,9 @@ attempt_change (rtx new_addr, rtx inc_re
PUT_MODE (mem_tmp, mode);
XEXP (mem_tmp, 0) = new_addr;
- old_cost = (rtx_cost (mem, SET, speed)
- + rtx_cost (PATTERN (inc_insn.insn), SET, speed));
- new_cost = rtx_cost (mem_tmp, SET, speed);
+ old_cost = (rtx_cost2 (mem, SET, NULL_RTX, speed)
+ + rtx_cost2 (PATTERN (inc_insn.insn), SET, NULL_RTX, speed));
+ new_cost = rtx_cost2 (mem_tmp, SET, NULL_RTX, speed);
/* The first item of business is to see if this is profitable. */
if (old_cost < new_cost)
===================================================================
===================================================================
===================================================================
===================================================================
===================================================================
@@ -143,6 +143,7 @@ static GTY(()) rtx shift_test;
static bool
prefer_and_bit_test (enum machine_mode mode, int bitnum)
{
+ bool speed = optimize_insn_for_speed_p ();
if (and_test == 0)
{
/* Set up rtxes for the two variations. Use NULL as a placeholder
@@ -167,8 +168,8 @@ prefer_and_bit_test (enum machine_mode m
mode);
XEXP (XEXP (shift_test, 0), 1) = GEN_INT (bitnum);
- return (rtx_cost (and_test, IF_THEN_ELSE, optimize_insn_for_speed_p ())
- <= rtx_cost (shift_test, IF_THEN_ELSE, optimize_insn_for_speed_p ()));
+ return (rtx_cost2 (and_test, IF_THEN_ELSE, NULL_RTX, speed)
+ <= rtx_cost2 (shift_test, IF_THEN_ELSE, NULL_RTX, speed));
}
/* Subroutine of do_jump, dealing with exploded comparisons of the type
===================================================================
@@ -2726,7 +2726,7 @@ seq_cost (rtx seq, bool speed)
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET,speed);
+ cost += rtx_cost2 (set, SET, SET_DEST (set), speed);
else
cost++;
}
===================================================================
@@ -468,7 +468,7 @@ struct table_elt
/* Compute cost of X, as stored in the `cost' field of a table_elt. Fixed
hard registers and pointers into the frame are the cheapest with a cost
of 0. Next come pseudos with a cost of one and other hard registers with
- a cost of 2. Aside from these special cases, call `rtx_cost'. */
+ a cost of 2. Aside from these special cases, call `rtx_cost2'. */
#define CHEAP_REGNO(N) \
(REGNO_PTR_FRAME_P(N) \
@@ -764,7 +764,7 @@ notreg_cost (rtx x, enum rtx_code outer)
&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)),
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))))
? 0
- : rtx_cost (x, outer, optimize_this_for_speed_p) * 2);
+ : rtx_cost2 (x, outer, NULL_RTX, optimize_this_for_speed_p) * 2);
}
===================================================================
@@ -2027,7 +2027,8 @@ noce_try_sign_mask (struct noce_if_info
&& (if_info->insn_b == NULL_RTX
|| BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb));
if (!(t_unconditional
- || (rtx_cost (t, SET, optimize_bb_for_speed_p (if_info->test_bb))
+ || (rtx_cost2 (t, SET, NULL_RTX,
+ optimize_bb_for_speed_p (if_info->test_bb))
< COSTS_N_INSNS (2))))
return FALSE;
===================================================================
@@ -3451,9 +3451,9 @@ compress_float_constant (rtx x, rtx y)
REAL_VALUE_FROM_CONST_DOUBLE (r, y);
if (LEGITIMATE_CONSTANT_P (y))
- oldcost = rtx_cost (y, SET, speed);
+ oldcost = rtx_cost2 (y, SET, NULL_RTX, speed);
else
- oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed);
+ oldcost = rtx_cost2 (force_const_mem (dstmode, y), SET, NULL_RTX, speed);
for (srcmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (orig_srcmode));
srcmode != orig_srcmode;
@@ -3480,7 +3480,8 @@ compress_float_constant (rtx x, rtx y)
if (! (*insn_data[ic].operand[1].predicate) (trunc_y, srcmode))
continue;
/* This is valid, but may not be cheaper than the original. */
- newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
+ newcost = rtx_cost2 (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET,
+ NULL_RTX, speed);
if (oldcost < newcost)
continue;
}
@@ -3488,7 +3489,8 @@ compress_float_constant (rtx x, rtx y)
{
trunc_y = force_const_mem (srcmode, trunc_y);
/* This is valid, but may not be cheaper than the original. */
- newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
+ newcost = rtx_cost2 (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET,
+ NULL_RTX, speed);
if (oldcost < newcost)
continue;
trunc_y = validize_mem (trunc_y);
===================================================================
@@ -1705,7 +1705,8 @@ find_shift_sequence (int access_size,
byte = subreg_lowpart_offset (read_mode, new_mode);
ret = simplify_subreg (read_mode, ret, new_mode, byte);
if (ret && CONSTANT_P (ret)
- && rtx_cost (ret, SET, speed) <= COSTS_N_INSNS (1))
+ && (rtx_cost2 (ret, SET, NULL_RTX, speed)
+ <= COSTS_N_INSNS (1)))
return ret;
}
}
===================================================================
@@ -826,7 +826,7 @@ want_to_gcse_p (rtx x, int *max_distance
gcc_assert (!optimize_function_for_speed_p (cfun)
&& optimize_function_for_size_p (cfun));
- cost = rtx_cost (x, SET, 0);
+ cost = rtx_cost2 (x, SET, NULL_RTX, 0);
if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
{
===================================================================
@@ -713,8 +713,9 @@ precompute_register_parameters (int num_
|| (GET_CODE (args[i].value) == SUBREG
&& REG_P (SUBREG_REG (args[i].value)))))
&& args[i].mode != BLKmode
- && rtx_cost (args[i].value, SET, optimize_insn_for_speed_p ())
- > COSTS_N_INSNS (1)
+ && (rtx_cost2 (args[i].value, SET, NULL_RTX,
+ optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (1))
&& ((*reg_parm_seen
&& targetm.small_register_classes_for_mode_p (args[i].mode))
|| optimize))
===================================================================
@@ -190,7 +190,7 @@ init_expmed (void)
for (speed = 0; speed < 2; speed++)
{
crtl->maybe_hot_insn_p = speed;
- zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
+ zero_cost[speed] = rtx_cost2 (const0_rtx, SET, NULL_RTX, speed);
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
@@ -211,15 +211,15 @@ init_expmed (void)
PUT_MODE (&all.shift_sub0, mode);
PUT_MODE (&all.shift_sub1, mode);
- add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
- neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
- mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
- sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
- udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
+ add_cost[speed][mode] = rtx_cost2 (&all.plus, SET, NULL_RTX, speed);
+ neg_cost[speed][mode] = rtx_cost2 (&all.neg, SET, NULL_RTX, speed);
+ mul_cost[speed][mode] = rtx_cost2 (&all.mult, SET, NULL_RTX, speed);
+ sdiv_cost[speed][mode] = rtx_cost2 (&all.sdiv, SET, NULL_RTX, speed);
+ udiv_cost[speed][mode] = rtx_cost2 (&all.udiv, SET, NULL_RTX, speed);
- sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
+ sdiv_pow2_cheap[speed][mode] = (rtx_cost2 (&all.sdiv_32, SET, NULL_RTX, speed)
<= 2 * add_cost[speed][mode]);
- smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
+ smod_pow2_cheap[speed][mode] = (rtx_cost2 (&all.smod_32, SET, NULL_RTX, speed)
<= 4 * add_cost[speed][mode]);
wider_mode = GET_MODE_WIDER_MODE (mode);
@@ -231,9 +231,9 @@ init_expmed (void)
XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
mul_widen_cost[speed][wider_mode]
- = rtx_cost (&all.wide_mult, SET, speed);
+ = rtx_cost2 (&all.wide_mult, SET, NULL_RTX, speed);
mul_highpart_cost[speed][mode]
- = rtx_cost (&all.wide_trunc, SET, speed);
+ = rtx_cost2 (&all.wide_trunc, SET, NULL_RTX, speed);
}
shift_cost[speed][mode][0] = 0;
@@ -246,10 +246,10 @@ init_expmed (void)
XEXP (&all.shift, 1) = cint[m];
XEXP (&all.shift_mult, 1) = pow2[m];
- shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
- shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
- shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
- shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
+ shift_cost[speed][mode][m] = rtx_cost2 (&all.shift, SET, NULL_RTX, speed);
+ shiftadd_cost[speed][mode][m] = rtx_cost2 (&all.shift_add, SET, NULL_RTX, speed);
+ shiftsub0_cost[speed][mode][m] = rtx_cost2 (&all.shift_sub0, SET, NULL_RTX, speed);
+ shiftsub1_cost[speed][mode][m] = rtx_cost2 (&all.shift_sub1, SET, NULL_RTX, speed);
}
}
}
@@ -3065,7 +3065,7 @@ expand_mult (enum machine_mode mode, rtx
result is interpreted as an unsigned coefficient.
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
+ max_cost = rtx_cost2 (gen_rtx_MULT (mode, fake_reg, op1), SET, NULL_RTX, speed)
- neg_cost[speed][mode];
if (max_cost > 0
&& choose_mult_variant (mode, -INTVAL (op1), &algorithm,
@@ -3111,7 +3111,7 @@ expand_mult (enum machine_mode mode, rtx
/* Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
+ max_cost = rtx_cost2 (gen_rtx_MULT (mode, fake_reg, op1), SET, NULL_RTX, speed);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
return expand_mult_const (mode, op0, coeff, target,
@@ -3598,7 +3598,8 @@ expand_smod_pow2 (enum machine_mode mode
temp = gen_rtx_LSHIFTRT (mode, result, shift);
if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
- || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
+ || (rtx_cost2 (temp, SET, NULL_RTX, optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (2)))
{
temp = expand_binop (mode, xor_optab, op0, signmask,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
@@ -5447,8 +5448,8 @@ emit_store_flag (rtx target, enum rtx_co
/* For the reverse comparison, use either an addition or a XOR. */
if (want_add
- && rtx_cost (GEN_INT (normalizep), PLUS,
- optimize_insn_for_speed_p ()) == 0)
+ && rtx_cost2 (GEN_INT (normalizep), PLUS, NULL_RTX,
+ optimize_insn_for_speed_p ()) == 0)
{
tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
STORE_FLAG_VALUE, target_mode);
@@ -5458,8 +5459,8 @@ emit_store_flag (rtx target, enum rtx_co
target, 0, OPTAB_WIDEN);
}
else if (!want_add
- && rtx_cost (trueval, XOR,
- optimize_insn_for_speed_p ()) == 0)
+ && rtx_cost2 (trueval, XOR, NULL_RTX,
+ optimize_insn_for_speed_p ()) == 0)
{
tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
normalizep, target_mode);
@@ -5551,8 +5552,8 @@ emit_store_flag (rtx target, enum rtx_co
/* Again, for the reverse comparison, use either an addition or a XOR. */
if (want_add
- && rtx_cost (GEN_INT (normalizep), PLUS,
- optimize_insn_for_speed_p ()) == 0)
+ && rtx_cost2 (GEN_INT (normalizep), PLUS, NULL_RTX,
+ optimize_insn_for_speed_p ()) == 0)
{
tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
STORE_FLAG_VALUE, target_mode);
@@ -5561,8 +5562,8 @@ emit_store_flag (rtx target, enum rtx_co
GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
}
else if (!want_add
- && rtx_cost (trueval, XOR,
- optimize_insn_for_speed_p ()) == 0)
+ && rtx_cost2 (trueval, XOR, NULL_RTX,
+ optimize_insn_for_speed_p ()) == 0)
{
tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
normalizep, target_mode);
===================================================================
@@ -1829,8 +1829,9 @@ simplify_binary_operation_1 (enum rtx_co
coeff = immed_double_int_const (val, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
- return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
- ? tem : 0;
+ return (rtx_cost2 (tem, SET, NULL_RTX, speed)
+ <= rtx_cost2 (orig, SET, NULL_RTX, speed)
+ ? tem : 0);
}
}
@@ -2010,8 +2011,9 @@ simplify_binary_operation_1 (enum rtx_co
coeff = immed_double_int_const (val, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
- return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
- ? tem : 0;
+ return (rtx_cost2 (tem, SET, NULL_RTX, speed)
+ <= rtx_cost2 (orig, SET, NULL_RTX, speed)
+ ? tem : 0);
}
}
===================================================================
@@ -704,7 +704,7 @@ create_new_invariant (struct def *def, r
the loop. Otherwise we save only cost of the computation. */
if (def)
{
- inv->cost = rtx_cost (set, SET, speed);
+ inv->cost = rtx_cost2 (set, SET, NULL_RTX, speed);
/* ??? Try to determine cheapness of address computation. Unfortunately
the address cost is only a relative measure, we can't really compare
it with any absolute number, but only with other address costs.
@@ -719,7 +719,7 @@ create_new_invariant (struct def *def, r
}
else
{
- inv->cost = rtx_cost (SET_SRC (set), SET, speed);
+ inv->cost = rtx_cost2 (SET_SRC (set), SET, SET_DEST (set), speed);
inv->cheap_address = false;
}
===================================================================
@@ -1119,9 +1119,64 @@ rhs_regno (const_rtx x)
not to use an rtx with this cost under any circumstances. */
#define MAX_COST INT_MAX
+/* A structure to hold all available cost information about an rtl
+ expression. */
+struct full_rtx_costs
+{
+ int speed;
+ int size;
+};
+
+static inline void
+init_costs_to_max (struct full_rtx_costs *c)
+{
+ c->speed = MAX_COST;
+ c->size = MAX_COST;
+}
+
+static inline void
+init_costs_to_zero (struct full_rtx_costs *c)
+{
+ c->speed = 0;
+ c->size = 0;
+}
+
+static inline bool
+costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
+ bool speed)
+{
+ if (speed)
+ return (a->speed < b->speed
+ || (a->speed == b->speed && a->size < b->size));
+ else
+ return (a->size < b->size
+ || (a->size == b->size && a->speed < b->speed));
+}
+
+static inline bool
+costs_le_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
+ bool speed)
+{
+ if (speed)
+ return (a->speed < b->speed
+ || (a->speed == b->speed && a->size <= b->size));
+ else
+ return (a->size < b->size
+ || (a->size == b->size && a->speed <= b->speed));
+}
+
+static inline void
+costs_add_n_insns (struct full_rtx_costs *c, int n)
+{
+ c->speed += COSTS_N_INSNS (n);
+ c->size += COSTS_N_INSNS (n);
+}
+
extern void init_rtlanal (void);
-extern int rtx_cost (rtx, enum rtx_code, bool);
+extern int rtx_cost2 (rtx, enum rtx_code, rtx, bool);
+#define rtx_cost(a,b,c) rtx_cost2 (a, b, NULL_RTX, c)
extern int address_cost (rtx, enum machine_mode, addr_space_t, bool);
+extern void get_full_rtx_cost (rtx, enum rtx_code, rtx, struct full_rtx_costs *);
extern unsigned int subreg_lsb (const_rtx);
extern unsigned int subreg_lsb_1 (enum machine_mode, enum machine_mode,
unsigned int);
===================================================================
@@ -6351,11 +6351,11 @@ expand_compound_operation (rtx x)
rtx temp2 = expand_compound_operation (temp);
/* Make sure this is a profitable operation. */
- if (rtx_cost (x, SET, optimize_this_for_speed_p)
- > rtx_cost (temp2, SET, optimize_this_for_speed_p))
+ if (rtx_cost2 (x, SET, NULL_RTX, optimize_this_for_speed_p)
+ > rtx_cost2 (temp2, SET, NULL_RTX, optimize_this_for_speed_p))
return temp2;
- else if (rtx_cost (x, SET, optimize_this_for_speed_p)
- > rtx_cost (temp, SET, optimize_this_for_speed_p))
+ else if (rtx_cost2 (x, SET, NULL_RTX, optimize_this_for_speed_p)
+ > rtx_cost2 (temp, SET, NULL_RTX, optimize_this_for_speed_p))
return temp;
else
return x;
@@ -6782,8 +6782,8 @@ make_extraction (enum machine_mode mode,
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
- if (rtx_cost (temp, SET, optimize_this_for_speed_p)
- <= rtx_cost (temp1, SET, optimize_this_for_speed_p))
+ if (rtx_cost2 (temp, SET, NULL_RTX, optimize_this_for_speed_p)
+ <= rtx_cost2 (temp1, SET, NULL_RTX, optimize_this_for_speed_p))
return temp;
return temp1;
}
@@ -6984,8 +6984,8 @@ make_extraction (enum machine_mode mode,
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
- if (rtx_cost (temp1, SET, optimize_this_for_speed_p)
- < rtx_cost (temp, SET, optimize_this_for_speed_p))
+ if (rtx_cost2 (temp1, SET, NULL_RTX, optimize_this_for_speed_p)
+ < rtx_cost2 (temp, SET, NULL_RTX, optimize_this_for_speed_p))
temp = temp1;
}
pos_rtx = temp;
@@ -7666,8 +7666,8 @@ force_to_mode (rtx x, enum machine_mode
y = simplify_gen_binary (AND, GET_MODE (x),
XEXP (x, 0), GEN_INT (cval));
- if (rtx_cost (y, SET, optimize_this_for_speed_p)
- < rtx_cost (x, SET, optimize_this_for_speed_p))
+ if (rtx_cost2 (y, SET, NULL_RTX, optimize_this_for_speed_p)
+ < rtx_cost2 (x, SET, NULL_RTX, optimize_this_for_speed_p))
x = y;
}
@@ -8825,8 +8825,8 @@ distribute_and_simplify_rtx (rtx x, int
tmp = apply_distributive_law (simplify_gen_binary (inner_code, mode,
new_op0, new_op1));
if (GET_CODE (tmp) != outer_code
- && rtx_cost (tmp, SET, optimize_this_for_speed_p)
- < rtx_cost (x, SET, optimize_this_for_speed_p))
+ && (rtx_cost2 (tmp, SET, NULL_RTX, optimize_this_for_speed_p)
+ < rtx_cost2 (x, SET, NULL_RTX, optimize_this_for_speed_p)))
return tmp;
return NULL_RTX;
===================================================================
@@ -26470,7 +26470,8 @@ ix86_modes_tieable_p (enum machine_mode
scanned. In either case, *TOTAL contains the cost result. */
static bool
-ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
+ix86_rtx_costs2 (rtx x, int code, int outer_code_i, rtx set_lhs, int *total,
+ bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
@@ -26642,7 +26643,8 @@ ix86_rtx_costs (rtx x, int code, int out
*total = (cost->mult_init[MODE_INDEX (mode)]
+ nbits * cost->mult_bit
- + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
+ + rtx_cost2 (op0, outer_code, set_lhs, speed)
+ + rtx_cost2 (op1, outer_code, set_lhs, speed));
return true;
}
@@ -26676,10 +26678,11 @@ ix86_rtx_costs (rtx x, int code, int out
if (val == 2 || val == 4 || val == 8)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
- *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 1), outer_code,
+ set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ outer_code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (x, 1), outer_code, set_lhs, speed);
return true;
}
}
@@ -26690,17 +26693,20 @@ ix86_rtx_costs (rtx x, int code, int out
if (val == 2 || val == 4 || val == 8)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), outer_code,
+ set_lhs, speed);
+ *total += rtx_cost2 (XEXP (x, 1), outer_code, set_lhs, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), outer_code,
+ set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 1), outer_code, set_lhs,
+ speed);
+ *total += rtx_cost2 (XEXP (x, 1), outer_code, set_lhs, speed);
return true;
}
}
@@ -26732,9 +26738,9 @@ ix86_rtx_costs (rtx x, int code, int out
if (!TARGET_64BIT && mode == DImode)
{
*total = (cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code, speed)
+ + (rtx_cost2 (XEXP (x, 0), outer_code, set_lhs, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code, speed)
+ + (rtx_cost2 (XEXP (x, 1), outer_code, set_lhs, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
@@ -26776,8 +26782,9 @@ ix86_rtx_costs (rtx x, int code, int out
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
*total = (cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
- + rtx_cost (const1_rtx, outer_code, speed));
+ + rtx_cost2 (XEXP (XEXP (x, 0), 0), outer_code, set_lhs,
+ speed)
+ + rtx_cost2 (const1_rtx, outer_code, set_lhs, speed));
return true;
}
return false;
@@ -31468,8 +31475,8 @@ ix86_enum_va_list (int idx, const char *
#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_RTX_COSTS2
+#define TARGET_RTX_COSTS2 ix86_rtx_costs2
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST ix86_address_cost
===================================================================
@@ -143,13 +143,13 @@ static void arm_internal_label (FILE *,
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
static bool arm_have_conditional_execution (void);
-static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
-static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
-static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_rtx_costs (rtx, int, int, int *, bool);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, rtx, int *, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, rtx, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, rtx, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, rtx, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, rtx, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, rtx, int *, bool);
+static bool arm_rtx_costs2 (rtx, int, int, rtx, int *, bool);
static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
static bool arm_cirrus_insn_p (rtx);
@@ -346,8 +346,8 @@ static const struct attribute_spec arm_a
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS arm_rtx_costs
+#undef TARGET_RTX_COSTS2
+#define TARGET_RTX_COSTS2 arm_rtx_costs2
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST arm_address_cost
@@ -6305,7 +6305,8 @@ arm_cannot_force_const_mem (rtx x)
(GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
static inline int
-thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer,
+ rtx set_lhs)
{
enum machine_mode mode = GET_MODE (x);
int total;
@@ -6361,11 +6362,12 @@ thumb1_rtx_costs (rtx x, enum rtx_code c
else if (outer == AND)
{
int i;
- /* This duplicates the tests in the andsi3 expander. */
+ /* This duplicates the tests in the andsi3 expander. Using shifts
+ takes one extra insn beyond the cost of the AND. */
for (i = 9; i <= 31; i++)
if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
- return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (1);
}
else if (outer == ASHIFT || outer == ASHIFTRT
|| outer == LSHIFTRT)
@@ -6410,7 +6412,8 @@ thumb1_rtx_costs (rtx x, enum rtx_code c
case SIGN_EXTEND:
case ZERO_EXTEND:
total = mode == DImode ? COSTS_N_INSNS (1) : 0;
- total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
+ total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code,
+ set_lhs);
if (mode == SImode)
return total;
@@ -6428,7 +6431,7 @@ thumb1_rtx_costs (rtx x, enum rtx_code c
}
static inline bool
-arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, rtx set_lhs, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
@@ -6460,7 +6463,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 1)) == REG)
*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- *total = rtx_cost (XEXP (x, 1), code, speed);
+ *total = rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
/* Fall through */
case ROTATERT:
@@ -6472,7 +6475,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
/* Fall through */
case ASHIFT: case LSHIFTRT: case ASHIFTRT:
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
if (mode == DImode)
{
*total += COSTS_N_INSNS (3);
@@ -6495,14 +6498,14 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0))))
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 1))))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -6519,14 +6522,14 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 0)))
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
}
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1)))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -6540,7 +6543,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0))))
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
}
@@ -6549,8 +6552,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 1), 0), subcode, set_lhs, speed);
return true;
}
@@ -6558,23 +6561,24 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 0)) == MULT
&& power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
}
if (subcode == MULT
&& power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 1), 0), subcode, set_lhs, speed);
return true;
}
if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (1) + rtx_cost2 (XEXP (x, 0), code, set_lhs,
+ speed);
if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
&& REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
*total += COSTS_N_INSNS (1);
@@ -6590,9 +6594,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
*total = COSTS_N_INSNS (1);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
- speed);
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+ NULL_RTX, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
}
@@ -6616,7 +6620,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1)))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -6630,7 +6634,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
+ *total = COSTS_N_INSNS (1) + rtx_cost2 (XEXP (x, 1), code, set_lhs,
+ speed);
if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
&& REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
*total += COSTS_N_INSNS (1);
@@ -6657,7 +6662,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -6668,7 +6673,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
subcode = GET_CODE (XEXP (x, 0));
@@ -6676,16 +6681,16 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), subcode, set_lhs, speed);
return true;
}
if (subcode == MULT
&& power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), subcode, set_lhs, speed);
return true;
}
@@ -6711,7 +6716,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
{
- *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
+ *total = rtx_cost2 (XEXP (XEXP (x, 0), 0), LSHIFTRT,
+ set_lhs, speed);
return true;
}
*total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
@@ -6743,11 +6749,13 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
|| (subcode == MULT
&& power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
{
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), subcode,
+ set_lhs, speed);
/* Register shifts cost an extra cycle. */
if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
- *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
- subcode, speed);
+ *total += COSTS_N_INSNS (1) + rtx_cost2 (XEXP (XEXP (x, 0), 1),
+ subcode, set_lhs,
+ speed);
return true;
}
}
@@ -6768,14 +6776,15 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
&& GET_CODE (XEXP (operand, 0)) == REG
&& REGNO (XEXP (operand, 0)) == CC_REGNUM))
*total += COSTS_N_INSNS (1);
- *total += (rtx_cost (XEXP (x, 1), code, speed)
- + rtx_cost (XEXP (x, 2), code, speed));
+ *total += (rtx_cost2 (XEXP (x, 1), code, set_lhs, speed)
+ + rtx_cost2 (XEXP (x, 2), code, set_lhs, speed));
return true;
case NE:
if (mode == SImode && XEXP (x, 1) == const0_rtx)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (2) + rtx_cost2 (XEXP (x, 0), code,
+ set_lhs, speed);
return true;
}
goto scc_insn;
@@ -6784,7 +6793,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
&& mode == SImode && XEXP (x, 1) == const0_rtx)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (2) + rtx_cost2 (XEXP (x, 0), code,
+ set_lhs, speed);
return true;
}
goto scc_insn;
@@ -6793,7 +6803,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
&& mode == SImode && XEXP (x, 1) == const0_rtx)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (1) + rtx_cost2 (XEXP (x, 0), code,
+ set_lhs, speed);
return true;
}
goto scc_insn;
@@ -6834,7 +6845,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -6843,16 +6854,16 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), subcode, set_lhs, speed);
return true;
}
if (subcode == MULT
&& power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
- *total += rtx_cost (XEXP (x, 1), code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), subcode, set_lhs, speed);
return true;
}
@@ -6862,10 +6873,11 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
case UMAX:
case SMIN:
case SMAX:
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (2) + rtx_cost2 (XEXP (x, 0), code, set_lhs,
+ speed);
if (GET_CODE (XEXP (x, 1)) != CONST_INT
|| !const_ok_for_arm (INTVAL (XEXP (x, 1))))
- *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, speed);
return true;
case ABS:
@@ -6942,7 +6954,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
case ZERO_EXTRACT:
case SIGN_EXTRACT:
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ *total = COSTS_N_INSNS (1) + rtx_cost2 (XEXP (x, 0), code, set_lhs,
+ speed);
return true;
case CONST_INT:
@@ -6967,7 +6980,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
case LO_SUM:
*total = COSTS_N_INSNS (1);
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
case CONST_DOUBLE:
@@ -6988,7 +7001,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
For now most of the code is copied from thumb1_rtx_costs. We need more
fine grain tuning when we have more related test cases. */
static inline int
-thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer,
+ rtx set_lhs)
{
enum machine_mode mode = GET_MODE (x);
@@ -7010,7 +7024,8 @@ thumb1_size_rtx_costs (rtx x, enum rtx_c
{
/* Thumb1 mul instruction can't operate on const. We must Load it
into a register first. */
- int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
+ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET,
+ set_lhs);
return COSTS_N_INSNS (1) + const_size;
}
return COSTS_N_INSNS (1);
@@ -7042,11 +7057,12 @@ thumb1_size_rtx_costs (rtx x, enum rtx_c
else if (outer == AND)
{
int i;
- /* This duplicates the tests in the andsi3 expander. */
+ /* This duplicates the tests in the andsi3 expander. Using shifts
+ takes one extra insn beyond the cost of the AND. */
for (i = 9; i <= 31; i++)
if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
- return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (1);
}
else if (outer == ASHIFT || outer == ASHIFTRT
|| outer == LSHIFTRT)
@@ -7115,16 +7131,16 @@ thumb1_size_rtx_costs (rtx x, enum rtx_c
/* RTX costs when optimizing for size. */
static bool
arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
- int *total)
+ rtx set_lhs, int *total)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
- *total = thumb1_size_rtx_costs (x, code, outer_code);
+ *total = thumb1_size_rtx_costs (x, code, outer_code, set_lhs);
return true;
}
- /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
+ /* FIXME: This makes few attempts to prefer narrow Thumb-2 instructions. */
switch (code)
{
case MEM:
@@ -7153,7 +7169,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code
case ROTATE:
if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
+ *total = COSTS_N_INSNS (2) + rtx_cost2 (XEXP (x, 0), code,
+ set_lhs, false);
return true;
}
/* Fall through */
@@ -7163,15 +7180,17 @@ arm_size_rtx_costs (rtx x, enum rtx_code
case ASHIFTRT:
if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
+ *total = COSTS_N_INSNS (3) + rtx_cost2 (XEXP (x, 0), code, set_lhs,
+ false);
return true;
}
else if (mode == SImode)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
+ *total = COSTS_N_INSNS (1) + rtx_cost2 (XEXP (x, 0), code, set_lhs,
+ false);
/* Slightly disparage register shifts, but not by much. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- *total += 1 + rtx_cost (XEXP (x, 1), code, false);
+ *total += 1 + rtx_cost2 (XEXP (x, 1), code, set_lhs, false);
return true;
}
@@ -7223,10 +7242,29 @@ arm_size_rtx_costs (rtx x, enum rtx_code
&& power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
*total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
- *total += rtx_cost (XEXP (x, 1), code, false);
+ *total += rtx_cost2 (XEXP (XEXP (x, 0), 0), code, set_lhs, false);
+ *total += rtx_cost2 (XEXP (x, 1), code, set_lhs, false);
return true;
}
+ else if (TARGET_THUMB2 && outer_code == SET
+ && mode == SImode
+ && CONST_INT_P (XEXP (x, 1))
+ && REG_P (XEXP (x, 0))
+ && (REGNO (XEXP (x, 0)) < FIRST_HI_REGNUM
+ || REGNO (XEXP (x, 0)) > LAST_HI_REGNUM))
+ {
+ if (INTVAL (XEXP (x, 1)) >= 0
+ && (INTVAL (XEXP (x, 1)) < 8
+ || (XEXP (x, 0) == stack_pointer_rtx
+ && (INTVAL (XEXP (x, 1)) & 3) == 0
+ && INTVAL (XEXP (x, 1)) < 1024)
+ || (rtx_equal_p (set_lhs, XEXP (x, 0))
+ && INTVAL (XEXP (x, 1)) < 256)))
+ {
+ *total = COSTS_N_INSNS (1) / 2;
+ return true;
+ }
+ }
/* Fall through */
case AND: case XOR: case IOR:
@@ -7286,10 +7324,24 @@ arm_size_rtx_costs (rtx x, enum rtx_code
case SIGN_EXTEND:
case ZERO_EXTEND:
- return arm_rtx_costs_1 (x, outer_code, total, 0);
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, 0);
case CONST_INT:
- if (const_ok_for_arm (INTVAL (x)))
+ if (TARGET_THUMB2 && outer_code == SET
+ && INTVAL (x) >= 0 && INTVAL (x) < 256
+ && (!set_lhs
+ || (REG_P (set_lhs)
+ && (REGNO (set_lhs) < FIRST_HI_REGNUM
+ || REGNO (set_lhs) > LAST_HI_REGNUM))))
+ *total = COSTS_N_INSNS (1) / 2;
+ else if (TARGET_THUMB2 && outer_code == SET
+ && INTVAL (x) >= 0 && INTVAL (x) < 65536)
+ *total = COSTS_N_INSNS (1);
+ else if (arm_arch6 && outer_code == AND
+ && mode == SImode
+ && (INTVAL (x) == 255 || INTVAL (x) == 65535))
+ *total = 0;
+ else if (const_ok_for_arm (INTVAL (x)))
/* A multiplication by a constant requires another instruction
to load the constant to a register. */
*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
@@ -7304,8 +7356,11 @@ arm_size_rtx_costs (rtx x, enum rtx_code
else
*total = COSTS_N_INSNS (1);
}
- else
+ else if (outer_code == SET)
*total = COSTS_N_INSNS (2);
+ else
+ return arm_size_rtx_costs (x, code, SET, set_lhs, total);
+
return true;
case CONST:
@@ -7336,16 +7391,16 @@ arm_size_rtx_costs (rtx x, enum rtx_code
/* RTX costs when optimizing for size. */
static bool
-arm_rtx_costs (rtx x, int code, int outer_code, int *total,
- bool speed)
+arm_rtx_costs2 (rtx x, int code, int outer_code, rtx set_lhs, int *total,
+ bool speed)
{
if (!speed)
return arm_size_rtx_costs (x, (enum rtx_code) code,
- (enum rtx_code) outer_code, total);
+ (enum rtx_code) outer_code, set_lhs, total);
else
return current_tune->rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code,
- total, speed);
+ set_lhs, total, speed);
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
@@ -7353,13 +7408,13 @@ arm_rtx_costs (rtx x, int code, int oute
static bool
arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
- int *total, bool speed)
+ rtx set_lhs, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB)
{
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_rtx_costs (x, code, outer_code, set_lhs);
return true;
}
@@ -7390,7 +7445,7 @@ arm_slowmul_rtx_costs (rtx x, enum rtx_c
}
*total = COSTS_N_INSNS (cost);
- *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost2 (XEXP (x, 0), code, set_lhs, speed);
return true;
}
@@ -7398,7 +7453,7 @@ arm_slowmul_rtx_costs (rtx x, enum rtx_c
return false;
default:
- return arm_rtx_costs_1 (x, outer_code, total, speed);;
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, speed);
}
}
@@ -7407,13 +7462,13 @@ arm_slowmul_rtx_costs (rtx x, enum rtx_c
static bool
arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
- int *total, bool speed)
+ rtx set_lhs, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_rtx_costs (x, code, outer_code, set_lhs);
return true;
}
@@ -7481,7 +7536,7 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_c
return false;
default:
- return arm_rtx_costs_1 (x, outer_code, total, speed);
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, speed);
}
}
@@ -7491,13 +7546,13 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_c
static bool
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
- int *total, bool speed)
+ rtx set_lhs, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB)
{
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_rtx_costs (x, code, outer_code, set_lhs);
return true;
}
@@ -7505,7 +7560,7 @@ arm_xscale_rtx_costs (rtx x, enum rtx_co
{
case COMPARE:
if (GET_CODE (XEXP (x, 0)) != MULT)
- return arm_rtx_costs_1 (x, outer_code, total, speed);
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, speed);
/* A COMPARE of a MULT is slow on XScale; the muls instruction
will stall until the multiplication is complete. */
@@ -7571,7 +7626,7 @@ arm_xscale_rtx_costs (rtx x, enum rtx_co
return false;
default:
- return arm_rtx_costs_1 (x, outer_code, total, speed);
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, speed);
}
}
@@ -7580,7 +7635,7 @@ arm_xscale_rtx_costs (rtx x, enum rtx_co
static bool
arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
- int *total, bool speed)
+ rtx set_lhs, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
@@ -7593,7 +7648,7 @@ arm_9e_rtx_costs (rtx x, enum rtx_code c
return true;
default:
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_rtx_costs (x, code, outer_code, set_lhs);
return true;
}
}
@@ -7640,7 +7695,7 @@ arm_9e_rtx_costs (rtx x, enum rtx_code c
return false;
default:
- return arm_rtx_costs_1 (x, outer_code, total, speed);
+ return arm_rtx_costs_1 (x, outer_code, set_lhs, total, speed);
}
}
/* All address computations that can be done are free, but rtx cost returns
===================================================================
@@ -215,7 +215,7 @@ extern void arm_order_regs_for_local_all
struct tune_params
{
- bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+ bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, rtx, int *, bool);
int constant_limit;
};
===================================================================
@@ -2106,8 +2106,8 @@ bool lshift_cheap_p (void)
if (!init)
{
rtx reg = gen_rtx_REG (word_mode, 10000);
- int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET,
- optimize_insn_for_speed_p ());
+ int cost = rtx_cost2 (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET,
+ NULL_RTX, optimize_insn_for_speed_p ());
cheap = cost < COSTS_N_INSNS (3);
init = true;
}
===================================================================
@@ -1629,8 +1629,8 @@ calculate_elim_costs_all_insns (void)
{
rtx t = eliminate_regs_1 (SET_SRC (set), VOIDmode, insn,
false, true);
- int cost = rtx_cost (t, SET,
- optimize_bb_for_speed_p (bb));
+ int cost = rtx_cost2 (t, SET, SET_DEST (set),
+ optimize_bb_for_speed_p (bb));
int freq = REG_FREQ_FROM_BB (bb);
reg_equiv_init_cost[regno] = cost * freq;
@@ -2483,7 +2483,8 @@ note_reg_elim_costly (rtx *px, void *dat
{
rtx t = reg_equiv_invariant[REGNO (x)];
rtx new_rtx = eliminate_regs_1 (t, Pmode, insn, true, true);
- int cost = rtx_cost (new_rtx, SET, optimize_bb_for_speed_p (elim_bb));
+ int cost = rtx_cost2 (new_rtx, SET, NULL_RTX,
+ optimize_bb_for_speed_p (elim_bb));
int freq = REG_FREQ_FROM_BB (elim_bb);
if (cost != 0)