===================================================================
@@ -73,7 +73,7 @@
extern rtx sh_emit_cheap_store_flag (enum machine_mode, enum rtx_code, rtx, rtx);
extern void sh_emit_compare_and_branch (rtx *, enum machine_mode);
extern void sh_emit_compare_and_set (rtx *, enum machine_mode);
-extern int shift_insns_rtx (rtx);
+extern bool sh_ashlsi_clobbers_t_reg_p (rtx);
extern void gen_shifty_op (int, rtx *);
extern void gen_shifty_hi_op (int, rtx *);
extern bool expand_ashiftrt (rtx *);
===================================================================
@@ -2786,72 +2786,117 @@
return false;
}
-/* Actual number of instructions used to make a shift by N. */
+/* Number of instructions used to make an arithmetic right shift by N. */
static const char ashiftrt_insns[] =
{ 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
-/* Left shift and logical right shift are the same. */
-static const char shift_insns[] =
- { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+/* Description of a logical left or right shift, when expanded to a sequence
+ of 1/2/8/16 shifts.
+ Notice that one bit right shifts clobber the T bit. One bit left shifts
+ are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
+enum
+{
+ ASHL_CLOBBERS_T = 1 << 0,
+ LSHR_CLOBBERS_T = 1 << 1
+};
-/* Individual shift amounts needed to get the above length sequences.
- One bit right shifts clobber the T bit, so when possible, put one bit
- shifts in the middle of the sequence, so the ends are eligible for
- branch delay slots. */
-static const short shift_amounts[32][5] = {
- {0}, {1}, {2}, {2, 1},
- {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
- {8}, {8, 1}, {8, 2}, {8, 1, 2},
- {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
- {16}, {16, 1}, {16, 2}, {16, 1, 2},
- {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
- {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
- {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+struct ashl_lshr_sequence
+{
+ char insn_count;
+ char amount[6];
+ char clobbers_t;
+};
-/* Likewise, but for shift amounts < 16, up to three highmost bits
- might be clobbered. This is typically used when combined with some
+static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
+{
+ { 0, { 0 }, 0 },
+ { 1, { 1 }, LSHR_CLOBBERS_T },
+ { 1, { 2 }, 0 },
+ { 2, { 2, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 2, 2 }, 0 },
+ { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 2, 2, 2 }, 0 },
+ { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 1, { 8 }, 0 },
+ { 2, { 8, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 8, 2 }, 0 },
+ { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 8, 2, 2 }, 0 },
+ { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 8, -2, 8 }, 0 },
+ { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
+ { 1, { 16 }, 0 },
+ { 2, { 16, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 16, 2 }, 0 },
+ { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 16, 2, 2 }, 0 },
+ { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 16, -2, 8 }, 0 },
+ { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
+ { 2, { 16, 8 }, 0 },
+ { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
+ { 3, { 16, 8, 2 }, 0 },
+ { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
+ { 4, { 16, 8, 2, 2 }, 0 },
+ { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
+ { 3, { 16, -2, 16 }, 0 },
+ { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
+};
+
+/* Individual shift amounts for shift amounts < 16, up to three highmost
+ bits might be clobbered. This is typically used when combined with some
kind of sign or zero extension. */
+static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
+{
+ { 0, { 0 }, 0 },
+ { 1, { 1 }, LSHR_CLOBBERS_T },
+ { 1, { 2 }, 0 },
+ { 2, { 2, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 2, 2 }, 0 },
+ { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 2, { 8, -2 }, 0 },
+ { 2, { 8, -1 }, ASHL_CLOBBERS_T },
+ { 1, { 8 }, 0 },
+ { 2, { 8, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 8, 2 }, 0 },
+ { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 8, 2, 2 }, 0 },
+ { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
+ { 2, { 16, -2 }, 0 },
+ { 2, { 16, -1 }, ASHL_CLOBBERS_T },
+ { 1, { 16 }, 0 },
+ { 2, { 16, 1 }, LSHR_CLOBBERS_T },
+ { 2, { 16, 2 }, 0 },
+ { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 16, 2, 2 }, 0 },
+ { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
+ { 3, { 16, -2, 8 }, 0 },
+ { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
+ { 2, { 16, 8 }, 0 },
+ { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
+ { 3, { 16, 8, 2 }, 0 },
+ { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
+ { 4, { 16, 8, 2, 2 }, 0 },
+ { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
+ { 3, { 16, -2, 16 }, 0 },
+ { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
+};
-static const char ext_shift_insns[] =
- { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+/* Return true if a shift left consisting of 1/2/8/16 shift instructions
+ will clobber the T bit. */
+bool
+sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
+{
+ gcc_assert (CONST_INT_P (shift_amount));
+ return (ashl_lshr_seq[INTVAL (shift_amount) & 31].clobbers_t
+ & ASHL_CLOBBERS_T) != 0;
+}
-static const short ext_shift_amounts[32][4] = {
- {0}, {1}, {2}, {2, 1},
- {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
- {8}, {8, 1}, {8, 2}, {8, 1, 2},
- {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
- {16}, {16, 1}, {16, 2}, {16, 1, 2},
- {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
- {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
- {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
-
/* Assuming we have a value that has been sign-extended by at least one bit,
can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
to shift it by N without data loss, and quicker than by other means? */
#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
-/* This is used in length attributes in sh.md to help compute the length
- of arbitrary constant shift instructions. */
-
-int
-shift_insns_rtx (rtx insn)
-{
- rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
- int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
- enum rtx_code shift_code = GET_CODE (set_src);
-
- switch (shift_code)
- {
- case ASHIFTRT:
- return ashiftrt_insns[shift_count];
- case LSHIFTRT:
- case ASHIFT:
- return shift_insns[shift_count];
- default:
- gcc_unreachable ();
- }
-}
-
/* Return the cost of a shift. */
static inline int
@@ -2890,7 +2935,7 @@
return cost;
}
else
- return shift_insns[value];
+ return ashl_lshr_seq[value].insn_count;
}
/* Return the cost of an AND/XOR/IOR operation. */
@@ -3400,9 +3445,9 @@
return;
}
- max = shift_insns[value];
+ max = ashl_lshr_seq[value].insn_count;
for (i = 0; i < max; i++)
- gen_ashift (code, shift_amounts[value][i], operands[0]);
+ gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
}
/* Same as above, but optimized for values where the topmost bits don't
@@ -3427,15 +3472,15 @@
gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
if (code == ASHIFT)
{
- max = ext_shift_insns[value];
+ max = ext_ashl_lshr_seq[value].insn_count;
for (i = 0; i < max; i++)
- gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+ gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
}
else
/* When shifting right, emit the shifts in reverse order, so that
solitary negative values come first. */
- for (i = ext_shift_insns[value] - 1; i >= 0; i--)
- gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+ for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
+ gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
}
/* Output RTL for an arithmetic right shift. */
@@ -3526,9 +3571,10 @@
bool
sh_dynamicalize_shift_p (rtx count)
{
+ int insn_count;
gcc_assert (CONST_INT_P (count));
- return TARGET_DYNSHIFT
- && (shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST);
+ insn_count = ashl_lshr_seq[INTVAL (count) & 31].insn_count;
+ return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
}
/* Try to find a good way to implement the combiner pattern
@@ -3575,12 +3621,14 @@
lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
/* mask has no zeroes but trailing zeroes <==> ! mask2 */
if (! mask2)
- best_cost = shift_insns[right] + shift_insns[right + left];
+ best_cost = ashl_lshr_seq[right].insn_count
+ + ashl_lshr_seq[right + left].insn_count;
/* mask has no trailing zeroes <==> ! right */
else if (! right && mask2 == ~(lsb2 - 1))
{
int late_right = exact_log2 (lsb2);
- best_cost = shift_insns[left + late_right] + shift_insns[late_right];
+ best_cost = ashl_lshr_seq[left + late_right].insn_count
+ + ashl_lshr_seq[late_right].insn_count;
}
/* Try to use zero extend. */
if (mask2 == ~(lsb2 - 1))
@@ -3592,8 +3640,8 @@
/* Can we zero-extend right away? */
if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
{
- cost
- = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
+ cost = 1 + ext_ashl_lshr_seq[right].insn_count
+ + ext_ashl_lshr_seq[left + right].insn_count;
if (cost < best_cost)
{
best = 1;
@@ -3612,8 +3660,10 @@
first = width - exact_log2 (lsb2) + right;
if (first >= 0 && right + left - first >= 0)
{
- cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
- + ext_shift_insns[right + left - first];
+ cost = ext_ashl_lshr_seq[right].insn_count
+ + ext_ashl_lshr_seq[first].insn_count + 1
+ + ext_ashl_lshr_seq[right + left - first].insn_count;
+
if (cost < best_cost)
{
best = 1;
@@ -3633,7 +3683,7 @@
break;
if (! CONST_OK_FOR_K08 (mask >> i))
continue;
- cost = (i != 0) + 2 + ext_shift_insns[left + i];
+ cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
if (cost < best_cost)
{
best = 2;
@@ -3649,7 +3699,9 @@
if (i > right)
break;
cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
- + (can_ext ? ext_shift_insns : shift_insns)[left + i];
+ + (can_ext
+ ? ext_ashl_lshr_seq
+ : ashl_lshr_seq)[left + i].insn_count;
if (cost < best_cost)
{
best = 4 - can_ext;
@@ -3688,11 +3740,11 @@
shl_and_scr_length (rtx insn)
{
rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
- int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
+ int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
rtx op = XEXP (set_src, 0);
- len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
+ len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
op = XEXP (XEXP (op, 0), 0);
- return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
+ return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
}
/* Generate rtl for instructions for which shl_and_kind advised a particular
@@ -3793,9 +3845,9 @@
int neg = 0;
if (kind != 4 && total_shift < 16)
{
- neg = -ext_shift_amounts[total_shift][1];
+ neg = -ext_ashl_lshr_seq[total_shift].amount[1];
if (neg > 0)
- neg -= ext_shift_amounts[total_shift][2];
+ neg -= ext_ashl_lshr_seq[total_shift].amount[2];
else
neg = 0;
}
@@ -3842,11 +3894,13 @@
gcc_assert (insize > 0);
/* Default to left / right shift. */
kind = 0;
- best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
+ best_cost = ashl_lshr_seq[32 - insize].insn_count
+ + ashl_lshr_seq[32 - size].insn_count;
if (size <= 16)
{
/* 16 bit shift / sign extend / 16 bit shift */
- cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
+ cost = ashl_lshr_seq[16 - insize].insn_count + 1
+ + ashl_lshr_seq[16 - size].insn_count;
/* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
below, by alternative 3 or something even better. */
if (cost < best_cost)
@@ -3860,7 +3914,8 @@
{
if (ext <= size)
{
- cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
+ cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+ + ashl_lshr_seq[size - ext].insn_count;
if (cost < best_cost)
{
kind = ext / (unsigned) 8;
@@ -3870,12 +3925,14 @@
/* Check if we can do a sloppy shift with a final signed shift
restoring the sign. */
if (EXT_SHIFT_SIGNED (size - ext))
- cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
+ cost = ext_ashl_lshr_seq[ext - insize].insn_count
+ + ext_ashl_lshr_seq[size - ext].insn_count + 1;
/* If not, maybe it's still cheaper to do the second shift sloppy,
and do a final sign extend? */
else if (size <= 16)
- cost = ext_shift_insns[ext - insize] + 1
- + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
+ cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+ + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
+ + 1;
else
continue;
if (cost < best_cost)
@@ -3887,7 +3944,7 @@
/* Check if we can sign extend in r0 */
if (insize < 8)
{
- cost = 3 + shift_insns[left];
+ cost = 3 + ashl_lshr_seq[left].insn_count;
if (cost < best_cost)
{
kind = 6;
@@ -3896,7 +3953,7 @@
/* Try the same with a final signed shift. */
if (left < 31)
{
- cost = 3 + ext_shift_insns[left + 1] + 1;
+ cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
if (cost < best_cost)
{
kind = 7;
@@ -3907,7 +3964,7 @@
if (TARGET_DYNSHIFT)
{
/* Try to use a dynamic shift. */
- cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
+ cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
if (cost < best_cost)
{
kind = 0;
===================================================================
@@ -3496,6 +3496,17 @@
if (TARGET_DYNSHIFT
&& CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
operands[2] = force_reg (SImode, operands[2]);
+
+ /* If the ashlsi3_* insn is going to clobber the T_REG it must be
+ expanded here. */
+ if (CONST_INT_P (operands[2])
+ && sh_ashlsi_clobbers_t_reg_p (operands[2])
+ && ! sh_dynamicalize_shift_p (operands[2]))
+ {
+ emit_insn (gen_ashlsi3_n_clobbers_t (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
})
(define_insn "ashlsi3_k"
@@ -3522,7 +3533,7 @@
emit_insn (gen_ashlsi3_k (operands[0], operands[1], operands[2]));
DONE;
}
- else if (!satisfies_constraint_P27 (operands[2]))
+ else if (! satisfies_constraint_P27 (operands[2]))
{
/* This must happen before reload, otherwise the constant will be moved
into a register due to the "r" constraint, after which this split
@@ -3541,8 +3552,33 @@
[(set (match_operand:SI 0 "arith_reg_dest" "=r")
(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
(match_operand:SI 2 "not_p27_shift_count_operand" "")))]
- "TARGET_SH1"
+ "TARGET_SH1 && ! sh_ashlsi_clobbers_t_reg_p (operands[2])"
"#"
+ "&& (reload_completed
+ || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+ [(const_int 0)]
+{
+ if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+ {
+ /* If this pattern was picked and dynamic shifts are supported, switch
+ to dynamic shift pattern before reload. However, we must not
+ create a shift sequence that clobbers the T_REG. */
+ operands[2] = force_reg (SImode, operands[2]);
+ emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+ }
+ else
+ gen_shifty_op (ASHIFT, operands);
+
+ DONE;
+})
+
+(define_insn_and_split "ashlsi3_n_clobbers_t"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "not_p27_shift_count_operand" "")))
+ (clobber (reg:SI T_REG))]
+ "TARGET_SH1 && sh_ashlsi_clobbers_t_reg_p (operands[2])"
+ "#"
"&& (reload_completed || INTVAL (operands[2]) == 31
|| (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
[(const_int 0)]