@@ -1269,7 +1269,54 @@ (define_insn ""
;; (pc)))]
;; "")
-;; Various ways to extract a single bit bitfield and sign extend it
+;; This is a signed bitfield extraction starting at bit 0
+;; It's usually faster than using shifts, but not always,
+;; particularly on the H8/S and H8/SX variants.
+(define_insn_and_split "*extvsi_n_0"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand")
+ (const_int 0)))]
+ "INTVAL (operands[2]) > 1
+ && INTVAL (operands[2]) < (TARGET_H8300S ? 26 - TARGET_H8300SX : 29)
+ && (!TARGET_H8300SX || (INTVAL (operands[2]) != 24 && INTVAL (operands[2]) != 17))"
+ "#"
+ "&& reload_completed"
+[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])]
+{
+ int tmp = INTVAL (operands[2]);
+ operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp));
+ operands[4] = GEN_INT (HOST_WIDE_INT_1U << (tmp - 1));
+})
+
+(define_insn_and_split "*extvsi_n_n"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand")))]
+ "(!h8300_shift_needs_scratch_p (INTVAL (operands[3]), SImode, LSHIFTRT)
+ && use_extvsi (INTVAL (operands[2]), INTVAL (operands[3])))"
+ "#"
+ "&& reload_completed"
+[(parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 5)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 5)))
+ (clobber (reg:CC CC_REG))])]
+{
+ int tmp = INTVAL (operands[2]);
+ operands[4] = gen_int_mode (~(HOST_WIDE_INT_M1U << tmp), SImode);
+ operands[5] = gen_int_mode (HOST_WIDE_INT_1U << (tmp - 1), SImode);
+})
+
;;
;; Testing showed this only triggering with SImode, probably because
;; of how insv/extv are defined.
@@ -111,5 +111,6 @@ extern const char * output_h8sx_shift (rtx *, int, int);
extern bool h8300_operands_match_p (rtx *);
extern bool h8sx_mergeable_memrefs_p (rtx, rtx);
extern poly_int64 h8300_push_rounding (poly_int64);
+extern bool use_extvsi (int, int);
#endif /* ! GCC_H8300_PROTOS_H */
@@ -5503,6 +5503,70 @@ h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
}
}
+/* Return true if a signed bitfield extraction with length COUNT
+ starting at position POS should be optimized by first shifting
+ right to put the field in the LSB, then using a 3 operand sequence
+ to sign extend from an arbitrary position. Return false
+ otherwise.
+
+ The basic idea here is to compute the length of each sequence
+ and use that as a proxy for performance. It's not strictly
+ correct on the H8/SX which has variable timed shifts and some
+ lengths may be incorrect, but this is pretty close.
+
+ There may be cases where the length computations are inaccurate
+ which may in turn lead to a sub-optimal sequence, but that
+ should be rare.
+
+ We do not try to balance avoiding a loop with burning an extra
+ couple bytes. Those probably couple be handled with special
+ cases. */
+
+bool
+use_extvsi (int count, int pos)
+{
+ rtx operands[3];
+ operands[0] = gen_rtx_REG (SImode, 0);
+ operands[1] = gen_rtx_REG (SImode, 0);
+
+ /* We have a special sequence to sign extract a single bit
+ object, otherwise compute it as a pair of shifts, first
+ left, then arithmetic right. The cost of that special
+ sequence is 8/10 depending on where the bit is. */
+ unsigned shift_cost;
+ if (count == 1)
+ shift_cost = pos >= 16 ? 10 : 8;
+ else
+ {
+ unsigned lshift = 32 - (count + pos);
+ unsigned rshift = 32 - count;
+ operands[2] = GEN_INT (lshift);
+ shift_cost = compute_a_shift_length (operands, ASHIFT);
+ operands[2] = GEN_INT (rshift);
+ shift_cost += compute_a_shift_length (operands, ASHIFTRT);
+ }
+
+ /* Cost of hopefully optimized sequence. First we logically shift right
+ by an adjusted count. Logicals are generally better than arith,
+ particularly for H8/SX. */
+ operands[2] = GEN_INT (pos);
+ unsigned opt_cost = compute_a_shift_length (operands, LSHIFTRT);
+ operands[2] = gen_int_mode (~(HOST_WIDE_INT_M1U << count), SImode);
+ opt_cost += compute_logical_op_length (SImode, AND, operands, NULL);
+ operands[2] = gen_int_mode (HOST_WIDE_INT_1U << (count - 1), SImode);
+ opt_cost += compute_logical_op_length (SImode, XOR, operands, NULL);
+
+ /* H8/SX has short form subtraction. */
+ if (TARGET_H8300SX && (INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 7))
+ opt_cost += 2;
+ else if (TARGET_H8300SX && (INTVAL (operands[2]) >= 8 && INTVAL (operands[2]) <= 32767))
+ opt_cost += 4;
+ else
+ opt_cost += 6;
+
+ return opt_cost <= shift_cost;
+}
+
/* Implement PUSH_ROUNDING.
On the H8/300, @-sp really pushes a byte if you ask it to - but that's