===================================================================
@@ -0,0 +1,86 @@
+/* Verify that TST #imm, R0 instruction is generated if the constant
+ allows it. Under some circumstances another compare instruction might
+ be selected, which is also fine. Any AND instructions are considered
+ counter productive and fail the test. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+#define make_func(__valtype__, __valget__, __tstval__, __suff__)\
+ int test_imm_##__tstval__##__suff__ (__valtype__ val) \
+ {\
+ return ((__valget__) & (0x##__tstval__ << 0)) ? -20 : -40;\
+ }
+
+#define make_func_0_F(__valtype__, __valget__, __y__, __suff__)\
+ make_func (__valtype__, __valget__, __y__##0, __suff__)\
+ make_func (__valtype__, __valget__, __y__##1, __suff__)\
+ make_func (__valtype__, __valget__, __y__##2, __suff__)\
+ make_func (__valtype__, __valget__, __y__##3, __suff__)\
+ make_func (__valtype__, __valget__, __y__##4, __suff__)\
+ make_func (__valtype__, __valget__, __y__##5, __suff__)\
+ make_func (__valtype__, __valget__, __y__##6, __suff__)\
+ make_func (__valtype__, __valget__, __y__##7, __suff__)\
+ make_func (__valtype__, __valget__, __y__##8, __suff__)\
+ make_func (__valtype__, __valget__, __y__##9, __suff__)\
+ make_func (__valtype__, __valget__, __y__##A, __suff__)\
+ make_func (__valtype__, __valget__, __y__##B, __suff__)\
+ make_func (__valtype__, __valget__, __y__##C, __suff__)\
+ make_func (__valtype__, __valget__, __y__##D, __suff__)\
+ make_func (__valtype__, __valget__, __y__##E, __suff__)\
+ make_func (__valtype__, __valget__, __y__##F, __suff__)\
+
+#define make_funcs_0_FF(__valtype__, __valget__, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 0, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 1, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 2, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 3, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 4, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 5, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 6, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 7, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 8, __suff__)\
+ make_func_0_F (__valtype__, __valget__, 9, __suff__)\
+ make_func_0_F (__valtype__, __valget__, A, __suff__)\
+ make_func_0_F (__valtype__, __valget__, B, __suff__)\
+ make_func_0_F (__valtype__, __valget__, C, __suff__)\
+ make_func_0_F (__valtype__, __valget__, D, __suff__)\
+ make_func_0_F (__valtype__, __valget__, E, __suff__)\
+ make_func_0_F (__valtype__, __valget__, F, __suff__)\
+
+make_funcs_0_FF (signed char*, *val, int8_mem)
+make_funcs_0_FF (signed char, val, int8_reg)
+
+make_funcs_0_FF (unsigned char*, *val, uint8_mem)
+make_funcs_0_FF (unsigned char, val, uint8_reg)
+
+make_funcs_0_FF (short*, *val, int16_mem)
+make_funcs_0_FF (short, val, int16_reg)
+
+make_funcs_0_FF (unsigned short*, *val, uint16_mem)
+make_funcs_0_FF (unsigned short, val, uint16_reg)
+
+make_funcs_0_FF (int*, *val, int32_mem)
+make_funcs_0_FF (int, val, int32_reg)
+
+make_funcs_0_FF (unsigned int*, *val, uint32_mem)
+make_funcs_0_FF (unsigned int, val, uint32_reg)
+
+make_funcs_0_FF (long long*, *val, int64_lowword_mem)
+make_funcs_0_FF (long long, val, int64_lowword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val, uint64_lowword_mem)
+make_funcs_0_FF (unsigned long long, val, uint64_lowword_reg)
+
+make_funcs_0_FF (long long*, *val >> 32, int64_highword_mem)
+make_funcs_0_FF (long long, val >> 32, int64_highword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val >> 32, uint64_highword_mem)
+make_funcs_0_FF (unsigned long long, val >> 32, uint64_highword_reg)
+
+make_funcs_0_FF (long long*, *val >> 16, int64_midword_mem)
+make_funcs_0_FF (long long, val >> 16, int64_midword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val >> 16, uint64_midword_mem)
+make_funcs_0_FF (unsigned long long, val >> 16, uint64_midword_reg)
+
===================================================================
@@ -242,7 +242,7 @@
static int flow_dependent_p (rtx, rtx);
static void flow_dependent_p_1 (rtx, const_rtx, void *);
static int shiftcosts (rtx);
-static int and_xor_ior_costs (rtx, int code);
+static int and_xor_ior_costs (rtx, int);
static int addsubcosts (rtx);
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
@@ -2995,6 +2995,20 @@
*total = 8;
return true;
+ case EQ:
+ /* An and with a constant compared against zero is
+ most likely going to be a TST #imm, R0 instruction.
+ Notice that this does not catch the zero_extract variants from
+ the md file. */
+ if (GET_CODE (XEXP (x, 0)) == AND
+ && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
+ {
+ *total = 1;
+ return true;
+ }
+ else
+ return false;
+
case CONST:
case LABEL_REF:
case SYMBOL_REF:
===================================================================
@@ -1195,6 +1195,9 @@
#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
&& ((HOST_WIDE_INT)(VALUE)) <= 255)
+#define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
+ (((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
+
#if 0
#define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \
((((REGCLASS_HAS_FP_REG (CLASS) \
===================================================================
@@ -585,15 +585,164 @@
;; SImode signed integer comparisons
;; -------------------------------------------------------------------------
-(define_insn ""
+;; Various patterns to generate the TST #imm, R0 instruction.
+;; Although this adds some pressure on the R0 register, it can potentially
+;; result in faster code, even if the operand has to be moved to R0 first.
+;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group
+;; instructions and thus will be executed in parallel. On SH4A TST #imm, R0
+;; is an EX group instruction but still can be executed in parallel with the
+;; MT group MOV Rm, Rn instruction.
+
+;; Usual TST #imm, R0 patterns for SI, HI and QI
+;; This is usually used for bit patterns other than contiguous bits
+;; and single bits.
+
+(define_insn "tstsi_t"
[(set (reg:SI T_REG)
- (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
+ (eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r")
(match_operand:SI 1 "logical_operand" "K08,r"))
(const_int 0)))]
"TARGET_SH1"
"tst %1,%0"
[(set_attr "type" "mt_group")])
+(define_insn "tsthi_t"
+ [(set (reg:SI T_REG)
+ (eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z")
+ (match_operand 1 "const_int_operand")) 0)
+ (const_int 0)))]
+ "TARGET_SH1
+ && CONST_OK_FOR_K08 (INTVAL (operands[1]))"
+ "tst %1,%0"
+ [(set_attr "type" "mt_group")])
+
+(define_insn "tstqi_t"
+ [(set (reg:SI T_REG)
+ (eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z")
+ (match_operand 1 "const_int_operand")) 0)
+ (const_int 0)))]
+ "TARGET_SH1
+ && (CONST_OK_FOR_K08 (INTVAL (operands[1]))
+ || CONST_OK_FOR_I08 (INTVAL (operands[1])))"
+{
+ operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+ return "tst %1,%0";
+}
+ [(set_attr "type" "mt_group")])
+
+;; Test low QI subreg against zero.
+;; This avoids unecessary zero extension before the test.
+
+(define_insn "tstqi_t_zero"
+ [(set (reg:SI T_REG)
+ (eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))]
+ "TARGET_SH1"
+ "tst #255,%0"
+ [(set_attr "type" "mt_group")])
+
+;; Extract LSB, negate and store in T bit.
+
+(define_insn "tstsi_t_and_not"
+ [(set (reg:SI T_REG)
+ (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z"))
+ (const_int 1)))]
+ "TARGET_SH1"
+ "tst #1,%0"
+ [(set_attr "type" "mt_group")])
+
+;; Extract contiguous bits and compare them against zero.
+
+(define_insn "tstsi_t_zero_extract_eq"
+ [(set (reg:SI T_REG)
+ (eq:SI (zero_extract:SI (match_operand 0 "logical_operand" "z")
+ (match_operand:SI 1 "const_int_operand")
+ (match_operand:SI 2 "const_int_operand"))
+ (const_int 0)))]
+ "TARGET_SH1
+ && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))"
+{
+ operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]));
+ return "tst %1,%0";
+}
+ [(set_attr "type" "mt_group")])
+
+;; This split is required when testing bits in a QI subreg.
+
+(define_split
+ [(set (reg:SI T_REG)
+ (eq:SI (if_then_else:SI (zero_extract:SI
+ (match_operand 0 "logical_operand" "")
+ (match_operand 1 "const_int_operand")
+ (match_operand 2 "const_int_operand"))
+ (match_operand 3 "const_int_operand")
+ (const_int 0))
+ (const_int 0)))]
+ "TARGET_SH1
+ && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+ && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+ [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3))
+ (const_int 0)))]
+ "
+{
+ if (GET_MODE (operands[0]) == QImode)
+ operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+}")
+
+;; Extract single bit, negate and store it in the T bit.
+;; Not used for SH4A.
+
+(define_insn "tstsi_t_zero_extract_xor"
+ [(set (reg:SI T_REG)
+ (zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+ (match_operand:SI 3 "const_int_operand"))
+ (match_operand:SI 1 "const_int_operand")
+ (match_operand:SI 2 "const_int_operand")))]
+ "TARGET_SH1
+ && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+ && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+ "tst %3,%0"
+ [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A little endian.
+
+(define_insn "tstsi_t_zero_extract_subreg_xor_little"
+ [(set (reg:SI T_REG)
+ (zero_extract:SI
+ (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+ (match_operand:SI 3 "const_int_operand")) 0)
+ (match_operand:SI 1 "const_int_operand")
+ (match_operand:SI 2 "const_int_operand")))]
+ "TARGET_SH1 && TARGET_LITTLE_ENDIAN
+ && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+ == (INTVAL (operands[3]) & 255)
+ && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+ return "tst %3,%0";
+}
+ [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A big endian.
+
+(define_insn "tstsi_t_zero_extract_subreg_xor_big"
+ [(set (reg:SI T_REG)
+ (zero_extract:SI
+ (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+ (match_operand:SI 3 "const_int_operand")) 3)
+ (match_operand:SI 1 "const_int_operand")
+ (match_operand:SI 2 "const_int_operand")))]
+ "TARGET_SH1 && ! TARGET_LITTLE_ENDIAN
+ && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+ == (INTVAL (operands[3]) & 255)
+ && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+ return "tst %3,%0";
+}
+ [(set_attr "type" "mt_group")])
+
;; ??? Perhaps should only accept reg/constant if the register is reg 0.
;; That would still allow reload to create cmpi instructions, but would
;; perhaps allow forcing the constant into a register when that is better.
@@ -1157,7 +1306,7 @@
&& (arith_reg_operand (operands[1], SImode)
|| (immediate_operand (operands[1], SImode)
&& satisfies_constraint_I08 (operands[1])))"
- "bt 0f\;mov %1,%0\\n0:"
+ "bt 0f\;mov %1,%0\\n0:"
[(set_attr "type" "mt_group,arith") ;; poor approximation
(set_attr "length" "4")])
@@ -1170,7 +1319,7 @@
&& (arith_reg_operand (operands[1], SImode)
|| (immediate_operand (operands[1], SImode)
&& satisfies_constraint_I08 (operands[1])))"
- "bf 0f\;mov %1,%0\\n0:"
+ "bf 0f\;mov %1,%0\\n0:"
[(set_attr "type" "mt_group,arith") ;; poor approximation
(set_attr "length" "4")])
@@ -3015,9 +3164,9 @@
;; -------------------------------------------------------------------------
(define_insn "*andsi3_compact"
- [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
(and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
- (match_operand:SI 2 "logical_operand" "r,K08")))]
+ (match_operand:SI 2 "logical_operand" "K08,r")))]
"TARGET_SH1"
"and %2,%0"
[(set_attr "type" "arith")])