diff mbox

[SH] PR 49263 - underutilized "TST #imm, R0" instruction

Message ID 1318641441.19997.404.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo Oct. 15, 2011, 1:17 a.m. UTC
Hello,

the attached patch is the same as the last proposed patch in the PR but
with some fixed formatting and comments. Hope it's fine like that.

Tested against trunk rev 179778 with 

make -k -j4 check RUNTESTFLAGS="--target_board=sh-sim
\{-m2,-m2a-single,-m4-single,-m4a-single\}\{-mb,-ml\}"

and no new failures (ignoring the impossible -m2a-single -mb
combination).

Cheers,
Oleg

ChangeLog:

2011-10-15  Oleg Endo  <oleg.endo@t-online.de>

	PR target/49263
	* config/sh/sh.h (ZERO_EXTRACT_ANDMASK): New macro.
	* config/sh/sh.c (sh_rtx_costs): Add test instruction case.
	* config/sh/sh.md (tstsi_t): Name existing insn.  Make inner
	and instruction commutative.
	(tsthi_t, tstqi_t, tstqi_t_zero, tstsi_t_and_not,
	tstsi_t_zero_extract_eq, tstsi_t_zero_extract_xor,
	tstsi_t_zero_extract_subreg_xor_little,
	tstsi_t_zero_extract_subreg_xor_big): New insns.
	(*movsicc_t_false, *movsicc_t_true): Replace space with tab in
	asm output.
	(*andsi_compact): Reorder alternatives so that K08 is considered
	first.

testsuite/ChangeLog:

2011-10-15  Oleg Endo  <oleg.endo@t-online.de>

	PR target/49263
	* gcc.target/sh/pr49263.c: New.

Comments

Kaz Kojima Oct. 15, 2011, 2:35 a.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> the attached patch is the same as the last proposed patch in the PR but
> with some fixed formatting and comments. Hope it's fine like that.
> 
> Tested against trunk rev 179778 with 
> 
> make -k -j4 check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2,-m2a-single,-m4-single,-m4a-single\}\{-mb,-ml\}"
> 
> and no new failures (ignoring the impossible -m2a-single -mb
> combination).

This patch is OK.  Thanks for working on this issue.
I've applied it on trunk as revision 180020.

Regards,
	kaz
diff mbox

Patch

Index: gcc/testsuite/gcc.target/sh/pr49263.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr49263.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr49263.c	(revision 0)
@@ -0,0 +1,86 @@ 
+/* Verify that TST #imm, R0 instruction is generated if the constant
+   allows it.  Under some circumstances another compare instruction might
+   be selected, which is also fine.  Any AND instructions are considered
+   counter productive and fail the test.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+#define make_func(__valtype__, __valget__, __tstval__, __suff__)\
+  int test_imm_##__tstval__##__suff__ (__valtype__ val) \
+    {\
+      return ((__valget__) & (0x##__tstval__  << 0)) ? -20 : -40;\
+    }
+
+#define make_func_0_F(__valtype__, __valget__, __y__, __suff__)\
+  make_func (__valtype__, __valget__, __y__##0, __suff__)\
+  make_func (__valtype__, __valget__, __y__##1, __suff__)\
+  make_func (__valtype__, __valget__, __y__##2, __suff__)\
+  make_func (__valtype__, __valget__, __y__##3, __suff__)\
+  make_func (__valtype__, __valget__, __y__##4, __suff__)\
+  make_func (__valtype__, __valget__, __y__##5, __suff__)\
+  make_func (__valtype__, __valget__, __y__##6, __suff__)\
+  make_func (__valtype__, __valget__, __y__##7, __suff__)\
+  make_func (__valtype__, __valget__, __y__##8, __suff__)\
+  make_func (__valtype__, __valget__, __y__##9, __suff__)\
+  make_func (__valtype__, __valget__, __y__##A, __suff__)\
+  make_func (__valtype__, __valget__, __y__##B, __suff__)\
+  make_func (__valtype__, __valget__, __y__##C, __suff__)\
+  make_func (__valtype__, __valget__, __y__##D, __suff__)\
+  make_func (__valtype__, __valget__, __y__##E, __suff__)\
+  make_func (__valtype__, __valget__, __y__##F, __suff__)\
+
+#define make_funcs_0_FF(__valtype__, __valget__, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 0, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 1, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 2, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 3, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 4, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 5, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 6, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 7, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 8, __suff__)\
+  make_func_0_F (__valtype__, __valget__, 9, __suff__)\
+  make_func_0_F (__valtype__, __valget__, A, __suff__)\
+  make_func_0_F (__valtype__, __valget__, B, __suff__)\
+  make_func_0_F (__valtype__, __valget__, C, __suff__)\
+  make_func_0_F (__valtype__, __valget__, D, __suff__)\
+  make_func_0_F (__valtype__, __valget__, E, __suff__)\
+  make_func_0_F (__valtype__, __valget__, F, __suff__)\
+
+make_funcs_0_FF (signed char*, *val, int8_mem)
+make_funcs_0_FF (signed char, val, int8_reg)
+
+make_funcs_0_FF (unsigned char*, *val, uint8_mem)
+make_funcs_0_FF (unsigned char, val, uint8_reg)
+
+make_funcs_0_FF (short*, *val, int16_mem)
+make_funcs_0_FF (short, val, int16_reg)
+
+make_funcs_0_FF (unsigned short*, *val, uint16_mem)
+make_funcs_0_FF (unsigned short, val, uint16_reg)
+
+make_funcs_0_FF (int*, *val, int32_mem)
+make_funcs_0_FF (int, val, int32_reg)
+
+make_funcs_0_FF (unsigned int*, *val, uint32_mem)
+make_funcs_0_FF (unsigned int, val, uint32_reg)
+
+make_funcs_0_FF (long long*, *val, int64_lowword_mem)
+make_funcs_0_FF (long long, val, int64_lowword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val, uint64_lowword_mem)
+make_funcs_0_FF (unsigned long long, val, uint64_lowword_reg)
+
+make_funcs_0_FF (long long*, *val >> 32, int64_highword_mem)
+make_funcs_0_FF (long long, val >> 32, int64_highword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val >> 32, uint64_highword_mem)
+make_funcs_0_FF (unsigned long long, val >> 32, uint64_highword_reg)
+
+make_funcs_0_FF (long long*, *val >> 16, int64_midword_mem)
+make_funcs_0_FF (long long, val >> 16, int64_midword_reg)
+
+make_funcs_0_FF (unsigned long long*, *val >> 16, uint64_midword_mem)
+make_funcs_0_FF (unsigned long long, val >> 16, uint64_midword_reg)
+
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 179778)
+++ gcc/config/sh/sh.c	(working copy)
@@ -242,7 +242,7 @@ 
 static int flow_dependent_p (rtx, rtx);
 static void flow_dependent_p_1 (rtx, const_rtx, void *);
 static int shiftcosts (rtx);
-static int and_xor_ior_costs (rtx, int code);
+static int and_xor_ior_costs (rtx, int);
 static int addsubcosts (rtx);
 static int multcosts (rtx);
 static bool unspec_caller_rtx_p (rtx);
@@ -2995,6 +2995,20 @@ 
         *total = 8;
       return true;
 
+    case EQ:
+      /* An and with a constant compared against zero is
+	 most likely going to be a TST #imm, R0 instruction.
+	 Notice that this does not catch the zero_extract variants from
+	 the md file.  */
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
+	{
+	  *total = 1;
+	  return true;
+	}
+      else
+	return false;
+
     case CONST:
     case LABEL_REF:
     case SYMBOL_REF:
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h	(revision 179778)
+++ gcc/config/sh/sh.h	(working copy)
@@ -1195,6 +1195,9 @@ 
 #define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
 				 && ((HOST_WIDE_INT)(VALUE)) <= 255)
 
+#define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
+  (((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
+
 #if 0
 #define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \
   ((((REGCLASS_HAS_FP_REG (CLASS) 					\
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 179778)
+++ gcc/config/sh/sh.md	(working copy)
@@ -585,15 +585,164 @@ 
 ;; SImode signed integer comparisons
 ;; -------------------------------------------------------------------------
 
-(define_insn ""
+;; Various patterns to generate the TST #imm, R0 instruction.
+;; Although this adds some pressure on the R0 register, it can potentially
+;; result in faster code, even if the operand has to be moved to R0 first.
+;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group 
+;; instructions and thus will be executed in parallel. On SH4A TST #imm, R0
+;; is an EX group instruction but still can be executed in parallel with the
+;; MT group MOV Rm, Rn instruction.
+
+;; Usual TST #imm, R0 patterns for SI, HI and QI
+;; This is usually used for bit patterns other than contiguous bits 
+;; and single bits.
+
+(define_insn "tstsi_t"
   [(set (reg:SI T_REG)
-	(eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
+	(eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r")
 		       (match_operand:SI 1 "logical_operand" "K08,r"))
 	       (const_int 0)))]
   "TARGET_SH1"
   "tst	%1,%0"
   [(set_attr "type" "mt_group")])
 
+(define_insn "tsthi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (INTVAL (operands[1]))"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "tstqi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && (CONST_OK_FOR_K08 (INTVAL (operands[1])) 
+       || CONST_OK_FOR_I08 (INTVAL (operands[1])))"
+{
+  operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Test low QI subreg against zero.
+;; This avoids unecessary zero extension before the test.
+
+(define_insn "tstqi_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))]
+  "TARGET_SH1"
+  "tst	#255,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract LSB, negate and store in T bit.
+
+(define_insn "tstsi_t_and_not"
+  [(set (reg:SI T_REG)
+	 (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z"))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "tst	#1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract contiguous bits and compare them against zero.
+
+(define_insn "tstsi_t_zero_extract_eq"
+  [(set (reg:SI T_REG)
+	(eq:SI (zero_extract:SI (match_operand 0 "logical_operand" "z")
+		(match_operand:SI 1 "const_int_operand")
+		(match_operand:SI 2 "const_int_operand"))
+         (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))"
+{
+  operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]));
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; This split is required when testing bits in a QI subreg.
+
+(define_split
+  [(set (reg:SI T_REG)
+   (eq:SI (if_then_else:SI (zero_extract:SI
+			    (match_operand 0 "logical_operand" "")
+			    (match_operand 1 "const_int_operand")
+			    (match_operand 2 "const_int_operand"))
+			   (match_operand 3 "const_int_operand")
+			   (const_int 0))
+	  (const_int 0)))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3))
+			      (const_int 0)))]
+  "
+{
+  if (GET_MODE (operands[0]) == QImode)
+    operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+}")
+
+;; Extract single bit, negate and store it in the T bit.
+;; Not used for SH4A.
+
+(define_insn "tstsi_t_zero_extract_xor"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			  (match_operand:SI 3 "const_int_operand"))
+			 (match_operand:SI 1 "const_int_operand")
+			 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  "tst	%3,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A little endian.
+
+(define_insn "tstsi_t_zero_extract_subreg_xor_little"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 0)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && TARGET_LITTLE_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A big endian.
+
+(define_insn "tstsi_t_zero_extract_subreg_xor_big"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 3)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && ! TARGET_LITTLE_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
 ;; ??? Perhaps should only accept reg/constant if the register is reg 0.
 ;; That would still allow reload to create cmpi instructions, but would
 ;; perhaps allow forcing the constant into a register when that is better.
@@ -1157,7 +1306,7 @@ 
    && (arith_reg_operand (operands[1], SImode)
        || (immediate_operand (operands[1], SImode)
 	   && satisfies_constraint_I08 (operands[1])))"
-  "bt 0f\;mov %1,%0\\n0:"
+  "bt	0f\;mov	%1,%0\\n0:"
   [(set_attr "type" "mt_group,arith") ;; poor approximation
    (set_attr "length" "4")])
 
@@ -1170,7 +1319,7 @@ 
    && (arith_reg_operand (operands[1], SImode)
        || (immediate_operand (operands[1], SImode)
 	   && satisfies_constraint_I08 (operands[1])))"
-  "bf 0f\;mov %1,%0\\n0:"
+  "bf	0f\;mov	%1,%0\\n0:"
   [(set_attr "type" "mt_group,arith") ;; poor approximation
    (set_attr "length" "4")])
 
@@ -3015,9 +3164,9 @@ 
 ;; -------------------------------------------------------------------------
 
 (define_insn "*andsi3_compact"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
 	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
-		(match_operand:SI 2 "logical_operand" "r,K08")))]
+		(match_operand:SI 2 "logical_operand" "K08,r")))]
   "TARGET_SH1"
   "and	%2,%0"
   [(set_attr "type" "arith")])