diff mbox

[SH,committed] Fix PR 67391

Message ID 1443355401.2509.121.camel@t-online.de
State New
Headers show

Commit Message

Oleg Endo Sept. 27, 2015, 12:03 p.m. UTC
On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote:
> Hi,
> 
> The attached patch fixes PR 67391.  Some additional reg overlapping were
> added to the addsi3 patterns while making LRA on SH work, but not all of
> them seem to be good.  Removing them, seems to be working just fine.
> Tested on sh-elf (LRA enabled) with make -k check
> RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> and by Kaz on sh4-linux.
> 
> Committed to trunk as r228046 and to the GCC 5 branch as r228047.

This has opened a small can of worms.  The follow up patch is attached
and has been commited to trunk as r228176.  Tested by me on sh-elf and
by Kaz on sh4-linux with LRA enabled/disabled.

As a positive side effect, we get some code size reduction on the CSiBE
set: 3345527 bytes -> 3334351 bytes    -11176 bytes / -0.334058 %

However, this is only when LRA is disabled because of some problems with
LRA and its usage/handling of addsi3 insns.

Backport to GCC 5 branch will follow.

Cheers,
Oleg

gcc/ChangeLog:
	PR target/67391
	* config/sh/sh-protos.h (sh_lra_p): Declare.
	* config/sh/sh.c (sh_lra_p): Make non-static.
	* config/sh/sh.md (addsi3): Use arith_reg_dest for operands[0] and
	arith_reg_operand for operands[1].  Remove TARGET_SHMEDIA case.
	Expand into addsi3_scr if operands[2] if needed.
	(*addsi3_compact): Rename to *addsi3_compact_lra.  Use
	arith_reg_operand for operands[1].  Allow it only when LRA is enabled.
	(addsi3_scr, *addsi3): New insn_and_split patterns.

Comments

Oleg Endo Sept. 28, 2015, 1:45 p.m. UTC | #1
On Sun, 2015-09-27 at 21:03 +0900, Oleg Endo wrote:
> On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote:
> > Hi,
> > 
> > The attached patch fixes PR 67391.  Some additional reg overlapping were
> > added to the addsi3 patterns while making LRA on SH work, but not all of
> > them seem to be good.  Removing them, seems to be working just fine.
> > Tested on sh-elf (LRA enabled) with make -k check
> > RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> > and by Kaz on sh4-linux.
> > 
> > Committed to trunk as r228046 and to the GCC 5 branch as r228047.
> 
> This has opened a small can of worms.  The follow up patch is attached
> and has been commited to trunk as r228176.  Tested by me on sh-elf and
> by Kaz on sh4-linux with LRA enabled/disabled.
> 
> As a positive side effect, we get some code size reduction on the CSiBE
> set: 3345527 bytes -> 3334351 bytes    -11176 bytes / -0.334058 %
> 
> However, this is only when LRA is disabled because of some problems with
> LRA and its usage/handling of addsi3 insns.
> 
> Backport to GCC 5 branch will follow.

Now also committed to GCC 5 branch as r228201.  Tested on the branch as
above.

Cheers,
Oleg
diff mbox

Patch

Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 228117)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -93,6 +93,7 @@ 
 extern rtx sh_fsca_int2sf (void);
 
 /* Declare functions defined in sh.c and used in templates.  */
+extern bool sh_lra_p (void);
 
 extern const char *output_branch (int, rtx_insn *, rtx *);
 extern const char *output_ieee_ccmpeq (rtx_insn *, rtx *);
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 228117)
+++ gcc/config/sh/sh.c	(working copy)
@@ -216,7 +216,6 @@ 
 static int sh_mode_entry (int);
 static int sh_mode_exit (int);
 static int sh_mode_priority (int entity, int n);
-static bool sh_lra_p (void);
 
 static rtx mark_constant_pool_use (rtx);
 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
@@ -14507,7 +14506,7 @@ 
 */
 
 /* Return true if we use LRA instead of reload pass.  */
-static bool
+bool
 sh_lra_p (void)
 {
   return sh_lra_flag;
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 228117)
+++ gcc/config/sh/sh.md	(working copy)
@@ -2122,13 +2122,19 @@ 
 })
 
 (define_expand "addsi3"
-  [(set (match_operand:SI 0 "arith_reg_operand" "")
-	(plus:SI (match_operand:SI 1 "arith_operand" "")
-		 (match_operand:SI 2 "arith_or_int_operand" "")))]
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand")
+		 (match_operand:SI 2 "arith_or_int_operand")))]
   ""
 {
-  if (TARGET_SHMEDIA)
-    operands[1] = force_reg (SImode, operands[1]);
+  if (TARGET_SH1 && !arith_operand (operands[2], SImode))
+    {
+      if (!sh_lra_p () || reg_overlap_mentioned_p (operands[0], operands[1]))
+	{
+	  emit_insn (gen_addsi3_scr (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+    }
 })
 
 (define_insn "addsi3_media"
@@ -2163,15 +2169,22 @@ 
 ;; copy or constant load before the actual add insn.
 ;; Use u constraint for that case to avoid the invalid value in the stack
 ;; pointer.
-(define_insn_and_split "*addsi3_compact"
+;; This also results in better code when LRA is not used.  However, we have
+;; to use different sets of patterns and the order of these patterns is
+;; important.
+;; In some cases the constant zero might end up in operands[2] of the
+;; patterns.  We have to accept that and convert it into a reg-reg move.
+(define_insn_and_split "*addsi3_compact_lra"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%0,r")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r")
 		 (match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))]
-  "TARGET_SH1"
+  "TARGET_SH1 && sh_lra_p ()
+   && (! reg_overlap_mentioned_p (operands[0], operands[1])
+       || arith_operand (operands[2], SImode))"
   "@
 	add	%2,%0
 	#"
-  "reload_completed
+  "&& reload_completed
    && ! reg_overlap_mentioned_p (operands[0], operands[1])"
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
@@ -2182,6 +2195,58 @@ 
 }
   [(set_attr "type" "arith")])
 
+(define_insn_and_split "addsi3_scr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u,&u")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r,r")
+		 (match_operand:SI 2 "arith_or_int_operand" "rI08,r,n")))
+   (clobber (match_scratch:SI 3 "=X,X,&u"))]
+  "TARGET_SH1"
+  "@
+	add	%2,%0
+	#
+	#"
+  "&& reload_completed"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+
+  if (CONST_INT_P (operands[2]) && !satisfies_constraint_I08 (operands[2]))
+    {
+      if (reg_overlap_mentioned_p (operands[0], operands[1]))
+	{
+	  emit_move_insn (operands[3], operands[2]);
+	  emit_move_insn (operands[0], operands[1]);
+	  operands[2] = operands[3];
+	}
+      else
+	{
+	  emit_move_insn (operands[0], operands[2]);
+	  operands[2] = operands[1];
+	}
+    }
+  else if (!reg_overlap_mentioned_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+}
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*addsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r")
+		 (match_operand:SI 2 "arith_operand" "rI08,Z")))]
+  "TARGET_SH1 && !sh_lra_p ()"
+  "@
+	add	%2,%0
+	#"
+  "&& operands[2] == const0_rtx"
+  [(set (match_dup 0) (match_dup 1))]
+{
+}
+  [(set_attr "type" "arith")])
+
 ;; -------------------------------------------------------------------------
 ;; Subtraction instructions
 ;; -------------------------------------------------------------------------