diff mbox

[SH] Add simple_return pattern

Message ID 5051C675.6060803@st.com
State New
Headers show

Commit Message

Christian Bruel Sept. 13, 2012, 11:41 a.m. UTC
Hi Kaz,

The failure turned out to be issues with the profile count and handling
or region partitioning. So, I prefer to handle those separately,
For now, I disable shrink-wrap when partitioning, even if the problem
seems to have disappeared with the more constrained heuristics. This is
probably latent also on other targets BTW.

I added a sh_can_use_simple_return_p function that makes the heuristic
refinements more convenient. For instance, measured that shrink-wrap is
generally not good when optimizing for size because we might introduce
new return instructions or split blocks to avoid the epilogue, that is
still in the code somewhere anyway.

Cycle-accurate benchmarks show a few very small improvements (there and
there, about max 2%. accordingly, the prologue is rarely in the critical
path...) but no regression. Manual assembly peering of CSiBE show that
the transformation are decent.

Checked with all assertions this time, Candidate for trunk.

Many thanks

Christian


On 09/11/2012 03:05 AM, Kaz Kojima wrote:
> Christian Bruel <christian.bruel@st.com> wrote:
>> This patch implements the simple_return pattern to enable -fshrink-wrap
>> on SH. It also clean up some redundancies for expand_epilogue (called
>> twice from the "return" and "epilogue" patterns and the
>> sh_expand_prologue parameter type.
>>
>> No regressions with sh-superh-elf and sh4-linux gcc testsuites.
> 
> With the patch + revision 191106, I've got a new failure:
> 
> FAIL: gcc.dg/tree-prof/bb-reorg.c compilation,  -fprofile-use -D_PROFILE_USE (internal compiler error)
> 
> for sh4-unknown-linux-gnu.  My testsuite/gcc/gcc.log says
> 
> /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In function 'main':
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: error: EDGE_CROSSING missing across section boundary
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: internal compiler error: verify_flow_info failed
> Please submit a full bug report,
> 
> Regards,
> 	kaz
>

Comments

Kaz Kojima Sept. 13, 2012, 1:16 p.m. UTC | #1
Christian Bruel <christian.bruel@st.com> wrote:
> The failure turned out to be issues with the profile count and handling
> or region partitioning. So, I prefer to handle those separately,
> For now, I disable shrink-wrap when partitioning, even if the problem
> seems to have disappeared with the more constrained heuristics. This is
> probably latent also on other targets BTW.
> 
> I added a sh_can_use_simple_return_p function that makes the heuristic
> refinements more convenient. For instance, measured that shrink-wrap is
> generally not good when optimizing for size because we might introduce
> new return instructions or split blocks to avoid the epilogue, that is
> still in the code somewhere anyway.
> 
> Cycle-accurate benchmarks show a few very small improvements (there and
> there, about max 2%. accordingly, the prologue is rarely in the critical
> path...) but no regression. Manual assembly peering of CSiBE show that
> the transformation are decent.
> 
> Checked with all assertions this time, Candidate for trunk.

The patch is OK for trunk.  Thanks for looking into the problem.

Regards,
	kaz
diff mbox

Patch

2012-09-12  Christian Bruel  <christian.bruel@st.com>

	PR target/54546
	* config/sh/sh-protos.h (sh_need_epilogue): Delete.
	(sh_can_use_simple_return_p): Declare.
	* config/sh/sh.c (sh_can_use_simple_return_p): Define.
	(sh_need_epilogue, sh_need_epilogue_known): Delete.
	(sh_output_function_epilogue): Remove sh_need_epilogue_known.
	* config/sh/sh.md (simple_return, return): Define.
	(epilogue): Use inline return rtl.
	(sh_expand_epilogue): Cleanup parameters boolean type.
	* config/sh/iterators.md (any_return): New iterator.

Index: config/sh/sh-protos.h
===================================================================
--- config/sh/sh-protos.h	(revision 191129)
+++ config/sh/sh-protos.h	(working copy)
@@ -117,7 +117,6 @@  extern rtx get_fpscr_rtx (void);
 extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
 extern void sh_expand_epilogue (bool);
-extern bool sh_need_epilogue (void);
 extern void sh_set_return_address (rtx, rtx);
 extern int initial_elimination_offset (int, int);
 extern bool fldi_ok (void);
@@ -155,4 +154,5 @@  extern int sh2a_get_function_vector_number (rtx);
 extern bool sh2a_is_function_vector_call (rtx);
 extern void sh_fix_range (const char *);
 extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool sh_can_use_simple_return_p (void);
 #endif /* ! GCC_SH_PROTOS_H */
Index: config/sh/sh.c
===================================================================
--- config/sh/sh.c	(revision 191129)
+++ config/sh/sh.c	(working copy)
@@ -7899,24 +7899,6 @@  sh_expand_epilogue (bool sibcall_p)
     emit_use (gen_rtx_REG (SImode, PR_REG));
 }
 
-static int sh_need_epilogue_known = 0;
-
-bool
-sh_need_epilogue (void)
-{
-  if (! sh_need_epilogue_known)
-    {
-      rtx epilogue;
-
-      start_sequence ();
-      sh_expand_epilogue (0);
-      epilogue = get_insns ();
-      end_sequence ();
-      sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
-    }
-  return sh_need_epilogue_known > 0;
-}
-
 /* Emit code to change the current function's return address to RA.
    TEMP is available as a scratch register, if needed.  */
 
@@ -7996,7 +7978,6 @@  static void
 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
 			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 {
-  sh_need_epilogue_known = 0;
 }
 
 static rtx
@@ -12959,4 +12940,34 @@  sh_init_sync_libfuncs (void)
   init_sync_libfuncs (UNITS_PER_WORD);
 }
 
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  */
+
+bool
+sh_can_use_simple_return_p (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+
+  if (! reload_completed || frame_pointer_needed)
+    return false;
+
+  /* Moving prologue around does't reduce the size.  */
+  if (optimize_function_for_size_p (cfun))
+    return false;
+
+  /* Can't optimize CROSSING_JUMPS for now.  */
+  if (flag_reorder_blocks_and_partition)
+    return false;
+
+  /* Finally, allow for pr save.  */
+  d = calc_live_regs (&live_regs_mask);
+
+  if (rounded_frame_size (d) > 4)
+   return false;
+
+  return true;
+
+}
+
 #include "gt-sh.h"
Index: config/sh/iterators.md
===================================================================
--- config/sh/iterators.md	(revision 191129)
+++ config/sh/iterators.md	(working copy)
@@ -34,3 +34,6 @@ 
 (define_mode_attr disp04 [(QI "K04") (HI "K05")])
 (define_mode_attr disp12 [(QI "K12") (HI "K13")])
 
+;; Return codes.
+(define_code_iterator any_return [return simple_return])
+
Index: config/sh/sh.md
===================================================================
--- config/sh/sh.md	(revision 191129)
+++ config/sh/sh.md	(working copy)
@@ -9280,7 +9280,7 @@  label:
   [(return)]
   ""
 {
-  sh_expand_epilogue (1);
+  sh_expand_epilogue (true);
   if (TARGET_SHCOMPACT)
     {
       rtx insn, set;
@@ -10099,9 +10099,13 @@  label:
 }
   [(set_attr "type" "load_media")])
 
+(define_expand "simple_return"
+  [(simple_return)]
+ "sh_can_use_simple_return_p ()")
+
 (define_expand "return"
   [(return)]
-  "reload_completed && ! sh_need_epilogue ()"
+ "reload_completed && epilogue_completed"
 {
   if (TARGET_SHMEDIA)
     {
@@ -10117,8 +10121,8 @@  label:
     }
 })
 
-(define_insn "*return_i"
-  [(return)]
+(define_insn "*<code>_i"
+  [(any_return)]
   "TARGET_SH1 && ! (TARGET_SHCOMPACT
 		    && (crtl->args.info.call_cookie
 			& CALL_COOKIE_RET_TRAMP (1)))
@@ -10244,19 +10248,12 @@  label:
 (define_expand "prologue"
   [(const_int 0)]
   ""
-{
-  sh_expand_prologue ();
-  DONE;
-})
+  "sh_expand_prologue (); DONE;")
 
 (define_expand "epilogue"
   [(return)]
   ""
-{
-  sh_expand_epilogue (0);
-  emit_jump_insn (gen_return ());
-  DONE;
-})
+  "sh_expand_epilogue (false);")
 
 (define_expand "eh_return"
   [(use (match_operand 0 "register_operand" ""))]