diff mbox

[AARCH64] Enable fuse-caller-save for AARCH64

Message ID 538AF9C7.7040408@mentor.com
State New
Headers show

Commit Message

Tom de Vries June 1, 2014, 10 a.m. UTC
Richard,

This patch:
- adds the for TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS required
   clobbers in CALL_INSN_FUNCTION_USAGE,
- sets TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true, which enables
   the fuse-caller-save optimisation, and
- adds an aarch64 fuse-caller-save test-case.

Build and tested on aarch64-linux-gnu.

OK for trunk?

Thanks,
- Tom

Comments

Marcus Shawcroft June 18, 2014, 2:59 p.m. UTC | #1
On 1 June 2014 11:00, Tom de Vries <Tom_deVries@mentor.com> wrote:
> Richard,
>
> This patch:
> - adds the for TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS required
>   clobbers in CALL_INSN_FUNCTION_USAGE,
> - sets TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true, which
> enables
>   the fuse-caller-save optimisation, and
> - adds an aarch64 fuse-caller-save test-case.
>
> Build and tested on aarch64-linux-gnu.
>
> OK for trunk?
>
> Thanks,
> - Tom
>

OK
/Marcus
Richard Henderson June 19, 2014, 3:21 a.m. UTC | #2
On 06/01/2014 03:00 AM, Tom de Vries wrote:
> +/* Emit call insn with PAT and do aarch64-specific handling.  */
> +
> +bool
> +aarch64_emit_call_insn (rtx pat)
> +{
> +  rtx insn = emit_call_insn (pat);
> +
> +  rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
> +  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
> +  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
> +}
> +

Which can't have been bootstrapped, since this has no return stmt.
Why the bool return type anyway?  Nothing appears to use it.


r~
Richard Henderson June 19, 2014, 3:53 a.m. UTC | #3
On 06/01/2014 03:00 AM, Tom de Vries wrote:
> +aarch64_emit_call_insn (rtx pat)
> +{
> +  rtx insn = emit_call_insn (pat);
> +
> +  rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
> +  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
> +  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));

Actually, I'd like to know more about how this is supposed to work.

Why are you only marking the two registers that would be used by a PLT entry,
but not those clobbered by the ld.so trampoline, or indeed the unknown function
that would be called from the PLT.

Oh, I see, looking at the code we do actually follow the cgraph and make sure
it is a direct call with a known destination.  So, in fact, it's only the
registers that could be clobbered by ld branch islands (so these two are still
correct for aarch64).

This means the documentation is actually wrong when it mentions PLTs at all.

Do we in fact make sure this isn't an ifunc resolver?  I don't immediately see
how those get wired up in the cgraph...


r~
diff mbox

Patch

2014-06-01  Tom de Vries  <tom@codesourcery.com>

	* config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Declare.
	* config/aarch64/aarch64.c
	(TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine as true.
	(aarch64_emit_call_insn): New function.
	(aarch64_load_symref_appropriately): Use aarch64_emit_call_insn instead
	of emit_call_insn.
	* config/aarch64/aarch64.md (define_expand "call_internal")
	(define_expand "call_value_internal", define_expand "sibcall_internal")
	(define_expand "sibcall_value_internal"): New.
	(define_expand "call", define_expand "call_value")
	(define_expand "sibcall", define_expand "sibcall_value"): Use internal
	expand variant and aarch64_emit_call_insn.

	* gcc.target/aarch64/fuse-caller-save.c: New test.
---
 gcc/config/aarch64/aarch64-protos.h                |  1 +
 gcc/config/aarch64/aarch64.c                       | 17 ++++++-
 gcc/config/aarch64/aarch64.md                      | 52 +++++++++++++++++++++-
 .../gcc.target/aarch64/fuse-caller-save.c          | 25 +++++++++++
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fuse-caller-save.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 68d488d..d3580d3 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -244,6 +244,7 @@  void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
 void aarch64_init_expanders (void);
 void aarch64_print_operand (FILE *, rtx, char);
 void aarch64_print_operand_address (FILE *, rtx);
+bool aarch64_emit_call_insn (rtx);
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f69457a..4f33a3e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -695,7 +695,7 @@  aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 
 	start_sequence ();
-	emit_call_insn (gen_tlsgd_small (result, imm));
+	aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
 	insns = get_insns ();
 	end_sequence ();
 
@@ -3401,6 +3401,18 @@  aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
   return true;
 }
 
+/* Emit call insn with PAT and do aarch64-specific handling.  */
+
+bool
+aarch64_emit_call_insn (rtx pat)
+{
+  rtx insn = emit_call_insn (pat);
+
+  rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
+}
+
 enum machine_mode
 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
 {
@@ -9581,6 +9593,9 @@  aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
 #undef TARGET_FLAGS_REGNUM
 #define TARGET_FLAGS_REGNUM CC_REGNUM
 
+#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fec2ea8..aeccd25 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -420,6 +420,12 @@ 
 ;; Subroutine calls and sibcalls
 ;; -------------------------------------------------------------------
 
+(define_expand "call_internal"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:DI LR_REGNUM))])])
+
 (define_expand "call"
   [(parallel [(call (match_operand 0 "memory_operand" "")
 		    (match_operand 1 "general_operand" ""))
@@ -428,7 +434,7 @@ 
   ""
   "
   {
-    rtx callee;
+    rtx callee, pat;
 
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[2] == NULL)
@@ -442,6 +448,10 @@ 
 	? aarch64_is_long_call_p (callee)
 	: !REG_P (callee))
       XEXP (operands[0], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_internal (operands[0], operands[1], operands[2]);
+    aarch64_emit_call_insn (pat);
+    DONE;
   }"
 )
 
@@ -466,6 +476,13 @@ 
   [(set_attr "type" "call")]
 )
 
+(define_expand "call_value_internal"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:DI LR_REGNUM))])])
+
 (define_expand "call_value"
   [(parallel [(set (match_operand 0 "" "")
 		   (call (match_operand 1 "memory_operand" "")
@@ -475,7 +492,7 @@ 
   ""
   "
   {
-    rtx callee;
+    rtx callee, pat;
 
     /* In an untyped call, we can get NULL for operand 3.  */
     if (operands[3] == NULL)
@@ -489,6 +506,11 @@ 
 	? aarch64_is_long_call_p (callee)
 	: !REG_P (callee))
       XEXP (operands[1], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_value_internal (operands[0], operands[1], operands[2],
+                                   operands[3]);
+    aarch64_emit_call_insn (pat);
+    DONE;
   }"
 )
 
@@ -516,6 +538,12 @@ 
   [(set_attr "type" "call")]
 )
 
+(define_expand "sibcall_internal"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (return)
+	      (use (match_operand 2 "" ""))])])
+
 (define_expand "sibcall"
   [(parallel [(call (match_operand 0 "memory_operand" "")
 		    (match_operand 1 "general_operand" ""))
@@ -523,15 +551,28 @@ 
 	      (use (match_operand 2 "" ""))])]
   ""
   {
+    rtx pat;
+
     if (!REG_P (XEXP (operands[0], 0))
        && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
      XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
 
     if (operands[2] == NULL_RTX)
       operands[2] = const0_rtx;
+
+    pat = gen_sibcall_internal (operands[0], operands[1], operands[2]);
+    aarch64_emit_call_insn (pat);
+    DONE;
   }
 )
 
+(define_expand "sibcall_value_internal"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (return)
+	      (use (match_operand 3 "" ""))])])
+
 (define_expand "sibcall_value"
   [(parallel [(set (match_operand 0 "" "")
 		   (call (match_operand 1 "memory_operand" "")
@@ -540,12 +581,19 @@ 
 	      (use (match_operand 3 "" ""))])]
   ""
   {
+    rtx pat;
+
     if (!REG_P (XEXP (operands[1], 0))
        && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF))
      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
 
     if (operands[3] == NULL_RTX)
       operands[3] = const0_rtx;
+
+    pat = gen_sibcall_value_internal (operands[0], operands[1], operands[2],
+                                      operands[3]);
+    aarch64_emit_call_insn (pat);
+    DONE;
   }
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fuse-caller-save.c b/gcc/testsuite/gcc.target/aarch64/fuse-caller-save.c
new file mode 100644
index 0000000..c64a4d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fuse-caller-save.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fuse-caller-save" } */
+/* Testing -fuse-caller-save optimization option.  */
+
+static int __attribute__((noinline))
+bar (int x)
+{
+  return x + 3;
+}
+
+int __attribute__((noinline))
+foo (int y)
+{
+  return y + bar (y);
+}
+
+int
+main (void)
+{
+  return !(foo (5) == 13);
+}
+
+/* { dg-final { scan-assembler-times "\\\[sp, -16\\\]!" 2 } } */
+/* { dg-final { scan-assembler-not "\\\[sp, -32\\\]!" } } */
+
-- 
1.9.1