diff mbox series

[V7,1/3] split complicate 64bit constant to memory

Message ID 20240729050757.2500551-1-guojiufu@linux.ibm.com
State New
Headers show
Series [V7,1/3] split complicate 64bit constant to memory | expand

Commit Message

Jiufu Guo July 29, 2024, 5:07 a.m. UTC
Hi,

Sometimes, a complicated constant is built via 3(or more)
instructions.  Generally speaking, it would not be as fast
as loading it from the constant pool (as the discussions in
PR63281):
"ld" is one instruction.  If consider "address/toc" adjust,
we may count it as 2 instructions. And "pld" may need fewer
cycles.

Adding --param=rs6000-min-insns-constant-in-pool helps to
control the instruction number threshold for different scenarios.

As known, because the constant is load from memory by this
patch,  so this functionality may affect the cache missing.
While, IMHO, this patch would be still do the right thing.

Compare with the previous version:
This patch serie adds one more patch to tune the threshold
for power10.

Boostrap & regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

	PR target/63281

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to
	memory for -m64.
	* config/rs6000/rs6000.opt (rs6000-min-insns-constant-in-pool): New
	parameter.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/const_anchors.c: Test final-rtl.
	* gcc.target/powerpc/parall_5insn_const.c: Add option
	--param=rs6000-min-insns-constant-in-pool=5 to keep the original test.
	* gcc.target/powerpc/pr106550.c: Likewise.
	* gcc.target/powerpc/pr106550_1.c: Likewise.
	* gcc.target/powerpc/pr93012.c: Likewise.
	* gcc.target/powerpc/pr87870.c: Update instruction counts.
	* gcc.target/powerpc/pr63281.c: New test.


---
 gcc/config/rs6000/rs6000.cc                   | 19 +++++++++++++++++++
 gcc/config/rs6000/rs6000.opt                  |  5 +++++
 .../gcc.target/powerpc/const_anchors.c        |  5 +++--
 .../gcc.target/powerpc/parall_5insn_const.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr106550.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr63281.c    | 11 +++++++++++
 gcc/testsuite/gcc.target/powerpc/pr87870.c    |  5 ++++-
 gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
 9 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c

Comments

Jiufu Guo Aug. 14, 2024, 8:29 a.m. UTC | #1
Hi,

I would like to have a ping for these patches.

BR,
Jeff(Jiufu Guo)

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Sometimes, a complicated constant is built via 3(or more)
> instructions.  Generally speaking, it would not be as fast
> as loading it from the constant pool (as the discussions in
> PR63281):
> "ld" is one instruction.  If consider "address/toc" adjust,
> we may count it as 2 instructions. And "pld" may need fewer
> cycles.
>
> Adding --param=rs6000-min-insns-constant-in-pool helps to
> control the instruction number threshold for different scenarios.
>
> As known, because the constant is load from memory by this
> patch,  so this functionality may affect the cache missing.
> While, IMHO, this patch would be still do the right thing.
>
> Compare with the previous version:
> This patch serie adds one more patch to tune the threshold
> for power10.
>
> Boostrap & regtest pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu Guo)
>
> 	PR target/63281
>
> gcc/ChangeLog:
>
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to
> 	memory for -m64.
> 	* config/rs6000/rs6000.opt (rs6000-min-insns-constant-in-pool): New
> 	parameter.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/const_anchors.c: Test final-rtl.
> 	* gcc.target/powerpc/parall_5insn_const.c: Add option
> 	--param=rs6000-min-insns-constant-in-pool=5 to keep the original test.
> 	* gcc.target/powerpc/pr106550.c: Likewise.
> 	* gcc.target/powerpc/pr106550_1.c: Likewise.
> 	* gcc.target/powerpc/pr93012.c: Likewise.
> 	* gcc.target/powerpc/pr87870.c: Update instruction counts.
> 	* gcc.target/powerpc/pr63281.c: New test.
>
>
> ---
>  gcc/config/rs6000/rs6000.cc                   | 19 +++++++++++++++++++
>  gcc/config/rs6000/rs6000.opt                  |  5 +++++
>  .../gcc.target/powerpc/const_anchors.c        |  5 +++--
>  .../gcc.target/powerpc/parall_5insn_const.c   |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr106550.c   |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr106550_1.c |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr63281.c    | 11 +++++++++++
>  gcc/testsuite/gcc.target/powerpc/pr87870.c    |  5 ++++-
>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>  9 files changed, 46 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 2046a831938..ec384e87868 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10240,6 +10240,25 @@ rs6000_emit_set_const (rtx dest, rtx source)
>  	  c = sext_hwi (c, 32);
>  	  emit_move_insn (lo, GEN_INT (c));
>  	}
> +
> +      /* Use base_reg_operand to avoid spliting "r0=xxx" to "r0=[r0+off]"
> +	 after RA when reusing the DEST register to build the value.  */
> +      else if ((can_create_pseudo_p () || base_reg_operand (dest, mode))
> +	       && num_insns_constant (source, mode)
> +		    > rs6000_min_insns_constant_in_pool
> +	       && TARGET_64BIT)
> +	{
> +	  rtx sym = force_const_mem (mode, source);
> +	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
> +	      && use_toc_relative_ref (XEXP (sym, 0), mode))
> +	    {
> +	      rtx toc = create_TOC_reference (XEXP (sym, 0), dest);
> +	      sym = gen_const_mem (mode, toc);
> +	      set_mem_alias_set (sym, get_TOC_alias_set ());
> +	    }
> +
> +	  emit_move_insn (dest, sym);
> +	}
>        else
>  	rs6000_emit_set_long_const (dest, c);
>        break;
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index e8ca70340df..a1c0d1e89c5 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -679,3 +679,8 @@ default value is 4.
>  Target Undocumented Joined UInteger Var(rs6000_vect_unroll_reduc_threshold) Init(1) Param
>  When reduction factor computed for a loop exceeds the threshold specified by
>  this parameter, prefer to unroll this loop.  The default value is 1.
> +
> +-param=rs6000-min-insns-constant-in-pool=
> +Target Undocumented Joined UInteger Var(rs6000_min_insns_constant_in_pool) Init(2) IntegerRange(2, 5) Param
> +The minimum instruction number of building a constant to force loading it from
> +the constant pool.
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> index 542e2674b12..682e773d506 100644
> --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target has_arch_ppc64 } } */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -fdump-rtl-final" } */
>  
>  #define C1 0x2351847027482577ULL
>  #define C2 0x2351847027482578ULL
> @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b)
>      *a++ = C2;
>  }
>  
> -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
> +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> index e3a9a7264cf..e39479bbf86 100644
> --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> +++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> @@ -1,5 +1,5 @@
>  /* { dg-do run } */
> -/* { dg-options "-O2 -mno-prefixed -save-temps" } */
> +/* { dg-options "-O2 -mno-prefixed --param=rs6000-min-insns-constant-in-pool=5 -save-temps" } */
>  /* { dg-require-effective-target has_arch_ppc64 } */
>  
>  /* { dg-final { scan-assembler-times {\mlis\M} 4 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
> index 92b76ac8811..2b12a7445b3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
> @@ -1,5 +1,5 @@
>  /* PR target/106550 */
> -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 --param=rs6000-min-insns-constant-in-pool=3" } */
>  /* { dg-require-effective-target has_arch_ppc64 } */
>  
>  void
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> index 5ab40d71a56..9da644578a5 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> @@ -1,7 +1,7 @@
>  /* PR target/106550 */
>  /* { dg-require-effective-target power10_ok } */
>  /* { dg-require-effective-target has_arch_ppc64 } */
> -/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1 --param=rs6000-min-insns-constant-in-pool=5" } */
>  /* force the constant splitter run after RA: -fdisable-rtl-split1.  */
>  
>  void
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> new file mode 100644
> index 00000000000..9763a7181fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> @@ -0,0 +1,11 @@
> +/* Check loading constant from memory pool.  */
> +/* { dg-options "-O2 -mpowerpc64" } */
> +
> +void
> +foo (unsigned long long *a)
> +{
> +  *a++ = 0x2351847027482577ULL;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c
> index d2108ac3386..09b2e8de901 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
> @@ -25,4 +25,7 @@ test3 (void)
>    return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
>  }
>  
> -/* { dg-final { scan-assembler-not {\mld\M} } } */
> +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's
> +   test0, test1 and test2 are using "li", then check 6 'li's.  */
> +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..a89a24aee36 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -1,6 +1,6 @@
>  /* PR target/93012 */
>  /* { dg-do compile { target lp64 } } */
> -/* { dg-options "-O2 -std=c99" } */
> +/* { dg-options "-O2 -std=c99 --param=rs6000-min-insns-constant-in-pool=5" } */
>  
>  unsigned long long msk66() { return 0x6666666666666666ULL; }
>  unsigned long long mskih() { return 0xabcd1234abcd1234ULL; }
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2046a831938..ec384e87868 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10240,6 +10240,25 @@  rs6000_emit_set_const (rtx dest, rtx source)
 	  c = sext_hwi (c, 32);
 	  emit_move_insn (lo, GEN_INT (c));
 	}
+
+      /* Use base_reg_operand to avoid spliting "r0=xxx" to "r0=[r0+off]"
+	 after RA when reusing the DEST register to build the value.  */
+      else if ((can_create_pseudo_p () || base_reg_operand (dest, mode))
+	       && num_insns_constant (source, mode)
+		    > rs6000_min_insns_constant_in_pool
+	       && TARGET_64BIT)
+	{
+	  rtx sym = force_const_mem (mode, source);
+	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+	      && use_toc_relative_ref (XEXP (sym, 0), mode))
+	    {
+	      rtx toc = create_TOC_reference (XEXP (sym, 0), dest);
+	      sym = gen_const_mem (mode, toc);
+	      set_mem_alias_set (sym, get_TOC_alias_set ());
+	    }
+
+	  emit_move_insn (dest, sym);
+	}
       else
 	rs6000_emit_set_long_const (dest, c);
       break;
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index e8ca70340df..a1c0d1e89c5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -679,3 +679,8 @@  default value is 4.
 Target Undocumented Joined UInteger Var(rs6000_vect_unroll_reduc_threshold) Init(1) Param
 When reduction factor computed for a loop exceeds the threshold specified by
 this parameter, prefer to unroll this loop.  The default value is 1.
+
+-param=rs6000-min-insns-constant-in-pool=
+Target Undocumented Joined UInteger Var(rs6000_min_insns_constant_in_pool) Init(2) IntegerRange(2, 5) Param
+The minimum instruction number of building a constant to force loading it from
+the constant pool.
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
index 542e2674b12..682e773d506 100644
--- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
+++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target has_arch_ppc64 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
 
 #define C1 0x2351847027482577ULL
 #define C2 0x2351847027482578ULL
@@ -17,4 +17,5 @@  void __attribute__ ((noinline)) foo1 (long long *a, long long b)
     *a++ = C2;
 }
 
-/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
+/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
index e3a9a7264cf..e39479bbf86 100644
--- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
+++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -mno-prefixed -save-temps" } */
+/* { dg-options "-O2 -mno-prefixed --param=rs6000-min-insns-constant-in-pool=5 -save-temps" } */
 /* { dg-require-effective-target has_arch_ppc64 } */
 
 /* { dg-final { scan-assembler-times {\mlis\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
index 92b76ac8811..2b12a7445b3 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -1,5 +1,5 @@ 
 /* PR target/106550 */
-/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 --param=rs6000-min-insns-constant-in-pool=3" } */
 /* { dg-require-effective-target has_arch_ppc64 } */
 
 void
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
index 5ab40d71a56..9da644578a5 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
@@ -1,7 +1,7 @@ 
 /* PR target/106550 */
 /* { dg-require-effective-target power10_ok } */
 /* { dg-require-effective-target has_arch_ppc64 } */
-/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1 --param=rs6000-min-insns-constant-in-pool=5" } */
 /* force the constant splitter run after RA: -fdisable-rtl-split1.  */
 
 void
diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c
new file mode 100644
index 00000000000..9763a7181fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
@@ -0,0 +1,11 @@ 
+/* Check loading constant from memory pool.  */
+/* { dg-options "-O2 -mpowerpc64" } */
+
+void
+foo (unsigned long long *a)
+{
+  *a++ = 0x2351847027482577ULL;
+}
+
+/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c
index d2108ac3386..09b2e8de901 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
@@ -25,4 +25,7 @@  test3 (void)
   return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
 }
 
-/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's
+   test0, test1 and test2 are using "li", then check 6 'li's.  */
+/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..a89a24aee36 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -1,6 +1,6 @@ 
 /* PR target/93012 */
 /* { dg-do compile { target lp64 } } */
-/* { dg-options "-O2 -std=c99" } */
+/* { dg-options "-O2 -std=c99 --param=rs6000-min-insns-constant-in-pool=5" } */
 
 unsigned long long msk66() { return 0x6666666666666666ULL; }
 unsigned long long mskih() { return 0xabcd1234abcd1234ULL; }