Message ID | 20240729050757.2500551-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V7,1/3] split complicate 64bit constant to memory | expand |
Hi, I would like to have a ping for these patches. BR, Jeff(Jiufu Guo) Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Sometimes, a complicated constant is built via 3(or more) > instructions. Generally speaking, it would not be as fast > as loading it from the constant pool (as the discussions in > PR63281): > "ld" is one instruction. If consider "address/toc" adjust, > we may count it as 2 instructions. And "pld" may need fewer > cycles. > > Adding --param=rs6000-min-insns-constant-in-pool helps to > control the instruction number threshold for different scenarios. > > As known, because the constant is load from memory by this > patch, so this functionality may affect the cache missing. > While, IMHO, this patch would be still do the right thing. > > Compare with the previous version: > This patch serie adds one more patch to tune the threshold > for power10. > > Boostrap & regtest pass on ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu Guo) > > PR target/63281 > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to > memory for -m64. > * config/rs6000/rs6000.opt (rs6000-min-insns-constant-in-pool): New > parameter. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/const_anchors.c: Test final-rtl. > * gcc.target/powerpc/parall_5insn_const.c: Add option > --param=rs6000-min-insns-constant-in-pool=5 to keep the original test. > * gcc.target/powerpc/pr106550.c: Likewise. > * gcc.target/powerpc/pr106550_1.c: Likewise. > * gcc.target/powerpc/pr93012.c: Likewise. > * gcc.target/powerpc/pr87870.c: Update instruction counts. > * gcc.target/powerpc/pr63281.c: New test. > > > --- > gcc/config/rs6000/rs6000.cc | 19 +++++++++++++++++++ > gcc/config/rs6000/rs6000.opt | 5 +++++ > .../gcc.target/powerpc/const_anchors.c | 5 +++-- > .../gcc.target/powerpc/parall_5insn_const.c | 2 +- > gcc/testsuite/gcc.target/powerpc/pr106550.c | 2 +- > gcc/testsuite/gcc.target/powerpc/pr106550_1.c | 2 +- > gcc/testsuite/gcc.target/powerpc/pr63281.c | 11 +++++++++++ > gcc/testsuite/gcc.target/powerpc/pr87870.c | 5 ++++- > gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- > 9 files changed, 46 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index 2046a831938..ec384e87868 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10240,6 +10240,25 @@ rs6000_emit_set_const (rtx dest, rtx source) > c = sext_hwi (c, 32); > emit_move_insn (lo, GEN_INT (c)); > } > + > + /* Use base_reg_operand to avoid spliting "r0=xxx" to "r0=[r0+off]" > + after RA when reusing the DEST register to build the value. */ > + else if ((can_create_pseudo_p () || base_reg_operand (dest, mode)) > + && num_insns_constant (source, mode) > + > rs6000_min_insns_constant_in_pool > + && TARGET_64BIT) > + { > + rtx sym = force_const_mem (mode, source); > + if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0)) > + && use_toc_relative_ref (XEXP (sym, 0), mode)) > + { > + rtx toc = create_TOC_reference (XEXP (sym, 0), dest); > + sym = gen_const_mem (mode, toc); > + set_mem_alias_set (sym, get_TOC_alias_set ()); > + } > + > + emit_move_insn (dest, sym); > + } > else > rs6000_emit_set_long_const (dest, c); > break; > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index e8ca70340df..a1c0d1e89c5 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -679,3 +679,8 @@ default value is 4. > Target Undocumented Joined UInteger Var(rs6000_vect_unroll_reduc_threshold) Init(1) Param > When reduction factor computed for a loop exceeds the threshold specified by > this parameter, prefer to unroll this loop. The default value is 1. > + > +-param=rs6000-min-insns-constant-in-pool= > +Target Undocumented Joined UInteger Var(rs6000_min_insns_constant_in_pool) Init(2) IntegerRange(2, 5) Param > +The minimum instruction number of building a constant to force loading it from > +the constant pool. > \ No newline at end of file > diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c > index 542e2674b12..682e773d506 100644 > --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c > +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target has_arch_ppc64 } } */ > -/* { dg-options "-O2" } */ > +/* { dg-options "-O2 -fdump-rtl-final" } */ > > #define C1 0x2351847027482577ULL > #define C2 0x2351847027482578ULL > @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b) > *a++ = C2; > } > > -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */ > +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > index e3a9a7264cf..e39479bbf86 100644 > --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > +++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > @@ -1,5 +1,5 @@ > /* { dg-do run } */ > -/* { dg-options "-O2 -mno-prefixed -save-temps" } */ > +/* { dg-options "-O2 -mno-prefixed --param=rs6000-min-insns-constant-in-pool=5 -save-temps" } */ > /* { dg-require-effective-target has_arch_ppc64 } */ > > /* { dg-final { scan-assembler-times {\mlis\M} 4 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c > index 92b76ac8811..2b12a7445b3 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c > @@ -1,5 +1,5 @@ > /* PR target/106550 */ > -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10 --param=rs6000-min-insns-constant-in-pool=3" } */ > /* { dg-require-effective-target has_arch_ppc64 } */ > > void > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > index 5ab40d71a56..9da644578a5 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > @@ -1,7 +1,7 @@ > /* PR target/106550 */ > /* { dg-require-effective-target power10_ok } */ > /* { dg-require-effective-target has_arch_ppc64 } */ > -/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1 --param=rs6000-min-insns-constant-in-pool=5" } */ > /* force the constant splitter run after RA: -fdisable-rtl-split1. */ > > void > diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c > new file mode 100644 > index 00000000000..9763a7181fc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c > @@ -0,0 +1,11 @@ > +/* Check loading constant from memory pool. */ > +/* { dg-options "-O2 -mpowerpc64" } */ > + > +void > +foo (unsigned long long *a) > +{ > + *a++ = 0x2351847027482577ULL; > +} > + > +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c > index d2108ac3386..09b2e8de901 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c > @@ -25,4 +25,7 @@ test3 (void) > return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad; > } > > -/* { dg-final { scan-assembler-not {\mld\M} } } */ > +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's > + test0, test1 and test2 are using "li", then check 6 'li's. */ > +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c > index 4f764d0576f..a89a24aee36 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c > @@ -1,6 +1,6 @@ > /* PR target/93012 */ > /* { dg-do compile { target lp64 } } */ > -/* { dg-options "-O2 -std=c99" } */ > +/* { dg-options "-O2 -std=c99 --param=rs6000-min-insns-constant-in-pool=5" } */ > > unsigned long long msk66() { return 0x6666666666666666ULL; } > unsigned long long mskih() { return 0xabcd1234abcd1234ULL; }
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 2046a831938..ec384e87868 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10240,6 +10240,25 @@ rs6000_emit_set_const (rtx dest, rtx source) c = sext_hwi (c, 32); emit_move_insn (lo, GEN_INT (c)); } + + /* Use base_reg_operand to avoid spliting "r0=xxx" to "r0=[r0+off]" + after RA when reusing the DEST register to build the value. */ + else if ((can_create_pseudo_p () || base_reg_operand (dest, mode)) + && num_insns_constant (source, mode) + > rs6000_min_insns_constant_in_pool + && TARGET_64BIT) + { + rtx sym = force_const_mem (mode, source); + if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0)) + && use_toc_relative_ref (XEXP (sym, 0), mode)) + { + rtx toc = create_TOC_reference (XEXP (sym, 0), dest); + sym = gen_const_mem (mode, toc); + set_mem_alias_set (sym, get_TOC_alias_set ()); + } + + emit_move_insn (dest, sym); + } else rs6000_emit_set_long_const (dest, c); break; diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index e8ca70340df..a1c0d1e89c5 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -679,3 +679,8 @@ default value is 4. Target Undocumented Joined UInteger Var(rs6000_vect_unroll_reduc_threshold) Init(1) Param When reduction factor computed for a loop exceeds the threshold specified by this parameter, prefer to unroll this loop. The default value is 1. + +-param=rs6000-min-insns-constant-in-pool= +Target Undocumented Joined UInteger Var(rs6000_min_insns_constant_in_pool) Init(2) IntegerRange(2, 5) Param +The minimum instruction number of building a constant to force loading it from +the constant pool. \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c index 542e2674b12..682e773d506 100644 --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c @@ -1,5 +1,5 @@ /* { dg-do compile { target has_arch_ppc64 } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fdump-rtl-final" } */ #define C1 0x2351847027482577ULL #define C2 0x2351847027482578ULL @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b) *a++ = C2; } -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */ +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c index e3a9a7264cf..e39479bbf86 100644 --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c +++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mno-prefixed -save-temps" } */ +/* { dg-options "-O2 -mno-prefixed --param=rs6000-min-insns-constant-in-pool=5 -save-temps" } */ /* { dg-require-effective-target has_arch_ppc64 } */ /* { dg-final { scan-assembler-times {\mlis\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c index 92b76ac8811..2b12a7445b3 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c @@ -1,5 +1,5 @@ /* PR target/106550 */ -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10 --param=rs6000-min-insns-constant-in-pool=3" } */ /* { dg-require-effective-target has_arch_ppc64 } */ void diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c index 5ab40d71a56..9da644578a5 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c @@ -1,7 +1,7 @@ /* PR target/106550 */ /* { dg-require-effective-target power10_ok } */ /* { dg-require-effective-target has_arch_ppc64 } */ -/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1 --param=rs6000-min-insns-constant-in-pool=5" } */ /* force the constant splitter run after RA: -fdisable-rtl-split1. */ void diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c new file mode 100644 index 00000000000..9763a7181fc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c @@ -0,0 +1,11 @@ +/* Check loading constant from memory pool. */ +/* { dg-options "-O2 -mpowerpc64" } */ + +void +foo (unsigned long long *a) +{ + *a++ = 0x2351847027482577ULL; +} + +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c index d2108ac3386..09b2e8de901 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c @@ -25,4 +25,7 @@ test3 (void) return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad; } -/* { dg-final { scan-assembler-not {\mld\M} } } */ +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's + test0, test1 and test2 are using "li", then check 6 'li's. */ +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c index 4f764d0576f..a89a24aee36 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c @@ -1,6 +1,6 @@ /* PR target/93012 */ /* { dg-do compile { target lp64 } } */ -/* { dg-options "-O2 -std=c99" } */ +/* { dg-options "-O2 -std=c99 --param=rs6000-min-insns-constant-in-pool=5" } */ unsigned long long msk66() { return 0x6666666666666666ULL; } unsigned long long mskih() { return 0xabcd1234abcd1234ULL; }