Message ID | 20240613021940.4000707-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V5,1/2] split complicate 64bit constant to memory | expand |
Hi Jeff, on 2024/6/13 10:19, Jiufu Guo wrote: > Hi, > > Sometimes, a complicated constant is built via 3(or more) > instructions. Generally speaking, it would not be as fast > as loading it from the constant pool (as the discussions in > PR63281): > "ld" is one instruction. If consider "address/toc" adjust, > we may count it as 2 instructions. And "pld" may need fewer > cycles. > > As testing(SPEC2017), it could get better/stable runtime > if set the threshold as "> 2" (compare with "> 3"). > > As known, because the constant is load from memory by this > patch, so this functionality may affect the cache missing. I wonder if it's a good idea to offer one rs6000 specific parameter to control this threshold, since this change isn't always a win, like 5 constant building simple insns can be well scheduled among insns in its own bb and an equilavent load can suffer from cache miss or insufficient LSU resource A parameter may be better for users in case they want to fine tune further for some cases. > While, IMHO, this patch would be still do the right thing. > > Compare with the previous version: > This version 1. allow assigning complicate constant to r0 before RA, > 2. allow more condition beside TARGET_ELF, > 3. updated test cases, and remove 2 test cases as the orignal test > point is not used any more. Can they be written with the proposed FORCE_CONST_INTO_REG used in other test cases? > > Boostrap & regtest pass on ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu Guo) > > PR target/63281 > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to > memory under -m64. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/const_anchors.c: Test final-rtl. > * gcc.target/powerpc/pr106550_1.c (FORCE_CONST_INTO_REG): New macro. > * gcc.target/powerpc/pr106550_1.c: Use macro FORCE_CONST_INTO_REG. Curious if git gcc-verify will complain this (same changed file have multipe lines). > * gcc.target/powerpc/pr87870.c: Update asm insn checking. > * gcc.target/powerpc/pr93012.c: Likewise. > * gcc.target/powerpc/parall_5insn_const.c: Removed. > * gcc.target/powerpc/pr106550.c: Removed. Nit: s/Removed/Remove/ > * gcc.target/powerpc/pr63281.c: New test. > --- > gcc/config/rs6000/rs6000.cc | 15 +++++++++++ > .../gcc.target/powerpc/const_anchors.c | 5 ++-- > .../gcc.target/powerpc/parall_5insn_const.c | 27 ------------------- > gcc/testsuite/gcc.target/powerpc/pr106550.c | 14 ---------- > gcc/testsuite/gcc.target/powerpc/pr106550_1.c | 16 ++++++----- > gcc/testsuite/gcc.target/powerpc/pr63281.c | 11 ++++++++ > gcc/testsuite/gcc.target/powerpc/pr87870.c | 5 +++- > gcc/testsuite/gcc.target/powerpc/pr93012.c | 6 ++++- > 8 files changed, 47 insertions(+), 52 deletions(-) > delete mode 100644 gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > delete mode 100644 gcc/testsuite/gcc.target/powerpc/pr106550.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index e4dc629ddcc..bc9d6f5c34f 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10240,6 +10240,21 @@ rs6000_emit_set_const (rtx dest, rtx source) > c = sext_hwi (c, 32); > emit_move_insn (lo, GEN_INT (c)); > } > + Nit: Unexpected new line. > + else if ((can_create_pseudo_p () || base_reg_operand (dest, mode)) Nit: It's not obvious, maybe one comment on why we need base_reg_operand restriction under !can_create_pseudo_p. BR, Kewen > + && TARGET_64BIT && num_insns_constant (source, mode) > 2) > + { > + rtx sym = force_const_mem (mode, source); > + if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0)) > + && use_toc_relative_ref (XEXP (sym, 0), mode)) > + { > + rtx toc = create_TOC_reference (XEXP (sym, 0), dest); > + sym = gen_const_mem (mode, toc); > + set_mem_alias_set (sym, get_TOC_alias_set ()); > + } > + > + emit_move_insn (dest, sym); > + } > else > rs6000_emit_set_long_const (dest, c); > break; > diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c > index 542e2674b12..682e773d506 100644 > --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c > +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target has_arch_ppc64 } } */ > -/* { dg-options "-O2" } */ > +/* { dg-options "-O2 -fdump-rtl-final" } */ > > #define C1 0x2351847027482577ULL > #define C2 0x2351847027482578ULL > @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b) > *a++ = C2; > } > > -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */ > +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > deleted file mode 100644 > index e3a9a7264cf..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c > +++ /dev/null > @@ -1,27 +0,0 @@ > -/* { dg-do run } */ > -/* { dg-options "-O2 -mno-prefixed -save-temps" } */ > -/* { dg-require-effective-target has_arch_ppc64 } */ > - > -/* { dg-final { scan-assembler-times {\mlis\M} 4 } } */ > -/* { dg-final { scan-assembler-times {\mori\M} 4 } } */ > -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */ > - > -void __attribute__ ((noinline)) foo (unsigned long long *a) > -{ > - /* 2 lis + 2 ori + 1 rldimi for each constant. */ > - *a++ = 0x800aabcdc167fa16ULL; > - *a++ = 0x7543a876867f616ULL; > -} > - > -long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL}; > -int > -main () > -{ > - long long res[2]; > - > - foo (res); > - if (__builtin_memcmp (res, A, sizeof (res)) != 0) > - __builtin_abort (); > - > - return 0; > -} > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c > deleted file mode 100644 > index 92b76ac8811..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c > +++ /dev/null > @@ -1,14 +0,0 @@ > -/* PR target/106550 */ > -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > -/* { dg-require-effective-target has_arch_ppc64 } */ > - > -void > -foo (unsigned long long *a) > -{ > - *a++ = 0x020805006106003; /* pli+pli+rldimi */ > - *a++ = 0x2351847027482577;/* pli+pli+rldimi */ > -} > - > -/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */ > -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */ > - > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > index 5ab40d71a56..aa98f31865e 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c > @@ -4,17 +4,19 @@ > /* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */ > /* force the constant splitter run after RA: -fdisable-rtl-split1. */ > > +#define FORCE_CONST_INTO_REG(DEST, CST) \ > + { \ > + register long long d asm ("r0") = CST; \ > + asm volatile ("std %1, %0" : : "m"(DEST), "r"(d)); \ > + } > + > void > foo (unsigned long long *a) > { > /* Test oris/ori is used where paddi does not work with 'r0'. */ > - register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori */ > - long long n; > - asm("cntlzd %0, %1" : "=r"(n) : "r"(d)); > - *a++ = n; > - > - *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */ > - *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */ > + FORCE_CONST_INTO_REG (*a++, 0x1245abcef9240dec); /* pli+sldi+oris+ori */ > + FORCE_CONST_INTO_REG (*a++, 0x235a8470a7480000ULL); /* pli+sldi+oris */ > + FORCE_CONST_INTO_REG (*a++, 0x23a184700000b677ULL); /* pli+sldi+ori */ > } > > /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c > new file mode 100644 > index 00000000000..9763a7181fc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c > @@ -0,0 +1,11 @@ > +/* Check loading constant from memory pool. */ > +/* { dg-options "-O2 -mpowerpc64" } */ > + > +void > +foo (unsigned long long *a) > +{ > + *a++ = 0x2351847027482577ULL; > +} > + > +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c > index d2108ac3386..09b2e8de901 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c > @@ -25,4 +25,7 @@ test3 (void) > return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad; > } > > -/* { dg-final { scan-assembler-not {\mld\M} } } */ > +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's > + test0, test1 and test2 are using "li", then check 6 'li's. */ > +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c > index 4f764d0576f..660fb0dddfa 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c > @@ -10,4 +10,8 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } > unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } > unsigned long long mskse() { return 0xffff1234ffff1234ULL; } > > -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ > +/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */ > + > +/* 4 complicated constants can be loaded from pool. */ > +/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */ > +/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index e4dc629ddcc..bc9d6f5c34f 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10240,6 +10240,21 @@ rs6000_emit_set_const (rtx dest, rtx source) c = sext_hwi (c, 32); emit_move_insn (lo, GEN_INT (c)); } + + else if ((can_create_pseudo_p () || base_reg_operand (dest, mode)) + && TARGET_64BIT && num_insns_constant (source, mode) > 2) + { + rtx sym = force_const_mem (mode, source); + if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0)) + && use_toc_relative_ref (XEXP (sym, 0), mode)) + { + rtx toc = create_TOC_reference (XEXP (sym, 0), dest); + sym = gen_const_mem (mode, toc); + set_mem_alias_set (sym, get_TOC_alias_set ()); + } + + emit_move_insn (dest, sym); + } else rs6000_emit_set_long_const (dest, c); break; diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c index 542e2674b12..682e773d506 100644 --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c @@ -1,5 +1,5 @@ /* { dg-do compile { target has_arch_ppc64 } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fdump-rtl-final" } */ #define C1 0x2351847027482577ULL #define C2 0x2351847027482578ULL @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b) *a++ = C2; } -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */ +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c deleted file mode 100644 index e3a9a7264cf..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c +++ /dev/null @@ -1,27 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -mno-prefixed -save-temps" } */ -/* { dg-require-effective-target has_arch_ppc64 } */ - -/* { dg-final { scan-assembler-times {\mlis\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mori\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */ - -void __attribute__ ((noinline)) foo (unsigned long long *a) -{ - /* 2 lis + 2 ori + 1 rldimi for each constant. */ - *a++ = 0x800aabcdc167fa16ULL; - *a++ = 0x7543a876867f616ULL; -} - -long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL}; -int -main () -{ - long long res[2]; - - foo (res); - if (__builtin_memcmp (res, A, sizeof (res)) != 0) - __builtin_abort (); - - return 0; -} diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c deleted file mode 100644 index 92b76ac8811..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c +++ /dev/null @@ -1,14 +0,0 @@ -/* PR target/106550 */ -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ -/* { dg-require-effective-target has_arch_ppc64 } */ - -void -foo (unsigned long long *a) -{ - *a++ = 0x020805006106003; /* pli+pli+rldimi */ - *a++ = 0x2351847027482577;/* pli+pli+rldimi */ -} - -/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */ - diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c index 5ab40d71a56..aa98f31865e 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c @@ -4,17 +4,19 @@ /* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */ /* force the constant splitter run after RA: -fdisable-rtl-split1. */ +#define FORCE_CONST_INTO_REG(DEST, CST) \ + { \ + register long long d asm ("r0") = CST; \ + asm volatile ("std %1, %0" : : "m"(DEST), "r"(d)); \ + } + void foo (unsigned long long *a) { /* Test oris/ori is used where paddi does not work with 'r0'. */ - register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori */ - long long n; - asm("cntlzd %0, %1" : "=r"(n) : "r"(d)); - *a++ = n; - - *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */ - *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */ + FORCE_CONST_INTO_REG (*a++, 0x1245abcef9240dec); /* pli+sldi+oris+ori */ + FORCE_CONST_INTO_REG (*a++, 0x235a8470a7480000ULL); /* pli+sldi+oris */ + FORCE_CONST_INTO_REG (*a++, 0x23a184700000b677ULL); /* pli+sldi+ori */ } /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c new file mode 100644 index 00000000000..9763a7181fc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c @@ -0,0 +1,11 @@ +/* Check loading constant from memory pool. */ +/* { dg-options "-O2 -mpowerpc64" } */ + +void +foo (unsigned long long *a) +{ + *a++ = 0x2351847027482577ULL; +} + +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c index d2108ac3386..09b2e8de901 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c @@ -25,4 +25,7 @@ test3 (void) return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad; } -/* { dg-final { scan-assembler-not {\mld\M} } } */ +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's + test0, test1 and test2 are using "li", then check 6 'li's. */ +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c index 4f764d0576f..660fb0dddfa 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c @@ -10,4 +10,8 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } unsigned long long mskse() { return 0xffff1234ffff1234ULL; } -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */ + +/* 4 complicated constants can be loaded from pool. */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */ +/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */