Message ID | 20221026114052.17713-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V2] rs6000: Support to build constants by li/lis+oris/xoris | expand |
Jiufu Guo <guojiufu@linux.ibm.com> writes: > index 00000000000..ebd9ea88993 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c > @@ -0,0 +1,12 @@ > +/* PR target/106708 */ > +/* { dg-do compile } } */ Typo, should be: +/* { dg-do compile } */> BR, Jeff(Jiufu)
Hi, Gentle ping: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604415.html BR, Jeff(Jiufu) Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > PR106708 constaint some constants which can be support by li/lis + oris/xoris. > > For constant C: > if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say: > 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to > build constant 'C'. > Here N(M) means N continuous bit M, x for M means it is ok for either > 1 or 0; '||' means concatenation. > > if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say: > 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok. > > if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say: > 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok. > > This patch update rs6000_emit_set_long_const to support these forms. > Bootstrap and regtest pass on ppc64 and ppc64le. > > Is this ok for trunk? > > BR, > Jeff(Jiufu) > > > PR target/106708 > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support > constants which can be built with li + oris or li/lis + xoris. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pr106708-run.c: New test. > * gcc.target/powerpc/pr106708.c: New test. > * gcc.target/powerpc/pr106708.h: New file. > > --- > gcc/config/rs6000/rs6000.cc | 41 ++++++++++++++----- > .../gcc.target/powerpc/pr106708-run.c | 17 ++++++++ > gcc/testsuite/gcc.target/powerpc/pr106708.c | 12 ++++++ > gcc/testsuite/gcc.target/powerpc/pr106708.h | 9 ++++ > 4 files changed, 69 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index d2743f7bce6..9b7a51f052d 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > { > rtx temp; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > + HOST_WIDE_INT orig_c = c; > > ud1 = c & 0xffff; > c = c >> 16; > @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > gen_rtx_IOR (DImode, copy_rtx (temp), > GEN_INT (ud1))); > } > + else if ((ud4 == 0xffff && ud3 == 0xffff) > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) > + { > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) > + : ((ud2 << 16) - 0x80000000); > + /* li/lis + xoris */ > + emit_move_insn (temp, GEN_INT (imm)); > + emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > + GEN_INT (orig_c ^ imm))); > + } > else if (ud3 == 0 && ud4 == 0) > { > temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > > gcc_assert (ud2 & 0x8000); > - emit_move_insn (copy_rtx (temp), > - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > - if (ud1 != 0) > - emit_move_insn (copy_rtx (temp), > - gen_rtx_IOR (DImode, copy_rtx (temp), > - GEN_INT (ud1))); > - emit_move_insn (dest, > - gen_rtx_ZERO_EXTEND (DImode, > - gen_lowpart (SImode, > - copy_rtx (temp)))); > + > + if (!(ud1 & 0x8000)) > + { > + /* li+oris */ > + emit_move_insn (temp, GEN_INT (ud1)); > + emit_move_insn (dest, > + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); > + } > + else > + { > + emit_move_insn (temp, > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > + if (ud1 != 0) > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + emit_move_insn (dest, > + gen_rtx_ZERO_EXTEND (DImode, > + gen_lowpart (SImode, temp))); > + } > } > else if (ud1 == ud3 && ud2 == ud4) > { > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c > new file mode 100644 > index 00000000000..df65c321f6b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c > @@ -0,0 +1,17 @@ > +/* PR target/106708 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#include "pr106708.h" > + > +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL}; > +int > +main () > +{ > + long long a[3]; > + > + foo (a); > + if (__builtin_memcmp (a, arr, sizeof (arr)) != 0) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c > new file mode 100644 > index 00000000000..ebd9ea88993 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c > @@ -0,0 +1,12 @@ > +/* PR target/106708 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > + > + > +#include "pr106708.h" > + > +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h > new file mode 100644 > index 00000000000..42526a70892 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h > @@ -0,0 +1,9 @@ > +/* Test constants which can be built by li/lis + oris/xoris */ > +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) > +{ > + *arg++ = 0x98765432ULL; > + *arg++ = 0xffffffff7cdeab55ULL; > + *arg++ = 0xffffffff65430000ULL; > +} > + > +
Hi Jeff, Sorry for the late reply. on 2022/10/26 19:40, Jiufu Guo wrote: > Hi, > > PR106708 constaint some constants which can be support by li/lis + oris/xoris. ~~~~~~~~ typo? for "li/lis + oris/xoris", I interpreted it into four combinations: li + oris, lis + oris, li + xoris, lis + xoris. not sure just me interpreting like that, but the actual combinations which this patch adopts are: li + oris, li + xoris, lis + xoris. It's a bit off, but not a big deal, up to you to reword it or not. :) > > For constant C: > if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say: > 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to > build constant 'C'. > Here N(M) means N continuous bit M, x for M means it is ok for either > 1 or 0; '||' means concatenation. > > if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say: > 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok. > > if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say: > 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok. > > This patch update rs6000_emit_set_long_const to support these forms. > Bootstrap and regtest pass on ppc64 and ppc64le. > > Is this ok for trunk? This updated version looks good to me, but I'd leave it to Segher for the final say. Thanks! BR, Kewen > > BR, > Jeff(Jiufu) > > > PR target/106708 > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support > constants which can be built with li + oris or li/lis + xoris. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pr106708-run.c: New test. > * gcc.target/powerpc/pr106708.c: New test. > * gcc.target/powerpc/pr106708.h: New file. > > --- > gcc/config/rs6000/rs6000.cc | 41 ++++++++++++++----- > .../gcc.target/powerpc/pr106708-run.c | 17 ++++++++ > gcc/testsuite/gcc.target/powerpc/pr106708.c | 12 ++++++ > gcc/testsuite/gcc.target/powerpc/pr106708.h | 9 ++++ > 4 files changed, 69 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index d2743f7bce6..9b7a51f052d 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > { > rtx temp; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > + HOST_WIDE_INT orig_c = c; > > ud1 = c & 0xffff; > c = c >> 16; > @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > gen_rtx_IOR (DImode, copy_rtx (temp), > GEN_INT (ud1))); > } > + else if ((ud4 == 0xffff && ud3 == 0xffff) > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) > + { > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) > + : ((ud2 << 16) - 0x80000000); > + /* li/lis + xoris */ > + emit_move_insn (temp, GEN_INT (imm)); > + emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > + GEN_INT (orig_c ^ imm))); > + } > else if (ud3 == 0 && ud4 == 0) > { > temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > > gcc_assert (ud2 & 0x8000); > - emit_move_insn (copy_rtx (temp), > - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > - if (ud1 != 0) > - emit_move_insn (copy_rtx (temp), > - gen_rtx_IOR (DImode, copy_rtx (temp), > - GEN_INT (ud1))); > - emit_move_insn (dest, > - gen_rtx_ZERO_EXTEND (DImode, > - gen_lowpart (SImode, > - copy_rtx (temp)))); > + > + if (!(ud1 & 0x8000)) > + { > + /* li+oris */ > + emit_move_insn (temp, GEN_INT (ud1)); > + emit_move_insn (dest, > + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); > + } > + else > + { > + emit_move_insn (temp, > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > + if (ud1 != 0) > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + emit_move_insn (dest, > + gen_rtx_ZERO_EXTEND (DImode, > + gen_lowpart (SImode, temp))); > + } > } > else if (ud1 == ud3 && ud2 == ud4) > { > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c > new file mode 100644 > index 00000000000..df65c321f6b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c > @@ -0,0 +1,17 @@ > +/* PR target/106708 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#include "pr106708.h" > + > +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL}; > +int > +main () > +{ > + long long a[3]; > + > + foo (a); > + if (__builtin_memcmp (a, arr, sizeof (arr)) != 0) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c > new file mode 100644 > index 00000000000..ebd9ea88993 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c > @@ -0,0 +1,12 @@ > +/* PR target/106708 */ > +/* { dg-do compile } } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > + > + > +#include "pr106708.h" > + > +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h > new file mode 100644 > index 00000000000..42526a70892 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h > @@ -0,0 +1,9 @@ > +/* Test constants which can be built by li/lis + oris/xoris */ > +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) > +{ > + *arg++ = 0x98765432ULL; > + *arg++ = 0xffffffff7cdeab55ULL; > + *arg++ = 0xffffffff65430000ULL; > +} > + > +
Hi Kewen, Thanks a lot for your insight comments! "Kewen.Lin" <linkw@linux.ibm.com> writes: > Hi Jeff, > > Sorry for the late reply. > > on 2022/10/26 19:40, Jiufu Guo wrote: >> Hi, >> >> PR106708 constaint some constants which can be support by li/lis + oris/xoris. > ~~~~~~~~ typo? Oh, typo! > > for "li/lis + oris/xoris", I interpreted it into four combinations: > > li + oris, lis + oris, li + xoris, lis + xoris. > > not sure just me interpreting like that, but the actual combinations > which this patch adopts are: > > li + oris, li + xoris, lis + xoris. > > It's a bit off, but not a big deal, up to you to reword it or not. :) Oh, thanks! I will update to use "li/lis + xoris" or "li + oris" to avoid confuse. > >> >> For constant C: >> if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say: >> 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to >> build constant 'C'. >> Here N(M) means N continuous bit M, x for M means it is ok for either >> 1 or 0; '||' means concatenation. >> >> if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say: >> 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok. >> >> if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say: >> 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok. >> >> This patch update rs6000_emit_set_long_const to support these forms. >> Bootstrap and regtest pass on ppc64 and ppc64le. >> >> Is this ok for trunk? > > This updated version looks good to me, but I'd leave it to Segher for the > final say. Thanks! Thanks! BR, Jeff (Jiufu) > > BR, > Kewen > >> >> BR, >> Jeff(Jiufu) >> >> >> PR target/106708 >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support >> constants which can be built with li + oris or li/lis + xoris. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/powerpc/pr106708-run.c: New test. >> * gcc.target/powerpc/pr106708.c: New test. >> * gcc.target/powerpc/pr106708.h: New file. >> >> --- >> gcc/config/rs6000/rs6000.cc | 41 ++++++++++++++----- >> .../gcc.target/powerpc/pr106708-run.c | 17 ++++++++ >> gcc/testsuite/gcc.target/powerpc/pr106708.c | 12 ++++++ >> gcc/testsuite/gcc.target/powerpc/pr106708.h | 9 ++++ >> 4 files changed, 69 insertions(+), 10 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c >> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c >> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h >> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index d2743f7bce6..9b7a51f052d 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> { >> rtx temp; >> HOST_WIDE_INT ud1, ud2, ud3, ud4; >> + HOST_WIDE_INT orig_c = c; >> >> ud1 = c & 0xffff; >> c = c >> 16; >> @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> gen_rtx_IOR (DImode, copy_rtx (temp), >> GEN_INT (ud1))); >> } >> + else if ((ud4 == 0xffff && ud3 == 0xffff) >> + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) >> + { >> + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + >> + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) >> + : ((ud2 << 16) - 0x80000000); >> + /* li/lis + xoris */ >> + emit_move_insn (temp, GEN_INT (imm)); >> + emit_move_insn (dest, gen_rtx_XOR (DImode, temp, >> + GEN_INT (orig_c ^ imm))); >> + } >> else if (ud3 == 0 && ud4 == 0) >> { >> temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> >> gcc_assert (ud2 & 0x8000); >> - emit_move_insn (copy_rtx (temp), >> - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >> - if (ud1 != 0) >> - emit_move_insn (copy_rtx (temp), >> - gen_rtx_IOR (DImode, copy_rtx (temp), >> - GEN_INT (ud1))); >> - emit_move_insn (dest, >> - gen_rtx_ZERO_EXTEND (DImode, >> - gen_lowpart (SImode, >> - copy_rtx (temp)))); >> + >> + if (!(ud1 & 0x8000)) >> + { >> + /* li+oris */ >> + emit_move_insn (temp, GEN_INT (ud1)); >> + emit_move_insn (dest, >> + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); >> + } >> + else >> + { >> + emit_move_insn (temp, >> + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >> + if (ud1 != 0) >> + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + emit_move_insn (dest, >> + gen_rtx_ZERO_EXTEND (DImode, >> + gen_lowpart (SImode, temp))); >> + } >> } >> else if (ud1 == ud3 && ud2 == ud4) >> { >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c >> new file mode 100644 >> index 00000000000..df65c321f6b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c >> @@ -0,0 +1,17 @@ >> +/* PR target/106708 */ >> +/* { dg-do run } */ >> +/* { dg-options "-O2" } */ >> + >> +#include "pr106708.h" >> + >> +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL}; >> +int >> +main () >> +{ >> + long long a[3]; >> + >> + foo (a); >> + if (__builtin_memcmp (a, arr, sizeof (arr)) != 0) >> + __builtin_abort (); >> + return 0; >> +} >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c >> new file mode 100644 >> index 00000000000..ebd9ea88993 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c >> @@ -0,0 +1,12 @@ >> +/* PR target/106708 */ >> +/* { dg-do compile } } */ >> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ >> +/* { dg-require-effective-target has_arch_ppc64 } */ >> + >> + >> +#include "pr106708.h" >> + >> +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */ >> +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */ >> +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h >> new file mode 100644 >> index 00000000000..42526a70892 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h >> @@ -0,0 +1,9 @@ >> +/* Test constants which can be built by li/lis + oris/xoris */ >> +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) >> +{ >> + *arg++ = 0x98765432ULL; >> + *arg++ = 0xffffffff7cdeab55ULL; >> + *arg++ = 0xffffffff65430000ULL; >> +} >> + >> +
Hi guys, On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: > on 2022/10/26 19:40, Jiufu Guo wrote: > for "li/lis + oris/xoris", I interpreted it into four combinations: > > li + oris, lis + oris, li + xoris, lis + xoris. > > not sure just me interpreting like that, but the actual combinations > which this patch adopts are: > > li + oris, li + xoris, lis + xoris. > > It's a bit off, but not a big deal, up to you to reword it or not. :) The first two are obvious, but the last one is almost never a good idea, there usually are better ways to do the same. I cannot even think of any case where this is best? A lis;rl* is always prefered (it can optimise better, be combined with other insns). > > + HOST_WIDE_INT orig_c = c; If you ever feel you need a variable to hold an "orig" value, that is a good hint that you should restructure the code a bit, perhaps even factor it. That often is overdue (like here), not caused by you, but you could help solve it ;-) (This is what made this patch hard to review, btw). > > gen_rtx_IOR (DImode, copy_rtx (temp), > > GEN_INT (ud1))); > > } > > + else if ((ud4 == 0xffff && ud3 == 0xffff) > > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) > > + { > > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > > + > > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) > > + : ((ud2 << 16) - 0x80000000); We really should have some "hwi::sign_extend (ud1, 16)" helper function, heh. Maybe there already is? Ah, "sext_hwi". Fixing that up everywhere in this function is preapproved. > > + else > > + { > > + emit_move_insn (temp, > > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > > + if (ud1 != 0) > > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > > + emit_move_insn (dest, > > + gen_rtx_ZERO_EXTEND (DImode, > > + gen_lowpart (SImode, temp))); > > + } Why this? Please just write it in DImode, do not go via SImode? > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h > > @@ -0,0 +1,9 @@ > > +/* Test constants which can be built by li/lis + oris/xoris */ > > +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) > > +{ > > + *arg++ = 0x98765432ULL; > > + *arg++ = 0xffffffff7cdeab55ULL; > > + *arg++ = 0xffffffff65430000ULL; > > +} Use noipa please (it is shorter, simpler, and covers more :-) ) Could you comment what exact instructions are expected? li;xoris and li;xoris and lis;xoris I guess? It helps if you just tell the reader here. The li;oris and li;xoris parts look good. Segher
Hi Segher! Thanks a lot for your comments! Segher Boessenkool <segher@kernel.crashing.org> writes: > Hi guys, > > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: >> on 2022/10/26 19:40, Jiufu Guo wrote: >> for "li/lis + oris/xoris", I interpreted it into four combinations: >> >> li + oris, lis + oris, li + xoris, lis + xoris. >> >> not sure just me interpreting like that, but the actual combinations >> which this patch adopts are: >> >> li + oris, li + xoris, lis + xoris. >> >> It's a bit off, but not a big deal, up to you to reword it or not. :) > > The first two are obvious, but the last one is almost never a good idea, > there usually are better ways to do the same. I cannot even think of > any case where this is best? A lis;rl* is always prefered (it can > optimise better, be combined with other insns). I understant your point here. The first two: 'li' for lowest 16bits, 'oris/xoris' for next 16bits. While for 'lis + xoris', it may not obvious, because both 'lis' and 'xoris' operates on 17-31bits. 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is used to clean bit31. This case seems hard to be supported by 'rlxx'. I hit to find this case when I analyze what kind of constants can be build by two instructions. Checked the posssible combinations: "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those instructions which accept one register and one immediate). I also drafted the patch to use "li/lis+rlxx" to build constant. https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html > >> > + HOST_WIDE_INT orig_c = c; > > If you ever feel you need a variable to hold an "orig" value, that is a > good hint that you should restructure the code a bit, perhaps even > factor it. That often is overdue (like here), not caused by you, but > you could help solve it ;-) > > (This is what made this patch hard to review, btw). You are right. Thanks for point out this! > >> > gen_rtx_IOR (DImode, copy_rtx (temp), >> > GEN_INT (ud1))); >> > } >> > + else if ((ud4 == 0xffff && ud3 == 0xffff) >> > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) >> > + { >> > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> > + >> > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) >> > + : ((ud2 << 16) - 0x80000000); > > We really should have some "hwi::sign_extend (ud1, 16)" helper function, > heh. Maybe there already is? Ah, "sext_hwi". Fixing that up > everywhere in this function is preapproved. Great, thanks! > >> > + else >> > + { >> > + emit_move_insn (temp, >> > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >> > + if (ud1 != 0) >> > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> > + emit_move_insn (dest, >> > + gen_rtx_ZERO_EXTEND (DImode, >> > + gen_lowpart (SImode, temp))); >> > + } > > Why this? Please just write it in DImode, do not go via SImode? Thanks for catch this. Yes, gen_lowpart with DImode would be ok. > >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h >> > @@ -0,0 +1,9 @@ >> > +/* Test constants which can be built by li/lis + oris/xoris */ >> > +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) >> > +{ >> > + *arg++ = 0x98765432ULL; >> > + *arg++ = 0xffffffff7cdeab55ULL; >> > + *arg++ = 0xffffffff65430000ULL; >> > +} > > Use noipa please (it is shorter, simpler, and covers more :-) ) Thanks! > > Could you comment what exact instructions are expected? > li;xoris and li;xoris and lis;xoris I guess? It helps if you just tell > the reader here. Sure, thanks! > > The li;oris and li;xoris parts look good. > BR, Jeff (Jiufu) > > Segher
Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi Segher! > > Thanks a lot for your comments! > > Segher Boessenkool <segher@kernel.crashing.org> writes: > >> Hi guys, >> >> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: >>> on 2022/10/26 19:40, Jiufu Guo wrote: >>> for "li/lis + oris/xoris", I interpreted it into four combinations: >>> >>> li + oris, lis + oris, li + xoris, lis + xoris. >>> >>> not sure just me interpreting like that, but the actual combinations >>> which this patch adopts are: >>> >>> li + oris, li + xoris, lis + xoris. >>> >>> It's a bit off, but not a big deal, up to you to reword it or not. :) >> >> The first two are obvious, but the last one is almost never a good idea, >> there usually are better ways to do the same. I cannot even think of >> any case where this is best? A lis;rl* is always prefered (it can >> optimise better, be combined with other insns). > I understant your point here. The first two: 'li' for lowest 16bits, > 'oris/xoris' for next 16bits. > > While for 'lis + xoris', it may not obvious, because both 'lis' and > 'xoris' operates on 17-31bits. > 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is > used to clean bit31. This case seems hard to be supported by 'rlxx'. > > I hit to find this case when I analyze what kind of constants can be > build by two instructions. Checked the posssible combinations: > "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those > instructions which accept one register and one immediate). > > I also drafted the patch to use "li/lis+rlxx" to build constant. > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html > >> >>> > + HOST_WIDE_INT orig_c = c; >> >> If you ever feel you need a variable to hold an "orig" value, that is a >> good hint that you should restructure the code a bit, perhaps even >> factor it. That often is overdue (like here), not caused by you, but >> you could help solve it ;-) >> >> (This is what made this patch hard to review, btw). > You are right. Thanks for point out this! >> >>> > gen_rtx_IOR (DImode, copy_rtx (temp), >>> > GEN_INT (ud1))); >>> > } >>> > + else if ((ud4 == 0xffff && ud3 == 0xffff) >>> > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) >>> > + { >>> > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >>> > + >>> > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) >>> > + : ((ud2 << 16) - 0x80000000); >> >> We really should have some "hwi::sign_extend (ud1, 16)" helper function, >> heh. Maybe there already is? Ah, "sext_hwi". Fixing that up >> everywhere in this function is preapproved. > Great, thanks! >> >>> > + else >>> > + { >>> > + emit_move_insn (temp, >>> > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >>> > + if (ud1 != 0) >>> > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >>> > + emit_move_insn (dest, >>> > + gen_rtx_ZERO_EXTEND (DImode, >>> > + gen_lowpart (SImode, temp))); >>> > + } >> >> Why this? Please just write it in DImode, do not go via SImode? > Thanks for catch this. Yes, gen_lowpart with DImode would be ok. Oh, Sorry. DImode can not be used here. The genreated pattern with DImode can not be recognized. Using SImode is to match 'rlwxx'. BR, Jeff (Jiufu) >> >>> > --- /dev/null >>> > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h >>> > @@ -0,0 +1,9 @@ >>> > +/* Test constants which can be built by li/lis + oris/xoris */ >>> > +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) >>> > +{ >>> > + *arg++ = 0x98765432ULL; >>> > + *arg++ = 0xffffffff7cdeab55ULL; >>> > + *arg++ = 0xffffffff65430000ULL; >>> > +} >> >> Use noipa please (it is shorter, simpler, and covers more :-) ) > Thanks! >> >> Could you comment what exact instructions are expected? >> li;xoris and li;xoris and lis;xoris I guess? It helps if you just tell >> the reader here. > Sure, thanks! >> >> The li;oris and li;xoris parts look good. >> > > BR, > Jeff (Jiufu) >> >> Segher
On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote: > Segher Boessenkool <segher@kernel.crashing.org> writes: > > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: > >> on 2022/10/26 19:40, Jiufu Guo wrote: > >> for "li/lis + oris/xoris", I interpreted it into four combinations: > >> > >> li + oris, lis + oris, li + xoris, lis + xoris. > >> > >> not sure just me interpreting like that, but the actual combinations > >> which this patch adopts are: > >> > >> li + oris, li + xoris, lis + xoris. > >> > >> It's a bit off, but not a big deal, up to you to reword it or not. :) > > > > The first two are obvious, but the last one is almost never a good idea, > > there usually are better ways to do the same. I cannot even think of > > any case where this is best? A lis;rl* is always prefered (it can > > optimise better, be combined with other insns). > I understant your point here. The first two: 'li' for lowest 16bits, > 'oris/xoris' for next 16bits. > > While for 'lis + xoris', it may not obvious, because both 'lis' and > 'xoris' operates on 17-31bits. > 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is > used to clean bit31. This case seems hard to be supported by 'rlxx'. Please put that in a separate patch? First do a patch with just lis;x?oris. They are unrelated and different in almost every way. > I hit to find this case when I analyze what kind of constants can be > build by two instructions. Checked the posssible combinations: > "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those > instructions which accept one register and one immediate). > > I also drafted the patch to use "li/lis+rlxx" to build constant. > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html Those seem to do many things in one patch as well :-( It is very hard to review such things, it takes many hours each to do properly. Segher
On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote: > Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > > Segher Boessenkool <segher@kernel.crashing.org> writes: > >>> > + else > >>> > + { > >>> > + emit_move_insn (temp, > >>> > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); > >>> > + if (ud1 != 0) > >>> > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > >>> > + emit_move_insn (dest, > >>> > + gen_rtx_ZERO_EXTEND (DImode, > >>> > + gen_lowpart (SImode, temp))); > >>> > + } > >> > >> Why this? Please just write it in DImode, do not go via SImode? > > Thanks for catch this. Yes, gen_lowpart with DImode would be ok. > Oh, Sorry. DImode can not be used here. The genreated pattern with > DImode can not be recognized. Using SImode is to match 'rlwxx'. There are patterns that accept DImode for rlwinm just fine. Please use (and:DI (const_int 0xffffffff) (x:DI)) not the obfuscated (zero_extend:DI (subreg:SI (x:DI) LOWBYTE)) Segher
Hi Segher, Thanks for your review! Segher Boessenkool <segher@kernel.crashing.org> writes: > On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote: >> Segher Boessenkool <segher@kernel.crashing.org> writes: >> > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: >> >> on 2022/10/26 19:40, Jiufu Guo wrote: >> >> for "li/lis + oris/xoris", I interpreted it into four combinations: >> >> >> >> li + oris, lis + oris, li + xoris, lis + xoris. >> >> >> >> not sure just me interpreting like that, but the actual combinations >> >> which this patch adopts are: >> >> >> >> li + oris, li + xoris, lis + xoris. >> >> >> >> It's a bit off, but not a big deal, up to you to reword it or not. :) >> > >> > The first two are obvious, but the last one is almost never a good idea, >> > there usually are better ways to do the same. I cannot even think of >> > any case where this is best? A lis;rl* is always prefered (it can >> > optimise better, be combined with other insns). >> I understant your point here. The first two: 'li' for lowest 16bits, >> 'oris/xoris' for next 16bits. >> >> While for 'lis + xoris', it may not obvious, because both 'lis' and >> 'xoris' operates on 17-31bits. >> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is >> used to clean bit31. This case seems hard to be supported by 'rlxx'. > > Please put that in a separate patch? First do a patch with just > lis;x?oris. They are unrelated and different in almost every way. Sure, Thanks for the advice! > >> I hit to find this case when I analyze what kind of constants can be >> build by two instructions. Checked the posssible combinations: >> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those >> instructions which accept one register and one immediate). >> >> I also drafted the patch to use "li/lis+rlxx" to build constant. >> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html >> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html > > Those seem to do many things in one patch as well :-( It is very hard > to review such things, it takes many hours each to do properly. Sorry, I will try to seperate them to smaller granularities! BR, Jeff (Jiufu) > > > Segher
Hi Segher, Thanks for your comment! Segher Boessenkool <segher@kernel.crashing.org> writes: > On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote: >> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: >> > Segher Boessenkool <segher@kernel.crashing.org> writes: >> >>> > + else >> >>> > + { >> >>> > + emit_move_insn (temp, >> >>> > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >> >>> > + if (ud1 != 0) >> >>> > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> >>> > + emit_move_insn (dest, >> >>> > + gen_rtx_ZERO_EXTEND (DImode, >> >>> > + gen_lowpart (SImode, temp))); >> >>> > + } >> >> >> >> Why this? Please just write it in DImode, do not go via SImode? >> > Thanks for catch this. Yes, gen_lowpart with DImode would be ok. >> Oh, Sorry. DImode can not be used here. The genreated pattern with >> DImode can not be recognized. Using SImode is to match 'rlwxx'. > > There are patterns that accept DImode for rlwinm just fine. Please use > (and:DI (const_int 0xffffffff) (x:DI)) > not the obfuscated > (zero_extend:DI (subreg:SI (x:DI) LOWBYTE)) > Agree, 'and 0xffffffff' would be easy to read. Here is an small patch for it. I believe it should be no regression. :-) To make sure, I will do more bootstraps and regtests, and then submit it. BR, Jeff (Jiufu) NFC: use more readable pattern to clean high bits This patch is just using a more readable pattern for "rldicl x,x,0,32" to clean high 32bits. Old pattern looks like: r118:DI=zero_extend(r120:DI#0) new pattern looks like: r118:DI=r120:DI&0xffffffff gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Update zero_extend(reg:DI#0) to reg:DI&0xffffffff --- gcc/config/rs6000/rs6000.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index eb7ad5e954f..5efe9b22d8b 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10267,10 +10267,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) emit_move_insn (copy_rtx (temp), gen_rtx_IOR (DImode, copy_rtx (temp), GEN_INT (ud1))); - emit_move_insn (dest, - gen_rtx_ZERO_EXTEND (DImode, - gen_lowpart (SImode, - copy_rtx (temp)))); + emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); } else if (ud1 == ud3 && ud2 == ud4) {
Thanks for your comment! Date: Wed, 30 Nov 2022 12:30:02 +0800 Message-ID: <7ebkopxdx1.fsf@pike.rch.stglabs.ibm.com> Segher Boessenkool <segher@kernel.crashing.org> writes: >> > + else if ((ud4 == 0xffff && ud3 == 0xffff) >> > + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) >> > + { >> > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> > + >> > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) >> > + : ((ud2 << 16) - 0x80000000); > > We really should have some "hwi::sign_extend (ud1, 16)" helper function, > heh. Maybe there already is? Ah, "sext_hwi". Fixing that up > everywhere in this function is preapproved. I drafted a seperate patch for this like below. Maybe I could update other code like "((v & 0xf..f) ^ 0x80..0) - 0x80..0" in rs6000.cc and rs6000.md with sext_hwi too. BR, Jeff (Jiufu) Below NFC patch just uses sext_hwi to hand expresion like: (xx ^ 0x80..0) - 0x80..0 in rs6000_emit_set_long_const. --- gcc/config/rs6000/rs6000.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 5efe9b22d8b..b03e059222b 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10242,7 +10242,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) - emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); + emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) @@ -10250,7 +10250,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); + GEN_INT (sext_hwi (ud2 << 16, 32))); if (ud1 != 0) emit_move_insn (dest, gen_rtx_IOR (DImode, copy_rtx (temp), @@ -10261,8 +10261,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); gcc_assert (ud2 & 0x8000); - emit_move_insn (copy_rtx (temp), - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); + emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud2 << 16, 32))); if (ud1 != 0) emit_move_insn (copy_rtx (temp), gen_rtx_IOR (DImode, copy_rtx (temp), @@ -10273,7 +10272,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); HOST_WIDE_INT num = (ud2 << 16) | ud1; - rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000); + rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); @@ -10283,8 +10282,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (copy_rtx (temp), - GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); + emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud3 << 16, 32))); if (ud2 != 0) emit_move_insn (copy_rtx (temp), gen_rtx_IOR (DImode, copy_rtx (temp), @@ -10336,8 +10334,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (copy_rtx (temp), - GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); + emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud4 << 16, 32))); if (ud3 != 0) emit_move_insn (copy_rtx (temp), gen_rtx_IOR (DImode, copy_rtx (temp),
Date: Thu, 01 Dec 2022 09:48:06 +0800 In-Reply-To: <20221128171950.GN25951@gate.crashing.org> (Segher Boessenkool's message of "Mon, 28 Nov 2022 11:19:50 -0600") Message-ID: <7e4jufyjvt.fsf@pike.rch.stglabs.ibm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux) Segher Boessenkool <segher@kernel.crashing.org> writes: > On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote: >> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: >> > Segher Boessenkool <segher@kernel.crashing.org> writes: >> >>> > + else >> >>> > + { >> >>> > + emit_move_insn (temp, >> >>> > + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); >> >>> > + if (ud1 != 0) >> >>> > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> >>> > + emit_move_insn (dest, >> >>> > + gen_rtx_ZERO_EXTEND (DImode, >> >>> > + gen_lowpart (SImode, temp))); >> >>> > + } >> >> >> >> Why this? Please just write it in DImode, do not go via SImode? >> > Thanks for catch this. Yes, gen_lowpart with DImode would be ok. >> Oh, Sorry. DImode can not be used here. The genreated pattern with >> DImode can not be recognized. Using SImode is to match 'rlwxx'. > > There are patterns that accept DImode for rlwinm just fine. Please use > (and:DI (const_int 0xffffffff) (x:DI)) > not the obfuscated > (zero_extend:DI (subreg:SI (x:DI) LOWBYTE)) I just submit a simple patch for this: https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607589.html Thanks for comments! BR, Jeff (Jiufu) > > > Segher
Date: Thu, 01 Dec 2022 09:51:32 +0800 In-Reply-To: <20221125144309.GG25951@gate.crashing.org> (Segher Boessenkool's message of "Fri, 25 Nov 2022 08:43:09 -0600") Message-ID: <7ewn7bx55n.fsf@pike.rch.stglabs.ibm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux) Segher Boessenkool <segher@kernel.crashing.org> writes: > Hi guys, > > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: >> on 2022/10/26 19:40, Jiufu Guo wrote: cut... >> > + >> > + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) >> > + : ((ud2 << 16) - 0x80000000); > > We really should have some "hwi::sign_extend (ud1, 16)" helper function, > heh. Maybe there already is? Ah, "sext_hwi". Fixing that up > everywhere in this function is preapproved. I just submit a patch to use sext_hwi for existing code: https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607588.html Thanks for your help and spend time on this! BR, Jeff (Jiufu) > >> > + else cut... > > Could you comment what exact instructions are expected? > li;xoris and li;xoris and lis;xoris I guess? It helps if you just tell > the reader here. > > The li;oris and li;xoris parts look good. > > > Segher
Hi Segher, 在 11/28/22 10:18 PM, Segher Boessenkool 写道: > On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote: >> Segher Boessenkool <segher@kernel.crashing.org> writes: >>> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote: >>>> on 2022/10/26 19:40, Jiufu Guo wrote: >>>> for "li/lis + oris/xoris", I interpreted it into four combinations: >>>> >>>> li + oris, lis + oris, li + xoris, lis + xoris. >>>> >>>> not sure just me interpreting like that, but the actual combinations >>>> which this patch adopts are: >>>> >>>> li + oris, li + xoris, lis + xoris. >>>> >>>> It's a bit off, but not a big deal, up to you to reword it or not. :) >>> >>> The first two are obvious, but the last one is almost never a good idea, >>> there usually are better ways to do the same. I cannot even think of >>> any case where this is best? A lis;rl* is always prefered (it can >>> optimise better, be combined with other insns). >> I understant your point here. The first two: 'li' for lowest 16bits, >> 'oris/xoris' for next 16bits. >> >> While for 'lis + xoris', it may not obvious, because both 'lis' and >> 'xoris' operates on 17-31bits. >> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is >> used to clean bit31. This case seems hard to be supported by 'rlxx'. > > Please put that in a separate patch? First do a patch with just > lis;x?oris. They are unrelated and different in almost every way. > I just send out two patches, one for "lis; xoris" and one for "li; x?oris". https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607617.html https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607618.html Maybe, do we prefer to separate into 3 patches for review easily :-)? Thanks for review! BR, Jeff (Jiufu) >> I hit to find this case when I analyze what kind of constants can be >> build by two instructions. Checked the posssible combinations: >> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those >> instructions which accept one register and one immediate). >> >> I also drafted the patch to use "li/lis+rlxx" to build constant. >> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html >> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html > > Those seem to do many things in one patch as well :-( It is very hard > to review such things, it takes many hours each to do properly. > > > Segher
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index d2743f7bce6..9b7a51f052d 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { rtx temp; HOST_WIDE_INT ud1, ud2, ud3, ud4; + HOST_WIDE_INT orig_c = c; ud1 = c & 0xffff; c = c >> 16; @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) gen_rtx_IOR (DImode, copy_rtx (temp), GEN_INT (ud1))); } + else if ((ud4 == 0xffff && ud3 == 0xffff) + && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000)))) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + + HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000) + : ((ud2 << 16) - 0x80000000); + /* li/lis + xoris */ + emit_move_insn (temp, GEN_INT (imm)); + emit_move_insn (dest, gen_rtx_XOR (DImode, temp, + GEN_INT (orig_c ^ imm))); + } else if (ud3 == 0 && ud4 == 0) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); gcc_assert (ud2 & 0x8000); - emit_move_insn (copy_rtx (temp), - GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); - if (ud1 != 0) - emit_move_insn (copy_rtx (temp), - gen_rtx_IOR (DImode, copy_rtx (temp), - GEN_INT (ud1))); - emit_move_insn (dest, - gen_rtx_ZERO_EXTEND (DImode, - gen_lowpart (SImode, - copy_rtx (temp)))); + + if (!(ud1 & 0x8000)) + { + /* li+oris */ + emit_move_insn (temp, GEN_INT (ud1)); + emit_move_insn (dest, + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); + } + else + { + emit_move_insn (temp, + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); + if (ud1 != 0) + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + emit_move_insn (dest, + gen_rtx_ZERO_EXTEND (DImode, + gen_lowpart (SImode, temp))); + } } else if (ud1 == ud3 && ud2 == ud4) { diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c new file mode 100644 index 00000000000..df65c321f6b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c @@ -0,0 +1,17 @@ +/* PR target/106708 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "pr106708.h" + +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL}; +int +main () +{ + long long a[3]; + + foo (a); + if (__builtin_memcmp (a, arr, sizeof (arr)) != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c new file mode 100644 index 00000000000..ebd9ea88993 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c @@ -0,0 +1,12 @@ +/* PR target/106708 */ +/* { dg-do compile } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ + + +#include "pr106708.h" + +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */ +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h new file mode 100644 index 00000000000..42526a70892 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h @@ -0,0 +1,9 @@ +/* Test constants which can be built by li/lis + oris/xoris */ +void __attribute__ ((__noinline__, __noclone__)) foo (long long *arg) +{ + *arg++ = 0x98765432ULL; + *arg++ = 0xffffffff7cdeab55ULL; + *arg++ = 0xffffffff65430000ULL; +} + +