diff mbox series

[V2] rs6000: Support to build constants by li/lis+oris/xoris

Message ID 20221026114052.17713-1-guojiufu@linux.ibm.com
State New
Headers show
Series [V2] rs6000: Support to build constants by li/lis+oris/xoris | expand

Commit Message

Jiufu Guo Oct. 26, 2022, 11:40 a.m. UTC
Hi,

PR106708 constaint some constants which can be support by li/lis + oris/xoris.

For constant C:
if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say:
32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to
build constant 'C'.
Here N(M) means N continuous bit M, x for M means it is ok for either
1 or 0; '||' means concatenation.

if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say:
32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok.

if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say:
32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok.

This patch update rs6000_emit_set_long_const to support these forms.
Bootstrap and regtest pass on ppc64 and ppc64le.

Is this ok for trunk?

BR,
Jeff(Jiufu)


	PR target/106708

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support
	constants which can be built with li + oris or li/lis + xoris.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr106708-run.c: New test.
	* gcc.target/powerpc/pr106708.c: New test.
	* gcc.target/powerpc/pr106708.h: New file.

---
 gcc/config/rs6000/rs6000.cc                   | 41 ++++++++++++++-----
 .../gcc.target/powerpc/pr106708-run.c         | 17 ++++++++
 gcc/testsuite/gcc.target/powerpc/pr106708.c   | 12 ++++++
 gcc/testsuite/gcc.target/powerpc/pr106708.h   |  9 ++++
 4 files changed, 69 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h

Comments

Jiufu Guo Oct. 27, 2022, 5:19 a.m. UTC | #1
Jiufu Guo <guojiufu@linux.ibm.com> writes:

> index 00000000000..ebd9ea88993
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> @@ -0,0 +1,12 @@
> +/* PR target/106708 */
> +/* { dg-do compile } } */
Typo, should be: +/* { dg-do compile } */>

BR,
Jeff(Jiufu)
Jiufu Guo Nov. 9, 2022, 3:29 a.m. UTC | #2
Hi,

Gentle ping:
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604415.html

BR,
Jeff(Jiufu)


Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> PR106708 constaint some constants which can be support by li/lis + oris/xoris.
>
> For constant C:
> if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say:
> 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to
> build constant 'C'.
> Here N(M) means N continuous bit M, x for M means it is ok for either
> 1 or 0; '||' means concatenation.
>
> if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say:
> 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok.
>
> if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say:
> 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok.
>
> This patch update rs6000_emit_set_long_const to support these forms.
> Bootstrap and regtest pass on ppc64 and ppc64le.
>
> Is this ok for trunk?
>
> BR,
> Jeff(Jiufu)
>
>
> 	PR target/106708
>
> gcc/ChangeLog:
>
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support
> 	constants which can be built with li + oris or li/lis + xoris.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/pr106708-run.c: New test.
> 	* gcc.target/powerpc/pr106708.c: New test.
> 	* gcc.target/powerpc/pr106708.h: New file.
>
> ---
>  gcc/config/rs6000/rs6000.cc                   | 41 ++++++++++++++-----
>  .../gcc.target/powerpc/pr106708-run.c         | 17 ++++++++
>  gcc/testsuite/gcc.target/powerpc/pr106708.c   | 12 ++++++
>  gcc/testsuite/gcc.target/powerpc/pr106708.h   |  9 ++++
>  4 files changed, 69 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index d2743f7bce6..9b7a51f052d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  {
>    rtx temp;
>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
> +  HOST_WIDE_INT orig_c = c;
>  
>    ud1 = c & 0xffff;
>    c = c >> 16;
> @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  			gen_rtx_IOR (DImode, copy_rtx (temp),
>  				     GEN_INT (ud1)));
>      }
> +  else if ((ud4 == 0xffff && ud3 == 0xffff)
> +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
> +    {
> +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +
> +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
> +					 : ((ud2 << 16) - 0x80000000);
> +      /* li/lis + xoris */
> +      emit_move_insn (temp, GEN_INT (imm));
> +      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
> +					 GEN_INT (orig_c ^ imm)));
> +    }
>    else if (ud3 == 0 && ud4 == 0)
>      {
>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>  
>        gcc_assert (ud2 & 0x8000);
> -      emit_move_insn (copy_rtx (temp),
> -		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> -      if (ud1 != 0)
> -	emit_move_insn (copy_rtx (temp),
> -			gen_rtx_IOR (DImode, copy_rtx (temp),
> -				     GEN_INT (ud1)));
> -      emit_move_insn (dest,
> -		      gen_rtx_ZERO_EXTEND (DImode,
> -					   gen_lowpart (SImode,
> -							copy_rtx (temp))));
> +
> +      if (!(ud1 & 0x8000))
> +	{
> +	  /* li+oris */
> +	  emit_move_insn (temp, GEN_INT (ud1));
> +	  emit_move_insn (dest,
> +			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> +	}
> +      else
> +	{
> +	  emit_move_insn (temp,
> +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> +	  if (ud1 != 0)
> +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +	  emit_move_insn (dest,
> +			  gen_rtx_ZERO_EXTEND (DImode,
> +					       gen_lowpart (SImode, temp)));
> +	}
>      }
>    else if (ud1 == ud3 && ud2 == ud4)
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
> new file mode 100644
> index 00000000000..df65c321f6b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
> @@ -0,0 +1,17 @@
> +/* PR target/106708 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "pr106708.h"
> +
> +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL};
> +int
> +main ()
> +{
> +  long long a[3];
> +
> +  foo (a);
> +  if (__builtin_memcmp (a, arr, sizeof (arr)) != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> new file mode 100644
> index 00000000000..ebd9ea88993
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> @@ -0,0 +1,12 @@
> +/* PR target/106708 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +
> +#include "pr106708.h"
> +
> +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h
> new file mode 100644
> index 00000000000..42526a70892
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
> @@ -0,0 +1,9 @@
> +/* Test constants which can be built by li/lis + oris/xoris */
> +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
> +{
> +  *arg++ = 0x98765432ULL;
> +  *arg++ = 0xffffffff7cdeab55ULL;
> +  *arg++ = 0xffffffff65430000ULL;
> +}
> +
> +
Kewen.Lin Nov. 25, 2022, 8:11 a.m. UTC | #3
Hi Jeff,

Sorry for the late reply.

on 2022/10/26 19:40, Jiufu Guo wrote:
> Hi,
> 
> PR106708 constaint some constants which can be support by li/lis + oris/xoris.
           ~~~~~~~~ typo?

for "li/lis + oris/xoris", I interpreted it into four combinations:

   li + oris, lis + oris, li + xoris, lis + xoris.

not sure just me interpreting like that, but the actual combinations
which this patch adopts are:

   li + oris, li + xoris, lis + xoris.

It's a bit off, but not a big deal, up to you to reword it or not.  :)

> 
> For constant C:
> if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say:
> 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to
> build constant 'C'.
> Here N(M) means N continuous bit M, x for M means it is ok for either
> 1 or 0; '||' means concatenation.
> 
> if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say:
> 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok.
> 
> if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say:
> 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok.
> 
> This patch update rs6000_emit_set_long_const to support these forms.
> Bootstrap and regtest pass on ppc64 and ppc64le.
> 
> Is this ok for trunk?

This updated version looks good to me, but I'd leave it to Segher for the
final say.  Thanks!

BR,
Kewen

> 
> BR,
> Jeff(Jiufu)
> 
> 
> 	PR target/106708
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support
> 	constants which can be built with li + oris or li/lis + xoris.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/powerpc/pr106708-run.c: New test.
> 	* gcc.target/powerpc/pr106708.c: New test.
> 	* gcc.target/powerpc/pr106708.h: New file.
> 
> ---
>  gcc/config/rs6000/rs6000.cc                   | 41 ++++++++++++++-----
>  .../gcc.target/powerpc/pr106708-run.c         | 17 ++++++++
>  gcc/testsuite/gcc.target/powerpc/pr106708.c   | 12 ++++++
>  gcc/testsuite/gcc.target/powerpc/pr106708.h   |  9 ++++
>  4 files changed, 69 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index d2743f7bce6..9b7a51f052d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  {
>    rtx temp;
>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
> +  HOST_WIDE_INT orig_c = c;
> 
>    ud1 = c & 0xffff;
>    c = c >> 16;
> @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  			gen_rtx_IOR (DImode, copy_rtx (temp),
>  				     GEN_INT (ud1)));
>      }
> +  else if ((ud4 == 0xffff && ud3 == 0xffff)
> +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
> +    {
> +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +
> +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
> +					 : ((ud2 << 16) - 0x80000000);
> +      /* li/lis + xoris */
> +      emit_move_insn (temp, GEN_INT (imm));
> +      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
> +					 GEN_INT (orig_c ^ imm)));
> +    }
>    else if (ud3 == 0 && ud4 == 0)
>      {
>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> 
>        gcc_assert (ud2 & 0x8000);
> -      emit_move_insn (copy_rtx (temp),
> -		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> -      if (ud1 != 0)
> -	emit_move_insn (copy_rtx (temp),
> -			gen_rtx_IOR (DImode, copy_rtx (temp),
> -				     GEN_INT (ud1)));
> -      emit_move_insn (dest,
> -		      gen_rtx_ZERO_EXTEND (DImode,
> -					   gen_lowpart (SImode,
> -							copy_rtx (temp))));
> +
> +      if (!(ud1 & 0x8000))
> +	{
> +	  /* li+oris */
> +	  emit_move_insn (temp, GEN_INT (ud1));
> +	  emit_move_insn (dest,
> +			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> +	}
> +      else
> +	{
> +	  emit_move_insn (temp,
> +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> +	  if (ud1 != 0)
> +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +	  emit_move_insn (dest,
> +			  gen_rtx_ZERO_EXTEND (DImode,
> +					       gen_lowpart (SImode, temp)));
> +	}
>      }
>    else if (ud1 == ud3 && ud2 == ud4)
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
> new file mode 100644
> index 00000000000..df65c321f6b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
> @@ -0,0 +1,17 @@
> +/* PR target/106708 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "pr106708.h"
> +
> +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL};
> +int
> +main ()
> +{
> +  long long a[3];
> +
> +  foo (a);
> +  if (__builtin_memcmp (a, arr, sizeof (arr)) != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> new file mode 100644
> index 00000000000..ebd9ea88993
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> @@ -0,0 +1,12 @@
> +/* PR target/106708 */
> +/* { dg-do compile } } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +
> +#include "pr106708.h"
> +
> +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h
> new file mode 100644
> index 00000000000..42526a70892
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
> @@ -0,0 +1,9 @@
> +/* Test constants which can be built by li/lis + oris/xoris */
> +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
> +{
> +  *arg++ = 0x98765432ULL;
> +  *arg++ = 0xffffffff7cdeab55ULL;
> +  *arg++ = 0xffffffff65430000ULL;
> +}
> +
> +
Jiufu Guo Nov. 25, 2022, 12:41 p.m. UTC | #4
Hi Kewen,

Thanks a lot for your insight comments!

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi Jeff,
>
> Sorry for the late reply.
>
> on 2022/10/26 19:40, Jiufu Guo wrote:
>> Hi,
>> 
>> PR106708 constaint some constants which can be support by li/lis + oris/xoris.
>            ~~~~~~~~ typo?
Oh, typo!
>
> for "li/lis + oris/xoris", I interpreted it into four combinations:
>
>    li + oris, lis + oris, li + xoris, lis + xoris.
>
> not sure just me interpreting like that, but the actual combinations
> which this patch adopts are:
>
>    li + oris, li + xoris, lis + xoris.
>
> It's a bit off, but not a big deal, up to you to reword it or not.  :)
Oh, thanks! I will update to use "li/lis + xoris" or "li + oris" to
avoid confuse.
>
>> 
>> For constant C:
>> if '(c & 0xFFFFFFFF80008000ULL) == 0x80000000ULL' or say:
>> 32(0) || 1(1) || 15(x) || 1(0) || 15(x), we could use li+oris to
>> build constant 'C'.
>> Here N(M) means N continuous bit M, x for M means it is ok for either
>> 1 or 0; '||' means concatenation.
>> 
>> if '(c & 0xFFFFFFFF00008000ULL) == 0xFFFFFFFF00008000ULL' or say:
>> 32(1) || 16(x) || 1(1) || 15(x), using li+xoris would be ok.
>> 
>> if '(c & 0xFFFFFFFF0000FFFFULL) == 0xFFFFFFFF00000000' or say:
>> 32(1) || 1(0) || 15(x) || 16(0), using lis+xoris would be ok.
>> 
>> This patch update rs6000_emit_set_long_const to support these forms.
>> Bootstrap and regtest pass on ppc64 and ppc64le.
>> 
>> Is this ok for trunk?
>
> This updated version looks good to me, but I'd leave it to Segher for the
> final say.  Thanks!

Thanks!

BR,
Jeff (Jiufu)
>
> BR,
> Kewen
>
>> 
>> BR,
>> Jeff(Jiufu)
>> 
>> 
>> 	PR target/106708
>> 
>> gcc/ChangeLog:
>> 
>> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Support
>> 	constants which can be built with li + oris or li/lis + xoris.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/powerpc/pr106708-run.c: New test.
>> 	* gcc.target/powerpc/pr106708.c: New test.
>> 	* gcc.target/powerpc/pr106708.h: New file.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc                   | 41 ++++++++++++++-----
>>  .../gcc.target/powerpc/pr106708-run.c         | 17 ++++++++
>>  gcc/testsuite/gcc.target/powerpc/pr106708.c   | 12 ++++++
>>  gcc/testsuite/gcc.target/powerpc/pr106708.h   |  9 ++++
>>  4 files changed, 69 insertions(+), 10 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708-run.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106708.h
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index d2743f7bce6..9b7a51f052d 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10228,6 +10228,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>  {
>>    rtx temp;
>>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
>> +  HOST_WIDE_INT orig_c = c;
>> 
>>    ud1 = c & 0xffff;
>>    c = c >> 16;
>> @@ -10253,21 +10254,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>  			gen_rtx_IOR (DImode, copy_rtx (temp),
>>  				     GEN_INT (ud1)));
>>      }
>> +  else if ((ud4 == 0xffff && ud3 == 0xffff)
>> +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
>> +    {
>> +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +
>> +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>> +					 : ((ud2 << 16) - 0x80000000);
>> +      /* li/lis + xoris */
>> +      emit_move_insn (temp, GEN_INT (imm));
>> +      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>> +					 GEN_INT (orig_c ^ imm)));
>> +    }
>>    else if (ud3 == 0 && ud4 == 0)
>>      {
>>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> 
>>        gcc_assert (ud2 & 0x8000);
>> -      emit_move_insn (copy_rtx (temp),
>> -		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>> -      if (ud1 != 0)
>> -	emit_move_insn (copy_rtx (temp),
>> -			gen_rtx_IOR (DImode, copy_rtx (temp),
>> -				     GEN_INT (ud1)));
>> -      emit_move_insn (dest,
>> -		      gen_rtx_ZERO_EXTEND (DImode,
>> -					   gen_lowpart (SImode,
>> -							copy_rtx (temp))));
>> +
>> +      if (!(ud1 & 0x8000))
>> +	{
>> +	  /* li+oris */
>> +	  emit_move_insn (temp, GEN_INT (ud1));
>> +	  emit_move_insn (dest,
>> +			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
>> +	}
>> +      else
>> +	{
>> +	  emit_move_insn (temp,
>> +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>> +	  if (ud1 != 0)
>> +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +	  emit_move_insn (dest,
>> +			  gen_rtx_ZERO_EXTEND (DImode,
>> +					       gen_lowpart (SImode, temp)));
>> +	}
>>      }
>>    else if (ud1 == ud3 && ud2 == ud4)
>>      {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
>> new file mode 100644
>> index 00000000000..df65c321f6b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
>> @@ -0,0 +1,17 @@
>> +/* PR target/106708 */
>> +/* { dg-do run } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "pr106708.h"
>> +
>> +long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL};
>> +int
>> +main ()
>> +{
>> +  long long a[3];
>> +
>> +  foo (a);
>> +  if (__builtin_memcmp (a, arr, sizeof (arr)) != 0)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c
>> new file mode 100644
>> index 00000000000..ebd9ea88993
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
>> @@ -0,0 +1,12 @@
>> +/* PR target/106708 */
>> +/* { dg-do compile } } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
>> +
>> +
>> +#include "pr106708.h"
>> +
>> +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\moris\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h
>> new file mode 100644
>> index 00000000000..42526a70892
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
>> @@ -0,0 +1,9 @@
>> +/* Test constants which can be built by li/lis + oris/xoris */
>> +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
>> +{
>> +  *arg++ = 0x98765432ULL;
>> +  *arg++ = 0xffffffff7cdeab55ULL;
>> +  *arg++ = 0xffffffff65430000ULL;
>> +}
>> +
>> +
Segher Boessenkool Nov. 25, 2022, 2:43 p.m. UTC | #5
Hi guys,

On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
> on 2022/10/26 19:40, Jiufu Guo wrote:
> for "li/lis + oris/xoris", I interpreted it into four combinations:
> 
>    li + oris, lis + oris, li + xoris, lis + xoris.
> 
> not sure just me interpreting like that, but the actual combinations
> which this patch adopts are:
> 
>    li + oris, li + xoris, lis + xoris.
> 
> It's a bit off, but not a big deal, up to you to reword it or not.  :)

The first two are obvious, but the last one is almost never a good idea,
there usually are better ways to do the same.  I cannot even think of
any case where this is best?  A lis;rl* is always prefered (it can
optimise better, be combined with other insns).

> > +  HOST_WIDE_INT orig_c = c;

If you ever feel you need a variable to hold an "orig" value, that is a
good hint that you should restructure the code a bit, perhaps even
factor it.  That often is overdue (like here), not caused by you, but
you could help solve it ;-)

(This is what made this patch hard to review, btw).

> >  			gen_rtx_IOR (DImode, copy_rtx (temp),
> >  				     GEN_INT (ud1)));
> >      }
> > +  else if ((ud4 == 0xffff && ud3 == 0xffff)
> > +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
> > +    {
> > +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> > +
> > +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
> > +					 : ((ud2 << 16) - 0x80000000);

We really should have some "hwi::sign_extend (ud1, 16)" helper function,
heh.  Maybe there already is?  Ah, "sext_hwi".  Fixing that up
everywhere in this function is preapproved.

> > +      else
> > +	{
> > +	  emit_move_insn (temp,
> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> > +	  if (ud1 != 0)
> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> > +	  emit_move_insn (dest,
> > +			  gen_rtx_ZERO_EXTEND (DImode,
> > +					       gen_lowpart (SImode, temp)));
> > +	}

Why this?  Please just write it in DImode, do not go via SImode?

> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
> > @@ -0,0 +1,9 @@
> > +/* Test constants which can be built by li/lis + oris/xoris */
> > +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
> > +{
> > +  *arg++ = 0x98765432ULL;
> > +  *arg++ = 0xffffffff7cdeab55ULL;
> > +  *arg++ = 0xffffffff65430000ULL;
> > +}

Use noipa please (it is shorter, simpler, and covers more :-) )

Could you comment what exact instructions are expected?
li;xoris and li;xoris and lis;xoris I guess?  It helps if you just tell
the reader here.

The li;oris and li;xoris parts look good.


Segher
Jiufu Guo Nov. 28, 2022, 3:37 a.m. UTC | #6
Hi Segher!

Thanks a lot for your comments!

Segher Boessenkool <segher@kernel.crashing.org> writes:

> Hi guys,
>
> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
>> on 2022/10/26 19:40, Jiufu Guo wrote:
>> for "li/lis + oris/xoris", I interpreted it into four combinations:
>> 
>>    li + oris, lis + oris, li + xoris, lis + xoris.
>> 
>> not sure just me interpreting like that, but the actual combinations
>> which this patch adopts are:
>> 
>>    li + oris, li + xoris, lis + xoris.
>> 
>> It's a bit off, but not a big deal, up to you to reword it or not.  :)
>
> The first two are obvious, but the last one is almost never a good idea,
> there usually are better ways to do the same.  I cannot even think of
> any case where this is best?  A lis;rl* is always prefered (it can
> optimise better, be combined with other insns).
I understant your point here.  The first two: 'li' for lowest 16bits,
'oris/xoris' for next 16bits.

While for 'lis + xoris', it may not obvious, because both 'lis' and
'xoris' operates on 17-31bits.
'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is
used to clean bit31.  This case seems hard to be supported by 'rlxx'.

I hit to find this case when I analyze what kind of constants can be
build by two instructions. Checked the posssible combinations:
"addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those
instructions which accept one register and one immediate).

I also drafted the patch to use "li/lis+rlxx" to build constant.
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html

>
>> > +  HOST_WIDE_INT orig_c = c;
>
> If you ever feel you need a variable to hold an "orig" value, that is a
> good hint that you should restructure the code a bit, perhaps even
> factor it.  That often is overdue (like here), not caused by you, but
> you could help solve it ;-)
>
> (This is what made this patch hard to review, btw).
You are right.  Thanks for point out this!
>
>> >  			gen_rtx_IOR (DImode, copy_rtx (temp),
>> >  				     GEN_INT (ud1)));
>> >      }
>> > +  else if ((ud4 == 0xffff && ud3 == 0xffff)
>> > +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
>> > +    {
>> > +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> > +
>> > +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>> > +					 : ((ud2 << 16) - 0x80000000);
>
> We really should have some "hwi::sign_extend (ud1, 16)" helper function,
> heh.  Maybe there already is?  Ah, "sext_hwi".  Fixing that up
> everywhere in this function is preapproved.
Great, thanks! 
>
>> > +      else
>> > +	{
>> > +	  emit_move_insn (temp,
>> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>> > +	  if (ud1 != 0)
>> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> > +	  emit_move_insn (dest,
>> > +			  gen_rtx_ZERO_EXTEND (DImode,
>> > +					       gen_lowpart (SImode, temp)));
>> > +	}
>
> Why this?  Please just write it in DImode, do not go via SImode?
Thanks for catch this. Yes, gen_lowpart with DImode would be ok.
>
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
>> > @@ -0,0 +1,9 @@
>> > +/* Test constants which can be built by li/lis + oris/xoris */
>> > +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
>> > +{
>> > +  *arg++ = 0x98765432ULL;
>> > +  *arg++ = 0xffffffff7cdeab55ULL;
>> > +  *arg++ = 0xffffffff65430000ULL;
>> > +}
>
> Use noipa please (it is shorter, simpler, and covers more :-) )
Thanks!
>
> Could you comment what exact instructions are expected?
> li;xoris and li;xoris and lis;xoris I guess?  It helps if you just tell
> the reader here.
Sure, thanks!
>
> The li;oris and li;xoris parts look good.
>

BR,
Jeff (Jiufu)
>
> Segher
Jiufu Guo Nov. 28, 2022, 7:51 a.m. UTC | #7
Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Hi Segher!
>
> Thanks a lot for your comments!
>
> Segher Boessenkool <segher@kernel.crashing.org> writes:
>
>> Hi guys,
>>
>> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
>>> on 2022/10/26 19:40, Jiufu Guo wrote:
>>> for "li/lis + oris/xoris", I interpreted it into four combinations:
>>> 
>>>    li + oris, lis + oris, li + xoris, lis + xoris.
>>> 
>>> not sure just me interpreting like that, but the actual combinations
>>> which this patch adopts are:
>>> 
>>>    li + oris, li + xoris, lis + xoris.
>>> 
>>> It's a bit off, but not a big deal, up to you to reword it or not.  :)
>>
>> The first two are obvious, but the last one is almost never a good idea,
>> there usually are better ways to do the same.  I cannot even think of
>> any case where this is best?  A lis;rl* is always prefered (it can
>> optimise better, be combined with other insns).
> I understant your point here.  The first two: 'li' for lowest 16bits,
> 'oris/xoris' for next 16bits.
>
> While for 'lis + xoris', it may not obvious, because both 'lis' and
> 'xoris' operates on 17-31bits.
> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is
> used to clean bit31.  This case seems hard to be supported by 'rlxx'.
>
> I hit to find this case when I analyze what kind of constants can be
> build by two instructions. Checked the posssible combinations:
> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those
> instructions which accept one register and one immediate).
>
> I also drafted the patch to use "li/lis+rlxx" to build constant.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html
>
>>
>>> > +  HOST_WIDE_INT orig_c = c;
>>
>> If you ever feel you need a variable to hold an "orig" value, that is a
>> good hint that you should restructure the code a bit, perhaps even
>> factor it.  That often is overdue (like here), not caused by you, but
>> you could help solve it ;-)
>>
>> (This is what made this patch hard to review, btw).
> You are right.  Thanks for point out this!
>>
>>> >  			gen_rtx_IOR (DImode, copy_rtx (temp),
>>> >  				     GEN_INT (ud1)));
>>> >      }
>>> > +  else if ((ud4 == 0xffff && ud3 == 0xffff)
>>> > +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
>>> > +    {
>>> > +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>>> > +
>>> > +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>>> > +					 : ((ud2 << 16) - 0x80000000);
>>
>> We really should have some "hwi::sign_extend (ud1, 16)" helper function,
>> heh.  Maybe there already is?  Ah, "sext_hwi".  Fixing that up
>> everywhere in this function is preapproved.
> Great, thanks! 
>>
>>> > +      else
>>> > +	{
>>> > +	  emit_move_insn (temp,
>>> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>>> > +	  if (ud1 != 0)
>>> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>>> > +	  emit_move_insn (dest,
>>> > +			  gen_rtx_ZERO_EXTEND (DImode,
>>> > +					       gen_lowpart (SImode, temp)));
>>> > +	}
>>
>> Why this?  Please just write it in DImode, do not go via SImode?
> Thanks for catch this. Yes, gen_lowpart with DImode would be ok.
Oh, Sorry. DImode can not be used here.  The genreated pattern with
DImode can not be recognized.  Using SImode is to match 'rlwxx'.

BR,
Jeff (Jiufu)
>>
>>> > --- /dev/null
>>> > +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
>>> > @@ -0,0 +1,9 @@
>>> > +/* Test constants which can be built by li/lis + oris/xoris */
>>> > +void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
>>> > +{
>>> > +  *arg++ = 0x98765432ULL;
>>> > +  *arg++ = 0xffffffff7cdeab55ULL;
>>> > +  *arg++ = 0xffffffff65430000ULL;
>>> > +}
>>
>> Use noipa please (it is shorter, simpler, and covers more :-) )
> Thanks!
>>
>> Could you comment what exact instructions are expected?
>> li;xoris and li;xoris and lis;xoris I guess?  It helps if you just tell
>> the reader here.
> Sure, thanks!
>>
>> The li;oris and li;xoris parts look good.
>>
>
> BR,
> Jeff (Jiufu)
>>
>> Segher
Segher Boessenkool Nov. 28, 2022, 2:18 p.m. UTC | #8
On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
> >> on 2022/10/26 19:40, Jiufu Guo wrote:
> >> for "li/lis + oris/xoris", I interpreted it into four combinations:
> >> 
> >>    li + oris, lis + oris, li + xoris, lis + xoris.
> >> 
> >> not sure just me interpreting like that, but the actual combinations
> >> which this patch adopts are:
> >> 
> >>    li + oris, li + xoris, lis + xoris.
> >> 
> >> It's a bit off, but not a big deal, up to you to reword it or not.  :)
> >
> > The first two are obvious, but the last one is almost never a good idea,
> > there usually are better ways to do the same.  I cannot even think of
> > any case where this is best?  A lis;rl* is always prefered (it can
> > optimise better, be combined with other insns).
> I understant your point here.  The first two: 'li' for lowest 16bits,
> 'oris/xoris' for next 16bits.
> 
> While for 'lis + xoris', it may not obvious, because both 'lis' and
> 'xoris' operates on 17-31bits.
> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is
> used to clean bit31.  This case seems hard to be supported by 'rlxx'.

Please put that in a separate patch?  First do a patch with just
lis;x?oris.  They are unrelated and different in almost every way.

> I hit to find this case when I analyze what kind of constants can be
> build by two instructions. Checked the posssible combinations:
> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those
> instructions which accept one register and one immediate).
> 
> I also drafted the patch to use "li/lis+rlxx" to build constant.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html

Those seem to do many things in one patch as well :-(  It is very hard
to review such things, it takes many hours each to do properly.


Segher
Segher Boessenkool Nov. 28, 2022, 5:19 p.m. UTC | #9
On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote:
> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > Segher Boessenkool <segher@kernel.crashing.org> writes:
> >>> > +      else
> >>> > +	{
> >>> > +	  emit_move_insn (temp,
> >>> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
> >>> > +	  if (ud1 != 0)
> >>> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> >>> > +	  emit_move_insn (dest,
> >>> > +			  gen_rtx_ZERO_EXTEND (DImode,
> >>> > +					       gen_lowpart (SImode, temp)));
> >>> > +	}
> >>
> >> Why this?  Please just write it in DImode, do not go via SImode?
> > Thanks for catch this. Yes, gen_lowpart with DImode would be ok.
> Oh, Sorry. DImode can not be used here.  The genreated pattern with
> DImode can not be recognized.  Using SImode is to match 'rlwxx'.

There are patterns that accept DImode for rlwinm just fine.  Please use
  (and:DI (const_int 0xffffffff) (x:DI))
not the obfuscated
  (zero_extend:DI (subreg:SI (x:DI) LOWBYTE))


Segher
Jiufu Guo Nov. 29, 2022, 9:08 a.m. UTC | #10
Hi Segher,

Thanks for your review!

Segher Boessenkool <segher@kernel.crashing.org> writes:

> On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote:
>> Segher Boessenkool <segher@kernel.crashing.org> writes:
>> > On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
>> >> on 2022/10/26 19:40, Jiufu Guo wrote:
>> >> for "li/lis + oris/xoris", I interpreted it into four combinations:
>> >> 
>> >>    li + oris, lis + oris, li + xoris, lis + xoris.
>> >> 
>> >> not sure just me interpreting like that, but the actual combinations
>> >> which this patch adopts are:
>> >> 
>> >>    li + oris, li + xoris, lis + xoris.
>> >> 
>> >> It's a bit off, but not a big deal, up to you to reword it or not.  :)
>> >
>> > The first two are obvious, but the last one is almost never a good idea,
>> > there usually are better ways to do the same.  I cannot even think of
>> > any case where this is best?  A lis;rl* is always prefered (it can
>> > optimise better, be combined with other insns).
>> I understant your point here.  The first two: 'li' for lowest 16bits,
>> 'oris/xoris' for next 16bits.
>> 
>> While for 'lis + xoris', it may not obvious, because both 'lis' and
>> 'xoris' operates on 17-31bits.
>> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is
>> used to clean bit31.  This case seems hard to be supported by 'rlxx'.
>
> Please put that in a separate patch?  First do a patch with just
> lis;x?oris.  They are unrelated and different in almost every way.

Sure, Thanks for the advice!
>
>> I hit to find this case when I analyze what kind of constants can be
>> build by two instructions. Checked the posssible combinations:
>> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those
>> instructions which accept one register and one immediate).
>> 
>> I also drafted the patch to use "li/lis+rlxx" to build constant.
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html
>
> Those seem to do many things in one patch as well :-(  It is very hard
> to review such things, it takes many hours each to do properly.
Sorry, I will try to seperate them to smaller granularities!

BR,
Jeff (Jiufu)
>
>
> Segher
Jiufu Guo Nov. 29, 2022, 1:14 p.m. UTC | #11
Hi Segher,

Thanks for your comment!

Segher Boessenkool <segher@kernel.crashing.org> writes:

> On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote:
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> > Segher Boessenkool <segher@kernel.crashing.org> writes:
>> >>> > +      else
>> >>> > +	{
>> >>> > +	  emit_move_insn (temp,
>> >>> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>> >>> > +	  if (ud1 != 0)
>> >>> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> >>> > +	  emit_move_insn (dest,
>> >>> > +			  gen_rtx_ZERO_EXTEND (DImode,
>> >>> > +					       gen_lowpart (SImode, temp)));
>> >>> > +	}
>> >>
>> >> Why this?  Please just write it in DImode, do not go via SImode?
>> > Thanks for catch this. Yes, gen_lowpart with DImode would be ok.
>> Oh, Sorry. DImode can not be used here.  The genreated pattern with
>> DImode can not be recognized.  Using SImode is to match 'rlwxx'.
>
> There are patterns that accept DImode for rlwinm just fine.  Please use
>   (and:DI (const_int 0xffffffff) (x:DI))
> not the obfuscated
>   (zero_extend:DI (subreg:SI (x:DI) LOWBYTE))
>
Agree, 'and 0xffffffff' would be easy to read. Here is an small patch
for it.  I believe it should be no regression. :-) To make sure, I will
do more bootstraps and regtests, and then submit it.


BR,
Jeff (Jiufu)

NFC: use more readable pattern to clean high bits

This patch is just using a more readable pattern for "rldicl x,x,0,32"
to clean high 32bits.
Old pattern looks like: r118:DI=zero_extend(r120:DI#0)
new pattern looks like: r118:DI=r120:DI&0xffffffff

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Update
	zero_extend(reg:DI#0) to reg:DI&0xffffffff

---
 gcc/config/rs6000/rs6000.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index eb7ad5e954f..5efe9b22d8b 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10267,10 +10267,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 	emit_move_insn (copy_rtx (temp),
 			gen_rtx_IOR (DImode, copy_rtx (temp),
 				     GEN_INT (ud1)));
-      emit_move_insn (dest,
-		      gen_rtx_ZERO_EXTEND (DImode,
-					   gen_lowpart (SImode,
-							copy_rtx (temp))));
+      emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
     }
   else if (ud1 == ud3 && ud2 == ud4)
     {
Jiufu Guo Nov. 30, 2022, 4:30 a.m. UTC | #12
Thanks for your comment!
Date: Wed, 30 Nov 2022 12:30:02 +0800
Message-ID: <7ebkopxdx1.fsf@pike.rch.stglabs.ibm.com>

Segher Boessenkool <segher@kernel.crashing.org> writes:

>> > +  else if ((ud4 == 0xffff && ud3 == 0xffff)
>> > +	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
>> > +    {
>> > +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> > +
>> > +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>> > +					 : ((ud2 << 16) - 0x80000000);
>
> We really should have some "hwi::sign_extend (ud1, 16)" helper function,
> heh.  Maybe there already is?  Ah, "sext_hwi".  Fixing that up
> everywhere in this function is preapproved.

I drafted a seperate patch for this like below.  Maybe I could update other
code like "((v & 0xf..f) ^ 0x80..0) - 0x80..0" in rs6000.cc and rs6000.md
with sext_hwi too. 

BR,
Jeff (Jiufu)

Below NFC patch just uses sext_hwi to hand expresion like:
(xx ^ 0x80..0) - 0x80..0 in rs6000_emit_set_long_const.

---
 gcc/config/rs6000/rs6000.cc | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 5efe9b22d8b..b03e059222b 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10242,7 +10242,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 
   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
       || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
-    emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
+    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
 
   else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
 	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
@@ -10250,7 +10250,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
       emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
-		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
+		      GEN_INT (sext_hwi (ud2 << 16, 32)));
       if (ud1 != 0)
 	emit_move_insn (dest,
 			gen_rtx_IOR (DImode, copy_rtx (temp),
@@ -10261,8 +10261,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
       gcc_assert (ud2 & 0x8000);
-      emit_move_insn (copy_rtx (temp),
-		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
+      emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud2 << 16, 32)));
       if (ud1 != 0)
 	emit_move_insn (copy_rtx (temp),
 			gen_rtx_IOR (DImode, copy_rtx (temp),
@@ -10273,7 +10272,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
       HOST_WIDE_INT num = (ud2 << 16) | ud1;
-      rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
+      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
       rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
       rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
       emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
@@ -10283,8 +10282,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
-      emit_move_insn (copy_rtx (temp),
-		      GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
+      emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud3 << 16, 32)));
       if (ud2 != 0)
 	emit_move_insn (copy_rtx (temp),
 			gen_rtx_IOR (DImode, copy_rtx (temp),
@@ -10336,8 +10334,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
-      emit_move_insn (copy_rtx (temp),
-		      GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
+      emit_move_insn (copy_rtx (temp), GEN_INT (sext_hwi (ud4 << 16, 32)));
       if (ud3 != 0)
 	emit_move_insn (copy_rtx (temp),
 			gen_rtx_IOR (DImode, copy_rtx (temp),
Jiufu Guo Dec. 1, 2022, 1:48 a.m. UTC | #13
Date: Thu, 01 Dec 2022 09:48:06 +0800
In-Reply-To: <20221128171950.GN25951@gate.crashing.org> (Segher Boessenkool's
	message of "Mon, 28 Nov 2022 11:19:50 -0600")
Message-ID: <7e4jufyjvt.fsf@pike.rch.stglabs.ibm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux)

Segher Boessenkool <segher@kernel.crashing.org> writes:

> On Mon, Nov 28, 2022 at 03:51:59PM +0800, Jiufu Guo wrote:
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> > Segher Boessenkool <segher@kernel.crashing.org> writes:
>> >>> > +      else
>> >>> > +	{
>> >>> > +	  emit_move_insn (temp,
>> >>> > +			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
>> >>> > +	  if (ud1 != 0)
>> >>> > +	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> >>> > +	  emit_move_insn (dest,
>> >>> > +			  gen_rtx_ZERO_EXTEND (DImode,
>> >>> > +					       gen_lowpart (SImode, temp)));
>> >>> > +	}
>> >>
>> >> Why this?  Please just write it in DImode, do not go via SImode?
>> > Thanks for catch this. Yes, gen_lowpart with DImode would be ok.
>> Oh, Sorry. DImode can not be used here.  The genreated pattern with
>> DImode can not be recognized.  Using SImode is to match 'rlwxx'.
>
> There are patterns that accept DImode for rlwinm just fine.  Please use
>   (and:DI (const_int 0xffffffff) (x:DI))
> not the obfuscated
>   (zero_extend:DI (subreg:SI (x:DI) LOWBYTE))

I just submit a simple patch for this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607589.html

Thanks for comments!

BR,
Jeff (Jiufu)
>
>
> Segher
Jiufu Guo Dec. 1, 2022, 1:51 a.m. UTC | #14
Date: Thu, 01 Dec 2022 09:51:32 +0800
In-Reply-To: <20221125144309.GG25951@gate.crashing.org> (Segher Boessenkool's
	message of "Fri, 25 Nov 2022 08:43:09 -0600")
Message-ID: <7ewn7bx55n.fsf@pike.rch.stglabs.ibm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux)

Segher Boessenkool <segher@kernel.crashing.org> writes:

> Hi guys,
>
> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
>> on 2022/10/26 19:40, Jiufu Guo wrote:
cut...
>> > +
>> > +      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
>> > +					 : ((ud2 << 16) - 0x80000000);
>
> We really should have some "hwi::sign_extend (ud1, 16)" helper function,
> heh.  Maybe there already is?  Ah, "sext_hwi".  Fixing that up
> everywhere in this function is preapproved.

I just submit a patch to use sext_hwi for existing code:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607588.html

Thanks for your help and spend time on this!

BR,
Jeff (Jiufu)

>
>> > +      else
cut...
>
> Could you comment what exact instructions are expected?
> li;xoris and li;xoris and lis;xoris I guess?  It helps if you just tell
> the reader here.
>
> The li;oris and li;xoris parts look good.
>
>
> Segher
Jiufu Guo Dec. 1, 2022, 8:56 a.m. UTC | #15
Hi Segher,

在 11/28/22 10:18 PM, Segher Boessenkool 写道:
> On Mon, Nov 28, 2022 at 11:37:34AM +0800, Jiufu Guo wrote:
>> Segher Boessenkool <segher@kernel.crashing.org> writes:
>>> On Fri, Nov 25, 2022 at 04:11:49PM +0800, Kewen.Lin wrote:
>>>> on 2022/10/26 19:40, Jiufu Guo wrote:
>>>> for "li/lis + oris/xoris", I interpreted it into four combinations:
>>>>
>>>>    li + oris, lis + oris, li + xoris, lis + xoris.
>>>>
>>>> not sure just me interpreting like that, but the actual combinations
>>>> which this patch adopts are:
>>>>
>>>>    li + oris, li + xoris, lis + xoris.
>>>>
>>>> It's a bit off, but not a big deal, up to you to reword it or not.  :)
>>>
>>> The first two are obvious, but the last one is almost never a good idea,
>>> there usually are better ways to do the same.  I cannot even think of
>>> any case where this is best?  A lis;rl* is always prefered (it can
>>> optimise better, be combined with other insns).
>> I understant your point here.  The first two: 'li' for lowest 16bits,
>> 'oris/xoris' for next 16bits.
>>
>> While for 'lis + xoris', it may not obvious, because both 'lis' and
>> 'xoris' operates on 17-31bits.
>> 'lis + xoris' is for case "32(1) || 1(0) || 15(x) || 16(0)". xoris is
>> used to clean bit31.  This case seems hard to be supported by 'rlxx'.
> 
> Please put that in a separate patch?  First do a patch with just
> lis;x?oris.  They are unrelated and different in almost every way.
>
I just send out two patches, one for "lis; xoris" and one for "li; x?oris".

https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607617.html
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607618.html

Maybe, do we prefer to separate into 3 patches for review easily :-)?

Thanks for review!


BR,
Jeff (Jiufu)

>> I hit to find this case when I analyze what kind of constants can be
>> build by two instructions. Checked the posssible combinations:
>> "addi/addis" + "neg/ori/../xoris/rldX/rlwX/../sradi/extswsli"(those
>> instructions which accept one register and one immediate).
>>
>> I also drafted the patch to use "li/lis+rlxx" to build constant.
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601276.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601277.html
> 
> Those seem to do many things in one patch as well :-(  It is very hard
> to review such things, it takes many hours each to do properly.
> 
> 
> Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d2743f7bce6..9b7a51f052d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10228,6 +10228,7 @@  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 {
   rtx temp;
   HOST_WIDE_INT ud1, ud2, ud3, ud4;
+  HOST_WIDE_INT orig_c = c;
 
   ud1 = c & 0xffff;
   c = c >> 16;
@@ -10253,21 +10254,41 @@  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 			gen_rtx_IOR (DImode, copy_rtx (temp),
 				     GEN_INT (ud1)));
     }
+  else if ((ud4 == 0xffff && ud3 == 0xffff)
+	   && ((ud1 & 0x8000) || (ud1 == 0 && !(ud2 & 0x8000))))
+    {
+      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+      HOST_WIDE_INT imm = (ud1 & 0x8000) ? ((ud1 ^ 0x8000) - 0x8000)
+					 : ((ud2 << 16) - 0x80000000);
+      /* li/lis + xoris */
+      emit_move_insn (temp, GEN_INT (imm));
+      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
+					 GEN_INT (orig_c ^ imm)));
+    }
   else if (ud3 == 0 && ud4 == 0)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
       gcc_assert (ud2 & 0x8000);
-      emit_move_insn (copy_rtx (temp),
-		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
-      if (ud1 != 0)
-	emit_move_insn (copy_rtx (temp),
-			gen_rtx_IOR (DImode, copy_rtx (temp),
-				     GEN_INT (ud1)));
-      emit_move_insn (dest,
-		      gen_rtx_ZERO_EXTEND (DImode,
-					   gen_lowpart (SImode,
-							copy_rtx (temp))));
+
+      if (!(ud1 & 0x8000))
+	{
+	  /* li+oris */
+	  emit_move_insn (temp, GEN_INT (ud1));
+	  emit_move_insn (dest,
+			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
+	}
+      else
+	{
+	  emit_move_insn (temp,
+			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
+	  if (ud1 != 0)
+	    emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+	  emit_move_insn (dest,
+			  gen_rtx_ZERO_EXTEND (DImode,
+					       gen_lowpart (SImode, temp)));
+	}
     }
   else if (ud1 == ud3 && ud2 == ud4)
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708-run.c b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
new file mode 100644
index 00000000000..df65c321f6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106708-run.c
@@ -0,0 +1,17 @@ 
+/* PR target/106708 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "pr106708.h"
+
+long long arr[] = {0x98765432ULL, 0xffffffff7cdeab55ULL, 0xffffffff65430000ULL};
+int
+main ()
+{
+  long long a[3];
+
+  foo (a);
+  if (__builtin_memcmp (a, arr, sizeof (arr)) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c b/gcc/testsuite/gcc.target/powerpc/pr106708.c
new file mode 100644
index 00000000000..ebd9ea88993
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
@@ -0,0 +1,12 @@ 
+/* PR target/106708 */
+/* { dg-do compile } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+
+#include "pr106708.h"
+
+/* { dg-final { scan-assembler-times {\mli\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */
+/* { dg-final { scan-assembler-times {\moris\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxoris\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.h b/gcc/testsuite/gcc.target/powerpc/pr106708.h
new file mode 100644
index 00000000000..42526a70892
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106708.h
@@ -0,0 +1,9 @@ 
+/* Test constants which can be built by li/lis + oris/xoris */
+void  __attribute__ ((__noinline__, __noclone__)) foo (long long *arg)
+{
+  *arg++ = 0x98765432ULL;
+  *arg++ = 0xffffffff7cdeab55ULL;
+  *arg++ = 0xffffffff65430000ULL;
+}
+
+