diff mbox series

add rlwinm pattern for DImode for constant building

Message ID 20240422023503.179552-1-guojiufu@linux.ibm.com
State New
Headers show
Series add rlwinm pattern for DImode for constant building | expand

Commit Message

Jiufu Guo April 22, 2024, 2:35 a.m. UTC
Hi,

'rlwinm' pattern is already well used for SImode.  As this instruction
can touch the whole 64bit register, so some constants in 64bit(DImode)
can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
if a constant is able to be built by 'lis/li; rlwinm'.

Bootstrap and regtest pass on ppc64{,le}.

Is this patch ok for trunk (when stage1 is open)?

Jeff (Jiufu Guo).

gcc/ChangeLog:

	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
	parameter.
	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
	(can_be_rotated_to_lowbits): Add new parameter.
	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
	* gcc.target/powerpc/rlwinm4di-1.c: New test.
	* gcc.target/powerpc/rlwinm4di-2.c: New test.
	* gcc.target/powerpc/rlwinm4di.c: New test.
	* gcc.target/powerpc/rlwinm4di.h: New test.

---
 gcc/config/rs6000/rs6000-protos.h             |  2 +-
 gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                   | 18 +++++
 gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
 .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
 .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
 8 files changed, 158 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h

Comments

Jiufu Guo May 17, 2024, 3:17 a.m. UTC | #1
Hi,

Gentle ping ...

BR,
Jeff(Jiufu) Guo

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> 'rlwinm' pattern is already well used for SImode.  As this instruction
> can touch the whole 64bit register, so some constants in 64bit(DImode)
> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
> if a constant is able to be built by 'lis/li; rlwinm'.
>
> Bootstrap and regtest pass on ppc64{,le}.
>
> Is this patch ok for trunk (when stage1 is open)?
>
> Jeff (Jiufu Guo).
>
> gcc/ChangeLog:
>
> 	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
> 	parameter.
> 	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
> 	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
> 	(can_be_rotated_to_lowbits): Add new parameter.
> 	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
> 	* gcc.target/powerpc/rlwinm4di-1.c: New test.
> 	* gcc.target/powerpc/rlwinm4di-2.c: New test.
> 	* gcc.target/powerpc/rlwinm4di.c: New test.
> 	* gcc.target/powerpc/rlwinm4di.h: New test.
>
> ---
>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>  8 files changed, 158 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 09a57a806fa..10505a8061a 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>  extern int vspltis_shifted (rtx);
>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>  extern int num_insns_constant (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 6ba9df4f02e..853eaede673 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>    return false;
>  }
>  
> +/* Check if value C can be generated by 2 instructions, one instruction
> +   is li/lis, another instruction is rlwinm.  */
> +
> +static bool
> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
> +				   int *shift, HOST_WIDE_INT *mask)
> +{
> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
> +  unsigned HOST_WIDE_INT v;
> +
> +  /* diff of high and low (high ^ low) should be the mask position.  */
> +  unsigned HOST_WIDE_INT m = low ^ high;
> +  int tz = ctz_hwi (m);
> +  int lz = clz_hwi (m);
> +  if (m != 0)
> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
> +  if (high != 0)
> +    m = ~m;
> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
> +
> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
> +    return false;
> +
> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
> +  int n;
> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
> +    {
> +      /* rotate32 from a negative value of 'lis'.  */
> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
> +	return false;
> +      n += 16;
> +    }
> +  n = 32 - (n % 32);
> +  n %= 32;
> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
> +  if (v & 0x80000000ULL)
> +    v |= HOST_WIDE_INT_M1U << 32;
> +  *mask = m;
> +  *val = v;
> +  *shift = n;
> +  return true;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>     Output insns to set DEST equal to the constant C as a series of
>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>        return;
>      }
>  
> +  HOST_WIDE_INT val;
> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
> +    {
> +      /* li/lis; rlwinm */
> +      count_or_emit_insn (temp, GEN_INT (val));
> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
> +      rtx m = GEN_INT (mask);
> +      rtx n = GEN_INT (shift);
> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
> +      return;
> +    }
> +
>    if (ud3 == 0 && ud4 == 0)
>      {
>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>     Return false otherwise.  */
>  
>  bool
> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
> +			   bool rotl32)
>  {
>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>  
> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>  	       ^bit -> Vbit, , then zeros are at head or tail.
>  	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>    const int rot_bits = lowbits + 1;
> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
> +  unsigned HOST_WIDE_INT rc;
> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
> +		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
> +	      : (c >> rot_bits) | (c << (clz - 1));
>    tz = ctz_hwi (rc);
>    if (clz_hwi (rc) + tz >= clz)
>      {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bc8bc6ab060..8a82ba3e26c 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>     (set_attr "dot" "yes")
>     (set_attr "length" "4,8")])
>  
> +; define an insn about rlwinm for DI mode (with high part content)
> +(define_insn "rlwinm_di_mask"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +    (and:DI (plus:DI
> +              (ashift:DI (subreg:DI
> +		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
> +                              (match_operand:SI 2 "const_int_operand" "n")) 0)
> +                   (const_int 32))
> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
> +            (match_operand:DI 3 "const_int_operand" "n")))]
> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
> +{
> +  return UINTVAL (operands[3]) == -1ULL ?
> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
> +}
> +  [(set_attr "type" "shift")
> +   (set_attr "maybe_var_shift" "yes")])
> +
>  ; Special case for less-than-0.  We can do it with just one machine
>  ; instruction, but the generic optimizers do not realise it is cheap.
>  (define_insn "*lt0_<mode>di"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..70ddfaa21da 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>  
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
> new file mode 100644
> index 00000000000..8959578143b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "rlwinm4di.h"
> +
> +long long arr1[] = {
> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
> +  0x0002000100000001ULL, 0x0002000100020001ULL,
> +};
> +
> +int
> +main ()
> +{
> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
> +
> +  foo (a);
> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
> new file mode 100644
> index 00000000000..9494d0327b4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-options "-O2 -mno-prefixed" } */
> +/* { dg-do compile { target has_arch_ppc64 } } */
> +
> +#define N 5
> +#define MASK 0xffffffffe0000003ULL
> +
> +typedef unsigned long long int64;
> +
> +int64
> +foo (int64 v)
> +{
> +  unsigned int v1 = v;
> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
> +}
> +
> +/* { dg-final { scan-assembler-not {\mor\M} } } */
> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
> new file mode 100644
> index 00000000000..fcbc8f8d742
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
> @@ -0,0 +1,6 @@
> +/* { dg-options "-O2 -mno-prefixed" } */
> +/* { dg-do compile { target has_arch_ppc64 } } */
> +#include "rlwinm4di.h"
> +
> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
> new file mode 100644
> index 00000000000..59fe739ca85
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
> @@ -0,0 +1,25 @@
> +/* using 2 instructions(rlwinm) to build constants.   */
> +void __attribute__ ((__noinline__, __noclone__))
> +foo (long long *arg)
> +{
> +  *arg++ = 0x0000400100000001ULL;
> +  *arg++ = 0x0000000200000002ULL;
> +  *arg++ = 0xffff8000bfff8000ULL;
> +  *arg++ = 0xffff8001ffff8001ULL;
> +  *arg++ = 0x0000800100000001ULL;
> +  *arg++ = 0x0000800100008001ULL;
> +  *arg++ = 0x0000800200000002ULL;
> +  *arg++ = 0x0000800000008000ULL;
> +  *arg++ = 0x0000000080008000ULL;
> +  *arg++ = 0xffff0001bfff0001ULL;
> +  *arg++ = 0xffff0001ffff0001ULL;
> +  *arg++ = 0x0001000200000002ULL;
> +  *arg++ = 0x8001000080010000ULL;
> +  *arg++ = 0x0004000100000001ULL;
> +  *arg++ = 0x0004000100040001ULL;
> +  *arg++ = 0x00000000bfffe001ULL;
> +  *arg++ = 0x0003fffe0001fffeULL;
> +  *arg++ = 0x0003fffe0003fffeULL;
> +  *arg++ = 0x0002000100000001ULL;
> +  *arg++ = 0x0002000100020001ULL;
> +}
Jiufu Guo June 6, 2024, 1:53 a.m. UTC | #2
Hi,

Gentle ping ...

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Gentle ping ...
>
> BR,
> Jeff(Jiufu) Guo
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> 'rlwinm' pattern is already well used for SImode.  As this instruction
>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>> if a constant is able to be built by 'lis/li; rlwinm'.
>>
>> Bootstrap and regtest pass on ppc64{,le}.
>>
>> Is this patch ok for trunk (when stage1 is open)?

Is this patch ok for trunk?

BR,
Jeff(Jiufu) Guo

>>
>> Jeff (Jiufu Guo).
>>
>> gcc/ChangeLog:
>>
>> 	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>> 	parameter.
>> 	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>> 	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>> 	(can_be_rotated_to_lowbits): Add new parameter.
>> 	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>> 	* gcc.target/powerpc/rlwinm4di-1.c: New test.
>> 	* gcc.target/powerpc/rlwinm4di-2.c: New test.
>> 	* gcc.target/powerpc/rlwinm4di.c: New test.
>> 	* gcc.target/powerpc/rlwinm4di.h: New test.
>>
>> ---
>>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>>  8 files changed, 158 insertions(+), 4 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>
>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>> index 09a57a806fa..10505a8061a 100644
>> --- a/gcc/config/rs6000/rs6000-protos.h
>> +++ b/gcc/config/rs6000/rs6000-protos.h
>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>>  extern int vspltis_shifted (rtx);
>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>  extern int num_insns_constant (rtx, machine_mode);
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 6ba9df4f02e..853eaede673 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>    return false;
>>  }
>>  
>> +/* Check if value C can be generated by 2 instructions, one instruction
>> +   is li/lis, another instruction is rlwinm.  */
>> +
>> +static bool
>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>> +				   int *shift, HOST_WIDE_INT *mask)
>> +{
>> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>> +  unsigned HOST_WIDE_INT v;
>> +
>> +  /* diff of high and low (high ^ low) should be the mask position.  */
>> +  unsigned HOST_WIDE_INT m = low ^ high;
>> +  int tz = ctz_hwi (m);
>> +  int lz = clz_hwi (m);
>> +  if (m != 0)
>> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>> +  if (high != 0)
>> +    m = ~m;
>> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>> +
>> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>> +    return false;
>> +
>> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
>> +  int n;
>> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>> +    {
>> +      /* rotate32 from a negative value of 'lis'.  */
>> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>> +	return false;
>> +      n += 16;
>> +    }
>> +  n = 32 - (n % 32);
>> +  n %= 32;
>> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>> +  if (v & 0x80000000ULL)
>> +    v |= HOST_WIDE_INT_M1U << 32;
>> +  *mask = m;
>> +  *val = v;
>> +  *shift = n;
>> +  return true;
>> +}
>> +
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>     Output insns to set DEST equal to the constant C as a series of
>>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>        return;
>>      }
>>  
>> +  HOST_WIDE_INT val;
>> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>> +    {
>> +      /* li/lis; rlwinm */
>> +      count_or_emit_insn (temp, GEN_INT (val));
>> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>> +      rtx m = GEN_INT (mask);
>> +      rtx n = GEN_INT (shift);
>> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>> +      return;
>> +    }
>> +
>>    if (ud3 == 0 && ud4 == 0)
>>      {
>>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>>     Return false otherwise.  */
>>  
>>  bool
>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>> +			   bool rotl32)
>>  {
>>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>  
>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>  	       ^bit -> Vbit, , then zeros are at head or tail.
>>  	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>>    const int rot_bits = lowbits + 1;
>> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>> +  unsigned HOST_WIDE_INT rc;
>> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>> +		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>> +	      : (c >> rot_bits) | (c << (clz - 1));
>>    tz = ctz_hwi (rc);
>>    if (clz_hwi (rc) + tz >= clz)
>>      {
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index bc8bc6ab060..8a82ba3e26c 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>     (set_attr "dot" "yes")
>>     (set_attr "length" "4,8")])
>>  
>> +; define an insn about rlwinm for DI mode (with high part content)
>> +(define_insn "rlwinm_di_mask"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +    (and:DI (plus:DI
>> +              (ashift:DI (subreg:DI
>> +		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>> +                              (match_operand:SI 2 "const_int_operand" "n")) 0)
>> +                   (const_int 32))
>> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>> +            (match_operand:DI 3 "const_int_operand" "n")))]
>> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
>> +{
>> +  return UINTVAL (operands[3]) == -1ULL ?
>> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
>> +}
>> +  [(set_attr "type" "shift")
>> +   (set_attr "maybe_var_shift" "yes")])
>> +
>>  ; Special case for less-than-0.  We can do it with just one machine
>>  ; instruction, but the generic optimizers do not realise it is cheap.
>>  (define_insn "*lt0_<mode>di"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> index 4f764d0576f..70ddfaa21da 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>  
>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>> new file mode 100644
>> index 00000000000..8959578143b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>> @@ -0,0 +1,25 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "rlwinm4di.h"
>> +
>> +long long arr1[] = {
>> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>> +  0x0002000100000001ULL, 0x0002000100020001ULL,
>> +};
>> +
>> +int
>> +main ()
>> +{
>> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
>> +
>> +  foo (a);
>> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>> new file mode 100644
>> index 00000000000..9494d0327b4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-options "-O2 -mno-prefixed" } */
>> +/* { dg-do compile { target has_arch_ppc64 } } */
>> +
>> +#define N 5
>> +#define MASK 0xffffffffe0000003ULL
>> +
>> +typedef unsigned long long int64;
>> +
>> +int64
>> +foo (int64 v)
>> +{
>> +  unsigned int v1 = v;
>> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>> new file mode 100644
>> index 00000000000..fcbc8f8d742
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>> @@ -0,0 +1,6 @@
>> +/* { dg-options "-O2 -mno-prefixed" } */
>> +/* { dg-do compile { target has_arch_ppc64 } } */
>> +#include "rlwinm4di.h"
>> +
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>> +
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>> new file mode 100644
>> index 00000000000..59fe739ca85
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>> @@ -0,0 +1,25 @@
>> +/* using 2 instructions(rlwinm) to build constants.   */
>> +void __attribute__ ((__noinline__, __noclone__))
>> +foo (long long *arg)
>> +{
>> +  *arg++ = 0x0000400100000001ULL;
>> +  *arg++ = 0x0000000200000002ULL;
>> +  *arg++ = 0xffff8000bfff8000ULL;
>> +  *arg++ = 0xffff8001ffff8001ULL;
>> +  *arg++ = 0x0000800100000001ULL;
>> +  *arg++ = 0x0000800100008001ULL;
>> +  *arg++ = 0x0000800200000002ULL;
>> +  *arg++ = 0x0000800000008000ULL;
>> +  *arg++ = 0x0000000080008000ULL;
>> +  *arg++ = 0xffff0001bfff0001ULL;
>> +  *arg++ = 0xffff0001ffff0001ULL;
>> +  *arg++ = 0x0001000200000002ULL;
>> +  *arg++ = 0x8001000080010000ULL;
>> +  *arg++ = 0x0004000100000001ULL;
>> +  *arg++ = 0x0004000100040001ULL;
>> +  *arg++ = 0x00000000bfffe001ULL;
>> +  *arg++ = 0x0003fffe0001fffeULL;
>> +  *arg++ = 0x0003fffe0003fffeULL;
>> +  *arg++ = 0x0002000100000001ULL;
>> +  *arg++ = 0x0002000100020001ULL;
>> +}
Jiufu Guo June 21, 2024, 3:06 a.m. UTC | #3
Hi,

Gentle ping.

BR,
Jeff(Jiufu) Guo

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Gentle ping ...
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> Gentle ping ...
>>
>> BR,
>> Jeff(Jiufu) Guo
>>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> 'rlwinm' pattern is already well used for SImode.  As this instruction
>>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>>> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>>> if a constant is able to be built by 'lis/li; rlwinm'.
>>>
>>> Bootstrap and regtest pass on ppc64{,le}.
>>>
>>> Is this patch ok for trunk (when stage1 is open)?
>
> Is this patch ok for trunk?
>
> BR,
> Jeff(Jiufu) Guo
>
>>>
>>> Jeff (Jiufu Guo).
>>>
>>> gcc/ChangeLog:
>>>
>>> 	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>>> 	parameter.
>>> 	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>>> 	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>>> 	(can_be_rotated_to_lowbits): Add new parameter.
>>> 	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>>> 	* gcc.target/powerpc/rlwinm4di-1.c: New test.
>>> 	* gcc.target/powerpc/rlwinm4di-2.c: New test.
>>> 	* gcc.target/powerpc/rlwinm4di.c: New test.
>>> 	* gcc.target/powerpc/rlwinm4di.h: New test.
>>>
>>> ---
>>>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>>>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>>>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>>>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>>>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>>>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>>>  8 files changed, 158 insertions(+), 4 deletions(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>> index 09a57a806fa..10505a8061a 100644
>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>>>  extern int vspltis_shifted (rtx);
>>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>>>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>>  extern int num_insns_constant (rtx, machine_mode);
>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>> index 6ba9df4f02e..853eaede673 100644
>>> --- a/gcc/config/rs6000/rs6000.cc
>>> +++ b/gcc/config/rs6000/rs6000.cc
>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>>    return false;
>>>  }
>>>  
>>> +/* Check if value C can be generated by 2 instructions, one instruction
>>> +   is li/lis, another instruction is rlwinm.  */
>>> +
>>> +static bool
>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>>> +				   int *shift, HOST_WIDE_INT *mask)
>>> +{
>>> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>>> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>>> +  unsigned HOST_WIDE_INT v;
>>> +
>>> +  /* diff of high and low (high ^ low) should be the mask position.  */
>>> +  unsigned HOST_WIDE_INT m = low ^ high;
>>> +  int tz = ctz_hwi (m);
>>> +  int lz = clz_hwi (m);
>>> +  if (m != 0)
>>> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>>> +  if (high != 0)
>>> +    m = ~m;
>>> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>>> +
>>> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>>> +    return false;
>>> +
>>> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
>>> +  int n;
>>> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>>> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>>> +    {
>>> +      /* rotate32 from a negative value of 'lis'.  */
>>> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>>> +	return false;
>>> +      n += 16;
>>> +    }
>>> +  n = 32 - (n % 32);
>>> +  n %= 32;
>>> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>>> +  if (v & 0x80000000ULL)
>>> +    v |= HOST_WIDE_INT_M1U << 32;
>>> +  *mask = m;
>>> +  *val = v;
>>> +  *shift = n;
>>> +  return true;
>>> +}
>>> +
>>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>>     Output insns to set DEST equal to the constant C as a series of
>>>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>>        return;
>>>      }
>>>  
>>> +  HOST_WIDE_INT val;
>>> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>>> +    {
>>> +      /* li/lis; rlwinm */
>>> +      count_or_emit_insn (temp, GEN_INT (val));
>>> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>>> +      rtx m = GEN_INT (mask);
>>> +      rtx n = GEN_INT (shift);
>>> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>>> +      return;
>>> +    }
>>> +
>>>    if (ud3 == 0 && ud4 == 0)
>>>      {
>>>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>>>     Return false otherwise.  */
>>>  
>>>  bool
>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>>> +			   bool rotl32)
>>>  {
>>>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>>  
>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>>  	       ^bit -> Vbit, , then zeros are at head or tail.
>>>  	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>>>    const int rot_bits = lowbits + 1;
>>> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>>> +  unsigned HOST_WIDE_INT rc;
>>> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>>> +		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>>> +	      : (c >> rot_bits) | (c << (clz - 1));
>>>    tz = ctz_hwi (rc);
>>>    if (clz_hwi (rc) + tz >= clz)
>>>      {
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index bc8bc6ab060..8a82ba3e26c 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>>     (set_attr "dot" "yes")
>>>     (set_attr "length" "4,8")])
>>>  
>>> +; define an insn about rlwinm for DI mode (with high part content)
>>> +(define_insn "rlwinm_di_mask"
>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>> +    (and:DI (plus:DI
>>> +              (ashift:DI (subreg:DI
>>> +		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>>> +                              (match_operand:SI 2 "const_int_operand" "n")) 0)
>>> +                   (const_int 32))
>>> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>>> +            (match_operand:DI 3 "const_int_operand" "n")))]
>>> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
>>> +{
>>> +  return UINTVAL (operands[3]) == -1ULL ?
>>> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
>>> +}
>>> +  [(set_attr "type" "shift")
>>> +   (set_attr "maybe_var_shift" "yes")])
>>> +
>>>  ; Special case for less-than-0.  We can do it with just one machine
>>>  ; instruction, but the generic optimizers do not realise it is cheap.
>>>  (define_insn "*lt0_<mode>di"
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> index 4f764d0576f..70ddfaa21da 100644
>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>>  
>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>> new file mode 100644
>>> index 00000000000..8959578143b
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>> @@ -0,0 +1,25 @@
>>> +/* { dg-do run } */
>>> +/* { dg-options "-O2" } */
>>> +
>>> +#include "rlwinm4di.h"
>>> +
>>> +long long arr1[] = {
>>> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>>> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>>> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>>> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>>> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>>> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>>> +  0x0002000100000001ULL, 0x0002000100020001ULL,
>>> +};
>>> +
>>> +int
>>> +main ()
>>> +{
>>> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
>>> +
>>> +  foo (a);
>>> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>>> +    __builtin_abort ();
>>> +  return 0;
>>> +}
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>> new file mode 100644
>>> index 00000000000..9494d0327b4
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>> @@ -0,0 +1,19 @@
>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>> +
>>> +#define N 5
>>> +#define MASK 0xffffffffe0000003ULL
>>> +
>>> +typedef unsigned long long int64;
>>> +
>>> +int64
>>> +foo (int64 v)
>>> +{
>>> +  unsigned int v1 = v;
>>> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>>> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>> new file mode 100644
>>> index 00000000000..fcbc8f8d742
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>> @@ -0,0 +1,6 @@
>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>> +#include "rlwinm4di.h"
>>> +
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>>> +
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>> new file mode 100644
>>> index 00000000000..59fe739ca85
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>> @@ -0,0 +1,25 @@
>>> +/* using 2 instructions(rlwinm) to build constants.   */
>>> +void __attribute__ ((__noinline__, __noclone__))
>>> +foo (long long *arg)
>>> +{
>>> +  *arg++ = 0x0000400100000001ULL;
>>> +  *arg++ = 0x0000000200000002ULL;
>>> +  *arg++ = 0xffff8000bfff8000ULL;
>>> +  *arg++ = 0xffff8001ffff8001ULL;
>>> +  *arg++ = 0x0000800100000001ULL;
>>> +  *arg++ = 0x0000800100008001ULL;
>>> +  *arg++ = 0x0000800200000002ULL;
>>> +  *arg++ = 0x0000800000008000ULL;
>>> +  *arg++ = 0x0000000080008000ULL;
>>> +  *arg++ = 0xffff0001bfff0001ULL;
>>> +  *arg++ = 0xffff0001ffff0001ULL;
>>> +  *arg++ = 0x0001000200000002ULL;
>>> +  *arg++ = 0x8001000080010000ULL;
>>> +  *arg++ = 0x0004000100000001ULL;
>>> +  *arg++ = 0x0004000100040001ULL;
>>> +  *arg++ = 0x00000000bfffe001ULL;
>>> +  *arg++ = 0x0003fffe0001fffeULL;
>>> +  *arg++ = 0x0003fffe0003fffeULL;
>>> +  *arg++ = 0x0002000100000001ULL;
>>> +  *arg++ = 0x0002000100020001ULL;
>>> +}
Jiufu Guo July 10, 2024, 8:44 a.m. UTC | #4
Hi,

Gentle ping...

BR,
Jeff(Jiufu) Guo

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Gentle ping.
>
> BR,
> Jeff(Jiufu) Guo
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> Gentle ping ...
>>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> Gentle ping ...
>>>
>>> BR,
>>> Jeff(Jiufu) Guo
>>>
>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>
>>>> Hi,
>>>>
>>>> 'rlwinm' pattern is already well used for SImode.  As this instruction
>>>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>>>> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
>>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>>>> if a constant is able to be built by 'lis/li; rlwinm'.
>>>>
>>>> Bootstrap and regtest pass on ppc64{,le}.
>>>>
>>>> Is this patch ok for trunk (when stage1 is open)?
>>
>> Is this patch ok for trunk?
>>
>> BR,
>> Jeff(Jiufu) Guo
>>
>>>>
>>>> Jeff (Jiufu Guo).
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> 	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>>>> 	parameter.
>>>> 	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>>>> 	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>>>> 	(can_be_rotated_to_lowbits): Add new parameter.
>>>> 	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>>>> 	* gcc.target/powerpc/rlwinm4di-1.c: New test.
>>>> 	* gcc.target/powerpc/rlwinm4di-2.c: New test.
>>>> 	* gcc.target/powerpc/rlwinm4di.c: New test.
>>>> 	* gcc.target/powerpc/rlwinm4di.h: New test.
>>>>
>>>> ---
>>>>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>>>>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>>>>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>>>>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>>>>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>>>>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>>>>  8 files changed, 158 insertions(+), 4 deletions(-)
>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>
>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>> index 09a57a806fa..10505a8061a 100644
>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>>>>  extern int vspltis_shifted (rtx);
>>>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>>>>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>>>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>>>  extern int num_insns_constant (rtx, machine_mode);
>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>> index 6ba9df4f02e..853eaede673 100644
>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>>>    return false;
>>>>  }
>>>>  
>>>> +/* Check if value C can be generated by 2 instructions, one instruction
>>>> +   is li/lis, another instruction is rlwinm.  */
>>>> +
>>>> +static bool
>>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>>>> +				   int *shift, HOST_WIDE_INT *mask)
>>>> +{
>>>> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>>>> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>>>> +  unsigned HOST_WIDE_INT v;
>>>> +
>>>> +  /* diff of high and low (high ^ low) should be the mask position.  */
>>>> +  unsigned HOST_WIDE_INT m = low ^ high;
>>>> +  int tz = ctz_hwi (m);
>>>> +  int lz = clz_hwi (m);
>>>> +  if (m != 0)
>>>> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>>>> +  if (high != 0)
>>>> +    m = ~m;
>>>> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>>>> +
>>>> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>>>> +    return false;
>>>> +
>>>> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
>>>> +  int n;
>>>> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>>>> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>>>> +    {
>>>> +      /* rotate32 from a negative value of 'lis'.  */
>>>> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>>>> +	return false;
>>>> +      n += 16;
>>>> +    }
>>>> +  n = 32 - (n % 32);
>>>> +  n %= 32;
>>>> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>>>> +  if (v & 0x80000000ULL)
>>>> +    v |= HOST_WIDE_INT_M1U << 32;
>>>> +  *mask = m;
>>>> +  *val = v;
>>>> +  *shift = n;
>>>> +  return true;
>>>> +}
>>>> +
>>>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>>>     Output insns to set DEST equal to the constant C as a series of
>>>>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>>>        return;
>>>>      }
>>>>  
>>>> +  HOST_WIDE_INT val;
>>>> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>>>> +    {
>>>> +      /* li/lis; rlwinm */
>>>> +      count_or_emit_insn (temp, GEN_INT (val));
>>>> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>>>> +      rtx m = GEN_INT (mask);
>>>> +      rtx n = GEN_INT (shift);
>>>> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>>>> +      return;
>>>> +    }
>>>> +
>>>>    if (ud3 == 0 && ud4 == 0)
>>>>      {
>>>>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>>>>     Return false otherwise.  */
>>>>  
>>>>  bool
>>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>>>> +			   bool rotl32)
>>>>  {
>>>>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>>>  
>>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>>>  	       ^bit -> Vbit, , then zeros are at head or tail.
>>>>  	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>>>>    const int rot_bits = lowbits + 1;
>>>> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>>>> +  unsigned HOST_WIDE_INT rc;
>>>> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>>>> +		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>>>> +	      : (c >> rot_bits) | (c << (clz - 1));
>>>>    tz = ctz_hwi (rc);
>>>>    if (clz_hwi (rc) + tz >= clz)
>>>>      {
>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>>> index bc8bc6ab060..8a82ba3e26c 100644
>>>> --- a/gcc/config/rs6000/rs6000.md
>>>> +++ b/gcc/config/rs6000/rs6000.md
>>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>>>     (set_attr "dot" "yes")
>>>>     (set_attr "length" "4,8")])
>>>>  
>>>> +; define an insn about rlwinm for DI mode (with high part content)
>>>> +(define_insn "rlwinm_di_mask"
>>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>>> +    (and:DI (plus:DI
>>>> +              (ashift:DI (subreg:DI
>>>> +		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>>>> +                              (match_operand:SI 2 "const_int_operand" "n")) 0)
>>>> +                   (const_int 32))
>>>> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>>>> +            (match_operand:DI 3 "const_int_operand" "n")))]
>>>> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
>>>> +{
>>>> +  return UINTVAL (operands[3]) == -1ULL ?
>>>> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
>>>> +}
>>>> +  [(set_attr "type" "shift")
>>>> +   (set_attr "maybe_var_shift" "yes")])
>>>> +
>>>>  ; Special case for less-than-0.  We can do it with just one machine
>>>>  ; instruction, but the generic optimizers do not realise it is cheap.
>>>>  (define_insn "*lt0_<mode>di"
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>> index 4f764d0576f..70ddfaa21da 100644
>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>>>  
>>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>> new file mode 100644
>>>> index 00000000000..8959578143b
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>> @@ -0,0 +1,25 @@
>>>> +/* { dg-do run } */
>>>> +/* { dg-options "-O2" } */
>>>> +
>>>> +#include "rlwinm4di.h"
>>>> +
>>>> +long long arr1[] = {
>>>> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>>>> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>>>> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>>>> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>>>> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>>>> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>>>> +  0x0002000100000001ULL, 0x0002000100020001ULL,
>>>> +};
>>>> +
>>>> +int
>>>> +main ()
>>>> +{
>>>> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
>>>> +
>>>> +  foo (a);
>>>> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>>>> +    __builtin_abort ();
>>>> +  return 0;
>>>> +}
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>> new file mode 100644
>>>> index 00000000000..9494d0327b4
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>> @@ -0,0 +1,19 @@
>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>> +
>>>> +#define N 5
>>>> +#define MASK 0xffffffffe0000003ULL
>>>> +
>>>> +typedef unsigned long long int64;
>>>> +
>>>> +int64
>>>> +foo (int64 v)
>>>> +{
>>>> +  unsigned int v1 = v;
>>>> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>>>> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>> new file mode 100644
>>>> index 00000000000..fcbc8f8d742
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>> @@ -0,0 +1,6 @@
>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>> +#include "rlwinm4di.h"
>>>> +
>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>>>> +
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>> new file mode 100644
>>>> index 00000000000..59fe739ca85
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>> @@ -0,0 +1,25 @@
>>>> +/* using 2 instructions(rlwinm) to build constants.   */
>>>> +void __attribute__ ((__noinline__, __noclone__))
>>>> +foo (long long *arg)
>>>> +{
>>>> +  *arg++ = 0x0000400100000001ULL;
>>>> +  *arg++ = 0x0000000200000002ULL;
>>>> +  *arg++ = 0xffff8000bfff8000ULL;
>>>> +  *arg++ = 0xffff8001ffff8001ULL;
>>>> +  *arg++ = 0x0000800100000001ULL;
>>>> +  *arg++ = 0x0000800100008001ULL;
>>>> +  *arg++ = 0x0000800200000002ULL;
>>>> +  *arg++ = 0x0000800000008000ULL;
>>>> +  *arg++ = 0x0000000080008000ULL;
>>>> +  *arg++ = 0xffff0001bfff0001ULL;
>>>> +  *arg++ = 0xffff0001ffff0001ULL;
>>>> +  *arg++ = 0x0001000200000002ULL;
>>>> +  *arg++ = 0x8001000080010000ULL;
>>>> +  *arg++ = 0x0004000100000001ULL;
>>>> +  *arg++ = 0x0004000100040001ULL;
>>>> +  *arg++ = 0x00000000bfffe001ULL;
>>>> +  *arg++ = 0x0003fffe0001fffeULL;
>>>> +  *arg++ = 0x0003fffe0003fffeULL;
>>>> +  *arg++ = 0x0002000100000001ULL;
>>>> +  *arg++ = 0x0002000100020001ULL;
>>>> +}
Jiufu Guo Aug. 6, 2024, 6:31 a.m. UTC | #5
Hi,

Gentle ping...

BR,
Jeff(Jiufu) Guo

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Gentle ping...
>
> BR,
> Jeff(Jiufu) Guo
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> Gentle ping.
>>
>> BR,
>> Jeff(Jiufu) Guo
>>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> Gentle ping ...
>>>
>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>
>>>> Hi,
>>>>
>>>> Gentle ping ...
>>>>
>>>> BR,
>>>> Jeff(Jiufu) Guo
>>>>
>>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>>
>>>>> Hi,
>>>>>
>>>>> 'rlwinm' pattern is already well used for SImode.  As this instruction
>>>>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>>>>> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
>>>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>>>>> if a constant is able to be built by 'lis/li; rlwinm'.
>>>>>
>>>>> Bootstrap and regtest pass on ppc64{,le}.
>>>>>
>>>>> Is this patch ok for trunk (when stage1 is open)?
>>>
>>> Is this patch ok for trunk?
>>>
>>> BR,
>>> Jeff(Jiufu) Guo
>>>
>>>>>
>>>>> Jeff (Jiufu Guo).
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>> 	* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>>>>> 	parameter.
>>>>> 	* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>>>>> 	(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>>>>> 	(can_be_rotated_to_lowbits): Add new parameter.
>>>>> 	* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>> 	* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>>>>> 	* gcc.target/powerpc/rlwinm4di-1.c: New test.
>>>>> 	* gcc.target/powerpc/rlwinm4di-2.c: New test.
>>>>> 	* gcc.target/powerpc/rlwinm4di.c: New test.
>>>>> 	* gcc.target/powerpc/rlwinm4di.h: New test.
>>>>>
>>>>> ---
>>>>>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>>>>>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>>>>>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>>>>>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>>>>>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>>>>>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>>>>>  8 files changed, 158 insertions(+), 4 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>>
>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>>> index 09a57a806fa..10505a8061a 100644
>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>>>>>  extern int vspltis_shifted (rtx);
>>>>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>>>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>>>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>>>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>>>>>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>>>>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>>>>  extern int num_insns_constant (rtx, machine_mode);
>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>>> index 6ba9df4f02e..853eaede673 100644
>>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>>>>    return false;
>>>>>  }
>>>>>  
>>>>> +/* Check if value C can be generated by 2 instructions, one instruction
>>>>> +   is li/lis, another instruction is rlwinm.  */
>>>>> +
>>>>> +static bool
>>>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>>>>> +				   int *shift, HOST_WIDE_INT *mask)
>>>>> +{
>>>>> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>>>>> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>>>>> +  unsigned HOST_WIDE_INT v;
>>>>> +
>>>>> +  /* diff of high and low (high ^ low) should be the mask position.  */
>>>>> +  unsigned HOST_WIDE_INT m = low ^ high;
>>>>> +  int tz = ctz_hwi (m);
>>>>> +  int lz = clz_hwi (m);
>>>>> +  if (m != 0)
>>>>> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>>>>> +  if (high != 0)
>>>>> +    m = ~m;
>>>>> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>>>>> +
>>>>> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>>>>> +    return false;
>>>>> +
>>>>> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
>>>>> +  int n;
>>>>> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>>>>> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>>>>> +    {
>>>>> +      /* rotate32 from a negative value of 'lis'.  */
>>>>> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>>>>> +	return false;
>>>>> +      n += 16;
>>>>> +    }
>>>>> +  n = 32 - (n % 32);
>>>>> +  n %= 32;
>>>>> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>>>>> +  if (v & 0x80000000ULL)
>>>>> +    v |= HOST_WIDE_INT_M1U << 32;
>>>>> +  *mask = m;
>>>>> +  *val = v;
>>>>> +  *shift = n;
>>>>> +  return true;
>>>>> +}
>>>>> +
>>>>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>>>>     Output insns to set DEST equal to the constant C as a series of
>>>>>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>>>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>>>>        return;
>>>>>      }
>>>>>  
>>>>> +  HOST_WIDE_INT val;
>>>>> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>>>>> +    {
>>>>> +      /* li/lis; rlwinm */
>>>>> +      count_or_emit_insn (temp, GEN_INT (val));
>>>>> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>>>>> +      rtx m = GEN_INT (mask);
>>>>> +      rtx n = GEN_INT (shift);
>>>>> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>>>>> +      return;
>>>>> +    }
>>>>> +
>>>>>    if (ud3 == 0 && ud4 == 0)
>>>>>      {
>>>>>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>>>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>>>>>     Return false otherwise.  */
>>>>>  
>>>>>  bool
>>>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>>>>> +			   bool rotl32)
>>>>>  {
>>>>>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>>>>  
>>>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>>>>  	       ^bit -> Vbit, , then zeros are at head or tail.
>>>>>  	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>>>>>    const int rot_bits = lowbits + 1;
>>>>> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>>>>> +  unsigned HOST_WIDE_INT rc;
>>>>> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>>>>> +		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>>>>> +	      : (c >> rot_bits) | (c << (clz - 1));
>>>>>    tz = ctz_hwi (rc);
>>>>>    if (clz_hwi (rc) + tz >= clz)
>>>>>      {
>>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>>>> index bc8bc6ab060..8a82ba3e26c 100644
>>>>> --- a/gcc/config/rs6000/rs6000.md
>>>>> +++ b/gcc/config/rs6000/rs6000.md
>>>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>>>>     (set_attr "dot" "yes")
>>>>>     (set_attr "length" "4,8")])
>>>>>  
>>>>> +; define an insn about rlwinm for DI mode (with high part content)
>>>>> +(define_insn "rlwinm_di_mask"
>>>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>>>> +    (and:DI (plus:DI
>>>>> +              (ashift:DI (subreg:DI
>>>>> +		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>>>>> +                              (match_operand:SI 2 "const_int_operand" "n")) 0)
>>>>> +                   (const_int 32))
>>>>> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>>>>> +            (match_operand:DI 3 "const_int_operand" "n")))]
>>>>> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
>>>>> +{
>>>>> +  return UINTVAL (operands[3]) == -1ULL ?
>>>>> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
>>>>> +}
>>>>> +  [(set_attr "type" "shift")
>>>>> +   (set_attr "maybe_var_shift" "yes")])
>>>>> +
>>>>>  ; Special case for less-than-0.  We can do it with just one machine
>>>>>  ; instruction, but the generic optimizers do not realise it is cheap.
>>>>>  (define_insn "*lt0_<mode>di"
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> index 4f764d0576f..70ddfaa21da 100644
>>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>>>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>>>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>>>>  
>>>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>> new file mode 100644
>>>>> index 00000000000..8959578143b
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* { dg-do run } */
>>>>> +/* { dg-options "-O2" } */
>>>>> +
>>>>> +#include "rlwinm4di.h"
>>>>> +
>>>>> +long long arr1[] = {
>>>>> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>>>>> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>>>>> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>>>>> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>>>>> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>>>>> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>>>>> +  0x0002000100000001ULL, 0x0002000100020001ULL,
>>>>> +};
>>>>> +
>>>>> +int
>>>>> +main ()
>>>>> +{
>>>>> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
>>>>> +
>>>>> +  foo (a);
>>>>> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>>>>> +    __builtin_abort ();
>>>>> +  return 0;
>>>>> +}
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>> new file mode 100644
>>>>> index 00000000000..9494d0327b4
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>> @@ -0,0 +1,19 @@
>>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>>> +
>>>>> +#define N 5
>>>>> +#define MASK 0xffffffffe0000003ULL
>>>>> +
>>>>> +typedef unsigned long long int64;
>>>>> +
>>>>> +int64
>>>>> +foo (int64 v)
>>>>> +{
>>>>> +  unsigned int v1 = v;
>>>>> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>>>>> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>>>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>> new file mode 100644
>>>>> index 00000000000..fcbc8f8d742
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>> @@ -0,0 +1,6 @@
>>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>>> +#include "rlwinm4di.h"
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>>>>> +
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>> new file mode 100644
>>>>> index 00000000000..59fe739ca85
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* using 2 instructions(rlwinm) to build constants.   */
>>>>> +void __attribute__ ((__noinline__, __noclone__))
>>>>> +foo (long long *arg)
>>>>> +{
>>>>> +  *arg++ = 0x0000400100000001ULL;
>>>>> +  *arg++ = 0x0000000200000002ULL;
>>>>> +  *arg++ = 0xffff8000bfff8000ULL;
>>>>> +  *arg++ = 0xffff8001ffff8001ULL;
>>>>> +  *arg++ = 0x0000800100000001ULL;
>>>>> +  *arg++ = 0x0000800100008001ULL;
>>>>> +  *arg++ = 0x0000800200000002ULL;
>>>>> +  *arg++ = 0x0000800000008000ULL;
>>>>> +  *arg++ = 0x0000000080008000ULL;
>>>>> +  *arg++ = 0xffff0001bfff0001ULL;
>>>>> +  *arg++ = 0xffff0001ffff0001ULL;
>>>>> +  *arg++ = 0x0001000200000002ULL;
>>>>> +  *arg++ = 0x8001000080010000ULL;
>>>>> +  *arg++ = 0x0004000100000001ULL;
>>>>> +  *arg++ = 0x0004000100040001ULL;
>>>>> +  *arg++ = 0x00000000bfffe001ULL;
>>>>> +  *arg++ = 0x0003fffe0001fffeULL;
>>>>> +  *arg++ = 0x0003fffe0003fffeULL;
>>>>> +  *arg++ = 0x0002000100000001ULL;
>>>>> +  *arg++ = 0x0002000100020001ULL;
>>>>> +}
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 09a57a806fa..10505a8061a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -36,7 +36,7 @@  extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
-extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
+extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
 extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
 extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
 extern int num_insns_constant (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 6ba9df4f02e..853eaede673 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10454,6 +10454,51 @@  can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
   return false;
 }
 
+/* Check if value C can be generated by 2 instructions, one instruction
+   is li/lis, another instruction is rlwinm.  */
+
+static bool
+can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
+				   int *shift, HOST_WIDE_INT *mask)
+{
+  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
+  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
+  unsigned HOST_WIDE_INT v;
+
+  /* diff of high and low (high ^ low) should be the mask position.  */
+  unsigned HOST_WIDE_INT m = low ^ high;
+  int tz = ctz_hwi (m);
+  int lz = clz_hwi (m);
+  if (m != 0)
+    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
+  if (high != 0)
+    m = ~m;
+  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
+
+  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
+    return false;
+
+  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
+  int n;
+  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
+      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
+    {
+      /* rotate32 from a negative value of 'lis'.  */
+      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
+	return false;
+      n += 16;
+    }
+  n = 32 - (n % 32);
+  n %= 32;
+  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
+  if (v & 0x80000000ULL)
+    v |= HOST_WIDE_INT_M1U << 32;
+  *mask = m;
+  *val = v;
+  *shift = n;
+  return true;
+}
+
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
    Output insns to set DEST equal to the constant C as a series of
    lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
@@ -10553,6 +10598,18 @@  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
       return;
     }
 
+  HOST_WIDE_INT val;
+  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
+    {
+      /* li/lis; rlwinm */
+      count_or_emit_insn (temp, GEN_INT (val));
+      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
+      rtx m = GEN_INT (mask);
+      rtx n = GEN_INT (shift);
+      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
+      return;
+    }
+
   if (ud3 == 0 && ud4 == 0)
     {
       gcc_assert ((ud2 & 0x8000) && ud1 != 0);
@@ -15220,7 +15277,8 @@  rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
    Return false otherwise.  */
 
 bool
-can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
+can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
+			   bool rotl32)
 {
   int clz = HOST_BITS_PER_WIDE_INT - lowbits;
 
@@ -15244,7 +15302,10 @@  can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
 	       ^bit -> Vbit, , then zeros are at head or tail.
 	     00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
   const int rot_bits = lowbits + 1;
-  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
+  unsigned HOST_WIDE_INT rc;
+  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
+		  | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
+	      : (c >> rot_bits) | (c << (clz - 1));
   tz = ctz_hwi (rc);
   if (clz_hwi (rc) + tz >= clz)
     {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bc8bc6ab060..8a82ba3e26c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4213,6 +4213,24 @@  (define_insn_and_split "*rotl<mode>3_mask_dot2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+; define an insn about rlwinm for DI mode (with high part content)
+(define_insn "rlwinm_di_mask"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+    (and:DI (plus:DI
+              (ashift:DI (subreg:DI
+		   (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+                              (match_operand:SI 2 "const_int_operand" "n")) 0)
+                   (const_int 32))
+              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
+            (match_operand:DI 3 "const_int_operand" "n")))]
+  "rs6000_is_valid_and_mask (operands[3], SImode)"
+{
+  return UINTVAL (operands[3]) == -1ULL ?
+    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
+}
+  [(set_attr "type" "shift")
+   (set_attr "maybe_var_shift" "yes")])
+
 ; Special case for less-than-0.  We can do it with just one machine
 ; instruction, but the generic optimizers do not realise it is cheap.
 (define_insn "*lt0_<mode>di"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..70ddfaa21da 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -10,4 +10,4 @@  unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
 unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
new file mode 100644
index 00000000000..8959578143b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "rlwinm4di.h"
+
+long long arr1[] = {
+  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
+  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
+  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
+  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
+  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
+  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
+  0x0002000100000001ULL, 0x0002000100020001ULL,
+};
+
+int
+main ()
+{
+  long long a[sizeof (arr1) / sizeof (arr1[0])];
+
+  foo (a);
+  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
new file mode 100644
index 00000000000..9494d0327b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-options "-O2 -mno-prefixed" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+
+#define N 5
+#define MASK 0xffffffffe0000003ULL
+
+typedef unsigned long long int64;
+
+int64
+foo (int64 v)
+{
+  unsigned int v1 = v;
+  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
+  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
+}
+
+/* { dg-final { scan-assembler-not {\mor\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
new file mode 100644
index 00000000000..fcbc8f8d742
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
@@ -0,0 +1,6 @@ 
+/* { dg-options "-O2 -mno-prefixed" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+#include "rlwinm4di.h"
+
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
new file mode 100644
index 00000000000..59fe739ca85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
@@ -0,0 +1,25 @@ 
+/* using 2 instructions(rlwinm) to build constants.   */
+void __attribute__ ((__noinline__, __noclone__))
+foo (long long *arg)
+{
+  *arg++ = 0x0000400100000001ULL;
+  *arg++ = 0x0000000200000002ULL;
+  *arg++ = 0xffff8000bfff8000ULL;
+  *arg++ = 0xffff8001ffff8001ULL;
+  *arg++ = 0x0000800100000001ULL;
+  *arg++ = 0x0000800100008001ULL;
+  *arg++ = 0x0000800200000002ULL;
+  *arg++ = 0x0000800000008000ULL;
+  *arg++ = 0x0000000080008000ULL;
+  *arg++ = 0xffff0001bfff0001ULL;
+  *arg++ = 0xffff0001ffff0001ULL;
+  *arg++ = 0x0001000200000002ULL;
+  *arg++ = 0x8001000080010000ULL;
+  *arg++ = 0x0004000100000001ULL;
+  *arg++ = 0x0004000100040001ULL;
+  *arg++ = 0x00000000bfffe001ULL;
+  *arg++ = 0x0003fffe0001fffeULL;
+  *arg++ = 0x0003fffe0003fffeULL;
+  *arg++ = 0x0002000100000001ULL;
+  *arg++ = 0x0002000100020001ULL;
+}