Message ID | 20240422023503.179552-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | add rlwinm pattern for DImode for constant building | expand |
Hi, Gentle ping ... BR, Jeff(Jiufu) Guo Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > 'rlwinm' pattern is already well used for SImode. As this instruction > can touch the whole 64bit register, so some constants in 64bit(DImode) > can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for > 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check > if a constant is able to be built by 'lis/li; rlwinm'. > > Bootstrap and regtest pass on ppc64{,le}. > > Is this patch ok for trunk (when stage1 is open)? > > Jeff (Jiufu Guo). > > gcc/ChangeLog: > > * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new > parameter. > * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function. > (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. > (can_be_rotated_to_lowbits): Add new parameter. > * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. > * gcc.target/powerpc/rlwinm4di-1.c: New test. > * gcc.target/powerpc/rlwinm4di-2.c: New test. > * gcc.target/powerpc/rlwinm4di.c: New test. > * gcc.target/powerpc/rlwinm4di.h: New test. > > --- > gcc/config/rs6000/rs6000-protos.h | 2 +- > gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- > gcc/config/rs6000/rs6000.md | 18 +++++ > gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- > .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ > .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ > gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ > gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ > 8 files changed, 158 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h > > diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h > index 09a57a806fa..10505a8061a 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); > extern int vspltis_shifted (rtx); > extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); > extern bool macho_lo_sum_memory_operand (rtx, machine_mode); > -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); > +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); > extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); > extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); > extern int num_insns_constant (rtx, machine_mode); > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index 6ba9df4f02e..853eaede673 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) > return false; > } > > +/* Check if value C can be generated by 2 instructions, one instruction > + is li/lis, another instruction is rlwinm. */ > + > +static bool > +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, > + int *shift, HOST_WIDE_INT *mask) > +{ > + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; > + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; > + unsigned HOST_WIDE_INT v; > + > + /* diff of high and low (high ^ low) should be the mask position. */ > + unsigned HOST_WIDE_INT m = low ^ high; > + int tz = ctz_hwi (m); > + int lz = clz_hwi (m); > + if (m != 0) > + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); > + if (high != 0) > + m = ~m; > + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); > + > + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) > + return false; > + > + /* rotl32 on positive/negative value of 'li' 15/16bits. */ > + int n; > + if (!can_be_rotated_to_lowbits (v, 15, &n, true) > + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) > + { > + /* rotate32 from a negative value of 'lis'. */ > + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) > + return false; > + n += 16; > + } > + n = 32 - (n % 32); > + n %= 32; > + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; > + if (v & 0x80000000ULL) > + v |= HOST_WIDE_INT_M1U << 32; > + *mask = m; > + *val = v; > + *shift = n; > + return true; > +} > + > /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. > Output insns to set DEST equal to the constant C as a series of > lis, ori and shl instructions. If NUM_INSNS is not NULL, then > @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) > return; > } > > + HOST_WIDE_INT val; > + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) > + { > + /* li/lis; rlwinm */ > + count_or_emit_insn (temp, GEN_INT (val)); > + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; > + rtx m = GEN_INT (mask); > + rtx n = GEN_INT (shift); > + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); > + return; > + } > + > if (ud3 == 0 && ud4 == 0) > { > gcc_assert ((ud2 & 0x8000) && ud1 != 0); > @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) > Return false otherwise. */ > > bool > -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) > +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, > + bool rotl32) > { > int clz = HOST_BITS_PER_WIDE_INT - lowbits; > > @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) > ^bit -> Vbit, , then zeros are at head or tail. > 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ > const int rot_bits = lowbits + 1; > - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); > + unsigned HOST_WIDE_INT rc; > + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) > + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) > + : (c >> rot_bits) | (c << (clz - 1)); > tz = ctz_hwi (rc); > if (clz_hwi (rc) + tz >= clz) > { > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index bc8bc6ab060..8a82ba3e26c 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" > (set_attr "dot" "yes") > (set_attr "length" "4,8")]) > > +; define an insn about rlwinm for DI mode (with high part content) > +(define_insn "rlwinm_di_mask" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (and:DI (plus:DI > + (ashift:DI (subreg:DI > + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") > + (match_operand:SI 2 "const_int_operand" "n")) 0) > + (const_int 32)) > + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) > + (match_operand:DI 3 "const_int_operand" "n")))] > + "rs6000_is_valid_and_mask (operands[3], SImode)" > +{ > + return UINTVAL (operands[3]) == -1ULL ? > + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; > +} > + [(set_attr "type" "shift") > + (set_attr "maybe_var_shift" "yes")]) > + > ; Special case for less-than-0. We can do it with just one machine > ; instruction, but the generic optimizers do not realise it is cheap. > (define_insn "*lt0_<mode>di" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c > index 4f764d0576f..70ddfaa21da 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c > @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } > unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } > unsigned long long mskse() { return 0xffff1234ffff1234ULL; } > > -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ > +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c > new file mode 100644 > index 00000000000..8959578143b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#include "rlwinm4di.h" > + > +long long arr1[] = { > + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, > + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, > + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, > + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, > + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, > + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, > + 0x0002000100000001ULL, 0x0002000100020001ULL, > +}; > + > +int > +main () > +{ > + long long a[sizeof (arr1) / sizeof (arr1[0])]; > + > + foo (a); > + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c > new file mode 100644 > index 00000000000..9494d0327b4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c > @@ -0,0 +1,19 @@ > +/* { dg-options "-O2 -mno-prefixed" } */ > +/* { dg-do compile { target has_arch_ppc64 } } */ > + > +#define N 5 > +#define MASK 0xffffffffe0000003ULL > + > +typedef unsigned long long int64; > + > +int64 > +foo (int64 v) > +{ > + unsigned int v1 = v; > + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); > + return ((int64) v2 | ((int64) v2 << 32)) & MASK; > +} > + > +/* { dg-final { scan-assembler-not {\mor\M} } } */ > +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ > +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c > new file mode 100644 > index 00000000000..fcbc8f8d742 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c > @@ -0,0 +1,6 @@ > +/* { dg-options "-O2 -mno-prefixed" } */ > +/* { dg-do compile { target has_arch_ppc64 } } */ > +#include "rlwinm4di.h" > + > +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h > new file mode 100644 > index 00000000000..59fe739ca85 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h > @@ -0,0 +1,25 @@ > +/* using 2 instructions(rlwinm) to build constants. */ > +void __attribute__ ((__noinline__, __noclone__)) > +foo (long long *arg) > +{ > + *arg++ = 0x0000400100000001ULL; > + *arg++ = 0x0000000200000002ULL; > + *arg++ = 0xffff8000bfff8000ULL; > + *arg++ = 0xffff8001ffff8001ULL; > + *arg++ = 0x0000800100000001ULL; > + *arg++ = 0x0000800100008001ULL; > + *arg++ = 0x0000800200000002ULL; > + *arg++ = 0x0000800000008000ULL; > + *arg++ = 0x0000000080008000ULL; > + *arg++ = 0xffff0001bfff0001ULL; > + *arg++ = 0xffff0001ffff0001ULL; > + *arg++ = 0x0001000200000002ULL; > + *arg++ = 0x8001000080010000ULL; > + *arg++ = 0x0004000100000001ULL; > + *arg++ = 0x0004000100040001ULL; > + *arg++ = 0x00000000bfffe001ULL; > + *arg++ = 0x0003fffe0001fffeULL; > + *arg++ = 0x0003fffe0003fffeULL; > + *arg++ = 0x0002000100000001ULL; > + *arg++ = 0x0002000100020001ULL; > +}
Hi, Gentle ping ... Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Gentle ping ... > > BR, > Jeff(Jiufu) Guo > > Jiufu Guo <guojiufu@linux.ibm.com> writes: > >> Hi, >> >> 'rlwinm' pattern is already well used for SImode. As this instruction >> can touch the whole 64bit register, so some constants in 64bit(DImode) >> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for >> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check >> if a constant is able to be built by 'lis/li; rlwinm'. >> >> Bootstrap and regtest pass on ppc64{,le}. >> >> Is this patch ok for trunk (when stage1 is open)? Is this patch ok for trunk? BR, Jeff(Jiufu) Guo >> >> Jeff (Jiufu Guo). >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new >> parameter. >> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function. >> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. >> (can_be_rotated_to_lowbits): Add new parameter. >> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. >> * gcc.target/powerpc/rlwinm4di-1.c: New test. >> * gcc.target/powerpc/rlwinm4di-2.c: New test. >> * gcc.target/powerpc/rlwinm4di.c: New test. >> * gcc.target/powerpc/rlwinm4di.h: New test. >> >> --- >> gcc/config/rs6000/rs6000-protos.h | 2 +- >> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- >> gcc/config/rs6000/rs6000.md | 18 +++++ >> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- >> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ >> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ >> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ >> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ >> 8 files changed, 158 insertions(+), 4 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >> >> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h >> index 09a57a806fa..10505a8061a 100644 >> --- a/gcc/config/rs6000/rs6000-protos.h >> +++ b/gcc/config/rs6000/rs6000-protos.h >> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); >> extern int vspltis_shifted (rtx); >> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); >> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); >> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); >> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); >> extern int num_insns_constant (rtx, machine_mode); >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index 6ba9df4f02e..853eaede673 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) >> return false; >> } >> >> +/* Check if value C can be generated by 2 instructions, one instruction >> + is li/lis, another instruction is rlwinm. */ >> + >> +static bool >> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, >> + int *shift, HOST_WIDE_INT *mask) >> +{ >> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; >> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; >> + unsigned HOST_WIDE_INT v; >> + >> + /* diff of high and low (high ^ low) should be the mask position. */ >> + unsigned HOST_WIDE_INT m = low ^ high; >> + int tz = ctz_hwi (m); >> + int lz = clz_hwi (m); >> + if (m != 0) >> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); >> + if (high != 0) >> + m = ~m; >> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); >> + >> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) >> + return false; >> + >> + /* rotl32 on positive/negative value of 'li' 15/16bits. */ >> + int n; >> + if (!can_be_rotated_to_lowbits (v, 15, &n, true) >> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) >> + { >> + /* rotate32 from a negative value of 'lis'. */ >> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) >> + return false; >> + n += 16; >> + } >> + n = 32 - (n % 32); >> + n %= 32; >> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; >> + if (v & 0x80000000ULL) >> + v |= HOST_WIDE_INT_M1U << 32; >> + *mask = m; >> + *val = v; >> + *shift = n; >> + return true; >> +} >> + >> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >> Output insns to set DEST equal to the constant C as a series of >> lis, ori and shl instructions. If NUM_INSNS is not NULL, then >> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >> return; >> } >> >> + HOST_WIDE_INT val; >> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) >> + { >> + /* li/lis; rlwinm */ >> + count_or_emit_insn (temp, GEN_INT (val)); >> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; >> + rtx m = GEN_INT (mask); >> + rtx n = GEN_INT (shift); >> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); >> + return; >> + } >> + >> if (ud3 == 0 && ud4 == 0) >> { >> gcc_assert ((ud2 & 0x8000) && ud1 != 0); >> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) >> Return false otherwise. */ >> >> bool >> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, >> + bool rotl32) >> { >> int clz = HOST_BITS_PER_WIDE_INT - lowbits; >> >> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >> ^bit -> Vbit, , then zeros are at head or tail. >> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ >> const int rot_bits = lowbits + 1; >> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); >> + unsigned HOST_WIDE_INT rc; >> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) >> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) >> + : (c >> rot_bits) | (c << (clz - 1)); >> tz = ctz_hwi (rc); >> if (clz_hwi (rc) + tz >= clz) >> { >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index bc8bc6ab060..8a82ba3e26c 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" >> (set_attr "dot" "yes") >> (set_attr "length" "4,8")]) >> >> +; define an insn about rlwinm for DI mode (with high part content) >> +(define_insn "rlwinm_di_mask" >> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >> + (and:DI (plus:DI >> + (ashift:DI (subreg:DI >> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") >> + (match_operand:SI 2 "const_int_operand" "n")) 0) >> + (const_int 32)) >> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) >> + (match_operand:DI 3 "const_int_operand" "n")))] >> + "rs6000_is_valid_and_mask (operands[3], SImode)" >> +{ >> + return UINTVAL (operands[3]) == -1ULL ? >> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; >> +} >> + [(set_attr "type" "shift") >> + (set_attr "maybe_var_shift" "yes")]) >> + >> ; Special case for less-than-0. We can do it with just one machine >> ; instruction, but the generic optimizers do not realise it is cheap. >> (define_insn "*lt0_<mode>di" >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c >> index 4f764d0576f..70ddfaa21da 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c >> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c >> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } >> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } >> unsigned long long mskse() { return 0xffff1234ffff1234ULL; } >> >> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ >> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >> new file mode 100644 >> index 00000000000..8959578143b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >> @@ -0,0 +1,25 @@ >> +/* { dg-do run } */ >> +/* { dg-options "-O2" } */ >> + >> +#include "rlwinm4di.h" >> + >> +long long arr1[] = { >> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, >> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, >> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, >> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, >> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, >> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, >> + 0x0002000100000001ULL, 0x0002000100020001ULL, >> +}; >> + >> +int >> +main () >> +{ >> + long long a[sizeof (arr1) / sizeof (arr1[0])]; >> + >> + foo (a); >> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) >> + __builtin_abort (); >> + return 0; >> +} >> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >> new file mode 100644 >> index 00000000000..9494d0327b4 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >> @@ -0,0 +1,19 @@ >> +/* { dg-options "-O2 -mno-prefixed" } */ >> +/* { dg-do compile { target has_arch_ppc64 } } */ >> + >> +#define N 5 >> +#define MASK 0xffffffffe0000003ULL >> + >> +typedef unsigned long long int64; >> + >> +int64 >> +foo (int64 v) >> +{ >> + unsigned int v1 = v; >> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); >> + return ((int64) v2 | ((int64) v2 << 32)) & MASK; >> +} >> + >> +/* { dg-final { scan-assembler-not {\mor\M} } } */ >> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ >> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >> new file mode 100644 >> index 00000000000..fcbc8f8d742 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >> @@ -0,0 +1,6 @@ >> +/* { dg-options "-O2 -mno-prefixed" } */ >> +/* { dg-do compile { target has_arch_ppc64 } } */ >> +#include "rlwinm4di.h" >> + >> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ >> + >> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >> new file mode 100644 >> index 00000000000..59fe739ca85 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >> @@ -0,0 +1,25 @@ >> +/* using 2 instructions(rlwinm) to build constants. */ >> +void __attribute__ ((__noinline__, __noclone__)) >> +foo (long long *arg) >> +{ >> + *arg++ = 0x0000400100000001ULL; >> + *arg++ = 0x0000000200000002ULL; >> + *arg++ = 0xffff8000bfff8000ULL; >> + *arg++ = 0xffff8001ffff8001ULL; >> + *arg++ = 0x0000800100000001ULL; >> + *arg++ = 0x0000800100008001ULL; >> + *arg++ = 0x0000800200000002ULL; >> + *arg++ = 0x0000800000008000ULL; >> + *arg++ = 0x0000000080008000ULL; >> + *arg++ = 0xffff0001bfff0001ULL; >> + *arg++ = 0xffff0001ffff0001ULL; >> + *arg++ = 0x0001000200000002ULL; >> + *arg++ = 0x8001000080010000ULL; >> + *arg++ = 0x0004000100000001ULL; >> + *arg++ = 0x0004000100040001ULL; >> + *arg++ = 0x00000000bfffe001ULL; >> + *arg++ = 0x0003fffe0001fffeULL; >> + *arg++ = 0x0003fffe0003fffeULL; >> + *arg++ = 0x0002000100000001ULL; >> + *arg++ = 0x0002000100020001ULL; >> +}
Hi, Gentle ping. BR, Jeff(Jiufu) Guo Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Gentle ping ... > > Jiufu Guo <guojiufu@linux.ibm.com> writes: > >> Hi, >> >> Gentle ping ... >> >> BR, >> Jeff(Jiufu) Guo >> >> Jiufu Guo <guojiufu@linux.ibm.com> writes: >> >>> Hi, >>> >>> 'rlwinm' pattern is already well used for SImode. As this instruction >>> can touch the whole 64bit register, so some constants in 64bit(DImode) >>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for >>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check >>> if a constant is able to be built by 'lis/li; rlwinm'. >>> >>> Bootstrap and regtest pass on ppc64{,le}. >>> >>> Is this patch ok for trunk (when stage1 is open)? > > Is this patch ok for trunk? > > BR, > Jeff(Jiufu) Guo > >>> >>> Jeff (Jiufu Guo). >>> >>> gcc/ChangeLog: >>> >>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new >>> parameter. >>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function. >>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. >>> (can_be_rotated_to_lowbits): Add new parameter. >>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. >>> * gcc.target/powerpc/rlwinm4di-1.c: New test. >>> * gcc.target/powerpc/rlwinm4di-2.c: New test. >>> * gcc.target/powerpc/rlwinm4di.c: New test. >>> * gcc.target/powerpc/rlwinm4di.h: New test. >>> >>> --- >>> gcc/config/rs6000/rs6000-protos.h | 2 +- >>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- >>> gcc/config/rs6000/rs6000.md | 18 +++++ >>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- >>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ >>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ >>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ >>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ >>> 8 files changed, 158 insertions(+), 4 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>> >>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h >>> index 09a57a806fa..10505a8061a 100644 >>> --- a/gcc/config/rs6000/rs6000-protos.h >>> +++ b/gcc/config/rs6000/rs6000-protos.h >>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); >>> extern int vspltis_shifted (rtx); >>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); >>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); >>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); >>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); >>> extern int num_insns_constant (rtx, machine_mode); >>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >>> index 6ba9df4f02e..853eaede673 100644 >>> --- a/gcc/config/rs6000/rs6000.cc >>> +++ b/gcc/config/rs6000/rs6000.cc >>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) >>> return false; >>> } >>> >>> +/* Check if value C can be generated by 2 instructions, one instruction >>> + is li/lis, another instruction is rlwinm. */ >>> + >>> +static bool >>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, >>> + int *shift, HOST_WIDE_INT *mask) >>> +{ >>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; >>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; >>> + unsigned HOST_WIDE_INT v; >>> + >>> + /* diff of high and low (high ^ low) should be the mask position. */ >>> + unsigned HOST_WIDE_INT m = low ^ high; >>> + int tz = ctz_hwi (m); >>> + int lz = clz_hwi (m); >>> + if (m != 0) >>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); >>> + if (high != 0) >>> + m = ~m; >>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); >>> + >>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) >>> + return false; >>> + >>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */ >>> + int n; >>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true) >>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) >>> + { >>> + /* rotate32 from a negative value of 'lis'. */ >>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) >>> + return false; >>> + n += 16; >>> + } >>> + n = 32 - (n % 32); >>> + n %= 32; >>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; >>> + if (v & 0x80000000ULL) >>> + v |= HOST_WIDE_INT_M1U << 32; >>> + *mask = m; >>> + *val = v; >>> + *shift = n; >>> + return true; >>> +} >>> + >>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >>> Output insns to set DEST equal to the constant C as a series of >>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then >>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >>> return; >>> } >>> >>> + HOST_WIDE_INT val; >>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) >>> + { >>> + /* li/lis; rlwinm */ >>> + count_or_emit_insn (temp, GEN_INT (val)); >>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; >>> + rtx m = GEN_INT (mask); >>> + rtx n = GEN_INT (shift); >>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); >>> + return; >>> + } >>> + >>> if (ud3 == 0 && ud4 == 0) >>> { >>> gcc_assert ((ud2 & 0x8000) && ud1 != 0); >>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) >>> Return false otherwise. */ >>> >>> bool >>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, >>> + bool rotl32) >>> { >>> int clz = HOST_BITS_PER_WIDE_INT - lowbits; >>> >>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>> ^bit -> Vbit, , then zeros are at head or tail. >>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ >>> const int rot_bits = lowbits + 1; >>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); >>> + unsigned HOST_WIDE_INT rc; >>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) >>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) >>> + : (c >> rot_bits) | (c << (clz - 1)); >>> tz = ctz_hwi (rc); >>> if (clz_hwi (rc) + tz >= clz) >>> { >>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>> index bc8bc6ab060..8a82ba3e26c 100644 >>> --- a/gcc/config/rs6000/rs6000.md >>> +++ b/gcc/config/rs6000/rs6000.md >>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" >>> (set_attr "dot" "yes") >>> (set_attr "length" "4,8")]) >>> >>> +; define an insn about rlwinm for DI mode (with high part content) >>> +(define_insn "rlwinm_di_mask" >>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>> + (and:DI (plus:DI >>> + (ashift:DI (subreg:DI >>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") >>> + (match_operand:SI 2 "const_int_operand" "n")) 0) >>> + (const_int 32)) >>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) >>> + (match_operand:DI 3 "const_int_operand" "n")))] >>> + "rs6000_is_valid_and_mask (operands[3], SImode)" >>> +{ >>> + return UINTVAL (operands[3]) == -1ULL ? >>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; >>> +} >>> + [(set_attr "type" "shift") >>> + (set_attr "maybe_var_shift" "yes")]) >>> + >>> ; Special case for less-than-0. We can do it with just one machine >>> ; instruction, but the generic optimizers do not realise it is cheap. >>> (define_insn "*lt0_<mode>di" >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>> index 4f764d0576f..70ddfaa21da 100644 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } >>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } >>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; } >>> >>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ >>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ >>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>> new file mode 100644 >>> index 00000000000..8959578143b >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>> @@ -0,0 +1,25 @@ >>> +/* { dg-do run } */ >>> +/* { dg-options "-O2" } */ >>> + >>> +#include "rlwinm4di.h" >>> + >>> +long long arr1[] = { >>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, >>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, >>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, >>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, >>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, >>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, >>> + 0x0002000100000001ULL, 0x0002000100020001ULL, >>> +}; >>> + >>> +int >>> +main () >>> +{ >>> + long long a[sizeof (arr1) / sizeof (arr1[0])]; >>> + >>> + foo (a); >>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) >>> + __builtin_abort (); >>> + return 0; >>> +} >>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>> new file mode 100644 >>> index 00000000000..9494d0327b4 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>> @@ -0,0 +1,19 @@ >>> +/* { dg-options "-O2 -mno-prefixed" } */ >>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>> + >>> +#define N 5 >>> +#define MASK 0xffffffffe0000003ULL >>> + >>> +typedef unsigned long long int64; >>> + >>> +int64 >>> +foo (int64 v) >>> +{ >>> + unsigned int v1 = v; >>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); >>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK; >>> +} >>> + >>> +/* { dg-final { scan-assembler-not {\mor\M} } } */ >>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ >>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ >>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>> new file mode 100644 >>> index 00000000000..fcbc8f8d742 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>> @@ -0,0 +1,6 @@ >>> +/* { dg-options "-O2 -mno-prefixed" } */ >>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>> +#include "rlwinm4di.h" >>> + >>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ >>> + >>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>> new file mode 100644 >>> index 00000000000..59fe739ca85 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>> @@ -0,0 +1,25 @@ >>> +/* using 2 instructions(rlwinm) to build constants. */ >>> +void __attribute__ ((__noinline__, __noclone__)) >>> +foo (long long *arg) >>> +{ >>> + *arg++ = 0x0000400100000001ULL; >>> + *arg++ = 0x0000000200000002ULL; >>> + *arg++ = 0xffff8000bfff8000ULL; >>> + *arg++ = 0xffff8001ffff8001ULL; >>> + *arg++ = 0x0000800100000001ULL; >>> + *arg++ = 0x0000800100008001ULL; >>> + *arg++ = 0x0000800200000002ULL; >>> + *arg++ = 0x0000800000008000ULL; >>> + *arg++ = 0x0000000080008000ULL; >>> + *arg++ = 0xffff0001bfff0001ULL; >>> + *arg++ = 0xffff0001ffff0001ULL; >>> + *arg++ = 0x0001000200000002ULL; >>> + *arg++ = 0x8001000080010000ULL; >>> + *arg++ = 0x0004000100000001ULL; >>> + *arg++ = 0x0004000100040001ULL; >>> + *arg++ = 0x00000000bfffe001ULL; >>> + *arg++ = 0x0003fffe0001fffeULL; >>> + *arg++ = 0x0003fffe0003fffeULL; >>> + *arg++ = 0x0002000100000001ULL; >>> + *arg++ = 0x0002000100020001ULL; >>> +}
Hi, Gentle ping... BR, Jeff(Jiufu) Guo Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Gentle ping. > > BR, > Jeff(Jiufu) Guo > > Jiufu Guo <guojiufu@linux.ibm.com> writes: > >> Hi, >> >> Gentle ping ... >> >> Jiufu Guo <guojiufu@linux.ibm.com> writes: >> >>> Hi, >>> >>> Gentle ping ... >>> >>> BR, >>> Jeff(Jiufu) Guo >>> >>> Jiufu Guo <guojiufu@linux.ibm.com> writes: >>> >>>> Hi, >>>> >>>> 'rlwinm' pattern is already well used for SImode. As this instruction >>>> can touch the whole 64bit register, so some constants in 64bit(DImode) >>>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for >>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check >>>> if a constant is able to be built by 'lis/li; rlwinm'. >>>> >>>> Bootstrap and regtest pass on ppc64{,le}. >>>> >>>> Is this patch ok for trunk (when stage1 is open)? >> >> Is this patch ok for trunk? >> >> BR, >> Jeff(Jiufu) Guo >> >>>> >>>> Jeff (Jiufu Guo). >>>> >>>> gcc/ChangeLog: >>>> >>>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new >>>> parameter. >>>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function. >>>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. >>>> (can_be_rotated_to_lowbits): Add new parameter. >>>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. >>>> >>>> gcc/testsuite/ChangeLog: >>>> >>>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. >>>> * gcc.target/powerpc/rlwinm4di-1.c: New test. >>>> * gcc.target/powerpc/rlwinm4di-2.c: New test. >>>> * gcc.target/powerpc/rlwinm4di.c: New test. >>>> * gcc.target/powerpc/rlwinm4di.h: New test. >>>> >>>> --- >>>> gcc/config/rs6000/rs6000-protos.h | 2 +- >>>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- >>>> gcc/config/rs6000/rs6000.md | 18 +++++ >>>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- >>>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ >>>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ >>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ >>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ >>>> 8 files changed, 158 insertions(+), 4 deletions(-) >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>> >>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h >>>> index 09a57a806fa..10505a8061a 100644 >>>> --- a/gcc/config/rs6000/rs6000-protos.h >>>> +++ b/gcc/config/rs6000/rs6000-protos.h >>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); >>>> extern int vspltis_shifted (rtx); >>>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >>>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); >>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); >>>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); >>>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); >>>> extern int num_insns_constant (rtx, machine_mode); >>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >>>> index 6ba9df4f02e..853eaede673 100644 >>>> --- a/gcc/config/rs6000/rs6000.cc >>>> +++ b/gcc/config/rs6000/rs6000.cc >>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) >>>> return false; >>>> } >>>> >>>> +/* Check if value C can be generated by 2 instructions, one instruction >>>> + is li/lis, another instruction is rlwinm. */ >>>> + >>>> +static bool >>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, >>>> + int *shift, HOST_WIDE_INT *mask) >>>> +{ >>>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; >>>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; >>>> + unsigned HOST_WIDE_INT v; >>>> + >>>> + /* diff of high and low (high ^ low) should be the mask position. */ >>>> + unsigned HOST_WIDE_INT m = low ^ high; >>>> + int tz = ctz_hwi (m); >>>> + int lz = clz_hwi (m); >>>> + if (m != 0) >>>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); >>>> + if (high != 0) >>>> + m = ~m; >>>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); >>>> + >>>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) >>>> + return false; >>>> + >>>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */ >>>> + int n; >>>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true) >>>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) >>>> + { >>>> + /* rotate32 from a negative value of 'lis'. */ >>>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) >>>> + return false; >>>> + n += 16; >>>> + } >>>> + n = 32 - (n % 32); >>>> + n %= 32; >>>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; >>>> + if (v & 0x80000000ULL) >>>> + v |= HOST_WIDE_INT_M1U << 32; >>>> + *mask = m; >>>> + *val = v; >>>> + *shift = n; >>>> + return true; >>>> +} >>>> + >>>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >>>> Output insns to set DEST equal to the constant C as a series of >>>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then >>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >>>> return; >>>> } >>>> >>>> + HOST_WIDE_INT val; >>>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) >>>> + { >>>> + /* li/lis; rlwinm */ >>>> + count_or_emit_insn (temp, GEN_INT (val)); >>>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; >>>> + rtx m = GEN_INT (mask); >>>> + rtx n = GEN_INT (shift); >>>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); >>>> + return; >>>> + } >>>> + >>>> if (ud3 == 0 && ud4 == 0) >>>> { >>>> gcc_assert ((ud2 & 0x8000) && ud1 != 0); >>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) >>>> Return false otherwise. */ >>>> >>>> bool >>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, >>>> + bool rotl32) >>>> { >>>> int clz = HOST_BITS_PER_WIDE_INT - lowbits; >>>> >>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>>> ^bit -> Vbit, , then zeros are at head or tail. >>>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ >>>> const int rot_bits = lowbits + 1; >>>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); >>>> + unsigned HOST_WIDE_INT rc; >>>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) >>>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) >>>> + : (c >> rot_bits) | (c << (clz - 1)); >>>> tz = ctz_hwi (rc); >>>> if (clz_hwi (rc) + tz >= clz) >>>> { >>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>>> index bc8bc6ab060..8a82ba3e26c 100644 >>>> --- a/gcc/config/rs6000/rs6000.md >>>> +++ b/gcc/config/rs6000/rs6000.md >>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" >>>> (set_attr "dot" "yes") >>>> (set_attr "length" "4,8")]) >>>> >>>> +; define an insn about rlwinm for DI mode (with high part content) >>>> +(define_insn "rlwinm_di_mask" >>>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>>> + (and:DI (plus:DI >>>> + (ashift:DI (subreg:DI >>>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") >>>> + (match_operand:SI 2 "const_int_operand" "n")) 0) >>>> + (const_int 32)) >>>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) >>>> + (match_operand:DI 3 "const_int_operand" "n")))] >>>> + "rs6000_is_valid_and_mask (operands[3], SImode)" >>>> +{ >>>> + return UINTVAL (operands[3]) == -1ULL ? >>>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; >>>> +} >>>> + [(set_attr "type" "shift") >>>> + (set_attr "maybe_var_shift" "yes")]) >>>> + >>>> ; Special case for less-than-0. We can do it with just one machine >>>> ; instruction, but the generic optimizers do not realise it is cheap. >>>> (define_insn "*lt0_<mode>di" >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>> index 4f764d0576f..70ddfaa21da 100644 >>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } >>>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } >>>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; } >>>> >>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ >>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>> new file mode 100644 >>>> index 00000000000..8959578143b >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>> @@ -0,0 +1,25 @@ >>>> +/* { dg-do run } */ >>>> +/* { dg-options "-O2" } */ >>>> + >>>> +#include "rlwinm4di.h" >>>> + >>>> +long long arr1[] = { >>>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, >>>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, >>>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, >>>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, >>>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, >>>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, >>>> + 0x0002000100000001ULL, 0x0002000100020001ULL, >>>> +}; >>>> + >>>> +int >>>> +main () >>>> +{ >>>> + long long a[sizeof (arr1) / sizeof (arr1[0])]; >>>> + >>>> + foo (a); >>>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) >>>> + __builtin_abort (); >>>> + return 0; >>>> +} >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>> new file mode 100644 >>>> index 00000000000..9494d0327b4 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>> @@ -0,0 +1,19 @@ >>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>> + >>>> +#define N 5 >>>> +#define MASK 0xffffffffe0000003ULL >>>> + >>>> +typedef unsigned long long int64; >>>> + >>>> +int64 >>>> +foo (int64 v) >>>> +{ >>>> + unsigned int v1 = v; >>>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); >>>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK; >>>> +} >>>> + >>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */ >>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ >>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>> new file mode 100644 >>>> index 00000000000..fcbc8f8d742 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>> @@ -0,0 +1,6 @@ >>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>> +#include "rlwinm4di.h" >>>> + >>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ >>>> + >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>> new file mode 100644 >>>> index 00000000000..59fe739ca85 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>> @@ -0,0 +1,25 @@ >>>> +/* using 2 instructions(rlwinm) to build constants. */ >>>> +void __attribute__ ((__noinline__, __noclone__)) >>>> +foo (long long *arg) >>>> +{ >>>> + *arg++ = 0x0000400100000001ULL; >>>> + *arg++ = 0x0000000200000002ULL; >>>> + *arg++ = 0xffff8000bfff8000ULL; >>>> + *arg++ = 0xffff8001ffff8001ULL; >>>> + *arg++ = 0x0000800100000001ULL; >>>> + *arg++ = 0x0000800100008001ULL; >>>> + *arg++ = 0x0000800200000002ULL; >>>> + *arg++ = 0x0000800000008000ULL; >>>> + *arg++ = 0x0000000080008000ULL; >>>> + *arg++ = 0xffff0001bfff0001ULL; >>>> + *arg++ = 0xffff0001ffff0001ULL; >>>> + *arg++ = 0x0001000200000002ULL; >>>> + *arg++ = 0x8001000080010000ULL; >>>> + *arg++ = 0x0004000100000001ULL; >>>> + *arg++ = 0x0004000100040001ULL; >>>> + *arg++ = 0x00000000bfffe001ULL; >>>> + *arg++ = 0x0003fffe0001fffeULL; >>>> + *arg++ = 0x0003fffe0003fffeULL; >>>> + *arg++ = 0x0002000100000001ULL; >>>> + *arg++ = 0x0002000100020001ULL; >>>> +}
Hi, Gentle ping... BR, Jeff(Jiufu) Guo Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Gentle ping... > > BR, > Jeff(Jiufu) Guo > > Jiufu Guo <guojiufu@linux.ibm.com> writes: > >> Hi, >> >> Gentle ping. >> >> BR, >> Jeff(Jiufu) Guo >> >> Jiufu Guo <guojiufu@linux.ibm.com> writes: >> >>> Hi, >>> >>> Gentle ping ... >>> >>> Jiufu Guo <guojiufu@linux.ibm.com> writes: >>> >>>> Hi, >>>> >>>> Gentle ping ... >>>> >>>> BR, >>>> Jeff(Jiufu) Guo >>>> >>>> Jiufu Guo <guojiufu@linux.ibm.com> writes: >>>> >>>>> Hi, >>>>> >>>>> 'rlwinm' pattern is already well used for SImode. As this instruction >>>>> can touch the whole 64bit register, so some constants in 64bit(DImode) >>>>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for >>>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check >>>>> if a constant is able to be built by 'lis/li; rlwinm'. >>>>> >>>>> Bootstrap and regtest pass on ppc64{,le}. >>>>> >>>>> Is this patch ok for trunk (when stage1 is open)? >>> >>> Is this patch ok for trunk? >>> >>> BR, >>> Jeff(Jiufu) Guo >>> >>>>> >>>>> Jeff (Jiufu Guo). >>>>> >>>>> gcc/ChangeLog: >>>>> >>>>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new >>>>> parameter. >>>>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function. >>>>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. >>>>> (can_be_rotated_to_lowbits): Add new parameter. >>>>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. >>>>> >>>>> gcc/testsuite/ChangeLog: >>>>> >>>>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. >>>>> * gcc.target/powerpc/rlwinm4di-1.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di-2.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di.h: New test. >>>>> >>>>> --- >>>>> gcc/config/rs6000/rs6000-protos.h | 2 +- >>>>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- >>>>> gcc/config/rs6000/rs6000.md | 18 +++++ >>>>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- >>>>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ >>>>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ >>>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ >>>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ >>>>> 8 files changed, 158 insertions(+), 4 deletions(-) >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> >>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h >>>>> index 09a57a806fa..10505a8061a 100644 >>>>> --- a/gcc/config/rs6000/rs6000-protos.h >>>>> +++ b/gcc/config/rs6000/rs6000-protos.h >>>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); >>>>> extern int vspltis_shifted (rtx); >>>>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >>>>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >>>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); >>>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); >>>>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); >>>>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); >>>>> extern int num_insns_constant (rtx, machine_mode); >>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >>>>> index 6ba9df4f02e..853eaede673 100644 >>>>> --- a/gcc/config/rs6000/rs6000.cc >>>>> +++ b/gcc/config/rs6000/rs6000.cc >>>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) >>>>> return false; >>>>> } >>>>> >>>>> +/* Check if value C can be generated by 2 instructions, one instruction >>>>> + is li/lis, another instruction is rlwinm. */ >>>>> + >>>>> +static bool >>>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, >>>>> + int *shift, HOST_WIDE_INT *mask) >>>>> +{ >>>>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; >>>>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; >>>>> + unsigned HOST_WIDE_INT v; >>>>> + >>>>> + /* diff of high and low (high ^ low) should be the mask position. */ >>>>> + unsigned HOST_WIDE_INT m = low ^ high; >>>>> + int tz = ctz_hwi (m); >>>>> + int lz = clz_hwi (m); >>>>> + if (m != 0) >>>>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); >>>>> + if (high != 0) >>>>> + m = ~m; >>>>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); >>>>> + >>>>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) >>>>> + return false; >>>>> + >>>>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */ >>>>> + int n; >>>>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true) >>>>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) >>>>> + { >>>>> + /* rotate32 from a negative value of 'lis'. */ >>>>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) >>>>> + return false; >>>>> + n += 16; >>>>> + } >>>>> + n = 32 - (n % 32); >>>>> + n %= 32; >>>>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; >>>>> + if (v & 0x80000000ULL) >>>>> + v |= HOST_WIDE_INT_M1U << 32; >>>>> + *mask = m; >>>>> + *val = v; >>>>> + *shift = n; >>>>> + return true; >>>>> +} >>>>> + >>>>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >>>>> Output insns to set DEST equal to the constant C as a series of >>>>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then >>>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >>>>> return; >>>>> } >>>>> >>>>> + HOST_WIDE_INT val; >>>>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) >>>>> + { >>>>> + /* li/lis; rlwinm */ >>>>> + count_or_emit_insn (temp, GEN_INT (val)); >>>>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; >>>>> + rtx m = GEN_INT (mask); >>>>> + rtx n = GEN_INT (shift); >>>>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); >>>>> + return; >>>>> + } >>>>> + >>>>> if (ud3 == 0 && ud4 == 0) >>>>> { >>>>> gcc_assert ((ud2 & 0x8000) && ud1 != 0); >>>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) >>>>> Return false otherwise. */ >>>>> >>>>> bool >>>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, >>>>> + bool rotl32) >>>>> { >>>>> int clz = HOST_BITS_PER_WIDE_INT - lowbits; >>>>> >>>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) >>>>> ^bit -> Vbit, , then zeros are at head or tail. >>>>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ >>>>> const int rot_bits = lowbits + 1; >>>>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); >>>>> + unsigned HOST_WIDE_INT rc; >>>>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) >>>>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) >>>>> + : (c >> rot_bits) | (c << (clz - 1)); >>>>> tz = ctz_hwi (rc); >>>>> if (clz_hwi (rc) + tz >= clz) >>>>> { >>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>>>> index bc8bc6ab060..8a82ba3e26c 100644 >>>>> --- a/gcc/config/rs6000/rs6000.md >>>>> +++ b/gcc/config/rs6000/rs6000.md >>>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" >>>>> (set_attr "dot" "yes") >>>>> (set_attr "length" "4,8")]) >>>>> >>>>> +; define an insn about rlwinm for DI mode (with high part content) >>>>> +(define_insn "rlwinm_di_mask" >>>>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>>>> + (and:DI (plus:DI >>>>> + (ashift:DI (subreg:DI >>>>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") >>>>> + (match_operand:SI 2 "const_int_operand" "n")) 0) >>>>> + (const_int 32)) >>>>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) >>>>> + (match_operand:DI 3 "const_int_operand" "n")))] >>>>> + "rs6000_is_valid_and_mask (operands[3], SImode)" >>>>> +{ >>>>> + return UINTVAL (operands[3]) == -1ULL ? >>>>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; >>>>> +} >>>>> + [(set_attr "type" "shift") >>>>> + (set_attr "maybe_var_shift" "yes")]) >>>>> + >>>>> ; Special case for less-than-0. We can do it with just one machine >>>>> ; instruction, but the generic optimizers do not realise it is cheap. >>>>> (define_insn "*lt0_<mode>di" >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> index 4f764d0576f..70ddfaa21da 100644 >>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } >>>>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } >>>>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; } >>>>> >>>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> new file mode 100644 >>>>> index 00000000000..8959578143b >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> @@ -0,0 +1,25 @@ >>>>> +/* { dg-do run } */ >>>>> +/* { dg-options "-O2" } */ >>>>> + >>>>> +#include "rlwinm4di.h" >>>>> + >>>>> +long long arr1[] = { >>>>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, >>>>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, >>>>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, >>>>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, >>>>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, >>>>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, >>>>> + 0x0002000100000001ULL, 0x0002000100020001ULL, >>>>> +}; >>>>> + >>>>> +int >>>>> +main () >>>>> +{ >>>>> + long long a[sizeof (arr1) / sizeof (arr1[0])]; >>>>> + >>>>> + foo (a); >>>>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) >>>>> + __builtin_abort (); >>>>> + return 0; >>>>> +} >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> new file mode 100644 >>>>> index 00000000000..9494d0327b4 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> @@ -0,0 +1,19 @@ >>>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>>> + >>>>> +#define N 5 >>>>> +#define MASK 0xffffffffe0000003ULL >>>>> + >>>>> +typedef unsigned long long int64; >>>>> + >>>>> +int64 >>>>> +foo (int64 v) >>>>> +{ >>>>> + unsigned int v1 = v; >>>>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); >>>>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK; >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */ >>>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> new file mode 100644 >>>>> index 00000000000..fcbc8f8d742 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> @@ -0,0 +1,6 @@ >>>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>>> +#include "rlwinm4di.h" >>>>> + >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ >>>>> + >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> new file mode 100644 >>>>> index 00000000000..59fe739ca85 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> @@ -0,0 +1,25 @@ >>>>> +/* using 2 instructions(rlwinm) to build constants. */ >>>>> +void __attribute__ ((__noinline__, __noclone__)) >>>>> +foo (long long *arg) >>>>> +{ >>>>> + *arg++ = 0x0000400100000001ULL; >>>>> + *arg++ = 0x0000000200000002ULL; >>>>> + *arg++ = 0xffff8000bfff8000ULL; >>>>> + *arg++ = 0xffff8001ffff8001ULL; >>>>> + *arg++ = 0x0000800100000001ULL; >>>>> + *arg++ = 0x0000800100008001ULL; >>>>> + *arg++ = 0x0000800200000002ULL; >>>>> + *arg++ = 0x0000800000008000ULL; >>>>> + *arg++ = 0x0000000080008000ULL; >>>>> + *arg++ = 0xffff0001bfff0001ULL; >>>>> + *arg++ = 0xffff0001ffff0001ULL; >>>>> + *arg++ = 0x0001000200000002ULL; >>>>> + *arg++ = 0x8001000080010000ULL; >>>>> + *arg++ = 0x0004000100000001ULL; >>>>> + *arg++ = 0x0004000100040001ULL; >>>>> + *arg++ = 0x00000000bfffe001ULL; >>>>> + *arg++ = 0x0003fffe0001fffeULL; >>>>> + *arg++ = 0x0003fffe0003fffeULL; >>>>> + *arg++ = 0x0002000100000001ULL; >>>>> + *arg++ = 0x0002000100020001ULL; >>>>> +}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 09a57a806fa..10505a8061a 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *); +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false); extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); extern int num_insns_constant (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 6ba9df4f02e..853eaede673 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) return false; } +/* Check if value C can be generated by 2 instructions, one instruction + is li/lis, another instruction is rlwinm. */ + +static bool +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, + int *shift, HOST_WIDE_INT *mask) +{ + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; + unsigned HOST_WIDE_INT v; + + /* diff of high and low (high ^ low) should be the mask position. */ + unsigned HOST_WIDE_INT m = low ^ high; + int tz = ctz_hwi (m); + int lz = clz_hwi (m); + if (m != 0) + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); + if (high != 0) + m = ~m; + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); + + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) + return false; + + /* rotl32 on positive/negative value of 'li' 15/16bits. */ + int n; + if (!can_be_rotated_to_lowbits (v, 15, &n, true) + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) + { + /* rotate32 from a negative value of 'lis'. */ + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) + return false; + n += 16; + } + n = 32 - (n % 32); + n %= 32; + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; + if (v & 0x80000000ULL) + v |= HOST_WIDE_INT_M1U << 32; + *mask = m; + *val = v; + *shift = n; + return true; +} + /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of lis, ori and shl instructions. If NUM_INSNS is not NULL, then @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) return; } + HOST_WIDE_INT val; + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) + { + /* li/lis; rlwinm */ + count_or_emit_insn (temp, GEN_INT (val)); + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; + rtx m = GEN_INT (mask); + rtx n = GEN_INT (shift); + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); + return; + } + if (ud3 == 0 && ud4 == 0) { gcc_assert ((ud2 & 0x8000) && ud1 != 0); @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) Return false otherwise. */ bool -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot, + bool rotl32) { int clz = HOST_BITS_PER_WIDE_INT - lowbits; @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot) ^bit -> Vbit, , then zeros are at head or tail. 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ const int rot_bits = lowbits + 1; - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); + unsigned HOST_WIDE_INT rc; + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) + : (c >> rot_bits) | (c << (clz - 1)); tz = ctz_hwi (rc); if (clz_hwi (rc) + tz >= clz) { diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bc8bc6ab060..8a82ba3e26c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" (set_attr "dot" "yes") (set_attr "length" "4,8")]) +; define an insn about rlwinm for DI mode (with high part content) +(define_insn "rlwinm_di_mask" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (and:DI (plus:DI + (ashift:DI (subreg:DI + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")) 0) + (const_int 32)) + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) + (match_operand:DI 3 "const_int_operand" "n")))] + "rs6000_is_valid_and_mask (operands[3], SImode)" +{ + return UINTVAL (operands[3]) == -1ULL ? + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + ; Special case for less-than-0. We can do it with just one machine ; instruction, but the generic optimizers do not realise it is cheap. (define_insn "*lt0_<mode>di" diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c index 4f764d0576f..70ddfaa21da 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } unsigned long long mskse() { return 0xffff1234ffff1234ULL; } -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c new file mode 100644 index 00000000000..8959578143b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "rlwinm4di.h" + +long long arr1[] = { + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, + 0x0002000100000001ULL, 0x0002000100020001ULL, +}; + +int +main () +{ + long long a[sizeof (arr1) / sizeof (arr1[0])]; + + foo (a); + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c new file mode 100644 index 00000000000..9494d0327b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c @@ -0,0 +1,19 @@ +/* { dg-options "-O2 -mno-prefixed" } */ +/* { dg-do compile { target has_arch_ppc64 } } */ + +#define N 5 +#define MASK 0xffffffffe0000003ULL + +typedef unsigned long long int64; + +int64 +foo (int64 v) +{ + unsigned int v1 = v; + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); + return ((int64) v2 | ((int64) v2 << 32)) & MASK; +} + +/* { dg-final { scan-assembler-not {\mor\M} } } */ +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c new file mode 100644 index 00000000000..fcbc8f8d742 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c @@ -0,0 +1,6 @@ +/* { dg-options "-O2 -mno-prefixed" } */ +/* { dg-do compile { target has_arch_ppc64 } } */ +#include "rlwinm4di.h" + +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h new file mode 100644 index 00000000000..59fe739ca85 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h @@ -0,0 +1,25 @@ +/* using 2 instructions(rlwinm) to build constants. */ +void __attribute__ ((__noinline__, __noclone__)) +foo (long long *arg) +{ + *arg++ = 0x0000400100000001ULL; + *arg++ = 0x0000000200000002ULL; + *arg++ = 0xffff8000bfff8000ULL; + *arg++ = 0xffff8001ffff8001ULL; + *arg++ = 0x0000800100000001ULL; + *arg++ = 0x0000800100008001ULL; + *arg++ = 0x0000800200000002ULL; + *arg++ = 0x0000800000008000ULL; + *arg++ = 0x0000000080008000ULL; + *arg++ = 0xffff0001bfff0001ULL; + *arg++ = 0xffff0001ffff0001ULL; + *arg++ = 0x0001000200000002ULL; + *arg++ = 0x8001000080010000ULL; + *arg++ = 0x0004000100000001ULL; + *arg++ = 0x0004000100040001ULL; + *arg++ = 0x00000000bfffe001ULL; + *arg++ = 0x0003fffe0001fffeULL; + *arg++ = 0x0003fffe0003fffeULL; + *arg++ = 0x0002000100000001ULL; + *arg++ = 0x0002000100020001ULL; +}