@@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
-extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
+extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
extern int num_insns_constant (rtx, machine_mode);
@@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
return false;
}
+/* Check if value C can be generated by 2 instructions, one instruction
+ is li/lis, another instruction is rlwinm. */
+
+static bool
+can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
+ int *shift, HOST_WIDE_INT *mask)
+{
+ unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
+ unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
+ unsigned HOST_WIDE_INT v;
+
+ /* diff of high and low (high ^ low) should be the mask position. */
+ unsigned HOST_WIDE_INT m = low ^ high;
+ int tz = ctz_hwi (m);
+ int lz = clz_hwi (m);
+ if (m != 0)
+ m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
+ if (high != 0)
+ m = ~m;
+ v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
+
+ if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
+ return false;
+
+ /* rotl32 on positive/negative value of 'li' 15/16bits. */
+ int n;
+ if (!can_be_rotated_to_lowbits (v, 15, &n, true)
+ && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
+ {
+ /* rotate32 from a negative value of 'lis'. */
+ if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
+ return false;
+ n += 16;
+ }
+ n = 32 - (n % 32);
+ n %= 32;
+ v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
+ if (v & 0x80000000ULL)
+ v |= HOST_WIDE_INT_M1U << 32;
+ *mask = m;
+ *val = v;
+ *shift = n;
+ return true;
+}
+
/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
Output insns to set DEST equal to the constant C as a series of
lis, ori and shl instructions. If NUM_INSNS is not NULL, then
@@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
return;
}
+ HOST_WIDE_INT val;
+ if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
+ {
+ /* li/lis; rlwinm */
+ count_or_emit_insn (temp, GEN_INT (val));
+ rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
+ rtx m = GEN_INT (mask);
+ rtx n = GEN_INT (shift);
+ count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
+ return;
+ }
+
if (ud3 == 0 && ud4 == 0)
{
gcc_assert ((ud2 & 0x8000) && ud1 != 0);
@@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
Return false otherwise. */
bool
-can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
+can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
+ bool rotl32)
{
int clz = HOST_BITS_PER_WIDE_INT - lowbits;
@@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
^bit -> Vbit, , then zeros are at head or tail.
00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
const int rot_bits = lowbits + 1;
- unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
+ unsigned HOST_WIDE_INT rc;
+ rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
+ | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
+ : (c >> rot_bits) | (c << (clz - 1));
tz = ctz_hwi (rc);
if (clz_hwi (rc) + tz >= clz)
{
@@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
+; define an insn about rlwinm for DI mode (with high part content)
+(define_insn "rlwinm_di_mask"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (and:DI (plus:DI
+ (ashift:DI (subreg:DI
+ (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n")) 0)
+ (const_int 32))
+ (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
+ (match_operand:DI 3 "const_int_operand" "n")))]
+ "rs6000_is_valid_and_mask (operands[3], SImode)"
+{
+ return UINTVAL (operands[3]) == -1ULL ?
+ "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3";
+}
+ [(set_attr "type" "shift")
+ (set_attr "maybe_var_shift" "yes")])
+
; Special case for less-than-0. We can do it with just one machine
; instruction, but the generic optimizers do not realise it is cheap.
(define_insn "*lt0_<mode>di"
@@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "rlwinm4di.h"
+
+long long arr1[] = {
+ 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
+ 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
+ 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
+ 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
+ 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
+ 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
+ 0x0002000100000001ULL, 0x0002000100020001ULL,
+};
+
+int
+main ()
+{
+ long long a[sizeof (arr1) / sizeof (arr1[0])];
+
+ foo (a);
+ if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-options "-O2 -mno-prefixed -mpowerpc64" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+
+#define N 5
+#define MASK 0xffffffffe0000003ULL
+
+typedef unsigned long long int64;
+
+int64
+foo (int64 v)
+{
+ unsigned int v1 = v;
+ unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
+ return ((int64) v2 | ((int64) v2 << 32)) & MASK;
+}
+
+/* { dg-final { scan-assembler-not {\mor\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-options "-O2 -mno-prefixed -mpowerpc64" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+#include "rlwinm4di.h"
+
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* using 2 instructions(rlwinm) to build constants. */
+void __attribute__ ((__noinline__, __noclone__))
+foo (long long *arg)
+{
+ *arg++ = 0x0000400100000001ULL;
+ *arg++ = 0x0000000200000002ULL;
+ *arg++ = 0xffff8000bfff8000ULL;
+ *arg++ = 0xffff8001ffff8001ULL;
+ *arg++ = 0x0000800100000001ULL;
+ *arg++ = 0x0000800100008001ULL;
+ *arg++ = 0x0000800200000002ULL;
+ *arg++ = 0x0000800000008000ULL;
+ *arg++ = 0x0000000080008000ULL;
+ *arg++ = 0xffff0001bfff0001ULL;
+ *arg++ = 0xffff0001ffff0001ULL;
+ *arg++ = 0x0001000200000002ULL;
+ *arg++ = 0x8001000080010000ULL;
+ *arg++ = 0x0004000100000001ULL;
+ *arg++ = 0x0004000100040001ULL;
+ *arg++ = 0x00000000bfffe001ULL;
+ *arg++ = 0x0003fffe0001fffeULL;
+ *arg++ = 0x0003fffe0003fffeULL;
+ *arg++ = 0x0002000100000001ULL;
+ *arg++ = 0x0002000100020001ULL;
+}