@@ -9157,10 +9157,6 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
unsigned int i, elt, nelt = d->nelt;
rtx lane;
- /* TODO: This may not be big-endian safe. */
- if (BYTES_BIG_ENDIAN)
- return false;
-
elt = d->perm[0];
for (i = 1; i < nelt; i++)
{
@@ -9174,7 +9170,7 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
use d->op0 and need not do any extra arithmetic to get the
correct lane number. */
in0 = d->op0;
- lane = GEN_INT (elt);
+ lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
switch (vmode)
{
@@ -9255,14 +9251,14 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
else if (aarch64_evpc_ext (d))
return true;
+ else if (aarch64_evpc_dup (d))
+ return true;
else if (aarch64_evpc_zip (d))
return true;
else if (aarch64_evpc_uzp (d))
return true;
else if (aarch64_evpc_trn (d))
return true;
- else if (aarch64_evpc_dup (d))
- return true;
return aarch64_evpc_tbl (d);
}
return false;
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef float float32x2_t __attribute__ ((__vector_size__ ((8))));
+typedef unsigned int uint32x2_t __attribute__ ((__vector_size__ ((8))));
+
+float32x2_t
+test_dup_1 (float32x2_t in)
+{
+ return __builtin_shuffle (in, (uint32x2_t) {1, 1});
+}
+
+int
+main (int argc, char **argv)
+{
+ float32x2_t test = {2.718, 3.141};
+ float32x2_t res = test_dup_1 (test);
+ if (res[0] != test[1] || res[1] != test[1])
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "\[ \t\]*dup\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.s\\\[\[01\]\\\]" 1 } } */
+/* { dg-final { scan-assembler-not "zip" } } */
+/* { dg-final { cleanup-saved-temps } } */
+