[1/2,ARM] PR68532: Fix up vuzp for big endian

Message ID	1454525947-14690-2-git-send-email-charles.baylis@linaro.org
State	New
Headers	show Return-Path: <gcc-patches-return-420676-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references; q=dns; s= default; b=e1jIXXZQr3ulk/vd+p7eTDt4zK5fDBSIQXoOC2DklcMDxCwL213k4 YU9CHlfgXXCRgY1qxa5yVibu5qQ57Joc84GuyhyPVkss4IcIBcvZ8PPFNzNsoRyL iUNc9p3MRDQJs3zLlgBMa4RizjLoVXvty4YPa7VwRFiI2zhJxW+BoY= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: charles.baylis@linaro.org To: Ramana.Radhakrishnan@arm.com, kyrylo.tkachov@arm.com, richard.earnshaw@arm.com Cc: rearnsha@arm.com, gcc-patches@gcc.gnu.org, michael.collison@linaro.org Subject: [PATCH 1/2] [ARM] PR68532: Fix up vuzp for big endian Date: Wed, 3 Feb 2016 18:59:06 +0000 Message-Id: <1454525947-14690-2-git-send-email-charles.baylis@linaro.org> In-Reply-To: <1454525947-14690-1-git-send-email-charles.baylis@linaro.org> References: <1454525947-14690-1-git-send-email-charles.baylis@linaro.org>

Message ID

1454525947-14690-2-git-send-email-charles.baylis@linaro.org

State

New

Headers

DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:from
	:to:cc:subject:date:message-id:in-reply-to:references; q=dns; s=
	default; b=e1jIXXZQr3ulk/vd+p7eTDt4zK5fDBSIQXoOC2DklcMDxCwL213k4
	YU9CHlfgXXCRgY1qxa5yVibu5qQ57Joc84GuyhyPVkss4IcIBcvZ8PPFNzNsoRyL
	iUNc9p3MRDQJs3zLlgBMa4RizjLoVXvty4YPa7VwRFiI2zhJxW+BoY=
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
Sender: gcc-patches-owner@gcc.gnu.org
From: charles.baylis@linaro.org
To: Ramana.Radhakrishnan@arm.com, kyrylo.tkachov@arm.com,
	richard.earnshaw@arm.com
Cc: rearnsha@arm.com,	gcc-patches@gcc.gnu.org,	michael.collison@linaro.org
Subject: [PATCH 1/2] [ARM] PR68532: Fix up vuzp for big endian
Date: Wed,  3 Feb 2016 18:59:06 +0000
Message-Id: <1454525947-14690-2-git-send-email-charles.baylis@linaro.org>
In-Reply-To: <1454525947-14690-1-git-send-email-charles.baylis@linaro.org>
References: <1454525947-14690-1-git-send-email-charles.baylis@linaro.org>

Commit Message

Charles Baylis Feb. 3, 2016, 6:59 p.m. UTC

From: Charles Baylis <charles.baylis@linaro.org>

gcc/ChangeLog:

2016-02-03  Charles Baylis  <charles.baylis@linaro.org>

	PR target/68532
	* config/arm/arm.c (neon_endian_lane_map): New function.
	(neon_vector_pair_endian_lane_map): New function.
	(arm_evpc_neon_vuzp): Allow for big endian lane order.
	* config/arm/arm_neon.h (vuzpq_s8): Adjust shuffle patterns for big
	endian.
	(vuzpq_s16): Likewise.
	(vuzpq_s32): Likewise.
	(vuzpq_f32): Likewise.
	(vuzpq_u8): Likewise.
	(vuzpq_u16): Likewise.
	(vuzpq_u32): Likewise.
	(vuzpq_p8): Likewise.
	(vuzpq_p16): Likewise.

gcc/testsuite/ChangeLog:

2015-12-15  Charles Baylis  <charles.baylis@linaro.org>

	PR target/68532
	* gcc.c-torture/execute/pr68532.c: New test.

Change-Id: Ifd35d79bd42825f05403a1b96d8f34ef0f21dac3

Comments

Kyrill Tkachov Feb. 8, 2016, 11:42 a.m. UTC | #1

Hi Charles,

On 03/02/16 18:59, charles.baylis@linaro.org wrote:
> From: Charles Baylis <charles.baylis@linaro.org>
>
> gcc/ChangeLog:
>
> 2016-02-03  Charles Baylis  <charles.baylis@linaro.org>
>
> 	PR target/68532
> 	* config/arm/arm.c (neon_endian_lane_map): New function.
> 	(neon_vector_pair_endian_lane_map): New function.
> 	(arm_evpc_neon_vuzp): Allow for big endian lane order.
> 	* config/arm/arm_neon.h (vuzpq_s8): Adjust shuffle patterns for big
> 	endian.
> 	(vuzpq_s16): Likewise.
> 	(vuzpq_s32): Likewise.
> 	(vuzpq_f32): Likewise.
> 	(vuzpq_u8): Likewise.
> 	(vuzpq_u16): Likewise.
> 	(vuzpq_u32): Likewise.
> 	(vuzpq_p8): Likewise.
> 	(vuzpq_p16): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2015-12-15  Charles Baylis  <charles.baylis@linaro.org>
>
> 	PR target/68532
> 	* gcc.c-torture/execute/pr68532.c: New test.
>
> Change-Id: Ifd35d79bd42825f05403a1b96d8f34ef0f21dac3
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index d8a2745..e9aa982 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -28208,6 +28208,35 @@ arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
>     arm_expand_vec_perm_1 (target, op0, op1, sel);
>   }
>   
> +/* map lane ordering between architectural lane order, and GCC lane order,
> +   taking into account ABI.  See comment above output_move_neon for details.  */
> +static int
> +neon_endian_lane_map (machine_mode mode, int lane)

s/map/Map/
New line between comment and function signature.

> +{
> +  if (BYTES_BIG_ENDIAN)
> +  {
> +    int nelems = GET_MODE_NUNITS (mode);
> +    /* Reverse lane order.  */
> +    lane = (nelems - 1 - lane);
> +    /* Reverse D register order, to match ABI.  */
> +    if (GET_MODE_SIZE (mode) == 16)
> +      lane = lane ^ (nelems / 2);
> +  }
> +  return lane;
> +}
> +
> +/* some permutations index into pairs of vectors, this is a helper function
> +   to map indexes into those pairs of vectors.  */
> +static int
> +neon_pair_endian_lane_map (machine_mode mode, int lane)

Similarly, s/some/Some/ and new line after comment.

> +{
> +  int nelem = GET_MODE_NUNITS (mode);
> +  if (BYTES_BIG_ENDIAN)
> +    lane =
> +      neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
> +  return lane;
> +}
> +
>   /* Generate or test for an insn that supports a constant permutation.  */
>   
>   /* Recognize patterns for the VUZP insns.  */
> @@ -28218,14 +28247,22 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
>     unsigned int i, odd, mask, nelt = d->nelt;
>     rtx out0, out1, in0, in1;
>     rtx (*gen)(rtx, rtx, rtx, rtx);
> +  int first_elem;
> +  int swap;
>   

Just make this a bool.

>     if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
>       return false;
>   
> -  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
> -  if (d->perm[0] == 0)
> +  /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
> +     big endian pattern on 64 bit vectors, so we correct for that.  */
> +  swap = BYTES_BIG_ENDIAN && !d->one_vector_p
> +    && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
> +
> +  first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap;
> +
> +  if (first_elem == neon_endian_lane_map (d->vmode, 0))
>       odd = 0;
> -  else if (d->perm[0] == 1)
> +  else if (first_elem == neon_endian_lane_map (d->vmode, 1))
>       odd = 1;
>     else
>       return false;
> @@ -28233,8 +28270,9 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
>   
>     for (i = 0; i < nelt; i++)
>       {
> -      unsigned elt = (i * 2 + odd) & mask;
> -      if (d->perm[i] != elt)
> +      unsigned elt =
> +	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
> +      if ((d->perm[i] ^ swap) != neon_pair_endian_lane_map (d->vmode, elt))
>   	return false;
>       }
>   
> @@ -28258,10 +28296,9 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
>   
>     in0 = d->op0;
>     in1 = d->op1;
> -  if (BYTES_BIG_ENDIAN)
> +  if (swap)
>       {
>         std::swap (in0, in1);
> -      odd = !odd;
>       }

remove the braces around the std::swap

>   
>     out0 = d->target;
> diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
> index 47816d5..2e014b6 100644
> --- a/gcc/config/arm/arm_neon.h
> +++ b/gcc/config/arm/arm_neon.h
> @@ -8741,9 +8741,9 @@ vuzpq_s8 (int8x16_t __a, int8x16_t __b)
>     int8x16x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
> +      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
> +      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
> @@ -8759,9 +8759,9 @@ vuzpq_s16 (int16x8_t __a, int16x8_t __b)
>     int16x8x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 9, 11, 13, 15, 1, 3, 5, 7 });
> +      { 5, 7, 1, 3, 13, 15, 9, 11 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 8, 10, 12, 14, 0, 2, 4, 6 });
> +      { 4, 6, 0, 2, 12, 14, 8, 10 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14 });
> @@ -8776,8 +8776,8 @@ vuzpq_s32 (int32x4_t __a, int32x4_t __b)
>   {
>     int32x4x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
> -  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
> -  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
> +  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
> +  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
> @@ -8790,8 +8790,8 @@ vuzpq_f32 (float32x4_t __a, float32x4_t __b)
>   {
>     float32x4x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
> -  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
> -  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
> +  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
> +  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
> @@ -8805,9 +8805,9 @@ vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
>     uint8x16x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
> +      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
> +      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
> @@ -8823,9 +8823,9 @@ vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
>     uint16x8x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 9, 11, 13, 15, 1, 3, 5, 7 });
> +      { 5, 7, 1, 3, 13, 15, 9, 11 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 8, 10, 12, 14, 0, 2, 4, 6 });
> +      { 4, 6, 0, 2, 12, 14, 8, 10 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14 });
> @@ -8840,8 +8840,8 @@ vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
>   {
>     uint32x4x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
> -  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
> -  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
> +  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
> +  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
> @@ -8855,9 +8855,9 @@ vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
>     poly8x16x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
> +      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
> -      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
> +      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
> @@ -8873,9 +8873,9 @@ vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
>     poly16x8x2_t __rv;
>   #ifdef __ARM_BIG_ENDIAN
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 9, 11, 13, 15, 1, 3, 5, 7 });
> +      { 5, 7, 1, 3, 13, 15, 9, 11 });
>     __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
> -      { 8, 10, 12, 14, 0, 2, 4, 6 });
> +      { 4, 6, 0, 2, 12, 14, 8, 10 });
>   #else
>     __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
>         { 0, 2, 4, 6, 8, 10, 12, 14 });
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr68532.c b/gcc/testsuite/gcc.c-torture/execute/pr68532.c
> new file mode 100644
> index 0000000..3c40aa8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr68532.c
> @@ -0,0 +1,24 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +#define SIZE 128
> +unsigned short _Alignas (16) in[SIZE];
> +
> +extern void abort (void);
> +
> +__attribute__ ((noinline)) int
> +test (unsigned short sum, unsigned short *in, int x)
> +{
> +  for (int j = 0; j < SIZE; j += 8)
> +    sum += in[j] * x;
> +  return sum;
> +}
> +
> +int
> +main ()
> +{
> +  for (int i = 0; i < SIZE; i++)
> +    in[i] = i;
> +  if (test (0, in, 1) != 960)
> +    abort ();

AFAIK tests here usually prefer __builtin_abort ();
That way you don't have to declare the abort prototype in the beginning.

Ok with those changes.

Thanks,
Kyrill

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d8a2745..e9aa982 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28208,6 +28208,35 @@  arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
   arm_expand_vec_perm_1 (target, op0, op1, sel);
 }
 
+/* map lane ordering between architectural lane order, and GCC lane order,
+   taking into account ABI.  See comment above output_move_neon for details.  */
+static int
+neon_endian_lane_map (machine_mode mode, int lane)
+{
+  if (BYTES_BIG_ENDIAN)
+  {
+    int nelems = GET_MODE_NUNITS (mode);
+    /* Reverse lane order.  */
+    lane = (nelems - 1 - lane);
+    /* Reverse D register order, to match ABI.  */
+    if (GET_MODE_SIZE (mode) == 16)
+      lane = lane ^ (nelems / 2);
+  }
+  return lane;
+}
+
+/* some permutations index into pairs of vectors, this is a helper function
+   to map indexes into those pairs of vectors.  */
+static int
+neon_pair_endian_lane_map (machine_mode mode, int lane)
+{
+  int nelem = GET_MODE_NUNITS (mode);
+  if (BYTES_BIG_ENDIAN)
+    lane =
+      neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
+  return lane;
+}
+
 /* Generate or test for an insn that supports a constant permutation.  */
 
 /* Recognize patterns for the VUZP insns.  */
@@ -28218,14 +28247,22 @@  arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
   unsigned int i, odd, mask, nelt = d->nelt;
   rtx out0, out1, in0, in1;
   rtx (*gen)(rtx, rtx, rtx, rtx);
+  int first_elem;
+  int swap;
 
   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
     return false;
 
-  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
-  if (d->perm[0] == 0)
+  /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
+     big endian pattern on 64 bit vectors, so we correct for that.  */
+  swap = BYTES_BIG_ENDIAN && !d->one_vector_p
+    && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
+
+  first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap;
+
+  if (first_elem == neon_endian_lane_map (d->vmode, 0))
     odd = 0;
-  else if (d->perm[0] == 1)
+  else if (first_elem == neon_endian_lane_map (d->vmode, 1))
     odd = 1;
   else
     return false;
@@ -28233,8 +28270,9 @@  arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
 
   for (i = 0; i < nelt; i++)
     {
-      unsigned elt = (i * 2 + odd) & mask;
-      if (d->perm[i] != elt)
+      unsigned elt =
+	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
+      if ((d->perm[i] ^ swap) != neon_pair_endian_lane_map (d->vmode, elt))
 	return false;
     }
 
@@ -28258,10 +28296,9 @@  arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
 
   in0 = d->op0;
   in1 = d->op1;
-  if (BYTES_BIG_ENDIAN)
+  if (swap)
     {
       std::swap (in0, in1);
-      odd = !odd;
     }
 
   out0 = d->target;
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 47816d5..2e014b6 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -8741,9 +8741,9 @@  vuzpq_s8 (int8x16_t __a, int8x16_t __b)
   int8x16x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
       { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
@@ -8759,9 +8759,9 @@  vuzpq_s16 (int16x8_t __a, int16x8_t __b)
   int16x8x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 9, 11, 13, 15, 1, 3, 5, 7 });
+      { 5, 7, 1, 3, 13, 15, 9, 11 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 8, 10, 12, 14, 0, 2, 4, 6 });
+      { 4, 6, 0, 2, 12, 14, 8, 10 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
       { 0, 2, 4, 6, 8, 10, 12, 14 });
@@ -8776,8 +8776,8 @@  vuzpq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
-  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
-  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
@@ -8790,8 +8790,8 @@  vuzpq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
-  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
-  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
@@ -8805,9 +8805,9 @@  vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
   uint8x16x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
       { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
@@ -8823,9 +8823,9 @@  vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
   uint16x8x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 9, 11, 13, 15, 1, 3, 5, 7 });
+      { 5, 7, 1, 3, 13, 15, 9, 11 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 8, 10, 12, 14, 0, 2, 4, 6 });
+      { 4, 6, 0, 2, 12, 14, 8, 10 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
       { 0, 2, 4, 6, 8, 10, 12, 14 });
@@ -8840,8 +8840,8 @@  vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
-  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
-  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
@@ -8855,9 +8855,9 @@  vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
   poly8x16x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+      { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
-      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+      { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
       { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
@@ -8873,9 +8873,9 @@  vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
   poly16x8x2_t __rv;
 #ifdef __ARM_BIG_ENDIAN
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 9, 11, 13, 15, 1, 3, 5, 7 });
+      { 5, 7, 1, 3, 13, 15, 9, 11 });
   __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
-      { 8, 10, 12, 14, 0, 2, 4, 6 });
+      { 4, 6, 0, 2, 12, 14, 8, 10 });
 #else
   __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
       { 0, 2, 4, 6, 8, 10, 12, 14 });
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr68532.c b/gcc/testsuite/gcc.c-torture/execute/pr68532.c
new file mode 100644
index 0000000..3c40aa8
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr68532.c
@@ -0,0 +1,24 @@ 
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#define SIZE 128
+unsigned short _Alignas (16) in[SIZE];
+
+extern void abort (void);
+
+__attribute__ ((noinline)) int
+test (unsigned short sum, unsigned short *in, int x)
+{
+  for (int j = 0; j < SIZE; j += 8)
+    sum += in[j] * x;
+  return sum;
+}
+
+int
+main ()
+{
+  for (int i = 0; i < SIZE; i++)
+    in[i] = i;
+  if (test (0, in, 1) != 960)
+    abort ();
+  return 0;
+}

[1/2,ARM] PR68532: Fix up vuzp for big endian

Commit Message

Comments

Patch