diff mbox series

[v2] Arm: Fix ldrd offset range [PR115153]

Message ID PAWPR08MB89825D8733A3D85C1284D90B83C72@PAWPR08MB8982.eurprd08.prod.outlook.com
State New
Headers show
Series [v2] Arm: Fix ldrd offset range [PR115153] | expand

Commit Message

Wilco Dijkstra June 11, 2024, 4:42 p.m. UTC
v2: use a new arm_arch_v7ve_neon, fix use of DImode in output_move_neon

The valid offset range of LDRD in arm_legitimate_index_p is increased to
-1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
Fix this by moving the LDRD check earlier.

Passes bootstrap & regress, OK for commit?

gcc:
        PR target/115153
        * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before NEON.
        (thumb2_legitimate_index_p): Update comments.
        (output_move_neon): Use DFmode for vldr/vstr.
        * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.

gcc/testsuite:
        PR target/115153
        * gcc.target/arm/pr115153.c: Add new test.

---

Comments

Richard Earnshaw (lists) June 19, 2024, 12:52 p.m. UTC | #1
On 11/06/2024 17:42, Wilco Dijkstra wrote:
> v2: use a new arm_arch_v7ve_neon, fix use of DImode in output_move_neon
> 
> The valid offset range of LDRD in arm_legitimate_index_p is increased to
> -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
> Fix this by moving the LDRD check earlier.
> 
> Passes bootstrap & regress, OK for commit?
> 
> gcc:
>         PR target/115153
>         * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before NEON.
>         (thumb2_legitimate_index_p): Update comments.
>         (output_move_neon): Use DFmode for vldr/vstr.
>         * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.
> 
> gcc/testsuite:
>         PR target/11515>         * gcc.target/arm/pr115153.c: Add new test.

The Linaro CI is reporting an ICE while building libgfortran with this change.

# 00:14:58 /home/tcwg-build/workspace/tcwg_gnu_3/abe/snapshots/gcc.git~master/libgfortran/generated/matmul_i1.c:3006:1: internal compiler error: in change_address_1, at emit-rtl.cc:2299
# 00:14:58 make[3]: *** [Makefile:4262: generated/matmul_i1.lo] Error 1
# 00:14:58 make[2]: *** [Makefile:1861: all] Error 2
# 00:14:58 make[1]: *** [Makefile:15767: all-target-libgfortran] Error 2
# 00:14:58 make: *** [Makefile:1065: all] Error 2

Could you investigate please?

R.

> 
> ---
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index ea0c963a4d67ecd70e1571624e84dfe46d757df9..7dec0254f5a953050c9c52aa297fad7f3dfb6c74 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -8852,6 +8852,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && INTVAL (index) > -1024
>  	    && (INTVAL (index) & 3) == 0);
>  
> +  if (arm_address_register_rtx_p (index, strict_p)
> +      && (GET_MODE_SIZE (mode) <= 4))
> +    return 1;
> +
> +  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
> +  if (mode == DImode || mode == DFmode)
> +    {
> +      if (code == CONST_INT)
> +	{
> +	  HOST_WIDE_INT val = INTVAL (index);
> +
> +	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> +	     If vldr is selected it uses arm_coproc_mem_operand.  */
> +	  if (TARGET_LDRD)
> +	    return val > -256 && val < 256;
> +	  else
> +	    return val > -4096 && val < 4092;
> +	}
> +
> +      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> +    }
> +
>    /* For quad modes, we restrict the constant offset to be slightly less
>       than what the instruction format permits.  We do this because for
>       quad mode moves, we will actually decompose them into two separate
> @@ -8864,7 +8886,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>  	    && INTVAL (index) < 1024
> @@ -8877,27 +8899,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && INTVAL (index) > -1024
>  	    && (INTVAL (index) & 3) == 0);
>  
> -  if (arm_address_register_rtx_p (index, strict_p)
> -      && (GET_MODE_SIZE (mode) <= 4))
> -    return 1;
> -
> -  if (mode == DImode || mode == DFmode)
> -    {
> -      if (code == CONST_INT)
> -	{
> -	  HOST_WIDE_INT val = INTVAL (index);
> -
> -	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> -	     If vldr is selected it uses arm_coproc_mem_operand.  */
> -	  if (TARGET_LDRD)
> -	    return val > -256 && val < 256;
> -	  else
> -	    return val > -4096 && val < 4092;
> -	}
> -
> -      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> -    }
> -
>    if (GET_MODE_SIZE (mode) <= 4
>        && ! (arm_arch4
>  	    && (mode == HImode
> @@ -9000,7 +9001,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
>  	    && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>  	    && INTVAL (index) < 1024
> @@ -9011,6 +9012,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
>        && (GET_MODE_SIZE (mode) <= 4))
>      return 1;
>  
> +  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
>    if (mode == DImode || mode == DFmode)
>      {
>        if (code == CONST_INT)
> @@ -20854,7 +20856,7 @@ output_move_neon (rtx *operands)
>  	    /* We're only using DImode here because it's a convenient
>  	       size.  */
>  	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
> -	    ops[1] = adjust_address (mem, DImode, 8 * i);
> +	    ops[1] = adjust_address (mem, DFmode, 8 * i);
>  	    if (reg_overlap_mentioned_p (ops[0], mem))
>  	      {
>  		gcc_assert (overlap == -1);
> @@ -20872,7 +20874,7 @@ output_move_neon (rtx *operands)
>  	if (overlap != -1)
>  	  {
>  	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
> -	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
> +	    ops[1] = adjust_address (mem, DFmode, 8 * overlap);
>  	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
>  	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
>  	    else
> diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr115153.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -marm" } */
> +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
> +/* { dg-add-options arm_arch_v7ve_neon } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** f1:
> +**	add	r0, r0, #256
> +**	ldrd	r0, r1, \[r0\]
> +**	bx	lr
> +*/
> +long long f1 (long long *p)
> +{
> +  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 5c0a3dade222777402d2a44e63cb5be305341471..20ea318bba65ff7685d1378bb898a15cb6c4f00b 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5518,6 +5518,8 @@ foreach { armfunc armflag armdefs } {
>  	v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
>  	v7ve "-march=armv7ve+fp -marm"
>  		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
> +	v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
> +		  "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
>  	v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
>  	v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__
>  	v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__
>
Wilco Dijkstra June 27, 2024, 4:25 p.m. UTC | #2
Hi Richard,

> The Linaro CI is reporting an ICE while building libgfortran with this change.

So it looks like Thumb-2 oddly enough restricts the negative range of DFmode
eventhough that is unnecessary and inefficient. The easiest workaround turned
out to avoid using checked adjust_address.

Cheers,
Wilco


v3: Use adjust_address_nv with DFmode.

The valid offset range of LDRD in arm_legitimate_index_p is increased to
-1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
Fix this by moving the LDRD check earlier.

Passes bootstrap & regress, OK for commit and backport to GCC14.2?

gcc:
        PR target/115153
        * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before NEON.
        (thumb2_legitimate_index_p): Update comments.
        (output_move_neon): Use DFmode for vldr/vstr and non-checking adjust_address.

gcc/testsuite:
        PR target/115153
        * gcc.target/arm/pr115153.c: Add new test.
        * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.

---

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 7d67d2cfee9f4edc91f187e940be40c07ff726cd..5e6608a30f17bf8185464e3fd0b202a71ff83fc8 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+
+	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
+	     If vldr is selected it uses arm_coproc_mem_operand.  */
+	  if (TARGET_LDRD)
+	    return val > -256 && val < 256;
+	  else
+	    return val > -4096 && val < 4092;
+	}
+
+      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+    }
+
   /* For quad modes, we restrict the constant offset to be slightly less
      than what the instruction format permits.  We do this because for
      quad mode moves, we will actually decompose them into two separate
@@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
-  if (arm_address_register_rtx_p (index, strict_p)
-      && (GET_MODE_SIZE (mode) <= 4))
-    return 1;
-
-  if (mode == DImode || mode == DFmode)
-    {
-      if (code == CONST_INT)
-	{
-	  HOST_WIDE_INT val = INTVAL (index);
-
-	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
-	     If vldr is selected it uses arm_coproc_mem_operand.  */
-	  if (TARGET_LDRD)
-	    return val > -256 && val < 256;
-	  else
-	    return val > -4096 && val < 4092;
-	}
-
-      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
-    }
-
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
@@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
       && (GET_MODE_SIZE (mode) <= 4))
     return 1;
 
+  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
   if (mode == DImode || mode == DFmode)
     {
       if (code == CONST_INT)
@@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
 	int overlap = -1;
 	for (i = 0; i < nregs; i++)
 	  {
-	    /* We're only using DImode here because it's a convenient
-	       size.  */
-	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, DImode, 8 * i);
+	    /* Use DFmode for vldr/vstr.  */
+	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
+	    ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
@@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands)
 	  }
 	if (overlap != -1)
 	  {
-	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
-	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
+	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
+	    ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
 	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
 	    else
diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c
new file mode 100644
index 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr115153.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -marm" } */
+/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
+/* { dg-add-options arm_arch_v7ve_neon } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**	add	r0, r0, #256
+**	ldrd	r0, r1, \[r0\]
+**	bx	lr
+*/
+long long f1 (long long *p)
+{
+  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index ed30cd18ad6935b4d19f573285197cd71cf743bb..500329e0fb78a83d6cd8a4b1e0db4fd61350f17f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5564,6 +5564,8 @@ foreach { armfunc armflag armdefs } {
 	v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
 	v7ve "-march=armv7ve+fp -marm"
 		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
+	v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
+		  "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
 	v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
 	v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__
 	v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__
Richard Earnshaw (lists) July 5, 2024, 4:13 p.m. UTC | #3
On 27/06/2024 17:25, Wilco Dijkstra wrote:
> Hi Richard,
> 
>> The Linaro CI is reporting an ICE while building libgfortran with this change.
> 
> So it looks like Thumb-2 oddly enough restricts the negative range of DFmode
> eventhough that is unnecessary and inefficient. The easiest workaround turned
> out to avoid using checked adjust_address.

It is necessary because DFmode has constraints such as "r" (GENERAL_REGS) and "m" in a load, so the range needs to be legitimate here as well as when a VFP_REG is used.

It might be possible to fix this, but it would involve a lot of rewriting the code to get it right.

But you're right that we don't need to validate this change: we just assume it's right anyway and if it were wrong something bigger has already gone wrong earlier and we'd end up with invalid offsets in the assembly code.

OK.

R.

> 
> Cheers,
> Wilco
> 
> 
> v3: Use adjust_address_nv with DFmode.
> 
> The valid offset range of LDRD in arm_legitimate_index_p is increased to
> -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
> Fix this by moving the LDRD check earlier.
> 
> Passes bootstrap & regress, OK for commit and backport to GCC14.2?
> 
> gcc:
>         PR target/115153
>         * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before NEON.
>         (thumb2_legitimate_index_p): Update comments.
>         (output_move_neon): Use DFmode for vldr/vstr and non-checking adjust_address.
> 
> gcc/testsuite:
>         PR target/115153
>         * gcc.target/arm/pr115153.c: Add new test.
>         * lib/target-supports.exp: Add arm_arch_v7ve_neon target support.
> 
> ---
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index 7d67d2cfee9f4edc91f187e940be40c07ff726cd..5e6608a30f17bf8185464e3fd0b202a71ff83fc8 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && INTVAL (index) > -1024
>  	    && (INTVAL (index) & 3) == 0);
>  
> +  if (arm_address_register_rtx_p (index, strict_p)
> +      && (GET_MODE_SIZE (mode) <= 4))
> +    return 1;
> +
> +  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
> +  if (mode == DImode || mode == DFmode)
> +    {
> +      if (code == CONST_INT)
> +	{
> +	  HOST_WIDE_INT val = INTVAL (index);
> +
> +	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> +	     If vldr is selected it uses arm_coproc_mem_operand.  */
> +	  if (TARGET_LDRD)
> +	    return val > -256 && val < 256;
> +	  else
> +	    return val > -4096 && val < 4092;
> +	}
> +
> +      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> +    }
> +
>    /* For quad modes, we restrict the constant offset to be slightly less
>       than what the instruction format permits.  We do this because for
>       quad mode moves, we will actually decompose them into two separate
> @@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>  	    && INTVAL (index) < 1024
> @@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
>  	    && INTVAL (index) > -1024
>  	    && (INTVAL (index) & 3) == 0);
>  
> -  if (arm_address_register_rtx_p (index, strict_p)
> -      && (GET_MODE_SIZE (mode) <= 4))
> -    return 1;
> -
> -  if (mode == DImode || mode == DFmode)
> -    {
> -      if (code == CONST_INT)
> -	{
> -	  HOST_WIDE_INT val = INTVAL (index);
> -
> -	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
> -	     If vldr is selected it uses arm_coproc_mem_operand.  */
> -	  if (TARGET_LDRD)
> -	    return val > -256 && val < 256;
> -	  else
> -	    return val > -4096 && val < 4092;
> -	}
> -
> -      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
> -    }
> -
>    if (GET_MODE_SIZE (mode) <= 4
>        && ! (arm_arch4
>  	    && (mode == HImode
> @@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
>  	    && (INTVAL (index) & 3) == 0);
>  
>    /* We have no such constraint on double mode offsets, so we permit the
> -     full range of the instruction format.  */
> +     full range of the instruction format.  Note DImode is included here.  */
>    if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
>      return (code == CONST_INT
>  	    && INTVAL (index) < 1024
> @@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
>        && (GET_MODE_SIZE (mode) <= 4))
>      return 1;
>  
> +  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
>    if (mode == DImode || mode == DFmode)
>      {
>        if (code == CONST_INT)
> @@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
>  	int overlap = -1;
>  	for (i = 0; i < nregs; i++)
>  	  {
> -	    /* We're only using DImode here because it's a convenient
> -	       size.  */
> -	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
> -	    ops[1] = adjust_address (mem, DImode, 8 * i);
> +	    /* Use DFmode for vldr/vstr.  */
> +	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
> +	    ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
>  	    if (reg_overlap_mentioned_p (ops[0], mem))
>  	      {
>  		gcc_assert (overlap == -1);
> @@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands)
>  	  }
>  	if (overlap != -1)
>  	  {
> -	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
> -	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
> +	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
> +	    ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
>  	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
>  	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
>  	    else
> diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr115153.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -marm" } */
> +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
> +/* { dg-add-options arm_arch_v7ve_neon } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** f1:
> +**	add	r0, r0, #256
> +**	ldrd	r0, r1, \[r0\]
> +**	bx	lr
> +*/
> +long long f1 (long long *p)
> +{
> +  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index ed30cd18ad6935b4d19f573285197cd71cf743bb..500329e0fb78a83d6cd8a4b1e0db4fd61350f17f 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5564,6 +5564,8 @@ foreach { armfunc armflag armdefs } {
>  	v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
>  	v7ve "-march=armv7ve+fp -marm"
>  		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
> +	v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
> +		  "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
>  	v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
>  	v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__
>  	v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__
>
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index ea0c963a4d67ecd70e1571624e84dfe46d757df9..7dec0254f5a953050c9c52aa297fad7f3dfb6c74 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8852,6 +8852,28 @@  arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+
+	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
+	     If vldr is selected it uses arm_coproc_mem_operand.  */
+	  if (TARGET_LDRD)
+	    return val > -256 && val < 256;
+	  else
+	    return val > -4096 && val < 4092;
+	}
+
+      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+    }
+
   /* For quad modes, we restrict the constant offset to be slightly less
      than what the instruction format permits.  We do this because for
      quad mode moves, we will actually decompose them into two separate
@@ -8864,7 +8886,7 @@  arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -8877,27 +8899,6 @@  arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
-  if (arm_address_register_rtx_p (index, strict_p)
-      && (GET_MODE_SIZE (mode) <= 4))
-    return 1;
-
-  if (mode == DImode || mode == DFmode)
-    {
-      if (code == CONST_INT)
-	{
-	  HOST_WIDE_INT val = INTVAL (index);
-
-	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
-	     If vldr is selected it uses arm_coproc_mem_operand.  */
-	  if (TARGET_LDRD)
-	    return val > -256 && val < 256;
-	  else
-	    return val > -4096 && val < 4092;
-	}
-
-      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
-    }
-
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
@@ -9000,7 +9001,7 @@  thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -9011,6 +9012,7 @@  thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
       && (GET_MODE_SIZE (mode) <= 4))
     return 1;
 
+  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
   if (mode == DImode || mode == DFmode)
     {
       if (code == CONST_INT)
@@ -20854,7 +20856,7 @@  output_move_neon (rtx *operands)
 	    /* We're only using DImode here because it's a convenient
 	       size.  */
 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, DImode, 8 * i);
+	    ops[1] = adjust_address (mem, DFmode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
@@ -20872,7 +20874,7 @@  output_move_neon (rtx *operands)
 	if (overlap != -1)
 	  {
 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
-	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
+	    ops[1] = adjust_address (mem, DFmode, 8 * overlap);
 	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
 	    else
diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c
new file mode 100644
index 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr115153.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -marm" } */
+/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
+/* { dg-add-options arm_arch_v7ve_neon } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**	add	r0, r0, #256
+**	ldrd	r0, r1, \[r0\]
+**	bx	lr
+*/
+long long f1 (long long *p)
+{
+  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5c0a3dade222777402d2a44e63cb5be305341471..20ea318bba65ff7685d1378bb898a15cb6c4f00b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5518,6 +5518,8 @@  foreach { armfunc armflag armdefs } {
 	v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
 	v7ve "-march=armv7ve+fp -marm"
 		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
+	v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
+		  "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
 	v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
 	v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__
 	v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__