diff mbox

[ARM] Backport - Avoid partial overlaps in DImode shifts *

Message ID AM5PR0802MB2610A1348D39A6FDCDA93E5683600@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra Jan. 5, 2017, 6:32 p.m. UTC
With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can 
either fully or partially overlap on both ARM and Thumb-2.  However the shift
expansion code can only deal with the full overlap case, and generates incorrect
code for partial overlaps.  The fix is to add new variants that support either
full overlap or no overlap.

Bootstrap & regress on arm-linux-gnueabihf OK on GCC6 branch.

OK for backport?

ChangeLog:
2017-01-05  Wilco Dijkstra  <wdijkstr@arm.com>

    gcc/
	PR target/78041
	* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
	Remove partial overlap check for shift by 1.
	(ashldi3_neon): Likewise.
    testsuite/
	* gcc.target/arm/pr78041.c: New test.
--

Comments

Kyrill Tkachov Jan. 6, 2017, 10:03 a.m. UTC | #1
On 05/01/17 18:32, Wilco Dijkstra wrote:
> With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can
> either fully or partially overlap on both ARM and Thumb-2.  However the shift
> expansion code can only deal with the full overlap case, and generates incorrect
> code for partial overlaps.  The fix is to add new variants that support either
> full overlap or no overlap.
>
> Bootstrap & regress on arm-linux-gnueabihf OK on GCC6 branch.
>
> OK for backport?

Ok as it fixes a wrong-code issue.
Thanks,
Kyrill

> ChangeLog:
> 2017-01-05  Wilco Dijkstra  <wdijkstr@arm.com>
>
>      gcc/
> 	PR target/78041
> 	* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
> 	Remove partial overlap check for shift by 1.
> 	(ashldi3_neon): Likewise.
>      testsuite/
> 	* gcc.target/arm/pr78041.c: New test.
> --
>
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 879c07c13b6aa20c46828d08f5a4f413a5722eca..1d51c4045a1779fbd7927c94cdd6752b95cdabc7 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1045,12 +1045,12 @@
>   )
>   
>   (define_insn_and_split "ashldi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w,?&r,?r, ?w,w")
> -	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
> -		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
> -   (clobber (match_scratch:SI 3				    "= X, X,?&r, X,  X,X"))
> -   (clobber (match_scratch:SI 4				    "= X, X,?&r, X,  X,X"))
> -   (clobber (match_scratch:DI 5				    "=&w, X,  X, X, &w,X"))
> +  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w,?&r,?r,?&r, ?w,w")
> +	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w,w")
> +		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm,i")))
> +   (clobber (match_scratch:SI 3				    "= X, X,?&r, X,  X,  X,X"))
> +   (clobber (match_scratch:SI 4				    "= X, X,?&r, X,  X,  X,X"))
> +   (clobber (match_scratch:DI 5				    "=&w, X,  X, X,  X, &w,X"))
>      (clobber (reg:CC_C CC_REGNUM))]
>     "TARGET_NEON"
>     "#"
> @@ -1082,9 +1082,11 @@
>         }
>       else
>         {
> -	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
> -	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
> -		|| REGNO (operands[0]) == REGNO (operands[1])))
> +	/* The shift expanders support either full overlap or no overlap.  */
> +	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> +		    || REGNO (operands[0]) == REGNO (operands[1]));
> +
> +	if (operands[2] == CONST1_RTX (SImode))
>   	  /* This clobbers CC.  */
>   	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
>   	else
> @@ -1093,8 +1095,8 @@
>         }
>       DONE;
>     }"
> -  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> -   (set_attr "opt" "*,*,speed,speed,*,*")
> +  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> +   (set_attr "opt" "*,*,speed,speed,speed,*,*")
>      (set_attr "type" "multiple")]
>   )
>   
> @@ -1143,12 +1145,12 @@
>   ;; ashrdi3_neon
>   ;; lshrdi3_neon
>   (define_insn_and_split "<shift>di3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?w,?w")
> -	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
> -		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
> -   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,2r, X"))
> -   (clobber (match_scratch:SI 4				     "= X, X, &r, X, X, X"))
> -   (clobber (match_scratch:DI 5				     "=&w, X,  X, X,&w, X"))
> +  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?&r,?w,?w")
> +	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
> +		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
> +   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,  X,2r, X"))
> +   (clobber (match_scratch:SI 4				     "= X, X, &r, X,  X, X, X"))
> +   (clobber (match_scratch:DI 5				     "=&w, X,  X, X, X,&w, X"))
>      (clobber (reg:CC CC_REGNUM))]
>     "TARGET_NEON"
>     "#"
> @@ -1184,9 +1186,11 @@
>         }
>       else
>         {
> -	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
> -	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
> -		|| REGNO (operands[0]) == REGNO (operands[1])))
> +	/* The shift expanders support either full overlap or no overlap.  */
> +	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> +		    || REGNO (operands[0]) == REGNO (operands[1]));
> +
> +	if (operands[2] == CONST1_RTX (SImode))
>   	  /* This clobbers CC.  */
>   	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
>   	else
> @@ -1197,8 +1201,8 @@
>   
>       DONE;
>     }"
> -  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> -   (set_attr "opt" "*,*,speed,speed,*,*")
> +  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> +   (set_attr "opt" "*,*,speed,speed,speed,*,*")
>      (set_attr "type" "multiple")]
>   )
>   
> diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr78041.c
> @@ -0,0 +1,20 @@
> +/* { dg-require-effective-target arm_thumb2_ok } */
> +/* { dg-require-effective-target arm_neon_ok } */
> +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
> +
> +extern void abort (void);
> +
> +register long long x asm ("r1");
> +
> +long long f (void)
> +{
> +  return x << 5;
> +}
> +
> +int main ()
> +{
> +  x = 0x0100000001;
> +  if (f () != 0x2000000020)
> +    abort ();
> +  return 0;
> +}
diff mbox

Patch

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 879c07c13b6aa20c46828d08f5a4f413a5722eca..1d51c4045a1779fbd7927c94cdd6752b95cdabc7 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1045,12 +1045,12 @@ 
 )
 
 (define_insn_and_split "ashldi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w,?&r,?r, ?w,w")
-	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
-		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
-   (clobber (match_scratch:SI 3				    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:SI 4				    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:DI 5				    "=&w, X,  X, X, &w,X"))
+  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w,?&r,?r,?&r, ?w,w")
+	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w,w")
+		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm,i")))
+   (clobber (match_scratch:SI 3				    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:SI 4				    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:DI 5				    "=&w, X,  X, X,  X, &w,X"))
    (clobber (reg:CC_C CC_REGNUM))]
   "TARGET_NEON"
   "#"
@@ -1082,9 +1082,11 @@ 
       }
     else
       {
-	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
-		|| REGNO (operands[0]) == REGNO (operands[1])))
+	/* The shift expanders support either full overlap or no overlap.  */
+	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+		    || REGNO (operands[0]) == REGNO (operands[1]));
+
+	if (operands[2] == CONST1_RTX (SImode))
 	  /* This clobbers CC.  */
 	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
 	else
@@ -1093,8 +1095,8 @@ 
       }
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
@@ -1143,12 +1145,12 @@ 
 ;; ashrdi3_neon
 ;; lshrdi3_neon
 (define_insn_and_split "<shift>di3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?w,?w")
-	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
-		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
-   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,2r, X"))
-   (clobber (match_scratch:SI 4				     "= X, X, &r, X, X, X"))
-   (clobber (match_scratch:DI 5				     "=&w, X,  X, X,&w, X"))
+  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w,?&r,?r,?&r,?w,?w")
+	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
+		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
+   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,  X,2r, X"))
+   (clobber (match_scratch:SI 4				     "= X, X, &r, X,  X, X, X"))
+   (clobber (match_scratch:DI 5				     "=&w, X,  X, X, X,&w, X"))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
   "#"
@@ -1184,9 +1186,11 @@ 
       }
     else
       {
-	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
-		|| REGNO (operands[0]) == REGNO (operands[1])))
+	/* The shift expanders support either full overlap or no overlap.  */
+	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+		    || REGNO (operands[0]) == REGNO (operands[1]));
+
+	if (operands[2] == CONST1_RTX (SImode))
 	  /* This clobbers CC.  */
 	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
 	else
@@ -1197,8 +1201,8 @@ 
 
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c
new file mode 100644
index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr78041.c
@@ -0,0 +1,20 @@ 
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
+
+extern void abort (void);
+
+register long long x asm ("r1");
+
+long long f (void)
+{
+  return x << 5;
+}
+
+int main ()
+{
+  x = 0x0100000001;
+  if (f () != 0x2000000020)
+    abort ();
+  return 0;
+}