Message ID | AM5PR0802MB2610A1348D39A6FDCDA93E5683600@AM5PR0802MB2610.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On 05/01/17 18:32, Wilco Dijkstra wrote: > With -fpu=neon DI mode shifts are expanded after reload. DI mode registers can > either fully or partially overlap on both ARM and Thumb-2. However the shift > expansion code can only deal with the full overlap case, and generates incorrect > code for partial overlaps. The fix is to add new variants that support either > full overlap or no overlap. > > Bootstrap & regress on arm-linux-gnueabihf OK on GCC6 branch. > > OK for backport? Ok as it fixes a wrong-code issue. Thanks, Kyrill > ChangeLog: > 2017-01-05 Wilco Dijkstra <wdijkstr@arm.com> > > gcc/ > PR target/78041 > * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants. > Remove partial overlap check for shift by 1. > (ashldi3_neon): Likewise. > testsuite/ > * gcc.target/arm/pr78041.c: New test. > -- > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 879c07c13b6aa20c46828d08f5a4f413a5722eca..1d51c4045a1779fbd7927c94cdd6752b95cdabc7 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1045,12 +1045,12 @@ > ) > > (define_insn_and_split "ashldi3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") > - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") > - (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) > - (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) > - (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) > - (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") > + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") > + (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) > + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) > + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) > + (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) > (clobber (reg:CC_C CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1082,9 +1082,11 @@ > } > else > { > - if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); > else > @@ -1093,8 +1095,8 @@ > } > DONE; > }" > - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > @@ -1143,12 +1145,12 @@ > ;; ashrdi3_neon > ;; lshrdi3_neon > (define_insn_and_split "<shift>di3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") > - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") > - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) > - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) > - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) > - (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") > + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") > + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) > + (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) > + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) > + (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) > (clobber (reg:CC CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1184,9 +1186,11 @@ > } > else > { > - if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); > else > @@ -1197,8 +1201,8 @@ > > DONE; > }" > - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c > new file mode 100644 > index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr78041.c > @@ -0,0 +1,20 @@ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ > + > +extern void abort (void); > + > +register long long x asm ("r1"); > + > +long long f (void) > +{ > + return x << 5; > +} > + > +int main () > +{ > + x = 0x0100000001; > + if (f () != 0x2000000020) > + abort (); > + return 0; > +}
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 879c07c13b6aa20c46828d08f5a4f413a5722eca..1d51c4045a1779fbd7927c94cdd6752b95cdabc7 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1045,12 +1045,12 @@ ) (define_insn_and_split "ashldi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") - (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) - (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") + (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) (clobber (reg:CC_C CC_REGNUM))] "TARGET_NEON" "#" @@ -1082,9 +1082,11 @@ } else { - if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); else @@ -1093,8 +1095,8 @@ } DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) @@ -1143,12 +1145,12 @@ ;; ashrdi3_neon ;; lshrdi3_neon (define_insn_and_split "<shift>di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" "#" @@ -1184,9 +1186,11 @@ } else { - if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); else @@ -1197,8 +1201,8 @@ DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c new file mode 100644 index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr78041.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ + +extern void abort (void); + +register long long x asm ("r1"); + +long long f (void) +{ + return x << 5; +} + +int main () +{ + x = 0x0100000001; + if (f () != 0x2000000020) + abort (); + return 0; +}