Message ID | AM5PR0802MB2610CD768B751CB23394B3A883A90@AM5PR0802MB2610.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On 24/10/16 15:28, Wilco Dijkstra wrote: > With -fpu=neon DI mode shifts are expanded after reload. DI mode registers can > either fully or partially overlap. However the shift expansion code can only deal > with the full overlap case, and generates incorrect code for partial overlaps. > The fix is to add new variants that support either full overlap or no overlap. > > Bootstrap & regress on arm-linux-gnueabihf OK. > > This will need backporting to all active branches. > > ChangeLog: > 2016-10-20 Wilco Dijkstra <wdijkstr@arm.com> > > gcc/ > PR target/78041 > * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants. > Remove partial overlap check for shift by 1. > (ashldi3_neon): Likewise. > testsuite/ > * gcc.target/arm/pr78041.c: New test. > OK. R. > -- > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1143,12 +1143,12 @@ > ) > > (define_insn_and_split "ashldi3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") > - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") > - (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) > - (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) > - (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) > - (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") > + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") > + (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) > + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) > + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) > + (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) > (clobber (reg:CC_C CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1180,9 +1180,11 @@ > } > else > { > - if (operands[2] == CONST1_RTX (SImode) > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); > else > @@ -1191,8 +1193,8 @@ > } > DONE; > }" > - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > @@ -1241,12 +1243,12 @@ > ;; ashrdi3_neon > ;; lshrdi3_neon > (define_insn_and_split "<shift>di3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") > - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") > - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) > - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) > - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) > - (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") > + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") > + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) > + (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) > + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) > + (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) > (clobber (reg:CC CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1282,9 +1284,11 @@ > } > else > { > - if (operands[2] == CONST1_RTX (SImode) > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); > else > @@ -1295,8 +1299,8 @@ > > DONE; > }" > - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c > new file mode 100644 > index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr78041.c > @@ -0,0 +1,20 @@ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ > + > +extern void abort (void); > + > +register long long x asm ("r1"); > + > +long long f (void) > +{ > + return x << 5; > +} > + > +int main () > +{ > + x = 0x0100000001; > + if (f () != 0x2000000020) > + abort (); > + return 0; > +} >
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1143,12 +1143,12 @@ ) (define_insn_and_split "ashldi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") - (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) - (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") + (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) (clobber (reg:CC_C CC_REGNUM))] "TARGET_NEON" "#" @@ -1180,9 +1180,11 @@ } else { - if (operands[2] == CONST1_RTX (SImode) - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); else @@ -1191,8 +1193,8 @@ } DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) @@ -1241,12 +1243,12 @@ ;; ashrdi3_neon ;; lshrdi3_neon (define_insn_and_split "<shift>di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" "#" @@ -1282,9 +1284,11 @@ } else { - if (operands[2] == CONST1_RTX (SImode) - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); else @@ -1295,8 +1299,8 @@ DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c new file mode 100644 index 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr78041.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ + +extern void abort (void); + +register long long x asm ("r1"); + +long long f (void) +{ + return x << 5; +} + +int main () +{ + x = 0x0100000001; + if (f () != 0x2000000020) + abort (); + return 0; +}