Message ID | Z1QeVA9N4fgxRE4Z@tucnak |
---|---|
State | New |
Headers | show |
Series | i386: x r<< (c - y) to x r>> y etc. optimization [PR117930] | expand |
On Sat, Dec 7, 2024 at 11:07 AM Jakub Jelinek <jakub@redhat.com> wrote: > > Hi! > > The following patch optimizes x r<< (c - y) to x r>> y, > x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and > x r>> (c + y) to x r>> y if c is a multiple of x's bitsize. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk > or do you want s/Other/Counter/ and s/orotate/crotate/ changes in > it? Yes, please change the term. "Other rotate" is not that informative. > 2024-12-07 Jakub Jelinek <jakub@redhat.com> > > PR target/117930 > * config/i386/i386.md (orotate): New define_code_attr. > (*<insn><mode>3_add, *<insn><mode>3_add_1, > *<insn><mode>3_sub, *<insn><mode>3_sub_1): New define_insn_and_split > patterns plus following define_split for constant first input > operand. > > * gcc.target/i386/pr117930.c: New test. OK with the above change. Thanks, Uros. > > --- gcc/config/i386/i386.md.jj 2024-12-05 18:29:46.211399587 +0100 > +++ gcc/config/i386/i386.md 2024-12-06 10:47:12.097770379 +0100 > @@ -1079,6 +1079,9 @@ (define_code_iterator any_rotate [rotate > ;; Base name for insn mnemonic. > (define_code_attr rotate [(rotate "rol") (rotatert "ror")]) > > +;; Other rotate. > +(define_code_attr orotate [(rotate "rotatert") (rotatert "rotate")]) > + > ;; Mapping of abs neg operators > (define_code_iterator absneg [abs neg]) > > @@ -18216,6 +18219,144 @@ (define_split > (any_rotate:SWI (match_dup 4) (match_dup 2)))] > "operands[4] = gen_reg_rtx (<MODE>mode);") > > +(define_insn_and_split "*<insn><mode>3_add" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (subreg:QI > + (plus > + (match_operand 2 "int_nonimmediate_operand") > + (match_operand 3 "const_int_operand")) 0))) > + (clobber (reg:CC FLAGS_REG))] > + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(parallel > + [(set (match_dup 0) > + (any_rotate:SWI (match_dup 1) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); > + operands[2] = gen_lowpart (QImode, operands[2]); > +}) > + > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > + (subreg:QI > + (plus > + (match_operand 2 "int248_register_operand") > + (match_operand 3 "const_int_operand")) 0)))] > + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" > + [(set (match_dup 4) (match_dup 1)) > + (set (match_dup 0) > + (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] > + "operands[4] = gen_reg_rtx (<MODE>mode);") > + > +(define_insn_and_split "*<insn><mode>3_add_1" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (plus:QI > + (match_operand:QI 2 "nonimmediate_operand") > + (match_operand:QI 3 "const_int_operand")))) > + (clobber (reg:CC FLAGS_REG))] > + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(parallel > + [(set (match_dup 0) > + (any_rotate:SWI (match_dup 1) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))])] > + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") > + > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > + (plus:QI > + (match_operand:QI 2 "register_operand") > + (match_operand:QI 3 "const_int_operand"))))] > + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" > + [(set (match_dup 4) (match_dup 1)) > + (set (match_dup 0) > + (any_rotate:SWI (match_dup 4) (match_dup 2)))] > + "operands[4] = gen_reg_rtx (<MODE>mode);") > + > +(define_insn_and_split "*<insn><mode>3_sub" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (subreg:QI > + (minus > + (match_operand 3 "const_int_operand") > + (match_operand 2 "int_nonimmediate_operand")) 0))) > + (clobber (reg:CC FLAGS_REG))] > + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(parallel > + [(set (match_dup 0) > + (<orotate>:SWI (match_dup 1) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); > + operands[2] = gen_lowpart (QImode, operands[2]); > +}) > + > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > + (subreg:QI > + (minus > + (match_operand 3 "const_int_operand") > + (match_operand 2 "int248_register_operand")) 0)))] > + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" > + [(set (match_dup 4) (match_dup 1)) > + (set (match_dup 0) > + (<orotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] > + "operands[4] = gen_reg_rtx (<MODE>mode);") > + > +(define_insn_and_split "*<insn><mode>3_sub_1" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (minus:QI > + (match_operand:QI 3 "const_int_operand") > + (match_operand:QI 2 "nonimmediate_operand")))) > + (clobber (reg:CC FLAGS_REG))] > + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(parallel > + [(set (match_dup 0) > + (<orotate>:SWI (match_dup 1) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))])] > + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") > + > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "const_int_operand") > + (minus:QI > + (match_operand:QI 3 "const_int_operand") > + (match_operand:QI 2 "register_operand"))))] > + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" > + [(set (match_dup 4) (match_dup 1)) > + (set (match_dup 0) > + (<orotate>:SWI (match_dup 4) (match_dup 2)))] > + "operands[4] = gen_reg_rtx (<MODE>mode);") > + > ;; Implement rotation using two double-precision > ;; shift instructions and a scratch register. > > --- gcc/testsuite/gcc.target/i386/pr117930.c.jj 2024-12-06 10:33:22.558446906 +0100 > +++ gcc/testsuite/gcc.target/i386/pr117930.c 2024-12-06 10:34:58.143101309 +0100 > @@ -0,0 +1,118 @@ > +/* PR target/117930 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */ > +/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */ > +/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */ > + > +static inline > +unsigned lrotate (unsigned x, int t) > +{ > + unsigned tl = x << t; > + unsigned th = x >> (-t & 31); > + return tl | th; > +} > + > +static inline > +unsigned rrotate (unsigned x, int t) > +{ > + unsigned tl = x >> t; > + unsigned th = x << (-t & 31); > + return tl | th; > +} > + > +unsigned > +f1 (unsigned x, int t) > +{ > + return lrotate (x, 32 - t); > +} > + > +unsigned > +f2 (unsigned x, int t) > +{ > + return lrotate (x, 64 - t); > +} > + > +unsigned > +f3 (unsigned x, int t) > +{ > + return lrotate (x, 32 + t); > +} > + > +unsigned > +f4 (unsigned x, int t) > +{ > + return lrotate (x, 64 + t); > +} > + > +unsigned > +f5 (unsigned x, int t) > +{ > + return rrotate (x, 32 - t); > +} > + > +unsigned > +f6 (unsigned x, int t) > +{ > + return rrotate (x, 64 - t); > +} > + > +unsigned > +f7 (unsigned x, int t) > +{ > + return rrotate (x, 32 + t); > +} > + > +unsigned > +f8 (unsigned x, int t) > +{ > + return rrotate (x, 64 + t); > +} > + > +unsigned > +f9 (int t) > +{ > + return lrotate (0xdeadbeefU, 32 - t); > +} > + > +unsigned > +f10 (int t) > +{ > + return lrotate (0xdeadbeefU, 64 - t); > +} > + > +unsigned > +f11 (int t) > +{ > + return lrotate (0xdeadbeefU, 32 + t); > +} > + > +unsigned > +f12 (int t) > +{ > + return lrotate (0xdeadbeefU, 64 + t); > +} > + > +unsigned > +f13 (int t) > +{ > + return rrotate (0xdeadbeefU, 32 - t); > +} > + > +unsigned > +f14 (int t) > +{ > + return rrotate (0xdeadbeefU, 64 - t); > +} > + > +unsigned > +f15 (int t) > +{ > + return rrotate (0xdeadbeefU, 32 + t); > +} > + > +unsigned > +f16 (int t) > +{ > + return rrotate (0xdeadbeefU, 64 + t); > +} > > Jakub >
--- gcc/config/i386/i386.md.jj 2024-12-05 18:29:46.211399587 +0100 +++ gcc/config/i386/i386.md 2024-12-06 10:47:12.097770379 +0100 @@ -1079,6 +1079,9 @@ (define_code_iterator any_rotate [rotate ;; Base name for insn mnemonic. (define_code_attr rotate [(rotate "rol") (rotatert "ror")]) +;; Other rotate. +(define_code_attr orotate [(rotate "rotatert") (rotatert "rotate")]) + ;; Mapping of abs neg operators (define_code_iterator absneg [abs neg]) @@ -18216,6 +18219,144 @@ (define_split (any_rotate:SWI (match_dup 4) (match_dup 2)))] "operands[4] = gen_reg_rtx (<MODE>mode);") +(define_insn_and_split "*<insn><mode>3_add" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (subreg:QI + (plus + (match_operand 2 "int_nonimmediate_operand") + (match_operand 3 "const_int_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_rotate:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (subreg:QI + (plus + (match_operand 2 "int248_register_operand") + (match_operand 3 "const_int_operand")) 0)))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] + "operands[4] = gen_reg_rtx (<MODE>mode);") + +(define_insn_and_split "*<insn><mode>3_add_1" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (plus:QI + (match_operand:QI 2 "nonimmediate_operand") + (match_operand:QI 3 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_rotate:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (plus:QI + (match_operand:QI 2 "register_operand") + (match_operand:QI 3 "const_int_operand"))))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (any_rotate:SWI (match_dup 4) (match_dup 2)))] + "operands[4] = gen_reg_rtx (<MODE>mode);") + +(define_insn_and_split "*<insn><mode>3_sub" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (subreg:QI + (minus + (match_operand 3 "const_int_operand") + (match_operand 2 "int_nonimmediate_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (<orotate>:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (subreg:QI + (minus + (match_operand 3 "const_int_operand") + (match_operand 2 "int248_register_operand")) 0)))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (<orotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] + "operands[4] = gen_reg_rtx (<MODE>mode);") + +(define_insn_and_split "*<insn><mode>3_sub_1" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (minus:QI + (match_operand:QI 3 "const_int_operand") + (match_operand:QI 2 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (<orotate>:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (minus:QI + (match_operand:QI 3 "const_int_operand") + (match_operand:QI 2 "register_operand"))))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (<orotate>:SWI (match_dup 4) (match_dup 2)))] + "operands[4] = gen_reg_rtx (<MODE>mode);") + ;; Implement rotation using two double-precision ;; shift instructions and a scratch register. --- gcc/testsuite/gcc.target/i386/pr117930.c.jj 2024-12-06 10:33:22.558446906 +0100 +++ gcc/testsuite/gcc.target/i386/pr117930.c 2024-12-06 10:34:58.143101309 +0100 @@ -0,0 +1,118 @@ +/* PR target/117930 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */ +/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */ +/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */ + +static inline +unsigned lrotate (unsigned x, int t) +{ + unsigned tl = x << t; + unsigned th = x >> (-t & 31); + return tl | th; +} + +static inline +unsigned rrotate (unsigned x, int t) +{ + unsigned tl = x >> t; + unsigned th = x << (-t & 31); + return tl | th; +} + +unsigned +f1 (unsigned x, int t) +{ + return lrotate (x, 32 - t); +} + +unsigned +f2 (unsigned x, int t) +{ + return lrotate (x, 64 - t); +} + +unsigned +f3 (unsigned x, int t) +{ + return lrotate (x, 32 + t); +} + +unsigned +f4 (unsigned x, int t) +{ + return lrotate (x, 64 + t); +} + +unsigned +f5 (unsigned x, int t) +{ + return rrotate (x, 32 - t); +} + +unsigned +f6 (unsigned x, int t) +{ + return rrotate (x, 64 - t); +} + +unsigned +f7 (unsigned x, int t) +{ + return rrotate (x, 32 + t); +} + +unsigned +f8 (unsigned x, int t) +{ + return rrotate (x, 64 + t); +} + +unsigned +f9 (int t) +{ + return lrotate (0xdeadbeefU, 32 - t); +} + +unsigned +f10 (int t) +{ + return lrotate (0xdeadbeefU, 64 - t); +} + +unsigned +f11 (int t) +{ + return lrotate (0xdeadbeefU, 32 + t); +} + +unsigned +f12 (int t) +{ + return lrotate (0xdeadbeefU, 64 + t); +} + +unsigned +f13 (int t) +{ + return rrotate (0xdeadbeefU, 32 - t); +} + +unsigned +f14 (int t) +{ + return rrotate (0xdeadbeefU, 64 - t); +} + +unsigned +f15 (int t) +{ + return rrotate (0xdeadbeefU, 32 + t); +} + +unsigned +f16 (int t) +{ + return rrotate (0xdeadbeefU, 64 + t); +}