diff mbox series

i386: x r<< (c - y) to x r>> y etc. optimization [PR117930]

Message ID Z1QeVA9N4fgxRE4Z@tucnak
State New
Headers show
Series i386: x r<< (c - y) to x r>> y etc. optimization [PR117930] | expand

Commit Message

Jakub Jelinek Dec. 7, 2024, 10:07 a.m. UTC
Hi!

The following patch optimizes x r<< (c - y) to x r>> y,
x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and
x r>> (c + y) to x r>> y if c is a multiple of x's bitsize.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
or do you want s/Other/Counter/ and s/orotate/crotate/ changes in
it?

2024-12-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/117930
	* config/i386/i386.md (orotate): New define_code_attr.
	(*<insn><mode>3_add, *<insn><mode>3_add_1,
	*<insn><mode>3_sub, *<insn><mode>3_sub_1): New define_insn_and_split
	patterns plus following define_split for constant first input
	operand.

	* gcc.target/i386/pr117930.c: New test.


	Jakub

Comments

Uros Bizjak Dec. 7, 2024, 10:10 a.m. UTC | #1
On Sat, Dec 7, 2024 at 11:07 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following patch optimizes x r<< (c - y) to x r>> y,
> x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and
> x r>> (c + y) to x r>> y if c is a multiple of x's bitsize.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
> or do you want s/Other/Counter/ and s/orotate/crotate/ changes in
> it?

Yes, please change the term. "Other rotate" is not that informative.

> 2024-12-07  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/117930
>         * config/i386/i386.md (orotate): New define_code_attr.
>         (*<insn><mode>3_add, *<insn><mode>3_add_1,
>         *<insn><mode>3_sub, *<insn><mode>3_sub_1): New define_insn_and_split
>         patterns plus following define_split for constant first input
>         operand.
>
>         * gcc.target/i386/pr117930.c: New test.

OK with the above change.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.md.jj  2024-12-05 18:29:46.211399587 +0100
> +++ gcc/config/i386/i386.md     2024-12-06 10:47:12.097770379 +0100
> @@ -1079,6 +1079,9 @@ (define_code_iterator any_rotate [rotate
>  ;; Base name for insn mnemonic.
>  (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
>
> +;; Other rotate.
> +(define_code_attr orotate [(rotate "rotatert") (rotatert "rotate")])
> +
>  ;; Mapping of abs neg operators
>  (define_code_iterator absneg [abs neg])
>
> @@ -18216,6 +18219,144 @@ (define_split
>         (any_rotate:SWI (match_dup 4) (match_dup 2)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
> +(define_insn_and_split "*<insn><mode>3_add"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (subreg:QI
> +           (plus
> +             (match_operand 2 "int_nonimmediate_operand")
> +             (match_operand 3 "const_int_operand")) 0)))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (any_rotate:SWI (match_dup 1) (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])]
> +{
> +  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
> +  operands[2] = gen_lowpart (QImode, operands[2]);
> +})
> +
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
> +         (subreg:QI
> +           (plus
> +             (match_operand 2 "int248_register_operand")
> +             (match_operand 3 "const_int_operand")) 0)))]
> + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
> + [(set (match_dup 4) (match_dup 1))
> +  (set (match_dup 0)
> +       (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
> + "operands[4] = gen_reg_rtx (<MODE>mode);")
> +
> +(define_insn_and_split "*<insn><mode>3_add_1"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (plus:QI
> +           (match_operand:QI 2 "nonimmediate_operand")
> +           (match_operand:QI 3 "const_int_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (any_rotate:SWI (match_dup 1) (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])]
> + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
> +
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
> +         (plus:QI
> +           (match_operand:QI 2 "register_operand")
> +           (match_operand:QI 3 "const_int_operand"))))]
> + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
> + [(set (match_dup 4) (match_dup 1))
> +  (set (match_dup 0)
> +       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
> + "operands[4] = gen_reg_rtx (<MODE>mode);")
> +
> +(define_insn_and_split "*<insn><mode>3_sub"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (subreg:QI
> +           (minus
> +             (match_operand 3 "const_int_operand")
> +             (match_operand 2 "int_nonimmediate_operand")) 0)))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (<orotate>:SWI (match_dup 1) (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])]
> +{
> +  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
> +  operands[2] = gen_lowpart (QImode, operands[2]);
> +})
> +
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
> +         (subreg:QI
> +           (minus
> +             (match_operand 3 "const_int_operand")
> +             (match_operand 2 "int248_register_operand")) 0)))]
> + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
> + [(set (match_dup 4) (match_dup 1))
> +  (set (match_dup 0)
> +       (<orotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
> + "operands[4] = gen_reg_rtx (<MODE>mode);")
> +
> +(define_insn_and_split "*<insn><mode>3_sub_1"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (minus:QI
> +           (match_operand:QI 3 "const_int_operand")
> +           (match_operand:QI 2 "nonimmediate_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (<orotate>:SWI (match_dup 1) (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])]
> + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
> +
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
> +         (minus:QI
> +           (match_operand:QI 3 "const_int_operand")
> +           (match_operand:QI 2 "register_operand"))))]
> + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
> + [(set (match_dup 4) (match_dup 1))
> +  (set (match_dup 0)
> +       (<orotate>:SWI (match_dup 4) (match_dup 2)))]
> + "operands[4] = gen_reg_rtx (<MODE>mode);")
> +
>  ;; Implement rotation using two double-precision
>  ;; shift instructions and a scratch register.
>
> --- gcc/testsuite/gcc.target/i386/pr117930.c.jj 2024-12-06 10:33:22.558446906 +0100
> +++ gcc/testsuite/gcc.target/i386/pr117930.c    2024-12-06 10:34:58.143101309 +0100
> @@ -0,0 +1,118 @@
> +/* PR target/117930 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */
> +/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */
> +/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */
> +
> +static inline
> +unsigned lrotate (unsigned x, int t)
> +{
> +  unsigned tl = x << t;
> +  unsigned th = x >> (-t & 31);
> +  return tl | th;
> +}
> +
> +static inline
> +unsigned rrotate (unsigned x, int t)
> +{
> +  unsigned tl = x >> t;
> +  unsigned th = x << (-t & 31);
> +  return tl | th;
> +}
> +
> +unsigned
> +f1 (unsigned x, int t)
> +{
> +  return lrotate (x, 32 - t);
> +}
> +
> +unsigned
> +f2 (unsigned x, int t)
> +{
> +  return lrotate (x, 64 - t);
> +}
> +
> +unsigned
> +f3 (unsigned x, int t)
> +{
> +  return lrotate (x, 32 + t);
> +}
> +
> +unsigned
> +f4 (unsigned x, int t)
> +{
> +  return lrotate (x, 64 + t);
> +}
> +
> +unsigned
> +f5 (unsigned x, int t)
> +{
> +  return rrotate (x, 32 - t);
> +}
> +
> +unsigned
> +f6 (unsigned x, int t)
> +{
> +  return rrotate (x, 64 - t);
> +}
> +
> +unsigned
> +f7 (unsigned x, int t)
> +{
> +  return rrotate (x, 32 + t);
> +}
> +
> +unsigned
> +f8 (unsigned x, int t)
> +{
> +  return rrotate (x, 64 + t);
> +}
> +
> +unsigned
> +f9 (int t)
> +{
> +  return lrotate (0xdeadbeefU, 32 - t);
> +}
> +
> +unsigned
> +f10 (int t)
> +{
> +  return lrotate (0xdeadbeefU, 64 - t);
> +}
> +
> +unsigned
> +f11 (int t)
> +{
> +  return lrotate (0xdeadbeefU, 32 + t);
> +}
> +
> +unsigned
> +f12 (int t)
> +{
> +  return lrotate (0xdeadbeefU, 64 + t);
> +}
> +
> +unsigned
> +f13 (int t)
> +{
> +  return rrotate (0xdeadbeefU, 32 - t);
> +}
> +
> +unsigned
> +f14 (int t)
> +{
> +  return rrotate (0xdeadbeefU, 64 - t);
> +}
> +
> +unsigned
> +f15 (int t)
> +{
> +  return rrotate (0xdeadbeefU, 32 + t);
> +}
> +
> +unsigned
> +f16 (int t)
> +{
> +  return rrotate (0xdeadbeefU, 64 + t);
> +}
>
>         Jakub
>
diff mbox series

Patch

--- gcc/config/i386/i386.md.jj	2024-12-05 18:29:46.211399587 +0100
+++ gcc/config/i386/i386.md	2024-12-06 10:47:12.097770379 +0100
@@ -1079,6 +1079,9 @@  (define_code_iterator any_rotate [rotate
 ;; Base name for insn mnemonic.
 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
 
+;; Other rotate.
+(define_code_attr orotate [(rotate "rotatert") (rotatert "rotate")])
+
 ;; Mapping of abs neg operators
 (define_code_iterator absneg [abs neg])
 
@@ -18216,6 +18219,144 @@  (define_split
        (any_rotate:SWI (match_dup 4) (match_dup 2)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
+(define_insn_and_split "*<insn><mode>3_add"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (plus
+	      (match_operand 2 "int_nonimmediate_operand")
+	      (match_operand 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (subreg:QI
+	    (plus
+	      (match_operand 2 "int248_register_operand")
+	      (match_operand 3 "const_int_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_add_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (plus:QI
+	    (match_operand:QI 2 "nonimmediate_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (plus:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_sub"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (minus
+	      (match_operand 3 "const_int_operand")
+	      (match_operand 2 "int_nonimmediate_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (<orotate>:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (subreg:QI
+	    (minus
+	      (match_operand 3 "const_int_operand")
+	      (match_operand 2 "int248_register_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (<orotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_sub_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (minus:QI
+	    (match_operand:QI 3 "const_int_operand")
+	    (match_operand:QI 2 "nonimmediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (<orotate>:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (minus:QI
+	    (match_operand:QI 3 "const_int_operand")
+	    (match_operand:QI 2 "register_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (<orotate>:SWI (match_dup 4) (match_dup 2)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
--- gcc/testsuite/gcc.target/i386/pr117930.c.jj	2024-12-06 10:33:22.558446906 +0100
+++ gcc/testsuite/gcc.target/i386/pr117930.c	2024-12-06 10:34:58.143101309 +0100
@@ -0,0 +1,118 @@ 
+/* PR target/117930 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */
+/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */
+/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */
+
+static inline
+unsigned lrotate (unsigned x, int t)
+{
+  unsigned tl = x << t;
+  unsigned th = x >> (-t & 31);
+  return tl | th;
+}
+
+static inline
+unsigned rrotate (unsigned x, int t)
+{
+  unsigned tl = x >> t;
+  unsigned th = x << (-t & 31);
+  return tl | th;
+}
+
+unsigned
+f1 (unsigned x, int t)
+{
+  return lrotate (x, 32 - t);
+}
+
+unsigned
+f2 (unsigned x, int t)
+{
+  return lrotate (x, 64 - t);
+}
+
+unsigned
+f3 (unsigned x, int t)
+{
+  return lrotate (x, 32 + t);
+}
+
+unsigned
+f4 (unsigned x, int t)
+{
+  return lrotate (x, 64 + t);
+}
+
+unsigned
+f5 (unsigned x, int t)
+{
+  return rrotate (x, 32 - t);
+}
+
+unsigned
+f6 (unsigned x, int t)
+{
+  return rrotate (x, 64 - t);
+}
+
+unsigned
+f7 (unsigned x, int t)
+{
+  return rrotate (x, 32 + t);
+}
+
+unsigned
+f8 (unsigned x, int t)
+{
+  return rrotate (x, 64 + t);
+}
+
+unsigned
+f9 (int t)
+{
+  return lrotate (0xdeadbeefU, 32 - t);
+}
+
+unsigned
+f10 (int t)
+{
+  return lrotate (0xdeadbeefU, 64 - t);
+}
+
+unsigned
+f11 (int t)
+{
+  return lrotate (0xdeadbeefU, 32 + t);
+}
+
+unsigned
+f12 (int t)
+{
+  return lrotate (0xdeadbeefU, 64 + t);
+}
+
+unsigned
+f13 (int t)
+{
+  return rrotate (0xdeadbeefU, 32 - t);
+}
+
+unsigned
+f14 (int t)
+{
+  return rrotate (0xdeadbeefU, 64 - t);
+}
+
+unsigned
+f15 (int t)
+{
+  return rrotate (0xdeadbeefU, 32 + t);
+}
+
+unsigned
+f16 (int t)
+{
+  return rrotate (0xdeadbeefU, 64 + t);
+}