diff mbox series

[v3] MATCH: Simplify `a rrotate (32-b) -> a lrotate b` [PR109906]

Message ID 20241016162300.13929-1-quic_eikagupt@quicinc.com
State New
Headers show
Series [v3] MATCH: Simplify `a rrotate (32-b) -> a lrotate b` [PR109906] | expand

Commit Message

Eikansh Gupta Oct. 16, 2024, 4:23 p.m. UTC
The pattern `a rrotate (32-b)` should be optimized to `a lrotate b`.
The same is also true for `a lrotate (32-b)`. It can be optimized to
`a rrotate b`.

This patch adds following patterns:
a rrotate (32-b) -> a lrotate b
a lrotate (32-b) -> a rrotate b

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

	PR tree-optimization/109906

gcc/ChangeLog:

	* match.pd (a rrotate (32-b) -> a lrotate b): New pattern
	(a lrotate (32-b) -> a rrotate b): New pattern

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/pr109906.c: New test.

Signed-off-by: Eikansh Gupta <quic_eikagupt@quicinc.com>
---
 gcc/match.pd                             |  9 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr109906.c | 41 ++++++++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109906.c

Comments

Kyrylo Tkachov Oct. 17, 2024, 9:05 a.m. UTC | #1
Hi Eikansh

> On 16 Oct 2024, at 18:23, Eikansh Gupta <quic_eikagupt@quicinc.com> wrote:
> 
> The pattern `a rrotate (32-b)` should be optimized to `a lrotate b`.
> The same is also true for `a lrotate (32-b)`. It can be optimized to
> `a rrotate b`.
> 
> This patch adds following patterns:
> a rrotate (32-b) -> a lrotate b
> a lrotate (32-b) -> a rrotate b
> 
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> 
> PR tree-optimization/109906
> 
> gcc/ChangeLog:
> 
> * match.pd (a rrotate (32-b) -> a lrotate b): New pattern
> (a lrotate (32-b) -> a rrotate b): New pattern
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/tree-ssa/pr109906.c: New test.
> 
> Signed-off-by: Eikansh Gupta <quic_eikagupt@quicinc.com>
> ---
> gcc/match.pd                             |  9 ++++++
> gcc/testsuite/gcc.dg/tree-ssa/pr109906.c | 41 ++++++++++++++++++++++++
> 2 files changed, 50 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5ec31ef6269..078ef050351 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4861,6 +4861,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    build_int_cst (TREE_TYPE (@1),
>   element_precision (type)), @1); }))
> 
> +/* a rrotate (32-b) -> a lrotate b */
> +/* a lrotate (32-b) -> a rrotate b */
> +(for rotate (lrotate rrotate)
> +     orotate (rrotate lrotate)
> + (simplify
> +  (rotate @0 (minus INTEGER_CST@1 @2))
> +   (if (TYPE_PRECISION (TREE_TYPE (@0)) == wi::to_wide (@1))
> +     (orotate @0 @2))))

There is already a transformation for lrotate (x, C) into rotate (x, SIZE - C) around line 4937 in match.pd
Isn’t there a risk that we enter an infinite recursion situation with this?

Thanks,
Kyrill


> +
> /* Turn (a OP c1) OP c2 into a OP (c1+c2).  */
> (for op (lrotate rrotate rshift lshift)
>  (simplify
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> new file mode 100644
> index 00000000000..9aa015d8c65
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> @@ -0,0 +1,41 @@
> +/* PR tree-optimization/109906 */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> +/* { dg-require-effective-target int32 } */
> +
> +/* Implementation of rotate right operation */
> +static inline
> +unsigned rrotate(unsigned x, int t)
> +{
> +  if (t >= 32) __builtin_unreachable();
> +  unsigned tl = x >> (t);
> +  unsigned th = x << (32 - t);
> +  return tl | th;
> +}
> +
> +/* Here rotate left is achieved by doing rotate right by (32 - x) */
> +unsigned rotateleft(unsigned t, int x)
> +{
> +  return rrotate (t, 32 - x);
> +}
> +
> +/* Implementation of rotate left operation */
> +static inline
> +unsigned lrotate(unsigned x, int t)
> +{
> +  if (t >= 32) __builtin_unreachable();
> +  unsigned tl = x << (t);
> +  unsigned th = x >> (32 - t);
> +  return tl | th;
> +}
> +
> +/* Here rotate right is achieved by doing rotate left by (32 - x) */
> +unsigned rotateright(unsigned t, int x)
> +{
> +  return lrotate (t, 32 - x);
> +}
> +
> +/* Shouldn't have instruction for (32 - x). */
> +/* { dg-final { scan-tree-dump-not "minus_expr" "optimized" } } */
> +/* { dg-final { scan-tree-dump "rrotate_expr" "optimized" } } */
> +/* { dg-final { scan-tree-dump "lrotate_expr" "optimized" } } */
> -- 
> 2.17.1
>
Richard Biener Oct. 21, 2024, 9:07 a.m. UTC | #2
On Thu, Oct 17, 2024 at 11:06 AM Kyrylo Tkachov <ktkachov@nvidia.com> wrote:
>
> Hi Eikansh
>
> > On 16 Oct 2024, at 18:23, Eikansh Gupta <quic_eikagupt@quicinc.com> wrote:
> >
> > The pattern `a rrotate (32-b)` should be optimized to `a lrotate b`.
> > The same is also true for `a lrotate (32-b)`. It can be optimized to
> > `a rrotate b`.
> >
> > This patch adds following patterns:
> > a rrotate (32-b) -> a lrotate b
> > a lrotate (32-b) -> a rrotate b
> >
> > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > PR tree-optimization/109906
> >
> > gcc/ChangeLog:
> >
> > * match.pd (a rrotate (32-b) -> a lrotate b): New pattern
> > (a lrotate (32-b) -> a rrotate b): New pattern
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/pr109906.c: New test.
> >
> > Signed-off-by: Eikansh Gupta <quic_eikagupt@quicinc.com>
> > ---
> > gcc/match.pd                             |  9 ++++++
> > gcc/testsuite/gcc.dg/tree-ssa/pr109906.c | 41 ++++++++++++++++++++++++
> > 2 files changed, 50 insertions(+)
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5ec31ef6269..078ef050351 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -4861,6 +4861,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    build_int_cst (TREE_TYPE (@1),
> >   element_precision (type)), @1); }))
> >
> > +/* a rrotate (32-b) -> a lrotate b */
> > +/* a lrotate (32-b) -> a rrotate b */
> > +(for rotate (lrotate rrotate)
> > +     orotate (rrotate lrotate)
> > + (simplify
> > +  (rotate @0 (minus INTEGER_CST@1 @2))
> > +   (if (TYPE_PRECISION (TREE_TYPE (@0)) == wi::to_wide (@1))

Use element_precision (@0) - rotates can be used on vector types as
well and using
TYPE_PRECISION would ICE on them.

> > +     (orotate @0 @2))))
>
> There is already a transformation for lrotate (x, C) into rotate (x, SIZE - C) around line 4937 in match.pd
> Isn’t there a risk that we enter an infinite recursion situation with this?

I don't think so.

The patch is OK with the adjustment.

Thanks,
Richard.

> Thanks,
> Kyrill
>
>
> > +
> > /* Turn (a OP c1) OP c2 into a OP (c1+c2).  */
> > (for op (lrotate rrotate rshift lshift)
> >  (simplify
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> > new file mode 100644
> > index 00000000000..9aa015d8c65
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
> > @@ -0,0 +1,41 @@
> > +/* PR tree-optimization/109906 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> > +/* { dg-require-effective-target int32 } */
> > +
> > +/* Implementation of rotate right operation */
> > +static inline
> > +unsigned rrotate(unsigned x, int t)
> > +{
> > +  if (t >= 32) __builtin_unreachable();
> > +  unsigned tl = x >> (t);
> > +  unsigned th = x << (32 - t);
> > +  return tl | th;
> > +}
> > +
> > +/* Here rotate left is achieved by doing rotate right by (32 - x) */
> > +unsigned rotateleft(unsigned t, int x)
> > +{
> > +  return rrotate (t, 32 - x);
> > +}
> > +
> > +/* Implementation of rotate left operation */
> > +static inline
> > +unsigned lrotate(unsigned x, int t)
> > +{
> > +  if (t >= 32) __builtin_unreachable();
> > +  unsigned tl = x << (t);
> > +  unsigned th = x >> (32 - t);
> > +  return tl | th;
> > +}
> > +
> > +/* Here rotate right is achieved by doing rotate left by (32 - x) */
> > +unsigned rotateright(unsigned t, int x)
> > +{
> > +  return lrotate (t, 32 - x);
> > +}
> > +
> > +/* Shouldn't have instruction for (32 - x). */
> > +/* { dg-final { scan-tree-dump-not "minus_expr" "optimized" } } */
> > +/* { dg-final { scan-tree-dump "rrotate_expr" "optimized" } } */
> > +/* { dg-final { scan-tree-dump "lrotate_expr" "optimized" } } */
> > --
> > 2.17.1
> >
>
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 5ec31ef6269..078ef050351 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4861,6 +4861,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 			    build_int_cst (TREE_TYPE (@1),
 					   element_precision (type)), @1); }))
 
+/* a rrotate (32-b) -> a lrotate b */
+/* a lrotate (32-b) -> a rrotate b */
+(for rotate (lrotate rrotate)
+     orotate (rrotate lrotate)
+ (simplify
+  (rotate @0 (minus INTEGER_CST@1 @2))
+   (if (TYPE_PRECISION (TREE_TYPE (@0)) == wi::to_wide (@1))
+     (orotate @0 @2))))
+
 /* Turn (a OP c1) OP c2 into a OP (c1+c2).  */
 (for op (lrotate rrotate rshift lshift)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
new file mode 100644
index 00000000000..9aa015d8c65
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c
@@ -0,0 +1,41 @@ 
+/* PR tree-optimization/109906 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+/* { dg-require-effective-target int32 } */
+
+/* Implementation of rotate right operation */
+static inline
+unsigned rrotate(unsigned x, int t)
+{
+  if (t >= 32) __builtin_unreachable();
+  unsigned tl = x >> (t);
+  unsigned th = x << (32 - t);
+  return tl | th;
+}
+
+/* Here rotate left is achieved by doing rotate right by (32 - x) */
+unsigned rotateleft(unsigned t, int x)
+{
+  return rrotate (t, 32 - x);
+}
+
+/* Implementation of rotate left operation */
+static inline
+unsigned lrotate(unsigned x, int t)
+{
+  if (t >= 32) __builtin_unreachable();
+  unsigned tl = x << (t);
+  unsigned th = x >> (32 - t);
+  return tl | th;
+}
+
+/* Here rotate right is achieved by doing rotate left by (32 - x) */
+unsigned rotateright(unsigned t, int x)
+{
+  return lrotate (t, 32 - x);
+}
+
+/* Shouldn't have instruction for (32 - x). */
+/* { dg-final { scan-tree-dump-not "minus_expr" "optimized" } } */
+/* { dg-final { scan-tree-dump "rrotate_expr" "optimized" } } */
+/* { dg-final { scan-tree-dump "lrotate_expr" "optimized" } } */