diff mbox series

[4/5] MATCH: Create BIT_ANDN and BIT_IORN from matching

Message ID 20240725021449.3650437-4-quic_apinski@quicinc.com
State New
Headers show
Series [1/5] aarch64: Rename bic/orn patterns to iorn/andn for vector modes | expand

Commit Message

Andrew Pinski July 25, 2024, 2:14 a.m. UTC
To better create rtl directly from gimple, we can use
these already internal functions from the gimple.

That is simplify `a & ~b` into BIT_ANDN.
Likewise `a | ~b` into BIT_IORN.
We only want to do this late after vectorization as some
targets (e.g. aarch64 SVE) has BIT_IORN on scalars but not on
some vector modes; even though the vectorizer could expand it back.

Note a few testcases need to be changed to not look
into optimized dump and catch them earlier.
The modified testcases could catch BIT_ANDN and BIT_IORN so move the
testing to forwprop2 before simplification happens.

Built and tested on aarch64-linux-gnu with no regressions.

	PR target/115086

gcc/ChangeLog:

	* match.pd (`a & ~b`, `a | ~b`): New pattern.
	(BIT_ANDN/BIT_IORN with CST): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/bic-cst-1.c: New test.
	* gcc.target/aarch64/bic_simd-1.c: New test.
	* gcc.dg/tree-ssa/bitops-1.c: Move testing from optimized to forwprop2.
	* gcc.dg/tree-ssa/bitops-6.c: Likewise.
	* gcc.dg/tree-ssa/cmpbit-4.c: Likewise.
	* gcc.dg/tree-ssa/pr110637-2.c: Likewise.
	* gcc.dg/tree-ssa/pr94880.c: Likewise.
	* gcc.dg/tree-ssa/pr96671-1.c: Likewise.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/match.pd                                  | 17 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c      | 10 +++---
 gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c      | 12 +++----
 gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c      |  8 ++---
 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c      | 12 +++----
 gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c    |  8 ++---
 gcc/testsuite/gcc.dg/tree-ssa/pr94880.c       |  6 ++--
 gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c     |  8 ++---
 gcc/testsuite/gcc.target/aarch64/bic-cst-1.c  | 31 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/bic_simd-1.c | 32 +++++++++++++++++++
 10 files changed, 112 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/bic_simd-1.c

Comments

Richard Biener July 25, 2024, 12:15 p.m. UTC | #1
On Thu, Jul 25, 2024 at 4:16 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> To better create rtl directly from gimple, we can use
> these already internal functions from the gimple.
>
> That is simplify `a & ~b` into BIT_ANDN.
> Likewise `a | ~b` into BIT_IORN.
> We only want to do this late after vectorization as some
> targets (e.g. aarch64 SVE) has BIT_IORN on scalars but not on
> some vector modes; even though the vectorizer could expand it back.
>
> Note a few testcases need to be changed to not look
> into optimized dump and catch them earlier.
> The modified testcases could catch BIT_ANDN and BIT_IORN so move the
> testing to forwprop2 before simplification happens.
>
> Built and tested on aarch64-linux-gnu with no regressions.

I think we want these only for ISEL as they happen way too often and will
disturb the IL too much in ways not handled by passes.  not/and/or are
too important ops to "hide" from most of the gimple pipeline.

Richard.

>         PR target/115086
>
> gcc/ChangeLog:
>
>         * match.pd (`a & ~b`, `a | ~b`): New pattern.
>         (BIT_ANDN/BIT_IORN with CST): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/bic-cst-1.c: New test.
>         * gcc.target/aarch64/bic_simd-1.c: New test.
>         * gcc.dg/tree-ssa/bitops-1.c: Move testing from optimized to forwprop2.
>         * gcc.dg/tree-ssa/bitops-6.c: Likewise.
>         * gcc.dg/tree-ssa/cmpbit-4.c: Likewise.
>         * gcc.dg/tree-ssa/pr110637-2.c: Likewise.
>         * gcc.dg/tree-ssa/pr94880.c: Likewise.
>         * gcc.dg/tree-ssa/pr96671-1.c: Likewise.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
>  gcc/match.pd                                  | 17 ++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c      | 10 +++---
>  gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c      | 12 +++----
>  gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c      |  8 ++---
>  gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c      | 12 +++----
>  gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c    |  8 ++---
>  gcc/testsuite/gcc.dg/tree-ssa/pr94880.c       |  6 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c     |  8 ++---
>  gcc/testsuite/gcc.target/aarch64/bic-cst-1.c  | 31 ++++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/bic_simd-1.c | 32 +++++++++++++++++++
>  10 files changed, 112 insertions(+), 32 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index cf359b0ec0f..56f631dfeec 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -9979,6 +9979,23 @@ and,
>     (cond_op:s @1 @2 @3 @4 @5) @5)
>    (cond_op (bit_and @1 @0) @2 @3 @4 @5)))
>
> +#if GIMPLE
> +/* Create bit_andc and bit_iorc internal functions. */
> +(for bitop  (bit_and      bit_ior)
> +     bitopc (IFN_BIT_ANDN IFN_BIT_IORN)
> + (simplify
> +  (bitop:c (bit_not:s @0) @1)
> +  (if (canonicalize_math_after_vectorization_p ()
> +       && direct_internal_fn_supported_p (as_internal_fn (bitopc),
> +                                         type, OPTIMIZE_FOR_BOTH))
> +   (bitopc @1 @0)))
> + /* If the second operand is a constant, then reduce it to a & ~cst if
> +    the not simplifies. */
> + (simplify
> +  (bitopc @0 CONSTANT_CLASS_P@1)
> +  (bitop (bit_not! @1) @0)))
> +#endif
> +
>  /* For pointers @0 and @2 and nonnegative constant offset @1, look for
>     expressions like:
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> index cf2823deb62..3a394b1f188 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do run } */
> -/* { dg-options "-O -fdump-tree-optimized-raw" } */
> +/* { dg-options "-O -fdump-tree-forwprop2-raw" } */
>
>  #define DECLS(n,VOL)                   \
>  __attribute__((noinline,noclone))      \
> @@ -66,7 +66,7 @@ int main(){
>         }
>  }
>
> -/* { dg-final { scan-tree-dump-times "bit_not_expr" 12 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "bit_and_expr"  9 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "bit_ior_expr" 10 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "bit_xor_expr"  9 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 12 "forwprop2"} } */
> +/* { dg-final { scan-tree-dump-times "bit_and_expr, "  9 "forwprop2"} } */
> +/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 10 "forwprop2"} } */
> +/* { dg-final { scan-tree-dump-times "bit_xor_expr, "  9 "forwprop2"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> index e6ab2fd6c71..e08132e2ab5 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
>  /* PR tree-optimization/111282 */
>
>
> @@ -25,9 +25,9 @@ int fcmp(int x, int y)
>    return a & (b ^ !a); // (x == 2) & (y == 1)
>  }
>
> -/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-not   "ne_expr, "        "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-not   "ne_expr, "        "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "forwprop2" } } */
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> index 40f756e4455..52c2f394222 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
>  /* PR tree-optimization/115224 */
>
>  int f1(int a, int b)
> @@ -9,7 +9,7 @@ int f1(int a, int b)
>          return c | (a ^ b);
>          // ~((a ^ 1) & b) or (a ^ -2) | ~b
>  }
> -/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "optimized" } } */
> -/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
> -/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
> +/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "forwprop2" } } */
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> index cdba5d623af..627dcc57cc7 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
>
>  int g(int x, int y)
>  {
> @@ -40,8 +40,8 @@ _Bool gbi0(int a, int b)
>  }
>
>  /* All of these should be optimized to `x & y` or `~x & y` */
> -/* { dg-final { scan-tree-dump-times "le_expr, " 3 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "le_expr, " 3 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "forwprop2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> index f1c5b90353a..81d6a092508 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O1 -fdump-tree-optimized" } */
> +/* { dg-options "-O1 -fdump-tree-forwprop2" } */
>  int f(int a)
>  {
>          int b = a & 1;
> @@ -8,6 +8,6 @@ int f(int a)
>  }
>
>  /* This should be optimized to just return `(a&1) ^ 1` or `(~a) & 1`. */
> -/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "~a" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times " & 1" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-not " == " "forwprop2"} } */
> +/* { dg-final { scan-tree-dump-times "~a" 1 "forwprop2"} } */
> +/* { dg-final { scan-tree-dump-times " & 1" 1 "forwprop2"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> index f7216618147..72a14b915a5 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> @@ -1,8 +1,8 @@
>  /* PR tree-optimization/94786 */
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> -/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "optimized" } } */
> +/* { dg-options "-O2 -fdump-tree-forwprop2" } */
> +/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "forwprop2" } } */
>
>  unsigned
>  foo_u(unsigned x, unsigned y)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> index 42c5b27b53f..cf977b55cc2 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> @@ -1,9 +1,9 @@
>  /* PR tree-optimization/96671 */
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> -/* { dg-final { scan-tree-dump-times " \\^ " 6 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times " ~" 6 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times " & " 6 "optimized" } } */
> +/* { dg-options "-O2 -fdump-tree-forwprop2" } */
> +/* { dg-final { scan-tree-dump-times " \\^ " 6 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times " ~" 6 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times " & " 6 "forwprop2" } } */
>
>  int
>  foo (int a, int b, int c)
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
> new file mode 100644
> index 00000000000..1c25de3ba84
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +/**
> +**bar1:
> +**     mov     w([0-9]+), 4
> +**     bic     w0, w\1, w1
> +**     ret
> +*/
> +int bar1(int a, int c)
> +{
> +  int b = 4 & ~c;
> +  return b;
> +}
> +
> +/**
> +**foo1:
> +**     mov     w([0-9]+), 4
> +**     orn     w0, w\1, w1
> +**     ret
> +*/
> +int foo1(int a, int c)
> +{
> +  int b = 4 | ~c;
> +  return b;
> +}
> +
> +/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
> +/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
> new file mode 100644
> index 00000000000..e2a69272456
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +/**
> +**bar1:
> +**     movi    v([0-9]+).2s, 0x4
> +**     bic     v0.8b, v\1.8b, v1.8b
> +**     ret
> +*/
> +#define vect8 __attribute__((vector_size(8)))
> +vect8 int bar1(vect8 int a, vect8 int c)
> +{
> +  vect8 int b = 4 & ~c;
> +  return b;
> +}
> +
> +/**
> +**foo1:
> +**     movi    v([0-9]+).2s, 0x4
> +**     orn     v0.8b, v\1.8b, v1.8b
> +**     ret
> +*/
> +vect8 int foo1(vect8 int a, vect8 int c)
> +{
> +  vect8 int b = 4 | ~c;
> +  return b;
> +}
> +
> +/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
> +/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
> +
> --
> 2.43.0
>
Andrew Pinski July 25, 2024, 10:11 p.m. UTC | #2
On Thu, Jul 25, 2024 at 5:16 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Jul 25, 2024 at 4:16 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
> >
> > To better create rtl directly from gimple, we can use
> > these already internal functions from the gimple.
> >
> > That is simplify `a & ~b` into BIT_ANDN.
> > Likewise `a | ~b` into BIT_IORN.
> > We only want to do this late after vectorization as some
> > targets (e.g. aarch64 SVE) has BIT_IORN on scalars but not on
> > some vector modes; even though the vectorizer could expand it back.
> >
> > Note a few testcases need to be changed to not look
> > into optimized dump and catch them earlier.
> > The modified testcases could catch BIT_ANDN and BIT_IORN so move the
> > testing to forwprop2 before simplification happens.
> >
> > Built and tested on aarch64-linux-gnu with no regressions.
>
> I think we want these only for ISEL as they happen way too often and will
> disturb the IL too much in ways not handled by passes.  not/and/or are
> too important ops to "hide" from most of the gimple pipeline.

I agree.

I also think the simplifications of `(VEC_COND @0 (uncond_expr) @1)`
-> COND_EXPR should also be done in isel rather than early on. I filed
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116100 to record that and
I will change those too when I get back to this next week.

Thanks,
Andrew

>
> Richard.
>
> >         PR target/115086
> >
> > gcc/ChangeLog:
> >
> >         * match.pd (`a & ~b`, `a | ~b`): New pattern.
> >         (BIT_ANDN/BIT_IORN with CST): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/aarch64/bic-cst-1.c: New test.
> >         * gcc.target/aarch64/bic_simd-1.c: New test.
> >         * gcc.dg/tree-ssa/bitops-1.c: Move testing from optimized to forwprop2.
> >         * gcc.dg/tree-ssa/bitops-6.c: Likewise.
> >         * gcc.dg/tree-ssa/cmpbit-4.c: Likewise.
> >         * gcc.dg/tree-ssa/pr110637-2.c: Likewise.
> >         * gcc.dg/tree-ssa/pr94880.c: Likewise.
> >         * gcc.dg/tree-ssa/pr96671-1.c: Likewise.
> >
> > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > ---
> >  gcc/match.pd                                  | 17 ++++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c      | 10 +++---
> >  gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c      | 12 +++----
> >  gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c      |  8 ++---
> >  gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c      | 12 +++----
> >  gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c    |  8 ++---
> >  gcc/testsuite/gcc.dg/tree-ssa/pr94880.c       |  6 ++--
> >  gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c     |  8 ++---
> >  gcc/testsuite/gcc.target/aarch64/bic-cst-1.c  | 31 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/aarch64/bic_simd-1.c | 32 +++++++++++++++++++
> >  10 files changed, 112 insertions(+), 32 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index cf359b0ec0f..56f631dfeec 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -9979,6 +9979,23 @@ and,
> >     (cond_op:s @1 @2 @3 @4 @5) @5)
> >    (cond_op (bit_and @1 @0) @2 @3 @4 @5)))
> >
> > +#if GIMPLE
> > +/* Create bit_andc and bit_iorc internal functions. */
> > +(for bitop  (bit_and      bit_ior)
> > +     bitopc (IFN_BIT_ANDN IFN_BIT_IORN)
> > + (simplify
> > +  (bitop:c (bit_not:s @0) @1)
> > +  (if (canonicalize_math_after_vectorization_p ()
> > +       && direct_internal_fn_supported_p (as_internal_fn (bitopc),
> > +                                         type, OPTIMIZE_FOR_BOTH))
> > +   (bitopc @1 @0)))
> > + /* If the second operand is a constant, then reduce it to a & ~cst if
> > +    the not simplifies. */
> > + (simplify
> > +  (bitopc @0 CONSTANT_CLASS_P@1)
> > +  (bitop (bit_not! @1) @0)))
> > +#endif
> > +
> >  /* For pointers @0 and @2 and nonnegative constant offset @1, look for
> >     expressions like:
> >
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> > index cf2823deb62..3a394b1f188 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do run } */
> > -/* { dg-options "-O -fdump-tree-optimized-raw" } */
> > +/* { dg-options "-O -fdump-tree-forwprop2-raw" } */
> >
> >  #define DECLS(n,VOL)                   \
> >  __attribute__((noinline,noclone))      \
> > @@ -66,7 +66,7 @@ int main(){
> >         }
> >  }
> >
> > -/* { dg-final { scan-tree-dump-times "bit_not_expr" 12 "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "bit_and_expr"  9 "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "bit_ior_expr" 10 "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "bit_xor_expr"  9 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 12 "forwprop2"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_and_expr, "  9 "forwprop2"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 10 "forwprop2"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_xor_expr, "  9 "forwprop2"} } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> > index e6ab2fd6c71..e08132e2ab5 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> > +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
> >  /* PR tree-optimization/111282 */
> >
> >
> > @@ -25,9 +25,9 @@ int fcmp(int x, int y)
> >    return a & (b ^ !a); // (x == 2) & (y == 1)
> >  }
> >
> > -/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-not   "ne_expr, "        "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-not   "ne_expr, "        "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "forwprop2" } } */
> >
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> > index 40f756e4455..52c2f394222 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> > +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
> >  /* PR tree-optimization/115224 */
> >
> >  int f1(int a, int b)
> > @@ -9,7 +9,7 @@ int f1(int a, int b)
> >          return c | (a ^ b);
> >          // ~((a ^ 1) & b) or (a ^ -2) | ~b
> >  }
> > -/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "forwprop2" } } */
> >
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> > index cdba5d623af..627dcc57cc7 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> > +/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
> >
> >  int g(int x, int y)
> >  {
> > @@ -40,8 +40,8 @@ _Bool gbi0(int a, int b)
> >  }
> >
> >  /* All of these should be optimized to `x & y` or `~x & y` */
> > -/* { dg-final { scan-tree-dump-times "le_expr, " 3 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "le_expr, " 3 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "forwprop2" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> > index f1c5b90353a..81d6a092508 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O1 -fdump-tree-optimized" } */
> > +/* { dg-options "-O1 -fdump-tree-forwprop2" } */
> >  int f(int a)
> >  {
> >          int b = a & 1;
> > @@ -8,6 +8,6 @@ int f(int a)
> >  }
> >
> >  /* This should be optimized to just return `(a&1) ^ 1` or `(~a) & 1`. */
> > -/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "~a" 1 "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times " & 1" 1 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-not " == " "forwprop2"} } */
> > +/* { dg-final { scan-tree-dump-times "~a" 1 "forwprop2"} } */
> > +/* { dg-final { scan-tree-dump-times " & 1" 1 "forwprop2"} } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> > index f7216618147..72a14b915a5 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
> > @@ -1,8 +1,8 @@
> >  /* PR tree-optimization/94786 */
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -fdump-tree-optimized" } */
> > -/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "optimized" } } */
> > +/* { dg-options "-O2 -fdump-tree-forwprop2" } */
> > +/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "forwprop2" } } */
> >
> >  unsigned
> >  foo_u(unsigned x, unsigned y)
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> > index 42c5b27b53f..cf977b55cc2 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
> > @@ -1,9 +1,9 @@
> >  /* PR tree-optimization/96671 */
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -fdump-tree-optimized" } */
> > -/* { dg-final { scan-tree-dump-times " \\^ " 6 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times " ~" 6 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times " & " 6 "optimized" } } */
> > +/* { dg-options "-O2 -fdump-tree-forwprop2" } */
> > +/* { dg-final { scan-tree-dump-times " \\^ " 6 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times " ~" 6 "forwprop2" } } */
> > +/* { dg-final { scan-tree-dump-times " & " 6 "forwprop2" } } */
> >
> >  int
> >  foo (int a, int b, int c)
> > diff --git a/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
> > new file mode 100644
> > index 00000000000..1c25de3ba84
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> > +
> > +/**
> > +**bar1:
> > +**     mov     w([0-9]+), 4
> > +**     bic     w0, w\1, w1
> > +**     ret
> > +*/
> > +int bar1(int a, int c)
> > +{
> > +  int b = 4 & ~c;
> > +  return b;
> > +}
> > +
> > +/**
> > +**foo1:
> > +**     mov     w([0-9]+), 4
> > +**     orn     w0, w\1, w1
> > +**     ret
> > +*/
> > +int foo1(int a, int c)
> > +{
> > +  int b = 4 | ~c;
> > +  return b;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
> > +/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
> > new file mode 100644
> > index 00000000000..e2a69272456
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> > +
> > +/**
> > +**bar1:
> > +**     movi    v([0-9]+).2s, 0x4
> > +**     bic     v0.8b, v\1.8b, v1.8b
> > +**     ret
> > +*/
> > +#define vect8 __attribute__((vector_size(8)))
> > +vect8 int bar1(vect8 int a, vect8 int c)
> > +{
> > +  vect8 int b = 4 & ~c;
> > +  return b;
> > +}
> > +
> > +/**
> > +**foo1:
> > +**     movi    v([0-9]+).2s, 0x4
> > +**     orn     v0.8b, v\1.8b, v1.8b
> > +**     ret
> > +*/
> > +vect8 int foo1(vect8 int a, vect8 int c)
> > +{
> > +  vect8 int b = 4 | ~c;
> > +  return b;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
> > +/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
> > +
> > --
> > 2.43.0
> >
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index cf359b0ec0f..56f631dfeec 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9979,6 +9979,23 @@  and,
    (cond_op:s @1 @2 @3 @4 @5) @5)
   (cond_op (bit_and @1 @0) @2 @3 @4 @5)))
 
+#if GIMPLE
+/* Create bit_andc and bit_iorc internal functions. */
+(for bitop  (bit_and      bit_ior)
+     bitopc (IFN_BIT_ANDN IFN_BIT_IORN)
+ (simplify
+  (bitop:c (bit_not:s @0) @1)
+  (if (canonicalize_math_after_vectorization_p ()
+       && direct_internal_fn_supported_p (as_internal_fn (bitopc),
+					  type, OPTIMIZE_FOR_BOTH))
+   (bitopc @1 @0)))
+ /* If the second operand is a constant, then reduce it to a & ~cst if
+    the not simplifies. */
+ (simplify
+  (bitopc @0 CONSTANT_CLASS_P@1)
+  (bitop (bit_not! @1) @0)))
+#endif
+
 /* For pointers @0 and @2 and nonnegative constant offset @1, look for
    expressions like:
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
index cf2823deb62..3a394b1f188 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O -fdump-tree-optimized-raw" } */
+/* { dg-options "-O -fdump-tree-forwprop2-raw" } */
 
 #define DECLS(n,VOL)			\
 __attribute__((noinline,noclone))	\
@@ -66,7 +66,7 @@  int main(){
 	}
 }
 
-/* { dg-final { scan-tree-dump-times "bit_not_expr" 12 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr"  9 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_ior_expr" 10 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_xor_expr"  9 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 12 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, "  9 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 10 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, "  9 "forwprop2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
index e6ab2fd6c71..e08132e2ab5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
 /* PR tree-optimization/111282 */
 
 
@@ -25,9 +25,9 @@  int fcmp(int x, int y)
   return a & (b ^ !a); // (x == 2) & (y == 1)
 }
 
-/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-not   "ne_expr, "        "optimized" } } */
-/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-not   "ne_expr, "        "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "eq_expr, "      2 "forwprop2" } } */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
index 40f756e4455..52c2f394222 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
 /* PR tree-optimization/115224 */
 
 int f1(int a, int b)
@@ -9,7 +9,7 @@  int f1(int a, int b)
         return c | (a ^ b);
         // ~((a ^ 1) & b) or (a ^ -2) | ~b
 }
-/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "forwprop2" } } */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
index cdba5d623af..627dcc57cc7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
 
 int g(int x, int y)
 {
@@ -40,8 +40,8 @@  _Bool gbi0(int a, int b)
 }
 
 /* All of these should be optimized to `x & y` or `~x & y` */
-/* { dg-final { scan-tree-dump-times "le_expr, " 3 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "le_expr, " 3 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
index f1c5b90353a..81d6a092508 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -fdump-tree-forwprop2" } */
 int f(int a)
 {
         int b = a & 1;
@@ -8,6 +8,6 @@  int f(int a)
 }
 
 /* This should be optimized to just return `(a&1) ^ 1` or `(~a) & 1`. */
-/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
-/* { dg-final { scan-tree-dump-times "~a" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times " & 1" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-not " == " "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "~a" 1 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times " & 1" 1 "forwprop2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
index f7216618147..72a14b915a5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
@@ -1,8 +1,8 @@ 
 /* PR tree-optimization/94786 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "optimized" } } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "forwprop2" } } */
 
 unsigned
 foo_u(unsigned x, unsigned y)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
index 42c5b27b53f..cf977b55cc2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
@@ -1,9 +1,9 @@ 
 /* PR tree-optimization/96671 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times " \\^ " 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " ~" 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " & " 6 "optimized" } } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+/* { dg-final { scan-tree-dump-times " \\^ " 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " ~" 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " & " 6 "forwprop2" } } */
 
 int
 foo (int a, int b, int c)
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
new file mode 100644
index 00000000000..1c25de3ba84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/**
+**bar1:
+**	mov	w([0-9]+), 4
+**	bic	w0, w\1, w1
+**	ret
+*/
+int bar1(int a, int c)
+{
+  int b = 4 & ~c;
+  return b;
+}
+
+/**
+**foo1:
+**	mov	w([0-9]+), 4
+**	orn	w0, w\1, w1
+**	ret
+*/
+int foo1(int a, int c)
+{
+  int b = 4 | ~c;
+  return b;
+}
+
+/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
+/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
new file mode 100644
index 00000000000..e2a69272456
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/**
+**bar1:
+**	movi	v([0-9]+).2s, 0x4
+**	bic	v0.8b, v\1.8b, v1.8b
+**	ret
+*/
+#define vect8 __attribute__((vector_size(8)))
+vect8 int bar1(vect8 int a, vect8 int c)
+{
+  vect8 int b = 4 & ~c;
+  return b;
+}
+
+/**
+**foo1:
+**	movi	v([0-9]+).2s, 0x4
+**	orn	v0.8b, v\1.8b, v1.8b
+**	ret
+*/
+vect8 int foo1(vect8 int a, vect8 int c)
+{
+  vect8 int b = 4 | ~c;
+  return b;
+}
+
+/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
+/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
+