Message ID | patch-18621-tamar@arm.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
Tamar Christina <tamar.christina@arm.com> writes: > Hi All, > > This implements the new target hook indicating that for AArch64 when possible > we prefer masked operations for any type vs doing LOAD + SELECT or > SELECT + STORE. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/115531 > * config/aarch64/aarch64.cc > (aarch64_conditional_operation_is_expensive): New. > (TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE): New. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/115531 > * gcc.dg/vect/vect-conditional_store_1.c: New test. > * gcc.dg/vect/vect-conditional_store_2.c: New test. > * gcc.dg/vect/vect-conditional_store_3.c: New test. > * gcc.dg/vect/vect-conditional_store_4.c: New test. OK for the aarch64 part if 1/2 is OK. The tests look good to me too, so OK for those unless someone objects. Thanks, Richard > > --- > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 2816124076383c1c458e2cfa21cbbafb0773b05a..dc1bc0958ca6172bc2d4753efe491457ab9bcc74 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -28222,6 +28222,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, > return true; > } > > +/* Implement TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE. Assume that > + predicated operations when available are beneficial. */ > + > +static bool > +aarch64_conditional_operation_is_expensive (unsigned) > +{ > + return false; > +} > + > /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that > it isn't worth branching around empty masked ops (including masked > stores). */ > @@ -30909,6 +30918,9 @@ aarch64_libgcc_floating_mode_supported_p > #define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode > #undef TARGET_VECTORIZE_GET_MASK_MODE > #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode > +#undef TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE > +#define TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE \ > + aarch64_conditional_operation_is_expensive > #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE > #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \ > aarch64_empty_mask_is_expensive > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..563ac63bdab01e33b7a3edd9ec1545633ee1b86e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c > @@ -0,0 +1,24 @@ > +/* { dg-do assemble } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride) > +{ > + if (stride <= 1) > + return; > + > + for (int i = 0; i < n; i++) > + { > + int res = c[i]; > + int t = b[i+stride]; > + if (a[i] != 0) > + res = t; > + c[i] = res; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c > new file mode 100644 > index 0000000000000000000000000000000000000000..c45cdc30a6278de7f04b8a04cfc7a508c853279b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c > @@ -0,0 +1,24 @@ > +/* { dg-do assemble } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo2 (char *restrict a, int *restrict b, int *restrict c, int n, int stride) > +{ > + if (stride <= 1) > + return; > + > + for (int i = 0; i < n; i++) > + { > + int res = c[i]; > + int t = b[i+stride]; > + if (a[i] != 0) > + t = res; > + c[i] = t; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c > new file mode 100644 > index 0000000000000000000000000000000000000000..da9e675dbb97add70d47fc8d714a02256fb1387a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c > @@ -0,0 +1,24 @@ > +/* { dg-do assemble } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride) > +{ > + if (stride <= 1) > + return; > + > + for (int i = 0; i < n; i++) > + { > + int res = c[i]; > + int t = b[i+stride]; > + if (a[i] >= 0) > + t = res; > + c[i] = t; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c > new file mode 100644 > index 0000000000000000000000000000000000000000..38b33fa43e5ee11cf7a52e681cfff44948c61e90 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c > @@ -0,0 +1,28 @@ > +/* { dg-do assemble } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo4 (signed char *restrict a, int *restrict b, int *restrict c, int *restrict d, int n, int stride) > +{ > + if (stride <= 1) > + return; > + > + for (int i = 0; i < n; i++) > + { > + int res1 = c[i]; > + int res2 = d[i]; > + int t = b[i+stride]; > + if (a[i] > 0) > + t = res1; > + else if (a[i] < 0) > + t = res2 * 2; > + > + c[i] = t; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 2816124076383c1c458e2cfa21cbbafb0773b05a..dc1bc0958ca6172bc2d4753efe491457ab9bcc74 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -28222,6 +28222,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, return true; } +/* Implement TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE. Assume that + predicated operations when available are beneficial. */ + +static bool +aarch64_conditional_operation_is_expensive (unsigned) +{ + return false; +} + /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that it isn't worth branching around empty masked ops (including masked stores). */ @@ -30909,6 +30918,9 @@ aarch64_libgcc_floating_mode_supported_p #define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode #undef TARGET_VECTORIZE_GET_MASK_MODE #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode +#undef TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE +#define TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE \ + aarch64_conditional_operation_is_expensive #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \ aarch64_empty_mask_is_expensive diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c new file mode 100644 index 0000000000000000000000000000000000000000..563ac63bdab01e33b7a3edd9ec1545633ee1b86e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_1.c @@ -0,0 +1,24 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < n; i++) + { + int res = c[i]; + int t = b[i+stride]; + if (a[i] != 0) + res = t; + c[i] = res; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c new file mode 100644 index 0000000000000000000000000000000000000000..c45cdc30a6278de7f04b8a04cfc7a508c853279b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_2.c @@ -0,0 +1,24 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo2 (char *restrict a, int *restrict b, int *restrict c, int n, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < n; i++) + { + int res = c[i]; + int t = b[i+stride]; + if (a[i] != 0) + t = res; + c[i] = t; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c new file mode 100644 index 0000000000000000000000000000000000000000..da9e675dbb97add70d47fc8d714a02256fb1387a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_3.c @@ -0,0 +1,24 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < n; i++) + { + int res = c[i]; + int t = b[i+stride]; + if (a[i] >= 0) + t = res; + c[i] = t; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c new file mode 100644 index 0000000000000000000000000000000000000000..38b33fa43e5ee11cf7a52e681cfff44948c61e90 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_4.c @@ -0,0 +1,28 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo4 (signed char *restrict a, int *restrict b, int *restrict c, int *restrict d, int n, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < n; i++) + { + int res1 = c[i]; + int res2 = d[i]; + int t = b[i+stride]; + if (a[i] > 0) + t = res1; + else if (a[i] < 0) + t = res2 * 2; + + c[i] = t; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { aarch64-*-* } } } */