Message ID | 20220707164545.628291-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] Simplify memchr with small constant strings | expand |
On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > When memchr is applied on a constant string of no more than the bytes of > a word, simplify memchr by checking each byte in the constant string. > > int f (int a) > { > return __builtin_memchr ("AE", a, 2) != 0; > } > > is simplified to > > int f (int a) > { > return ((char) a == 'A' || (char) a == 'E') != 0; > } > > gcc/ > > PR tree-optimization/103798 > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > (simplify_builtin_call): Inline memchr with constant strings of > no more than the bytes of a word. > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > * tree-ssa-strlen.h (use_in_zero_equality): New. > > gcc/testsuite/ > > PR tree-optimization/103798 > * c-c++-common/pr103798-1.c: New test. > * c-c++-common/pr103798-2.c: Likewise. > * c-c++-common/pr103798-3.c: Likewise. > * c-c++-common/pr103798-4.c: Likewise. > * c-c++-common/pr103798-5.c: Likewise. > * c-c++-common/pr103798-6.c: Likewise. > * c-c++-common/pr103798-7.c: Likewise. > * c-c++-common/pr103798-8.c: Likewise. > --- > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > gcc/tree-ssa-strlen.cc | 4 +- > gcc/tree-ssa-strlen.h | 2 + > 11 files changed, 289 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c b/gcc/testsuite/c-c++-common/pr103798-1.c > new file mode 100644 > index 00000000000..cd3edf569fc > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("a", a, 1) == 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a != 'a'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c b/gcc/testsuite/c-c++-common/pr103798-2.c > new file mode 100644 > index 00000000000..e7e99c3679e > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > @@ -0,0 +1,30 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +#include <string.h> > + > +__attribute__ ((weak)) > +int > +f (int a) > +{ > + return memchr ("aE", a, 2) != NULL; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a == 'a' || a == 'E'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i + 256) != g (i + 256)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c b/gcc/testsuite/c-c++-common/pr103798-3.c > new file mode 100644 > index 00000000000..ddcedc7e238 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("aEgZ", a, 3) == 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a != 'a' && a != 'E' && a != 'g'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c b/gcc/testsuite/c-c++-common/pr103798-4.c > new file mode 100644 > index 00000000000..00e8302a833 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("aEgi", a, 4) != 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c b/gcc/testsuite/c-c++-common/pr103798-5.c > new file mode 100644 > index 00000000000..0d6487a13df > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > @@ -0,0 +1,26 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiH", a, 5) == 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c b/gcc/testsuite/c-c++-common/pr103798-6.c > new file mode 100644 > index 00000000000..5ccb5ee66e0 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > + || a == 'x'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c b/gcc/testsuite/c-c++-common/pr103798-7.c > new file mode 100644 > index 00000000000..40fd38257d1 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > + && a != 'j' && a != 'Z'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c b/gcc/testsuite/c-c++-common/pr103798-8.c > new file mode 100644 > index 00000000000..0841b18cea4 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(int a) > +{ > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > + || a == 'x' || a == '1' || a == '9'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i + 256) != g (i + 256)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 69567ab3275..edd1277c23b 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-dfa.h" > #include "tree-ssa-propagate.h" > #include "tree-ssa-dom.h" > +#include "tree-ssa-strlen.h" > #include "builtins.h" > #include "tree-cfgcleanup.h" > #include "cfganal.h" > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > memcpy (p, "abcd ", 7); > call if the latter can be stored by pieces during expansion. > > + Optimize > + memchr ("abcd", a, 4) == 0; > + or > + memchr ("abcd", a, 4) != 0; > + to > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > + or > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > + > Also canonicalize __atomic_fetch_op (p, x, y) op x > to __atomic_op_fetch (p, x, y) or > __atomic_op_fetch (p, x, y) iop x > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > return false; > stmt1 = SSA_NAME_DEF_STMT (vuse); > > + tree res; > + > switch (DECL_FUNCTION_CODE (callee2)) > { > + case BUILT_IN_MEMCHR: > + if (gimple_call_num_args (stmt2) == 3 > + && (res = gimple_call_lhs (stmt2)) != nullptr > + && use_in_zero_equality (res) != nullptr > + && CHAR_BIT == 8 > + && BITS_PER_UNIT == 8) > + { > + tree ptr = gimple_call_arg (stmt2, 0); > + if (TREE_CODE (ptr) != ADDR_EXPR > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > + break; > + unsigned HOST_WIDE_INT slen > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > + /* It must be a non-empty string constant. */ > + if (slen < 2) > + break; > + tree size = gimple_call_arg (stmt2, 2); > + /* Size must be a constant which is <= UNITS_PER_WORD and > + <= the string length. */ > + if (TREE_CODE (size) != INTEGER_CST > + || integer_zerop (size) > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > + || wi::geu_p (wi::to_wide (size), slen)) it might be convenient to check tree_fits_uhwi_p (size) and do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead of playing with wide-int here. > + break; > + tree ch = gimple_call_arg (stmt2, 1); > + location_t loc = gimple_location (stmt2); > + if (!useless_type_conversion_p (char_type_node, > + TREE_TYPE (ch))) > + ch = fold_convert_loc (loc, char_type_node, ch); > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > + unsigned int isize = TREE_INT_CST_LOW (size); > + tree *op = new tree[isize]; > + for (unsigned int i = 0; i < isize; i++) > + { > + op[i] = build_int_cst (char_type_node, p[i]); > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > + op[i], ch); > + } > + for (unsigned int i = isize - 1; i >= 1; i--) > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > + fold_build2_loc (loc, > + BIT_IOR_EXPR, > + boolean_type_node, > + op[i - 1], > + op[i])); > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > + gimplify_and_update_call_from_tree (gsi_p, res); my worry about -Os remains, can you please add a check like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 (so we allow inline expanding two comparisons when not optimizing for speed)? For UNITS_PER_WORD == 8 this is possibly a lot of compares, don't we have some _BY_PIECEs limit we can use here? There's no COMPARE_RATIO it seems, not sure what we use there. Since we only inline expand memchr(..) != 0 we're leaving 'a' != x | 'b' != x | ... optimization to possibly 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't do anything like that). The x "splat" needs log2 (sz) operations (but they might be slow?), but in the end it might pay off for power-of-two (sz) (one could even do this in chunks)? Anyway, this might be for a followup. Otherwise it looks OK. Thanks, Richard. > + delete[] op; > + return true; > + } > + break; > + > case BUILT_IN_MEMSET: > if (gimple_call_num_args (stmt2) != 3 > || gimple_call_lhs (stmt2) > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > index 7b3e3899ea2..5afbae1b72e 100644 > --- a/gcc/tree-ssa-strlen.cc > +++ b/gcc/tree-ssa-strlen.cc > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > nonnull if and only RES is used in such expressions exclusively and > in none other. */ > > -static gimple * > -use_in_zero_equality (tree res, bool exclusive = true) > +gimple * > +use_in_zero_equality (tree res, bool exclusive) > { > gimple *first_use = NULL; > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > index 8d155450db8..fdb4d9d7783 100644 > --- a/gcc/tree-ssa-strlen.h > +++ b/gcc/tree-ssa-strlen.h > @@ -35,6 +35,8 @@ struct c_strlen_data; > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > pointer_query &); > > +extern gimple *use_in_zero_equality (tree, bool = true); > + > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > -- > 2.36.1 >
On Fri, Jul 8, 2022 at 5:54 AM Richard Biener <richard.guenther@gmail.com> wrote: > > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > When memchr is applied on a constant string of no more than the bytes of > > a word, simplify memchr by checking each byte in the constant string. > > > > int f (int a) > > { > > return __builtin_memchr ("AE", a, 2) != 0; > > } > > > > is simplified to > > > > int f (int a) > > { > > return ((char) a == 'A' || (char) a == 'E') != 0; > > } > > > > gcc/ > > > > PR tree-optimization/103798 > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > > (simplify_builtin_call): Inline memchr with constant strings of > > no more than the bytes of a word. > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > > * tree-ssa-strlen.h (use_in_zero_equality): New. > > > > gcc/testsuite/ > > > > PR tree-optimization/103798 > > * c-c++-common/pr103798-1.c: New test. > > * c-c++-common/pr103798-2.c: Likewise. > > * c-c++-common/pr103798-3.c: Likewise. > > * c-c++-common/pr103798-4.c: Likewise. > > * c-c++-common/pr103798-5.c: Likewise. > > * c-c++-common/pr103798-6.c: Likewise. > > * c-c++-common/pr103798-7.c: Likewise. > > * c-c++-common/pr103798-8.c: Likewise. > > --- > > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > > gcc/tree-ssa-strlen.cc | 4 +- > > gcc/tree-ssa-strlen.h | 2 + > > 11 files changed, 289 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c b/gcc/testsuite/c-c++-common/pr103798-1.c > > new file mode 100644 > > index 00000000000..cd3edf569fc > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("a", a, 1) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a != 'a'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c b/gcc/testsuite/c-c++-common/pr103798-2.c > > new file mode 100644 > > index 00000000000..e7e99c3679e > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > > @@ -0,0 +1,30 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +#include <string.h> > > + > > +__attribute__ ((weak)) > > +int > > +f (int a) > > +{ > > + return memchr ("aE", a, 2) != NULL; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a == 'a' || a == 'E'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i + 256) != g (i + 256)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c b/gcc/testsuite/c-c++-common/pr103798-3.c > > new file mode 100644 > > index 00000000000..ddcedc7e238 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("aEgZ", a, 3) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a != 'a' && a != 'E' && a != 'g'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c b/gcc/testsuite/c-c++-common/pr103798-4.c > > new file mode 100644 > > index 00000000000..00e8302a833 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("aEgi", a, 4) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c b/gcc/testsuite/c-c++-common/pr103798-5.c > > new file mode 100644 > > index 00000000000..0d6487a13df > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiH", a, 5) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c b/gcc/testsuite/c-c++-common/pr103798-6.c > > new file mode 100644 > > index 00000000000..5ccb5ee66e0 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > + || a == 'x'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c b/gcc/testsuite/c-c++-common/pr103798-7.c > > new file mode 100644 > > index 00000000000..40fd38257d1 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > > + && a != 'j' && a != 'Z'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c b/gcc/testsuite/c-c++-common/pr103798-8.c > > new file mode 100644 > > index 00000000000..0841b18cea4 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(int a) > > +{ > > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > + || a == 'x' || a == '1' || a == '9'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i + 256) != g (i + 256)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 69567ab3275..edd1277c23b 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > > #include "tree-dfa.h" > > #include "tree-ssa-propagate.h" > > #include "tree-ssa-dom.h" > > +#include "tree-ssa-strlen.h" > > #include "builtins.h" > > #include "tree-cfgcleanup.h" > > #include "cfganal.h" > > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > > memcpy (p, "abcd ", 7); > > call if the latter can be stored by pieces during expansion. > > > > + Optimize > > + memchr ("abcd", a, 4) == 0; > > + or > > + memchr ("abcd", a, 4) != 0; > > + to > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > + or > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > > + > > Also canonicalize __atomic_fetch_op (p, x, y) op x > > to __atomic_op_fetch (p, x, y) or > > __atomic_op_fetch (p, x, y) iop x > > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > > return false; > > stmt1 = SSA_NAME_DEF_STMT (vuse); > > > > + tree res; > > + > > switch (DECL_FUNCTION_CODE (callee2)) > > { > > + case BUILT_IN_MEMCHR: > > + if (gimple_call_num_args (stmt2) == 3 > > + && (res = gimple_call_lhs (stmt2)) != nullptr > > + && use_in_zero_equality (res) != nullptr > > + && CHAR_BIT == 8 > > + && BITS_PER_UNIT == 8) > > + { > > + tree ptr = gimple_call_arg (stmt2, 0); > > + if (TREE_CODE (ptr) != ADDR_EXPR > > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > > + break; > > + unsigned HOST_WIDE_INT slen > > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > > + /* It must be a non-empty string constant. */ > > + if (slen < 2) > > + break; > > + tree size = gimple_call_arg (stmt2, 2); > > + /* Size must be a constant which is <= UNITS_PER_WORD and > > + <= the string length. */ > > + if (TREE_CODE (size) != INTEGER_CST > > + || integer_zerop (size) > > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > > + || wi::geu_p (wi::to_wide (size), slen)) > > it might be convenient to check tree_fits_uhwi_p (size) and > do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead > of playing with wide-int here. Fixed. > > + break; > > + tree ch = gimple_call_arg (stmt2, 1); > > + location_t loc = gimple_location (stmt2); > > + if (!useless_type_conversion_p (char_type_node, > > + TREE_TYPE (ch))) > > + ch = fold_convert_loc (loc, char_type_node, ch); > > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > > + unsigned int isize = TREE_INT_CST_LOW (size); > > + tree *op = new tree[isize]; > > + for (unsigned int i = 0; i < isize; i++) > > + { > > + op[i] = build_int_cst (char_type_node, p[i]); > > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > > + op[i], ch); > > + } > > + for (unsigned int i = isize - 1; i >= 1; i--) > > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > > + fold_build2_loc (loc, > > + BIT_IOR_EXPR, > > + boolean_type_node, > > + op[i - 1], > > + op[i])); > > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > > + gimplify_and_update_call_from_tree (gsi_p, res); > > my worry about -Os remains, can you please add a check > like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 Fixed. > (so we allow inline expanding two comparisons when not optimizing > for speed)? For UNITS_PER_WORD == 8 this is possibly a > lot of compares, don't we have some _BY_PIECEs limit we There are a few comparisons. Should I limit it to 4 bytes? > can use here? There's no COMPARE_RATIO it seems, > not sure what we use there. > > Since we only inline expand memchr(..) != 0 we're leaving > 'a' != x | 'b' != x | ... optimization to possibly memchr("abcd", a, 4) == 0 is optimized to (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 by this patch. > 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't It works only for memchr("aaaa", a, 4) == 0. It is optimized to x != 'a' by this patch. Thanks. > do anything like that). The x "splat" needs log2 (sz) operations > (but they might be slow?), but in the end it might pay off > for power-of-two (sz) (one could even do this in chunks)? > Anyway, this might be for a followup. > > Otherwise it looks OK. > > Thanks, > Richard. > > > + delete[] op; > > + return true; > > + } > > + break; > > + > > case BUILT_IN_MEMSET: > > if (gimple_call_num_args (stmt2) != 3 > > || gimple_call_lhs (stmt2) > > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > > index 7b3e3899ea2..5afbae1b72e 100644 > > --- a/gcc/tree-ssa-strlen.cc > > +++ b/gcc/tree-ssa-strlen.cc > > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > > nonnull if and only RES is used in such expressions exclusively and > > in none other. */ > > > > -static gimple * > > -use_in_zero_equality (tree res, bool exclusive = true) > > +gimple * > > +use_in_zero_equality (tree res, bool exclusive) > > { > > gimple *first_use = NULL; > > > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > > index 8d155450db8..fdb4d9d7783 100644 > > --- a/gcc/tree-ssa-strlen.h > > +++ b/gcc/tree-ssa-strlen.h > > @@ -35,6 +35,8 @@ struct c_strlen_data; > > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > > pointer_query &); > > > > +extern gimple *use_in_zero_equality (tree, bool = true); > > + > > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > > > -- > > 2.36.1 > >
On Tue, Jul 12, 2022 at 6:59 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Fri, Jul 8, 2022 at 5:54 AM Richard Biener > <richard.guenther@gmail.com> wrote: > > > > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > When memchr is applied on a constant string of no more than the bytes of > > > a word, simplify memchr by checking each byte in the constant string. > > > > > > int f (int a) > > > { > > > return __builtin_memchr ("AE", a, 2) != 0; > > > } > > > > > > is simplified to > > > > > > int f (int a) > > > { > > > return ((char) a == 'A' || (char) a == 'E') != 0; > > > } > > > > > > gcc/ > > > > > > PR tree-optimization/103798 > > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > > > (simplify_builtin_call): Inline memchr with constant strings of > > > no more than the bytes of a word. > > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > > > * tree-ssa-strlen.h (use_in_zero_equality): New. > > > > > > gcc/testsuite/ > > > > > > PR tree-optimization/103798 > > > * c-c++-common/pr103798-1.c: New test. > > > * c-c++-common/pr103798-2.c: Likewise. > > > * c-c++-common/pr103798-3.c: Likewise. > > > * c-c++-common/pr103798-4.c: Likewise. > > > * c-c++-common/pr103798-5.c: Likewise. > > > * c-c++-common/pr103798-6.c: Likewise. > > > * c-c++-common/pr103798-7.c: Likewise. > > > * c-c++-common/pr103798-8.c: Likewise. > > > --- > > > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > > > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > > > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > > > gcc/tree-ssa-strlen.cc | 4 +- > > > gcc/tree-ssa-strlen.h | 2 + > > > 11 files changed, 289 insertions(+), 2 deletions(-) > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c b/gcc/testsuite/c-c++-common/pr103798-1.c > > > new file mode 100644 > > > index 00000000000..cd3edf569fc > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do run } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int > > > +f (char a) > > > +{ > > > + return __builtin_memchr ("a", a, 1) == 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int > > > +g (char a) > > > +{ > > > + return a != 'a'; > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c b/gcc/testsuite/c-c++-common/pr103798-2.c > > > new file mode 100644 > > > index 00000000000..e7e99c3679e > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > > > @@ -0,0 +1,30 @@ > > > +/* { dg-do run } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +#include <string.h> > > > + > > > +__attribute__ ((weak)) > > > +int > > > +f (int a) > > > +{ > > > + return memchr ("aE", a, 2) != NULL; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int > > > +g (char a) > > > +{ > > > + return a == 'a' || a == 'E'; > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i + 256) != g (i + 256)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c b/gcc/testsuite/c-c++-common/pr103798-3.c > > > new file mode 100644 > > > index 00000000000..ddcedc7e238 > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do run } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int > > > +f (char a) > > > +{ > > > + return __builtin_memchr ("aEgZ", a, 3) == 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int > > > +g (char a) > > > +{ > > > + return a != 'a' && a != 'E' && a != 'g'; > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c b/gcc/testsuite/c-c++-common/pr103798-4.c > > > new file mode 100644 > > > index 00000000000..00e8302a833 > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do run } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int > > > +f (char a) > > > +{ > > > + return __builtin_memchr ("aEgi", a, 4) != 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int > > > +g (char a) > > > +{ > > > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c b/gcc/testsuite/c-c++-common/pr103798-5.c > > > new file mode 100644 > > > index 00000000000..0d6487a13df > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > > > @@ -0,0 +1,26 @@ > > > +/* { dg-do run { target int128 } } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int f(char a) > > > +{ > > > + return __builtin_memchr ("aEgiH", a, 5) == 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int g(char a) > > > +{ > > > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c b/gcc/testsuite/c-c++-common/pr103798-6.c > > > new file mode 100644 > > > index 00000000000..5ccb5ee66e0 > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > > > @@ -0,0 +1,27 @@ > > > +/* { dg-do run { target int128 } } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int f(char a) > > > +{ > > > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int g(char a) > > > +{ > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > + || a == 'x'); > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c b/gcc/testsuite/c-c++-common/pr103798-7.c > > > new file mode 100644 > > > index 00000000000..40fd38257d1 > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > > > @@ -0,0 +1,27 @@ > > > +/* { dg-do run { target int128 } } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int f(char a) > > > +{ > > > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int g(char a) > > > +{ > > > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > > > + && a != 'j' && a != 'Z'); > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i) != g (i)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c b/gcc/testsuite/c-c++-common/pr103798-8.c > > > new file mode 100644 > > > index 00000000000..0841b18cea4 > > > --- /dev/null > > > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > > > @@ -0,0 +1,27 @@ > > > +/* { dg-do run { target int128 } } */ > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > + > > > +__attribute__ ((weak)) > > > +int f(int a) > > > +{ > > > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > > > +} > > > + > > > +__attribute__ ((weak)) > > > +int g(char a) > > > +{ > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > + || a == 'x' || a == '1' || a == '9'); > > > +} > > > + > > > +int > > > +main () > > > +{ > > > + for (int i = 0; i < 255; i++) > > > + if (f (i + 256) != g (i + 256)) > > > + __builtin_abort (); > > > + > > > + return 0; > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > > index 69567ab3275..edd1277c23b 100644 > > > --- a/gcc/tree-ssa-forwprop.cc > > > +++ b/gcc/tree-ssa-forwprop.cc > > > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > > > #include "tree-dfa.h" > > > #include "tree-ssa-propagate.h" > > > #include "tree-ssa-dom.h" > > > +#include "tree-ssa-strlen.h" > > > #include "builtins.h" > > > #include "tree-cfgcleanup.h" > > > #include "cfganal.h" > > > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > > > memcpy (p, "abcd ", 7); > > > call if the latter can be stored by pieces during expansion. > > > > > > + Optimize > > > + memchr ("abcd", a, 4) == 0; > > > + or > > > + memchr ("abcd", a, 4) != 0; > > > + to > > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > > + or > > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > > > + > > > Also canonicalize __atomic_fetch_op (p, x, y) op x > > > to __atomic_op_fetch (p, x, y) or > > > __atomic_op_fetch (p, x, y) iop x > > > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > > > return false; > > > stmt1 = SSA_NAME_DEF_STMT (vuse); > > > > > > + tree res; > > > + > > > switch (DECL_FUNCTION_CODE (callee2)) > > > { > > > + case BUILT_IN_MEMCHR: > > > + if (gimple_call_num_args (stmt2) == 3 > > > + && (res = gimple_call_lhs (stmt2)) != nullptr > > > + && use_in_zero_equality (res) != nullptr > > > + && CHAR_BIT == 8 > > > + && BITS_PER_UNIT == 8) > > > + { > > > + tree ptr = gimple_call_arg (stmt2, 0); > > > + if (TREE_CODE (ptr) != ADDR_EXPR > > > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > > > + break; > > > + unsigned HOST_WIDE_INT slen > > > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > > > + /* It must be a non-empty string constant. */ > > > + if (slen < 2) > > > + break; > > > + tree size = gimple_call_arg (stmt2, 2); > > > + /* Size must be a constant which is <= UNITS_PER_WORD and > > > + <= the string length. */ > > > + if (TREE_CODE (size) != INTEGER_CST > > > + || integer_zerop (size) > > > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > > > + || wi::geu_p (wi::to_wide (size), slen)) > > > > it might be convenient to check tree_fits_uhwi_p (size) and > > do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead > > of playing with wide-int here. > > Fixed. > > > > + break; > > > + tree ch = gimple_call_arg (stmt2, 1); > > > + location_t loc = gimple_location (stmt2); > > > + if (!useless_type_conversion_p (char_type_node, > > > + TREE_TYPE (ch))) > > > + ch = fold_convert_loc (loc, char_type_node, ch); > > > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > > > + unsigned int isize = TREE_INT_CST_LOW (size); > > > + tree *op = new tree[isize]; > > > + for (unsigned int i = 0; i < isize; i++) > > > + { > > > + op[i] = build_int_cst (char_type_node, p[i]); > > > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > > > + op[i], ch); > > > + } > > > + for (unsigned int i = isize - 1; i >= 1; i--) > > > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > > > + fold_build2_loc (loc, > > > + BIT_IOR_EXPR, > > > + boolean_type_node, > > > + op[i - 1], > > > + op[i])); > > > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > > > + gimplify_and_update_call_from_tree (gsi_p, res); > > > > my worry about -Os remains, can you please add a check > > like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 > > Fixed. > > > (so we allow inline expanding two comparisons when not optimizing > > for speed)? For UNITS_PER_WORD == 8 this is possibly a > > lot of compares, don't we have some _BY_PIECEs limit we > > There are a few comparisons. Should I limit it to 4 bytes? Not sure, if we don't have any existing target configurable limit then using UNITS_PER_WORD is fine for now I guess. > > can use here? There's no COMPARE_RATIO it seems, > > not sure what we use there. > > > > Since we only inline expand memchr(..) != 0 we're leaving > > 'a' != x | 'b' != x | ... optimization to possibly > > memchr("abcd", a, 4) == 0 is optimized to > > (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > by this patch. > > > 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't > > It works only for memchr("aaaa", a, 4) == 0. It is optimized to > x != 'a' by this patch. Hmm, true ;) Btw, no updated patch in your mail? Thanks, Richard. > > Thanks. > > > do anything like that). The x "splat" needs log2 (sz) operations > > (but they might be slow?), but in the end it might pay off > > for power-of-two (sz) (one could even do this in chunks)? > > Anyway, this might be for a followup. > > > > Otherwise it looks OK. > > > > Thanks, > > Richard. > > > > > + delete[] op; > > > + return true; > > > + } > > > + break; > > > + > > > case BUILT_IN_MEMSET: > > > if (gimple_call_num_args (stmt2) != 3 > > > || gimple_call_lhs (stmt2) > > > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > > > index 7b3e3899ea2..5afbae1b72e 100644 > > > --- a/gcc/tree-ssa-strlen.cc > > > +++ b/gcc/tree-ssa-strlen.cc > > > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > > > nonnull if and only RES is used in such expressions exclusively and > > > in none other. */ > > > > > > -static gimple * > > > -use_in_zero_equality (tree res, bool exclusive = true) > > > +gimple * > > > +use_in_zero_equality (tree res, bool exclusive) > > > { > > > gimple *first_use = NULL; > > > > > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > > > index 8d155450db8..fdb4d9d7783 100644 > > > --- a/gcc/tree-ssa-strlen.h > > > +++ b/gcc/tree-ssa-strlen.h > > > @@ -35,6 +35,8 @@ struct c_strlen_data; > > > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > > > pointer_query &); > > > > > > +extern gimple *use_in_zero_equality (tree, bool = true); > > > + > > > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > > > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > > > > > -- > > > 2.36.1 > > > > > > > -- > H.J.
On Wed, Jul 13, 2022 at 5:35 AM Richard Biener <richard.guenther@gmail.com> wrote: > > On Tue, Jul 12, 2022 at 6:59 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Fri, Jul 8, 2022 at 5:54 AM Richard Biener > > <richard.guenther@gmail.com> wrote: > > > > > > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > When memchr is applied on a constant string of no more than the bytes of > > > > a word, simplify memchr by checking each byte in the constant string. > > > > > > > > int f (int a) > > > > { > > > > return __builtin_memchr ("AE", a, 2) != 0; > > > > } > > > > > > > > is simplified to > > > > > > > > int f (int a) > > > > { > > > > return ((char) a == 'A' || (char) a == 'E') != 0; > > > > } > > > > > > > > gcc/ > > > > > > > > PR tree-optimization/103798 > > > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > > > > (simplify_builtin_call): Inline memchr with constant strings of > > > > no more than the bytes of a word. > > > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > > > > * tree-ssa-strlen.h (use_in_zero_equality): New. > > > > > > > > gcc/testsuite/ > > > > > > > > PR tree-optimization/103798 > > > > * c-c++-common/pr103798-1.c: New test. > > > > * c-c++-common/pr103798-2.c: Likewise. > > > > * c-c++-common/pr103798-3.c: Likewise. > > > > * c-c++-common/pr103798-4.c: Likewise. > > > > * c-c++-common/pr103798-5.c: Likewise. > > > > * c-c++-common/pr103798-6.c: Likewise. > > > > * c-c++-common/pr103798-7.c: Likewise. > > > > * c-c++-common/pr103798-8.c: Likewise. > > > > --- > > > > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > > > > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > > > > gcc/tree-ssa-strlen.cc | 4 +- > > > > gcc/tree-ssa-strlen.h | 2 + > > > > 11 files changed, 289 insertions(+), 2 deletions(-) > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > > > > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c b/gcc/testsuite/c-c++-common/pr103798-1.c > > > > new file mode 100644 > > > > index 00000000000..cd3edf569fc > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("a", a, 1) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a != 'a'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c b/gcc/testsuite/c-c++-common/pr103798-2.c > > > > new file mode 100644 > > > > index 00000000000..e7e99c3679e > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > > > > @@ -0,0 +1,30 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +#include <string.h> > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (int a) > > > > +{ > > > > + return memchr ("aE", a, 2) != NULL; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a == 'a' || a == 'E'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i + 256) != g (i + 256)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c b/gcc/testsuite/c-c++-common/pr103798-3.c > > > > new file mode 100644 > > > > index 00000000000..ddcedc7e238 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("aEgZ", a, 3) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a != 'a' && a != 'E' && a != 'g'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c b/gcc/testsuite/c-c++-common/pr103798-4.c > > > > new file mode 100644 > > > > index 00000000000..00e8302a833 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("aEgi", a, 4) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c b/gcc/testsuite/c-c++-common/pr103798-5.c > > > > new file mode 100644 > > > > index 00000000000..0d6487a13df > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > > > > @@ -0,0 +1,26 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiH", a, 5) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c b/gcc/testsuite/c-c++-common/pr103798-6.c > > > > new file mode 100644 > > > > index 00000000000..5ccb5ee66e0 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > > + || a == 'x'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c b/gcc/testsuite/c-c++-common/pr103798-7.c > > > > new file mode 100644 > > > > index 00000000000..40fd38257d1 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > > > > + && a != 'j' && a != 'Z'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c b/gcc/testsuite/c-c++-common/pr103798-8.c > > > > new file mode 100644 > > > > index 00000000000..0841b18cea4 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(int a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > > + || a == 'x' || a == '1' || a == '9'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i + 256) != g (i + 256)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > > > index 69567ab3275..edd1277c23b 100644 > > > > --- a/gcc/tree-ssa-forwprop.cc > > > > +++ b/gcc/tree-ssa-forwprop.cc > > > > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > > > > #include "tree-dfa.h" > > > > #include "tree-ssa-propagate.h" > > > > #include "tree-ssa-dom.h" > > > > +#include "tree-ssa-strlen.h" > > > > #include "builtins.h" > > > > #include "tree-cfgcleanup.h" > > > > #include "cfganal.h" > > > > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > > > > memcpy (p, "abcd ", 7); > > > > call if the latter can be stored by pieces during expansion. > > > > > > > > + Optimize > > > > + memchr ("abcd", a, 4) == 0; > > > > + or > > > > + memchr ("abcd", a, 4) != 0; > > > > + to > > > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > > > + or > > > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > > > > + > > > > Also canonicalize __atomic_fetch_op (p, x, y) op x > > > > to __atomic_op_fetch (p, x, y) or > > > > __atomic_op_fetch (p, x, y) iop x > > > > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > > > > return false; > > > > stmt1 = SSA_NAME_DEF_STMT (vuse); > > > > > > > > + tree res; > > > > + > > > > switch (DECL_FUNCTION_CODE (callee2)) > > > > { > > > > + case BUILT_IN_MEMCHR: > > > > + if (gimple_call_num_args (stmt2) == 3 > > > > + && (res = gimple_call_lhs (stmt2)) != nullptr > > > > + && use_in_zero_equality (res) != nullptr > > > > + && CHAR_BIT == 8 > > > > + && BITS_PER_UNIT == 8) > > > > + { > > > > + tree ptr = gimple_call_arg (stmt2, 0); > > > > + if (TREE_CODE (ptr) != ADDR_EXPR > > > > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > > > > + break; > > > > + unsigned HOST_WIDE_INT slen > > > > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > > > > + /* It must be a non-empty string constant. */ > > > > + if (slen < 2) > > > > + break; > > > > + tree size = gimple_call_arg (stmt2, 2); > > > > + /* Size must be a constant which is <= UNITS_PER_WORD and > > > > + <= the string length. */ > > > > + if (TREE_CODE (size) != INTEGER_CST > > > > + || integer_zerop (size) > > > > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > > > > + || wi::geu_p (wi::to_wide (size), slen)) > > > > > > it might be convenient to check tree_fits_uhwi_p (size) and > > > do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead > > > of playing with wide-int here. > > > > Fixed. > > > > > > + break; > > > > + tree ch = gimple_call_arg (stmt2, 1); > > > > + location_t loc = gimple_location (stmt2); > > > > + if (!useless_type_conversion_p (char_type_node, > > > > + TREE_TYPE (ch))) > > > > + ch = fold_convert_loc (loc, char_type_node, ch); > > > > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > > > > + unsigned int isize = TREE_INT_CST_LOW (size); > > > > + tree *op = new tree[isize]; > > > > + for (unsigned int i = 0; i < isize; i++) > > > > + { > > > > + op[i] = build_int_cst (char_type_node, p[i]); > > > > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > > > > + op[i], ch); > > > > + } > > > > + for (unsigned int i = isize - 1; i >= 1; i--) > > > > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > > > > + fold_build2_loc (loc, > > > > + BIT_IOR_EXPR, > > > > + boolean_type_node, > > > > + op[i - 1], > > > > + op[i])); > > > > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > > > > + gimplify_and_update_call_from_tree (gsi_p, res); > > > > > > my worry about -Os remains, can you please add a check > > > like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 > > > > Fixed. > > > > > (so we allow inline expanding two comparisons when not optimizing > > > for speed)? For UNITS_PER_WORD == 8 this is possibly a > > > lot of compares, don't we have some _BY_PIECEs limit we > > > > There are a few comparisons. Should I limit it to 4 bytes? > > Not sure, if we don't have any existing target configurable limit > then using UNITS_PER_WORD is fine for now I guess. > > > > can use here? There's no COMPARE_RATIO it seems, > > > not sure what we use there. > > > > > > Since we only inline expand memchr(..) != 0 we're leaving > > > 'a' != x | 'b' != x | ... optimization to possibly > > > > memchr("abcd", a, 4) == 0 is optimized to > > > > (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > > > by this patch. > > > > > 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't > > > > It works only for memchr("aaaa", a, 4) == 0. It is optimized to > > x != 'a' by this patch. > > Hmm, true ;) > > Btw, no updated patch in your mail? Here is the v3 patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598362.html Thanks. > Thanks > Richard. > > > > > Thanks. > > > > > do anything like that). The x "splat" needs log2 (sz) operations > > > (but they might be slow?), but in the end it might pay off > > > for power-of-two (sz) (one could even do this in chunks)? > > > Anyway, this might be for a followup. > > > > > > Otherwise it looks OK. > > > > > > Thanks, > > > Richard. > > > > > > > + delete[] op; > > > > + return true; > > > > + } > > > > + break; > > > > + > > > > case BUILT_IN_MEMSET: > > > > if (gimple_call_num_args (stmt2) != 3 > > > > || gimple_call_lhs (stmt2) > > > > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > > > > index 7b3e3899ea2..5afbae1b72e 100644 > > > > --- a/gcc/tree-ssa-strlen.cc > > > > +++ b/gcc/tree-ssa-strlen.cc > > > > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > > > > nonnull if and only RES is used in such expressions exclusively and > > > > in none other. */ > > > > > > > > -static gimple * > > > > -use_in_zero_equality (tree res, bool exclusive = true) > > > > +gimple * > > > > +use_in_zero_equality (tree res, bool exclusive) > > > > { > > > > gimple *first_use = NULL; > > > > > > > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > > > > index 8d155450db8..fdb4d9d7783 100644 > > > > --- a/gcc/tree-ssa-strlen.h > > > > +++ b/gcc/tree-ssa-strlen.h > > > > @@ -35,6 +35,8 @@ struct c_strlen_data; > > > > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > > > > pointer_query &); > > > > > > > > +extern gimple *use_in_zero_equality (tree, bool = true); > > > > + > > > > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > > > > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > > > > > > > -- > > > > 2.36.1 > > > > > > > > > > > > -- > > H.J.
diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c b/gcc/testsuite/c-c++-common/pr103798-1.c new file mode 100644 index 00000000000..cd3edf569fc --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-1.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int +f (char a) +{ + return __builtin_memchr ("a", a, 1) == 0; +} + +__attribute__ ((weak)) +int +g (char a) +{ + return a != 'a'; +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c b/gcc/testsuite/c-c++-common/pr103798-2.c new file mode 100644 index 00000000000..e7e99c3679e --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-2.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +#include <string.h> + +__attribute__ ((weak)) +int +f (int a) +{ + return memchr ("aE", a, 2) != NULL; +} + +__attribute__ ((weak)) +int +g (char a) +{ + return a == 'a' || a == 'E'; +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i + 256) != g (i + 256)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c b/gcc/testsuite/c-c++-common/pr103798-3.c new file mode 100644 index 00000000000..ddcedc7e238 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-3.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int +f (char a) +{ + return __builtin_memchr ("aEgZ", a, 3) == 0; +} + +__attribute__ ((weak)) +int +g (char a) +{ + return a != 'a' && a != 'E' && a != 'g'; +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c b/gcc/testsuite/c-c++-common/pr103798-4.c new file mode 100644 index 00000000000..00e8302a833 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-4.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int +f (char a) +{ + return __builtin_memchr ("aEgi", a, 4) != 0; +} + +__attribute__ ((weak)) +int +g (char a) +{ + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c b/gcc/testsuite/c-c++-common/pr103798-5.c new file mode 100644 index 00000000000..0d6487a13df --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-5.c @@ -0,0 +1,26 @@ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int f(char a) +{ + return __builtin_memchr ("aEgiH", a, 5) == 0; +} + +__attribute__ ((weak)) +int g(char a) +{ + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c b/gcc/testsuite/c-c++-common/pr103798-6.c new file mode 100644 index 00000000000..5ccb5ee66e0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-6.c @@ -0,0 +1,27 @@ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int f(char a) +{ + return __builtin_memchr ("aEgiHx", a, 6) != 0; +} + +__attribute__ ((weak)) +int g(char a) +{ + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' + || a == 'x'); +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c b/gcc/testsuite/c-c++-common/pr103798-7.c new file mode 100644 index 00000000000..40fd38257d1 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-7.c @@ -0,0 +1,27 @@ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int f(char a) +{ + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; +} + +__attribute__ ((weak)) +int g(char a) +{ + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' + && a != 'j' && a != 'Z'); +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i) != g (i)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c b/gcc/testsuite/c-c++-common/pr103798-8.c new file mode 100644 index 00000000000..0841b18cea4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr103798-8.c @@ -0,0 +1,27 @@ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ + +__attribute__ ((weak)) +int f(int a) +{ + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; +} + +__attribute__ ((weak)) +int g(char a) +{ + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' + || a == 'x' || a == '1' || a == '9'); +} + +int +main () +{ + for (int i = 0; i < 255; i++) + if (f (i + 256) != g (i + 256)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "memchr" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 69567ab3275..edd1277c23b 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-dfa.h" #include "tree-ssa-propagate.h" #include "tree-ssa-dom.h" +#include "tree-ssa-strlen.h" #include "builtins.h" #include "tree-cfgcleanup.h" #include "cfganal.h" @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) memcpy (p, "abcd ", 7); call if the latter can be stored by pieces during expansion. + Optimize + memchr ("abcd", a, 4) == 0; + or + memchr ("abcd", a, 4) != 0; + to + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 + or + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 + Also canonicalize __atomic_fetch_op (p, x, y) op x to __atomic_op_fetch (p, x, y) or __atomic_op_fetch (p, x, y) iop x @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) return false; stmt1 = SSA_NAME_DEF_STMT (vuse); + tree res; + switch (DECL_FUNCTION_CODE (callee2)) { + case BUILT_IN_MEMCHR: + if (gimple_call_num_args (stmt2) == 3 + && (res = gimple_call_lhs (stmt2)) != nullptr + && use_in_zero_equality (res) != nullptr + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8) + { + tree ptr = gimple_call_arg (stmt2, 0); + if (TREE_CODE (ptr) != ADDR_EXPR + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) + break; + unsigned HOST_WIDE_INT slen + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); + /* It must be a non-empty string constant. */ + if (slen < 2) + break; + tree size = gimple_call_arg (stmt2, 2); + /* Size must be a constant which is <= UNITS_PER_WORD and + <= the string length. */ + if (TREE_CODE (size) != INTEGER_CST + || integer_zerop (size) + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) + || wi::geu_p (wi::to_wide (size), slen)) + break; + tree ch = gimple_call_arg (stmt2, 1); + location_t loc = gimple_location (stmt2); + if (!useless_type_conversion_p (char_type_node, + TREE_TYPE (ch))) + ch = fold_convert_loc (loc, char_type_node, ch); + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); + unsigned int isize = TREE_INT_CST_LOW (size); + tree *op = new tree[isize]; + for (unsigned int i = 0; i < isize; i++) + { + op[i] = build_int_cst (char_type_node, p[i]); + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, + op[i], ch); + } + for (unsigned int i = isize - 1; i >= 1; i--) + op[i - 1] = fold_convert_loc (loc, boolean_type_node, + fold_build2_loc (loc, + BIT_IOR_EXPR, + boolean_type_node, + op[i - 1], + op[i])); + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); + gimplify_and_update_call_from_tree (gsi_p, res); + delete[] op; + return true; + } + break; + case BUILT_IN_MEMSET: if (gimple_call_num_args (stmt2) != 3 || gimple_call_lhs (stmt2) diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc index 7b3e3899ea2..5afbae1b72e 100644 --- a/gcc/tree-ssa-strlen.cc +++ b/gcc/tree-ssa-strlen.cc @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) nonnull if and only RES is used in such expressions exclusively and in none other. */ -static gimple * -use_in_zero_equality (tree res, bool exclusive = true) +gimple * +use_in_zero_equality (tree res, bool exclusive) { gimple *first_use = NULL; diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h index 8d155450db8..fdb4d9d7783 100644 --- a/gcc/tree-ssa-strlen.h +++ b/gcc/tree-ssa-strlen.h @@ -35,6 +35,8 @@ struct c_strlen_data; extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, pointer_query &); +extern gimple *use_in_zero_equality (tree, bool = true); + /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &);