Message ID | 20101011204617.GJ2082@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, 11 Oct 2010, Jakub Jelinek wrote: > Hi! > > This patch optimizes > memcpy (x, "abcd", 4); > memset (x + 4, ' ', 4); > sequences into a single > memcpy (x, "abcd ", 8); > if it is known (or expected) that the memcpy will be implemented using > store_by_pieces (otherwise we'd risk enlarging .rodata too much, e.g. > if there are only few longer string literals in memcpy originally and > adding too many different memset variants after them would mean the string > literals couldn't be shared anymore). > The patch also handles > memcpy (x, "a", 1); > memset (x + 1, 'b', 11); > where the memcpy has been optimized into a store. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2010-10-11 Jakub Jelinek <jakub@redhat.com> > > PR fortran/45636 > * tree-ssa-forwprop.c: Include expr.h. > (constant_pointer_difference, simplify_builtin_call): New functions. > (tree_ssa_forward_propagate_single_use_vars): Call > simplify_builtin_call on builtin calls. > > * gcc.c-torture/execute/pr45636.c: New test. > * gfortran.dg/pr45636.f90: New test. > > --- gcc/tree-ssa-forwprop.c.jj 2010-10-07 19:44:48.000000000 +0200 > +++ gcc/tree-ssa-forwprop.c 2010-10-11 16:43:03.000000000 +0200 > @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. > #include "langhooks.h" > #include "flags.h" > #include "gimple.h" > +#include "expr.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > @@ -1317,6 +1318,296 @@ simplify_gimple_switch (gimple stmt) > } > } > > +/* For pointers p2 and p1 return p2 - p1 if the > + difference is known and constant, otherwise return NULL. */ > + > +static tree > +constant_pointer_difference (tree p1, tree p2) > +{ > + int i, j; > + tree exps[2][5]; > + tree offs[2][5]; > + int cnt[2]; > + > + for (i = 0; i < 2; i++) > + { > + tree p = i ? p1 : p2; > + tree off = size_zero_node; > + gimple stmt; > + enum tree_code code; > + > + j = 0; > + do > + { > + if (!POINTER_TYPE_P (TREE_TYPE (p))) > + break; > + if (TREE_CODE (p) == ADDR_EXPR) > + { > + tree q = TREE_OPERAND (p, 0); > + if (handled_component_p (q) > + || TREE_CODE (q) == MEM_REF > + || TREE_CODE (q) == TARGET_MEM_REF) > + { > + HOST_WIDE_INT offset, size, maxsize; > + tree base > + = get_ref_base_and_extent (q, &offset, > + &size, &maxsize); Please use get_addr_base_and_unit_offset here, that simplifies the following checks > + if (size == maxsize > + && offset >= 0 > + && (offset % BITS_PER_UNIT) == 0) > + { > + q = base; > + off = size_binop (PLUS_EXPR, off, > + size_int (offset / BITS_PER_UNIT)); > + } > + } > + if (TREE_CODE (q) == MEM_REF > + && TREE_CODE (TREE_OPERAND (q, 0)) == SSA_NAME) > + { > + p = TREE_OPERAND (q, 0); > + off = size_binop (PLUS_EXPR, off, > + fold_convert (sizetype, > + TREE_OPERAND (q, 1))); Use double_int_to_tree (sizetype, mem_ref_offset (q)) which properly sign-extends the pointer offset. > + } > + else > + { > + exps[i][j] = q; > + offs[i][j++] = off; > + break; > + } > + } > + if (TREE_CODE (p) != SSA_NAME) > + break; > + exps[i][j] = p; > + offs[i][j++] = off; > + if (j == 5) > + break; > + stmt = SSA_NAME_DEF_STMT (p); > + if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != p) > + break; > + code = gimple_assign_rhs_code (stmt); > + if (code == POINTER_PLUS_EXPR) > + { > + if (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) > + break; > + off = size_binop (PLUS_EXPR, off, gimple_assign_rhs2 (stmt)); > + p = gimple_assign_rhs1 (stmt); > + } > + else if (code == ADDR_EXPR || code == NOP_EXPR) > + p = gimple_assign_rhs1 (stmt); > + else > + break; > + } > + while (1); > + cnt[i] = j; > + } I wonder why the forward-propagation of POINTER_PLUS_EXPR and ADDR_EXPR doesn't make the above redundant? > + for (i = 0; i < cnt[0]; i++) > + for (j = 0; j < cnt[1]; j++) > + if (exps[0][i] == exps[1][j]) > + return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]); > + > + return NULL_TREE; > +} > + > +/* *GSI_P is a GIMPLE_CALL to a builtin function. > + Optimize > + memcpy (p, "abcd", 4); > + memset (p + 4, ' ', 3); > + into > + memcpy (p, "abcd ", 7); > + call if the latter can be stored by pieces during expansion. */ > + > +static bool > +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > +{ > + gimple stmt1, stmt2 = gsi_stmt (*gsi_p); > + tree vuse = gimple_vuse (stmt2); > + if (vuse == NULL) > + return false; > + stmt1 = SSA_NAME_DEF_STMT (vuse); > + > + switch (DECL_FUNCTION_CODE (callee2)) > + { > + case BUILT_IN_MEMSET: > + if (gimple_call_num_args (stmt2) != 3 > + || gimple_call_lhs (stmt2) > + || CHAR_BIT != 8 > + || BITS_PER_UNIT != 8) > + break; > + else > + { > + tree callee1; > + tree ptr1, src1, str1, off1, len1, lhs1; > + tree ptr2 = gimple_call_arg (stmt2, 0); > + tree val2 = gimple_call_arg (stmt2, 1); > + tree len2 = gimple_call_arg (stmt2, 2); > + tree diff, vdef, new_str_cst; > + unsigned int ptr1_align; > + unsigned HOST_WIDE_INT src_len; > + char *src_buf; > + imm_use_iterator imm_iter; > + use_operand_p use_p; > + bool do_fail; > + > + if (!host_integerp (val2, 0) > + || !host_integerp (len2, 1)) > + break; > + if (is_gimple_call (stmt1)) > + { > + callee1 = gimple_call_fndecl (stmt1); > + if (callee1 == NULL_TREE > + || DECL_BUILT_IN_CLASS (callee1) != BUILT_IN_NORMAL > + || gimple_call_num_args (stmt1) != 3) > + break; > + if (DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMCPY > + && DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMPCPY) > + break; > + ptr1 = gimple_call_arg (stmt1, 0); > + src1 = gimple_call_arg (stmt1, 1); > + len1 = gimple_call_arg (stmt1, 2); > + lhs1 = gimple_call_lhs (stmt1); > + if (!host_integerp (len1, 1)) > + break; > + str1 = string_constant (src1, &off1); > + if (str1 == NULL_TREE) > + break; > + if (!host_integerp (off1, 1) > + || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0 > + || compare_tree_int (len1, TREE_STRING_LENGTH (str1) > + - tree_low_cst (off1, 1)) > 0 > + || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE > + || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1))) > + != TYPE_MODE (char_type_node)) > + break; > + } > + else if (gimple_assign_single_p (stmt1)) > + { > + ptr1 = gimple_assign_lhs (stmt1); > + src1 = gimple_assign_rhs1 (stmt1); > + if (TREE_CODE (ptr1) != MEM_REF > + || TYPE_MODE (TREE_TYPE (ptr1)) != TYPE_MODE (char_type_node) > + || !host_integerp (src1, 0)) > + break; > + ptr1 = build_fold_addr_expr (ptr1); > + callee1 = NULL_TREE; > + len1 = size_one_node; > + lhs1 = NULL_TREE; > + off1 = size_zero_node; > + str1 = NULL_TREE; > + } > + else > + break; > + > + diff = constant_pointer_difference (ptr1, ptr2); > + if (diff == NULL && lhs1 != NULL) > + { > + diff = constant_pointer_difference (lhs1, ptr2); > + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY > + && diff != NULL) > + diff = size_binop (PLUS_EXPR, diff, > + fold_convert (sizetype, len1)); > + } > + if (diff == NULL > + || !host_integerp (diff, 1) > + || tree_int_cst_lt (len1, diff)) > + break; > + > + src_len = tree_low_cst (diff, 1); > + src_len += tree_low_cst (len2, 1); > + if (src_len < (unsigned HOST_WIDE_INT) tree_low_cst (len1, 1)) > + src_len = tree_low_cst (len1, 1); > + if (src_len > 1024) > + break; > + > + do_fail = false; > + if (lhs1 != NULL > + && DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) > + { > + if (TREE_CODE (lhs1) != SSA_NAME) > + break; > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs1) > + { > + if (USE_STMT (use_p) != stmt2) > + { > + do_fail = true; > + break; > + } > + } That looks like if (TREE_CODE (lhs1) != SSA_NAME || !single_imm_use (lhs1, &use_p, &use_stmt) || use_stmt != stmt2) break; and your loop gets confused by DEBUG_STMTs at least (and possibly generates different code -g vs. -g0) > + if (do_fail) > + break; > + } > + vdef = gimple_vdef (stmt1); > + if (vdef != NULL) > + { > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, vdef) > + { > + if (USE_STMT (use_p) != stmt2) > + { > + do_fail = true; > + break; > + } > + } Likewise. Why do we care about other uses of the VDEF? > + if (do_fail) > + break; > + } > + > + ptr1_align = get_pointer_alignment (ptr1, BIGGEST_ALIGNMENT); > + src_buf = XALLOCAVEC (char, src_len + 1); > + if (callee1) > + memcpy (src_buf, > + TREE_STRING_POINTER (str1) + tree_low_cst (off1, 1), > + tree_low_cst (len1, 1)); > + else > + src_buf[0] = tree_low_cst (src1, 0); > + memset (src_buf + tree_low_cst (diff, 1), > + tree_low_cst (val2, 1), tree_low_cst (len2, 1)); > + src_buf[src_len] = '\0'; > + if (strlen (src_buf) != src_len) > + break; > + rtl_profile_for_bb (gimple_bb (stmt2)); > + if (!can_store_by_pieces (src_len, > + builtin_strncpy_read_str, > + src_buf, ptr1_align, false)) > + break; > + > + new_str_cst = build_string_literal (src_len, src_buf); > + if (callee1) > + { > + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) > + gimple_call_set_lhs (stmt1, NULL_TREE); > + gimple_call_set_arg (stmt1, 1, new_str_cst); > + gimple_call_set_arg (stmt1, 2, > + build_int_cst (TREE_TYPE (len1), src_len)); > + update_stmt (stmt1); > + unlink_stmt_vdef (stmt2); > + gsi_remove (gsi_p, true); > + release_defs (stmt2); > + return true; > + } > + else > + { > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt1); > + > + gimple_call_set_fndecl (stmt2, built_in_decls [BUILT_IN_MEMCPY]); > + gimple_call_set_arg (stmt2, 0, ptr1); > + gimple_call_set_arg (stmt2, 1, new_str_cst); > + gimple_call_set_arg (stmt2, 2, > + build_int_cst (TREE_TYPE (len2), src_len)); > + unlink_stmt_vdef (stmt1); > + gsi_remove (&gsi, true); > + release_defs (stmt1); > + update_stmt (stmt2); > + return false; > + } > + } > + break; > + default: > + break; > + } > + return false; > +} The code above could use some comments, like those you gave about can_store_by_pieces in the mail. Otherwise the patch looks good, but I'm really curious as of why constant_pointer_difference is as complex as it is. Thanks, Richard. > /* Run bitwise and assignments throug the folder. If the first argument is an > ssa name that is itself a result of a typecast of an ADDR_EXPR to an > integer, feed the ADDR_EXPR to the folder rather than the ssa name. > @@ -1784,6 +2075,14 @@ tree_ssa_forward_propagate_single_use_va > WARN_STRICT_OVERFLOW_CONDITIONAL); > gsi_next (&gsi); > } > + else if (is_gimple_call (stmt)) > + { > + tree callee = gimple_call_fndecl (stmt); > + if (callee == NULL_TREE > + || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL > + || !simplify_builtin_call (&gsi, callee)) > + gsi_next (&gsi); > + } > else > gsi_next (&gsi); > } > --- gcc/Makefile.in.jj 2010-10-11 07:51:09.000000000 +0200 > +++ gcc/Makefile.in 2010-10-11 09:15:25.000000000 +0200 > @@ -2404,7 +2404,7 @@ tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG > tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ > - langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h > + langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h $(EXPR_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ > --- gcc/testsuite/gcc.c-torture/execute/pr45636.c.jj 2010-10-11 09:15:25.000000000 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr45636.c 2010-10-11 09:15:25.000000000 +0200 > @@ -0,0 +1,75 @@ > +/* PR fortran/45636 */ > + > +typedef __SIZE_TYPE__ size_t; > +void *memcpy (void *__restrict__, const void *__restrict__, size_t); > +void *mempcpy (void *__restrict__, const void *__restrict__, size_t); > +void *memset (void *, int, size_t); > +int memcmp (const void *, const void *, size_t); > +extern void abort (void); > + > +struct A { int i; char c[32]; } a[2]; > + > +__attribute__((noinline, noclone)) int > +f1 (char *p, int q, int z) > +{ > + memcpy (p, "abcd", 4); > + if (q) > + z = z + 123; > + else > + z *= 114; > + memset (p + 4, ' ', 2); > + return z; > +} > + > +__attribute__((noinline, noclone)) void > +f2 (void) > +{ > + char *p = mempcpy (&a[0].c[13], "123456", 4); > + memset (p, '7', 3); > +} > + > +__attribute__((noinline, noclone)) void > +f3 (struct A *p) > +{ > + p++; > + char *q = &p->c[10]; > + memcpy (q + 4, "__1234567" + 2, 7); > + memset (&p->c[21], '9', 3); > +} > + > +__attribute__((noinline, noclone)) void > +f4 (void) > +{ > + memcpy (&a[0].c[10], "0123456789", 10); > + memset (&a[0].c[13], ' ', 3); > +} > + > +__attribute__((noinline, noclone)) void > +check (const char *p, const char *str, size_t size) > +{ > + const char *q; > + for (q = (const char *) &a; q < p; q++) > + if (*q) > + abort (); > + if (memcmp (p, str, size) != 0) > + abort (); > + for (q = p + size; q < (const char *) (&a[0] + 2); q++) > + if (*q) > + abort (); > + memset (&a, '\0', sizeof a); > +} > + > +int > +main (void) > +{ > + if (f1 (&a[0].c[7], 1, 2) != 125) > + abort (); > + check (&a[0].c[7], "abcd ", 6); > + f2 (); > + check (&a[0].c[13], "1234777", 7); > + f3 (&a[0]); > + check (&a[1].c[14], "1234567999", 10); > + f4 (); > + check (&a[0].c[10], "012 6789", 10); > + return 0; > +} > --- gcc/testsuite/gfortran.dg/pr45636.f90.jj 2010-10-11 13:19:41.000000000 +0200 > +++ gcc/testsuite/gfortran.dg/pr45636.f90 2010-10-11 16:49:19.000000000 +0200 > @@ -0,0 +1,14 @@ > +! PR fortran/45636 > +! { dg-do compile } > +! { dg-options "-O2 -fdump-tree-forwprop2" } > +! PR 45636 - make sure no memset is needed for a short right-hand side. > +program main > + character(len=2), parameter :: x='a ' > + character(len=1), parameter :: y='b' > + character(len=4) :: a, b > + a = x > + b = y > + call sub(a, b) > +end program main > +! { dg-final { scan-tree-dump-times "memset" 0 "forwprop2" } } > +! { dg-final { cleanup-tree-dump "forwprop2" } } > > Jakub > >
On Mon, Oct 11, 2010 at 1:46 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > This patch optimizes > memcpy (x, "abcd", 4); > memset (x + 4, ' ', 4); > sequences into a single > memcpy (x, "abcd ", 8); > if it is known (or expected) that the memcpy will be implemented using > store_by_pieces (otherwise we'd risk enlarging .rodata too much, e.g. > if there are only few longer string literals in memcpy originally and > adding too many different memset variants after them would mean the string > literals couldn't be shared anymore). > The patch also handles > memcpy (x, "a", 1); > memset (x + 1, 'b', 11); > where the memcpy has been optimized into a store. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2010-10-11 Jakub Jelinek <jakub@redhat.com> > > PR fortran/45636 > * tree-ssa-forwprop.c: Include expr.h. > (constant_pointer_difference, simplify_builtin_call): New functions. > (tree_ssa_forward_propagate_single_use_vars): Call > simplify_builtin_call on builtin calls. > This caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46461
--- gcc/tree-ssa-forwprop.c.jj 2010-10-07 19:44:48.000000000 +0200 +++ gcc/tree-ssa-forwprop.c 2010-10-11 16:43:03.000000000 +0200 @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. #include "langhooks.h" #include "flags.h" #include "gimple.h" +#include "expr.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized @@ -1317,6 +1318,296 @@ simplify_gimple_switch (gimple stmt) } } +/* For pointers p2 and p1 return p2 - p1 if the + difference is known and constant, otherwise return NULL. */ + +static tree +constant_pointer_difference (tree p1, tree p2) +{ + int i, j; + tree exps[2][5]; + tree offs[2][5]; + int cnt[2]; + + for (i = 0; i < 2; i++) + { + tree p = i ? p1 : p2; + tree off = size_zero_node; + gimple stmt; + enum tree_code code; + + j = 0; + do + { + if (!POINTER_TYPE_P (TREE_TYPE (p))) + break; + if (TREE_CODE (p) == ADDR_EXPR) + { + tree q = TREE_OPERAND (p, 0); + if (handled_component_p (q) + || TREE_CODE (q) == MEM_REF + || TREE_CODE (q) == TARGET_MEM_REF) + { + HOST_WIDE_INT offset, size, maxsize; + tree base + = get_ref_base_and_extent (q, &offset, + &size, &maxsize); + if (size == maxsize + && offset >= 0 + && (offset % BITS_PER_UNIT) == 0) + { + q = base; + off = size_binop (PLUS_EXPR, off, + size_int (offset / BITS_PER_UNIT)); + } + } + if (TREE_CODE (q) == MEM_REF + && TREE_CODE (TREE_OPERAND (q, 0)) == SSA_NAME) + { + p = TREE_OPERAND (q, 0); + off = size_binop (PLUS_EXPR, off, + fold_convert (sizetype, + TREE_OPERAND (q, 1))); + } + else + { + exps[i][j] = q; + offs[i][j++] = off; + break; + } + } + if (TREE_CODE (p) != SSA_NAME) + break; + exps[i][j] = p; + offs[i][j++] = off; + if (j == 5) + break; + stmt = SSA_NAME_DEF_STMT (p); + if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != p) + break; + code = gimple_assign_rhs_code (stmt); + if (code == POINTER_PLUS_EXPR) + { + if (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) + break; + off = size_binop (PLUS_EXPR, off, gimple_assign_rhs2 (stmt)); + p = gimple_assign_rhs1 (stmt); + } + else if (code == ADDR_EXPR || code == NOP_EXPR) + p = gimple_assign_rhs1 (stmt); + else + break; + } + while (1); + cnt[i] = j; + } + + for (i = 0; i < cnt[0]; i++) + for (j = 0; j < cnt[1]; j++) + if (exps[0][i] == exps[1][j]) + return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]); + + return NULL_TREE; +} + +/* *GSI_P is a GIMPLE_CALL to a builtin function. + Optimize + memcpy (p, "abcd", 4); + memset (p + 4, ' ', 3); + into + memcpy (p, "abcd ", 7); + call if the latter can be stored by pieces during expansion. */ + +static bool +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) +{ + gimple stmt1, stmt2 = gsi_stmt (*gsi_p); + tree vuse = gimple_vuse (stmt2); + if (vuse == NULL) + return false; + stmt1 = SSA_NAME_DEF_STMT (vuse); + + switch (DECL_FUNCTION_CODE (callee2)) + { + case BUILT_IN_MEMSET: + if (gimple_call_num_args (stmt2) != 3 + || gimple_call_lhs (stmt2) + || CHAR_BIT != 8 + || BITS_PER_UNIT != 8) + break; + else + { + tree callee1; + tree ptr1, src1, str1, off1, len1, lhs1; + tree ptr2 = gimple_call_arg (stmt2, 0); + tree val2 = gimple_call_arg (stmt2, 1); + tree len2 = gimple_call_arg (stmt2, 2); + tree diff, vdef, new_str_cst; + unsigned int ptr1_align; + unsigned HOST_WIDE_INT src_len; + char *src_buf; + imm_use_iterator imm_iter; + use_operand_p use_p; + bool do_fail; + + if (!host_integerp (val2, 0) + || !host_integerp (len2, 1)) + break; + if (is_gimple_call (stmt1)) + { + callee1 = gimple_call_fndecl (stmt1); + if (callee1 == NULL_TREE + || DECL_BUILT_IN_CLASS (callee1) != BUILT_IN_NORMAL + || gimple_call_num_args (stmt1) != 3) + break; + if (DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMCPY + && DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMPCPY) + break; + ptr1 = gimple_call_arg (stmt1, 0); + src1 = gimple_call_arg (stmt1, 1); + len1 = gimple_call_arg (stmt1, 2); + lhs1 = gimple_call_lhs (stmt1); + if (!host_integerp (len1, 1)) + break; + str1 = string_constant (src1, &off1); + if (str1 == NULL_TREE) + break; + if (!host_integerp (off1, 1) + || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0 + || compare_tree_int (len1, TREE_STRING_LENGTH (str1) + - tree_low_cst (off1, 1)) > 0 + || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE + || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1))) + != TYPE_MODE (char_type_node)) + break; + } + else if (gimple_assign_single_p (stmt1)) + { + ptr1 = gimple_assign_lhs (stmt1); + src1 = gimple_assign_rhs1 (stmt1); + if (TREE_CODE (ptr1) != MEM_REF + || TYPE_MODE (TREE_TYPE (ptr1)) != TYPE_MODE (char_type_node) + || !host_integerp (src1, 0)) + break; + ptr1 = build_fold_addr_expr (ptr1); + callee1 = NULL_TREE; + len1 = size_one_node; + lhs1 = NULL_TREE; + off1 = size_zero_node; + str1 = NULL_TREE; + } + else + break; + + diff = constant_pointer_difference (ptr1, ptr2); + if (diff == NULL && lhs1 != NULL) + { + diff = constant_pointer_difference (lhs1, ptr2); + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY + && diff != NULL) + diff = size_binop (PLUS_EXPR, diff, + fold_convert (sizetype, len1)); + } + if (diff == NULL + || !host_integerp (diff, 1) + || tree_int_cst_lt (len1, diff)) + break; + + src_len = tree_low_cst (diff, 1); + src_len += tree_low_cst (len2, 1); + if (src_len < (unsigned HOST_WIDE_INT) tree_low_cst (len1, 1)) + src_len = tree_low_cst (len1, 1); + if (src_len > 1024) + break; + + do_fail = false; + if (lhs1 != NULL + && DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) + { + if (TREE_CODE (lhs1) != SSA_NAME) + break; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs1) + { + if (USE_STMT (use_p) != stmt2) + { + do_fail = true; + break; + } + } + if (do_fail) + break; + } + vdef = gimple_vdef (stmt1); + if (vdef != NULL) + { + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, vdef) + { + if (USE_STMT (use_p) != stmt2) + { + do_fail = true; + break; + } + } + if (do_fail) + break; + } + + ptr1_align = get_pointer_alignment (ptr1, BIGGEST_ALIGNMENT); + src_buf = XALLOCAVEC (char, src_len + 1); + if (callee1) + memcpy (src_buf, + TREE_STRING_POINTER (str1) + tree_low_cst (off1, 1), + tree_low_cst (len1, 1)); + else + src_buf[0] = tree_low_cst (src1, 0); + memset (src_buf + tree_low_cst (diff, 1), + tree_low_cst (val2, 1), tree_low_cst (len2, 1)); + src_buf[src_len] = '\0'; + if (strlen (src_buf) != src_len) + break; + rtl_profile_for_bb (gimple_bb (stmt2)); + if (!can_store_by_pieces (src_len, + builtin_strncpy_read_str, + src_buf, ptr1_align, false)) + break; + + new_str_cst = build_string_literal (src_len, src_buf); + if (callee1) + { + if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY) + gimple_call_set_lhs (stmt1, NULL_TREE); + gimple_call_set_arg (stmt1, 1, new_str_cst); + gimple_call_set_arg (stmt1, 2, + build_int_cst (TREE_TYPE (len1), src_len)); + update_stmt (stmt1); + unlink_stmt_vdef (stmt2); + gsi_remove (gsi_p, true); + release_defs (stmt2); + return true; + } + else + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt1); + + gimple_call_set_fndecl (stmt2, built_in_decls [BUILT_IN_MEMCPY]); + gimple_call_set_arg (stmt2, 0, ptr1); + gimple_call_set_arg (stmt2, 1, new_str_cst); + gimple_call_set_arg (stmt2, 2, + build_int_cst (TREE_TYPE (len2), src_len)); + unlink_stmt_vdef (stmt1); + gsi_remove (&gsi, true); + release_defs (stmt1); + update_stmt (stmt2); + return false; + } + } + break; + default: + break; + } + return false; +} + /* Run bitwise and assignments throug the folder. If the first argument is an ssa name that is itself a result of a typecast of an ADDR_EXPR to an integer, feed the ADDR_EXPR to the folder rather than the ssa name. @@ -1784,6 +2075,14 @@ tree_ssa_forward_propagate_single_use_va WARN_STRICT_OVERFLOW_CONDITIONAL); gsi_next (&gsi); } + else if (is_gimple_call (stmt)) + { + tree callee = gimple_call_fndecl (stmt); + if (callee == NULL_TREE + || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL + || !simplify_builtin_call (&gsi, callee)) + gsi_next (&gsi); + } else gsi_next (&gsi); } --- gcc/Makefile.in.jj 2010-10-11 07:51:09.000000000 +0200 +++ gcc/Makefile.in 2010-10-11 09:15:25.000000000 +0200 @@ -2404,7 +2404,7 @@ tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ - langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h + langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h $(EXPR_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \ --- gcc/testsuite/gcc.c-torture/execute/pr45636.c.jj 2010-10-11 09:15:25.000000000 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr45636.c 2010-10-11 09:15:25.000000000 +0200 @@ -0,0 +1,75 @@ +/* PR fortran/45636 */ + +typedef __SIZE_TYPE__ size_t; +void *memcpy (void *__restrict__, const void *__restrict__, size_t); +void *mempcpy (void *__restrict__, const void *__restrict__, size_t); +void *memset (void *, int, size_t); +int memcmp (const void *, const void *, size_t); +extern void abort (void); + +struct A { int i; char c[32]; } a[2]; + +__attribute__((noinline, noclone)) int +f1 (char *p, int q, int z) +{ + memcpy (p, "abcd", 4); + if (q) + z = z + 123; + else + z *= 114; + memset (p + 4, ' ', 2); + return z; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + char *p = mempcpy (&a[0].c[13], "123456", 4); + memset (p, '7', 3); +} + +__attribute__((noinline, noclone)) void +f3 (struct A *p) +{ + p++; + char *q = &p->c[10]; + memcpy (q + 4, "__1234567" + 2, 7); + memset (&p->c[21], '9', 3); +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + memcpy (&a[0].c[10], "0123456789", 10); + memset (&a[0].c[13], ' ', 3); +} + +__attribute__((noinline, noclone)) void +check (const char *p, const char *str, size_t size) +{ + const char *q; + for (q = (const char *) &a; q < p; q++) + if (*q) + abort (); + if (memcmp (p, str, size) != 0) + abort (); + for (q = p + size; q < (const char *) (&a[0] + 2); q++) + if (*q) + abort (); + memset (&a, '\0', sizeof a); +} + +int +main (void) +{ + if (f1 (&a[0].c[7], 1, 2) != 125) + abort (); + check (&a[0].c[7], "abcd ", 6); + f2 (); + check (&a[0].c[13], "1234777", 7); + f3 (&a[0]); + check (&a[1].c[14], "1234567999", 10); + f4 (); + check (&a[0].c[10], "012 6789", 10); + return 0; +} --- gcc/testsuite/gfortran.dg/pr45636.f90.jj 2010-10-11 13:19:41.000000000 +0200 +++ gcc/testsuite/gfortran.dg/pr45636.f90 2010-10-11 16:49:19.000000000 +0200 @@ -0,0 +1,14 @@ +! PR fortran/45636 +! { dg-do compile } +! { dg-options "-O2 -fdump-tree-forwprop2" } +! PR 45636 - make sure no memset is needed for a short right-hand side. +program main + character(len=2), parameter :: x='a ' + character(len=1), parameter :: y='b' + character(len=4) :: a, b + a = x + b = y + call sub(a, b) +end program main +! { dg-final { scan-tree-dump-times "memset" 0 "forwprop2" } } +! { dg-final { cleanup-tree-dump "forwprop2" } }