Message ID | 34b3cfcf-9bca-2fb6-e9ef-f361d15497f7@redhat.com |
---|---|
State | New |
Headers | show |
On Fri, Dec 16, 2016 at 2:54 AM, Jeff Law <law@redhat.com> wrote: > This is the 3rd patch in the kit to improve our DSE implementation. > > This patch supports trimming of the head or tail of a memset, memcpy or > memmove call. It's conceptually similar to trimming CONSTRUCTORS (and was > in fact developed first). > > Try as I might, I couldn't find a BZ in our database that would be resolved > by this patch. There's BZs in the LLVM database in this space, but I didn't > actually test those. With that in mind, I don't think I can strictly call > this a bugfix. It does represent closing a deficiency when compared to > LLVM. So while I'd like to see it go onto the trunk, I won't lose sleep > if we defer to gcc-8. > > Note this patch relies on the alignment tweak I mentioned in the discussion > of patch #2 to avoid creating code that the strlen folding optimization > can't optimize. The code is still correct/valid, it's just in a form that > the strlen folders don't grok. > > This includes a trivial test that I used for development purposes. It hits > fairly often building GCC itself. If we wanted more coverage i the > testsuite, I could extract some tests from GCC and reduce them. > > This patch has (of course) been bootstrapped and regression tested on > x86_64-linux-gnu. OK for the trunk or defer to gcc-8? Didn't see a re-post of this one so reviewing the old. > > * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. > (decrement_count): New function. > (increment_start_addr, trim_memstar_call): Likewise. > (trim_partially_dead_store): Call trim_memstar_call. > (pass_dse::execute): Initialize need_ssa_update. If set, then > return TODO_ssa_update. > > * gcc.dg/tree-ssa/ssa-dse-25.c: New test. > > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index 1482c7f..b21b9b5 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; > It is always safe to return FALSE. But typically better optimziation > can be achieved by analyzing more statements. */ > > +/* If trimming stores requires insertion of new statements, then we > + will need an SSA update. */ > +static bool need_ssa_update; > + huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see how you need an SSA update for this. > static bool > initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) > { > @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, > gimple *stmt) > } > } > > +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes > + copied/set by DECREMENT. */ > +static void > +decrement_count (gimple *stmt, int decrement) > +{ > + tree *countp = gimple_call_arg_ptr (stmt, 2); > + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); > + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, > + build_int_cst (TREE_TYPE (*countp), decrement)); > + *countp = x; thanks to wide-int the following should work *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); (if not please use int_const_binop rather than fold_build2 here and below as well) > +} > + > +static void > +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int > increment) > +{ > + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ > + if (TREE_CODE (*where) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) > + { > + tree t = TREE_OPERAND (*where, 0); > + t = build_ref_for_offset (EXPR_LOCATION (t), t, > + increment * BITS_PER_UNIT, false, > + ptr_type_node, NULL, false); please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME case here and below ... > + *where = build_fold_addr_expr (t); > + return; > + } > + else if (TREE_CODE (*where) == SSA_NAME) > + { > + tree tem = make_ssa_name (TREE_TYPE (*where)); > + gassign *newop > + = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, > + build_int_cst (sizetype, increment)); > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); > + need_ssa_update = true; > + *where = tem; > + update_stmt (gsi_stmt (gsi)); > + return; > + } > + > + /* We can just adjust the offset in the MEM_REF expression. */ > + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); > + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, > + build_int_cst (TREE_TYPE (x1), increment)); > + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; ... re-fold the thing as MEM_REF which will do all the magic for you: *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, *where, build_int_cst (ptr_type_node, increment))); that handles &MEM[] and &foo.bar just fine and avoids adding magic here. Otherwise looks ok. I think I'd like to see this in GCC 7 given it's so much similar to the constructor pruning. Richard. > + > +} > + > +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are > dead > + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce > + the amount of data it actually writes. > + > + Right now we only support trimming from the head or the tail of the > + memory region. In theory we could split the mem* call, but it's > + likely of marginal value. */ > + > +static void > +trim_memstar_call (bitmap orig, bitmap live, gimple *stmt) > +{ > + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) > + { > + case BUILT_IN_MEMCPY: > + case BUILT_IN_MEMMOVE: > + { > + int head_trim, tail_trim; > + compute_trims (orig, live, &head_trim, &tail_trim); > + > + /* Tail trimming is easy, we can just reduce the count. */ > + if (tail_trim) > + decrement_count (stmt, tail_trim); > + > + /* Head trimming requires adjusting all the arguments. */ > + if (head_trim) > + { > + tree *dst = gimple_call_arg_ptr (stmt, 0); > + increment_start_addr (stmt, dst, head_trim); > + tree *src = gimple_call_arg_ptr (stmt, 1); > + increment_start_addr (stmt, src, head_trim); > + decrement_count (stmt, head_trim); > + } > + break; > + } > + > + case BUILT_IN_MEMSET: > + { > + int head_trim, tail_trim; > + compute_trims (orig, live, &head_trim, &tail_trim); > + > + /* Tail trimming is easy, we can just reduce the count. */ > + if (tail_trim) > + decrement_count (stmt, tail_trim); > + > + /* Head trimming requires adjusting all the arguments. */ > + if (head_trim) > + { > + tree *dst = gimple_call_arg_ptr (stmt, 0); > + increment_start_addr (stmt, dst, head_trim); > + decrement_count (stmt, head_trim); > + } > + break; > + } > + > + default: > + break; > + } > +} > + > /* STMT is a memory write where one or more bytes written are dead > stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the > bitmap of stores that are actually live. > @@ -321,7 +433,9 @@ trim_constructor_store (bitmap orig, bitmap live, gimple > *stmt) > static void > trim_partially_dead_store (bitmap orig, bitmap live, gimple *stmt) > { > - if (is_gimple_assign (stmt)) > + if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) > + trim_memstar_call (orig, live, stmt); > + else if (is_gimple_assign (stmt)) > { > switch (gimple_assign_rhs_code (stmt)) > { > @@ -712,6 +826,7 @@ unsigned int > pass_dse::execute (function *fun) > { > need_eh_cleanup = BITMAP_ALLOC (NULL); > + need_ssa_update = false; > > renumber_gimple_stmt_uids (); > > @@ -738,7 +853,7 @@ pass_dse::execute (function *fun) > > /* For now, just wipe the post-dominator information. */ > free_dominance_info (CDI_POST_DOMINATORS); > - return 0; > + return (need_ssa_update ? TODO_update_ssa : 0); > } > > } // anon namespace > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > new file mode 100644 > index 0000000..8b7db3a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */ > + > +char z[32]; > + > + > +int > +foo(void) > +{ > + memset (z, 0, 16); > + memset (z+8, 0, 24); > +} > + > +/* { dg-final { scan-tree-dump "memset .&z, 0, 8." "dse1" } } */ > + > + >
On 01/04/2017 07:04 AM, Richard Biener wrote: > > Didn't see a re-post of this one so reviewing the old. Didn't figure mem* trimming was suitable for gcc-7 as I couldn't justify it as a bugfix, so I didn't ping it. I don't think it changed materially. All your comments are still applicable to the version in my tree. > >> >> * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. >> (decrement_count): New function. >> (increment_start_addr, trim_memstar_call): Likewise. >> (trim_partially_dead_store): Call trim_memstar_call. >> (pass_dse::execute): Initialize need_ssa_update. If set, then >> return TODO_ssa_update. >> >> * gcc.dg/tree-ssa/ssa-dse-25.c: New test. >> >> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >> index 1482c7f..b21b9b5 100644 >> --- a/gcc/tree-ssa-dse.c >> +++ b/gcc/tree-ssa-dse.c >> @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; >> It is always safe to return FALSE. But typically better optimziation >> can be achieved by analyzing more statements. */ >> >> +/* If trimming stores requires insertion of new statements, then we >> + will need an SSA update. */ >> +static bool need_ssa_update; >> + > > huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see > how you need an SSA update for this. I'll go back and re-investigate. I could easily have goof'd the in-place update and be papering over that with the ssa update. > >> static bool >> initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) >> { >> @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, >> gimple *stmt) >> } >> } >> >> +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes >> + copied/set by DECREMENT. */ >> +static void >> +decrement_count (gimple *stmt, int decrement) >> +{ >> + tree *countp = gimple_call_arg_ptr (stmt, 2); >> + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); >> + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, >> + build_int_cst (TREE_TYPE (*countp), decrement)); >> + *countp = x; > > thanks to wide-int the following should work > > *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); Sweet. I like that much better. > > (if not please use int_const_binop rather than fold_build2 here and > below as well) > >> +} >> + >> +static void >> +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int >> increment) >> +{ >> + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ >> + if (TREE_CODE (*where) == ADDR_EXPR >> + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) >> + { >> + tree t = TREE_OPERAND (*where, 0); >> + t = build_ref_for_offset (EXPR_LOCATION (t), t, >> + increment * BITS_PER_UNIT, false, >> + ptr_type_node, NULL, false); > > please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME > case here and below ... I think build_ref_for_offset was what spurred the tree-sra.h inclusion. IIRC I was seeing a goodly number of cases where the argument wasn't a MEM_REF or SSA_NAME at this point. But I'll double-check. If we don't need build_ref_for_offset, do you still want me to pull its prototype into the new tree-sra.h, or just leave it as-is? > >> + *where = build_fold_addr_expr (t); >> + return; >> + } >> + else if (TREE_CODE (*where) == SSA_NAME) >> + { >> + tree tem = make_ssa_name (TREE_TYPE (*where)); >> + gassign *newop >> + = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, >> + build_int_cst (sizetype, increment)); >> + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); >> + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); >> + need_ssa_update = true; >> + *where = tem; >> + update_stmt (gsi_stmt (gsi)); >> + return; >> + } >> + >> + /* We can just adjust the offset in the MEM_REF expression. */ >> + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); >> + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, >> + build_int_cst (TREE_TYPE (x1), increment)); >> + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; > ... > > re-fold the thing as MEM_REF which will do all the magic for you: > > *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, > *where, build_int_cst (ptr_type_node, increment))); > > that handles &MEM[] and &foo.bar just fine and avoids adding magic here. And that (&foo.bar) is likely what I was looking to handle with the first if clause above where I called build_ref_for_offset. > > Otherwise looks ok. I think I'd like to see this in GCC 7 given it's > so much similar to the constructor pruning. OK. I'll sort through the issues noted above and get this one reposted as well. jeff
On 01/04/2017 07:04 AM, Richard Biener wrote: > >> >> * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. >> (decrement_count): New function. >> (increment_start_addr, trim_memstar_call): Likewise. >> (trim_partially_dead_store): Call trim_memstar_call. >> (pass_dse::execute): Initialize need_ssa_update. If set, then >> return TODO_ssa_update. >> >> * gcc.dg/tree-ssa/ssa-dse-25.c: New test. >> >> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >> index 1482c7f..b21b9b5 100644 >> --- a/gcc/tree-ssa-dse.c >> +++ b/gcc/tree-ssa-dse.c >> @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; >> It is always safe to return FALSE. But typically better optimziation >> can be achieved by analyzing more statements. */ >> >> +/* If trimming stores requires insertion of new statements, then we >> + will need an SSA update. */ >> +static bool need_ssa_update; >> + > > huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see > how you need an SSA update for this. It doesn't seem needed anymore. I'm ripping that out. >> +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes >> + copied/set by DECREMENT. */ >> +static void >> +decrement_count (gimple *stmt, int decrement) >> +{ >> + tree *countp = gimple_call_arg_ptr (stmt, 2); >> + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); >> + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, >> + build_int_cst (TREE_TYPE (*countp), decrement)); >> + *countp = x; > > thanks to wide-int the following should work > > *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); *countp is still a tree, but we know its an INTEGER_CST, so we can extract its value trivially. >> +} >> + >> +static void >> +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int >> increment) >> +{ >> + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ >> + if (TREE_CODE (*where) == ADDR_EXPR >> + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) >> + { >> + tree t = TREE_OPERAND (*where, 0); >> + t = build_ref_for_offset (EXPR_LOCATION (t), t, >> + increment * BITS_PER_UNIT, false, >> + ptr_type_node, NULL, false); > > please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME > case here and below ... Done. I'm pretty sure calling into build_ref_for_offset was to handle &foo.bar kinds of cases and as you note, we can just re-fold the thing as a MEM_REF which simplifies everything a little. Jeff
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 1482c7f..b21b9b5 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; It is always safe to return FALSE. But typically better optimziation can be achieved by analyzing more statements. */ +/* If trimming stores requires insertion of new statements, then we + will need an SSA update. */ +static bool need_ssa_update; + static bool initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) { @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, gimple *stmt) } } +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes + copied/set by DECREMENT. */ +static void +decrement_count (gimple *stmt, int decrement) +{ + tree *countp = gimple_call_arg_ptr (stmt, 2); + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, + build_int_cst (TREE_TYPE (*countp), decrement)); + *countp = x; +} + +static void +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int increment) +{ + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ + if (TREE_CODE (*where) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) + { + tree t = TREE_OPERAND (*where, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + increment * BITS_PER_UNIT, false, + ptr_type_node, NULL, false); + *where = build_fold_addr_expr (t); + return; + } + else if (TREE_CODE (*where) == SSA_NAME) + { + tree tem = make_ssa_name (TREE_TYPE (*where)); + gassign *newop + = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, + build_int_cst (sizetype, increment)); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); + need_ssa_update = true; + *where = tem; + update_stmt (gsi_stmt (gsi)); + return; + } + + /* We can just adjust the offset in the MEM_REF expression. */ + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, + build_int_cst (TREE_TYPE (x1), increment)); + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; + +} + +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce + the amount of data it actually writes. + + Right now we only support trimming from the head or the tail of the + memory region. In theory we could split the mem* call, but it's + likely of marginal value. */ + +static void +trim_memstar_call (bitmap orig, bitmap live, gimple *stmt) +{ + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) + { + case BUILT_IN_MEMCPY: + case BUILT_IN_MEMMOVE: + { + int head_trim, tail_trim; + compute_trims (orig, live, &head_trim, &tail_trim); + + /* Tail trimming is easy, we can just reduce the count. */ + if (tail_trim) + decrement_count (stmt, tail_trim); + + /* Head trimming requires adjusting all the arguments. */ + if (head_trim) + { + tree *dst = gimple_call_arg_ptr (stmt, 0); + increment_start_addr (stmt, dst, head_trim); + tree *src = gimple_call_arg_ptr (stmt, 1); + increment_start_addr (stmt, src, head_trim); + decrement_count (stmt, head_trim); + } + break; + } + + case BUILT_IN_MEMSET: + { + int head_trim, tail_trim; + compute_trims (orig, live, &head_trim, &tail_trim); + + /* Tail trimming is easy, we can just reduce the count. */ + if (tail_trim) + decrement_count (stmt, tail_trim); + + /* Head trimming requires adjusting all the arguments. */ + if (head_trim) + { + tree *dst = gimple_call_arg_ptr (stmt, 0); + increment_start_addr (stmt, dst, head_trim); + decrement_count (stmt, head_trim); + } + break; + } + + default: + break; + } +} + /* STMT is a memory write where one or more bytes written are dead stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the bitmap of stores that are actually live. @@ -321,7 +433,9 @@ trim_constructor_store (bitmap orig, bitmap live, gimple *stmt) static void trim_partially_dead_store (bitmap orig, bitmap live, gimple *stmt) { - if (is_gimple_assign (stmt)) + if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) + trim_memstar_call (orig, live, stmt); + else if (is_gimple_assign (stmt)) { switch (gimple_assign_rhs_code (stmt)) { @@ -712,6 +826,7 @@ unsigned int pass_dse::execute (function *fun) { need_eh_cleanup = BITMAP_ALLOC (NULL); + need_ssa_update = false; renumber_gimple_stmt_uids (); @@ -738,7 +853,7 @@ pass_dse::execute (function *fun) /* For now, just wipe the post-dominator information. */ free_dominance_info (CDI_POST_DOMINATORS); - return 0; + return (need_ssa_update ? TODO_update_ssa : 0); } } // anon namespace diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c new file mode 100644 index 0000000..8b7db3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */ + +char z[32]; + + +int +foo(void) +{ + memset (z, 0, 16); + memset (z+8, 0, 24); +} + +/* { dg-final { scan-tree-dump "memset .&z, 0, 8." "dse1" } } */ + +