Message ID | ri61rp5am17.fsf@suse.cz |
---|---|
State | New |
Headers | show |
Series | gcc-9 sra: Cap number of sub-access propagations with a param (PR 93435) | expand |
On Thu, 2 Apr 2020, Martin Jambor wrote: > Hi, > > This is non-trivial but rather straightforward backport of > 29f23ed79b60949fc60f6fdbbd931bd58090b241 from master. See > https://gcc.gnu.org/pipermail/gcc-patches/2020-March/542390.html for > more information. > > Bootstrapped and tested on gcc-9 branch, can I commit it there? It also > applies as-is to gcc-8 as well and I will backport it there as the next > step after testing (without seeking another approval). OK. Thanks, Richard. > Thanks, > > Martin > > > 2020-04-01 Martin Jambor <mjambor@suse.cz> > > PR tree-optimization/93435 > * params.def (PARAM_SRA_MAX_PROPAGATIONS): New parameter. > * tree-sra.c (propagation_budget): New variable. > (budget_for_propagation_access): New function. > (propagate_subaccesses_across_link): Use it. > (propagate_all_subaccesses): Set up and destroy propagation_budget. > * doc/invoke.texi (sra-max-propagations): New. > > gcc/testsuite/ > * gcc.dg/tree-ssa/pr93435.c: New test. > --- > gcc/ChangeLog | 10 ++ > gcc/doc/invoke.texi | 5 + > gcc/params.def | 7 ++ > gcc/testsuite/ChangeLog | 5 + > gcc/testsuite/gcc.dg/tree-ssa/pr93435.c | 159 ++++++++++++++++++++++++ > gcc/tree-sra.c | 34 ++++- > 6 files changed, 219 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93435.c > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 2b1ce7df14a..815fa8eec2d 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,13 @@ > +2020-04-01 Martin Jambor <mjambor@suse.cz> > + > + PR tree-optimization/93435 > + * params.def (PARAM_SRA_MAX_PROPAGATIONS): New parameter. > + * tree-sra.c (propagation_budget): New variable. > + (budget_for_propagation_access): New function. > + (propagate_subaccesses_across_link): Use it. > + (propagate_all_subaccesses): Set up and destroy propagation_budget. > + * doc/invoke.texi (sra-max-propagations): New. > + > 2020-03-31 Carl Love <cel@us.ibm.com> > > Backport of: > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 0f6247caf51..1782a648d02 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -11797,6 +11797,11 @@ speed > (@option{sra-max-scalarization-size-Ospeed}) or size > (@option{sra-max-scalarization-size-Osize}) respectively. > > +@item sra-max-propagations > +The maximum number of artificial accesses that Scalar Replacement of > +Aggregates (SRA) will track, per one local variable, in order to > +facilitate copy propagation. > + > @item tm-max-aggregate-size > When making copies of thread-local variables in a transaction, this > parameter specifies the size in bytes after which variables are > diff --git a/gcc/params.def b/gcc/params.def > index 8e4887e50a2..e23a4530bfa 100644 > --- a/gcc/params.def > +++ b/gcc/params.def > @@ -1081,6 +1081,13 @@ DEFPARAM (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE, > "considered for scalarization when compiling for size.", > 0, 0, 0) > > +DEFPARAM (PARAM_SRA_MAX_PROPAGATIONS, > + "sra-max-propagations", > + "Maximum number of artificial accesses to enable forward propagation " > + "that Scalar Replacement of Aggregates will keep for one local " > + "variable.", > + 32, 0, 0) > + > DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE, > "ipa-cp-value-list-size", > "Maximum size of a list of values associated with each parameter for " > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 71686c72a33..6be82b5d5c2 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,8 @@ > +2020-04-01 Martin Jambor <mjambor@suse.cz> > + > + PR tree-optimization/93435 > + * gcc.dg/tree-ssa/pr93435.c: New test. > + > 2020-03-28 Tobias Burnus <tobias@codesourcery.com> > > Backport from mainline > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c > new file mode 100644 > index 00000000000..cb8e7495b15 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c > @@ -0,0 +1,159 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +typedef signed char int8_T; > +typedef int int32_T; > + > +typedef struct { > + int8_T a; > +} struct0_T; > + > +typedef struct { > + struct0_T f10[4]; > +} struct_T; > + > +typedef struct { > + struct_T f9[4]; > +} b_struct_T; > + > +typedef struct { > + b_struct_T f8[4]; > +} c_struct_T; > + > +typedef struct { > + c_struct_T f7[4]; > +} d_struct_T; > + > +typedef struct { > + d_struct_T f6[4]; > +} e_struct_T; > + > +typedef struct { > + e_struct_T f5[4]; > +} f_struct_T; > + > +typedef struct { > + f_struct_T f4[4]; > +} g_struct_T; > + > +typedef struct { > + g_struct_T f3[4]; > +} h_struct_T; > + > +typedef struct { > + h_struct_T f2[4]; > +} i_struct_T; > + > +typedef struct { > + i_struct_T f1[4]; > +} j_struct_T; > + > +typedef struct { > + struct { > + j_struct_T ds21[4]; > + i_struct_T ds20[4]; > + i_struct_T r9; > + } f0; > +} deep_struct_arraysStackData; > + > +/* Function Definitions */ > +void deep_struct_arrays(deep_struct_arraysStackData *SD, > + int8_T in1, int8_T inCount, int8_T *out1, int8_T *out2, struct0_T out3[4]) > +{ > + struct0_T r; > + struct_T r1; > + b_struct_T r2; > + c_struct_T r3; > + d_struct_T r4; > + e_struct_T r5; > + f_struct_T r6; > + g_struct_T r7; > + h_struct_T r8; > + int32_T count; > + int32_T i; > + > + /* Check properties of input in1 */ > + /* Check properties of input inCount */ > + /* Copyright 2006 The MathWorks, Inc. */ > + r.a = in1; > + r1.f10[0] = r; > + r1.f10[1] = r; > + r1.f10[2] = r; > + r1.f10[3] = r; > + r2.f9[0] = r1; > + r2.f9[1] = r1; > + r2.f9[2] = r1; > + r2.f9[3] = r1; > + r3.f8[0] = r2; > + r3.f8[1] = r2; > + r3.f8[2] = r2; > + r3.f8[3] = r2; > + r4.f7[0] = r3; > + r4.f7[1] = r3; > + r4.f7[2] = r3; > + r4.f7[3] = r3; > + r5.f6[0] = r4; > + r5.f6[1] = r4; > + r5.f6[2] = r4; > + r5.f6[3] = r4; > + r6.f5[0] = r5; > + r6.f5[1] = r5; > + r6.f5[2] = r5; > + r6.f5[3] = r5; > + r7.f4[0] = r6; > + r7.f4[1] = r6; > + r7.f4[2] = r6; > + r7.f4[3] = r6; > + r8.f3[0] = r7; > + r8.f3[1] = r7; > + r8.f3[2] = r7; > + r8.f3[3] = r7; > + SD->f0.r9.f2[0] = r8; > + SD->f0.r9.f2[1] = r8; > + SD->f0.r9.f2[2] = r8; > + SD->f0.r9.f2[3] = r8; > + SD->f0.ds20[0] = SD->f0.r9; > + SD->f0.ds20[3] = SD->f0.r9; > + count = 0; > + while (count < inCount) { > + i = in1 + SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0] > + .f10[0].a; > + if (i > 127) { > + i = 127; > + } else { > + if (i < -128) { > + i = -128; > + } > + } > + > + SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0].a = > + (int8_T)i; > + i = SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a > + + 3; > + if (i > 127) { > + i = 127; > + } > + > + SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a = > + (int8_T)i; > + count++; > + } > + > + if (inCount > 10) { > + SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. > + a = 14; > + } else { > + SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. > + a = 16; > + } > + > + *out1 = SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0]. > + a; > + *out2 = SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. > + a; > + out3[0] = r; > + out3[1] = r; > + out3[2] = r; > + out3[3] = SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3] > + .f10[3]; > +} > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c > index fd51a3d0323..0a5c24a183a 100644 > --- a/gcc/tree-sra.c > +++ b/gcc/tree-sra.c > @@ -291,6 +291,9 @@ static object_allocator<assign_link> assign_link_pool ("SRA links"); > /* Base (tree) -> Vector (vec<access_p> *) map. */ > static hash_map<tree, auto_vec<access_p> > *base_access_vec; > > +/* Hash to limit creation of artificial accesses */ > +static hash_map<tree, unsigned> *propagation_budget; > + > /* Candidate hash table helpers. */ > > struct uid_decl_hasher : nofree_ptr_hash <tree_node> > @@ -2670,6 +2673,32 @@ subtree_mark_written_and_enqueue (struct access *access) > subtree_mark_written_and_enqueue (child); > } > > +/* If there is still budget to create a propagation access for DECL, return > + true and decrement the budget. Otherwise return false. */ > + > +static bool > +budget_for_propagation_access (tree decl) > +{ > + unsigned b, *p = propagation_budget->get (decl); > + if (p) > + b = *p; > + else > + b = PARAM_SRA_MAX_PROPAGATIONS; > + > + if (b == 0) > + return false; > + b--; > + > + if (b == 0 && dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "The propagation budget of "); > + print_generic_expr (dump_file, decl); > + fprintf (dump_file, " (UID: %u) has been exhausted.\n", DECL_UID (decl)); > + } > + propagation_budget->put (decl, b); > + return true; > +} > + > /* Propagate subaccesses and grp_write flags of RACC across an assignment link > to LACC. Enqueue sub-accesses as necessary so that the write flag is > propagated transitively. Return true if anything changed. Additionally, if > @@ -2770,7 +2799,8 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) > continue; > } > > - if (rchild->grp_unscalarizable_region) > + if (rchild->grp_unscalarizable_region > + || !budget_for_propagation_access (lacc->base)) > { > if (rchild->grp_write && !lacc->grp_write) > { > @@ -2800,6 +2830,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) > static void > propagate_all_subaccesses (void) > { > + propagation_budget = new hash_map<tree, unsigned>; > while (work_queue_head) > { > struct access *racc = pop_access_from_work_queue (); > @@ -2838,6 +2869,7 @@ propagate_all_subaccesses (void) > while (lacc); > } > } > + delete propagation_budget; > } > > /* Go through all accesses collected throughout the (intraprocedural) analysis >
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2b1ce7df14a..815fa8eec2d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-04-01 Martin Jambor <mjambor@suse.cz> + + PR tree-optimization/93435 + * params.def (PARAM_SRA_MAX_PROPAGATIONS): New parameter. + * tree-sra.c (propagation_budget): New variable. + (budget_for_propagation_access): New function. + (propagate_subaccesses_across_link): Use it. + (propagate_all_subaccesses): Set up and destroy propagation_budget. + * doc/invoke.texi (sra-max-propagations): New. + 2020-03-31 Carl Love <cel@us.ibm.com> Backport of: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0f6247caf51..1782a648d02 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -11797,6 +11797,11 @@ speed (@option{sra-max-scalarization-size-Ospeed}) or size (@option{sra-max-scalarization-size-Osize}) respectively. +@item sra-max-propagations +The maximum number of artificial accesses that Scalar Replacement of +Aggregates (SRA) will track, per one local variable, in order to +facilitate copy propagation. + @item tm-max-aggregate-size When making copies of thread-local variables in a transaction, this parameter specifies the size in bytes after which variables are diff --git a/gcc/params.def b/gcc/params.def index 8e4887e50a2..e23a4530bfa 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1081,6 +1081,13 @@ DEFPARAM (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE, "considered for scalarization when compiling for size.", 0, 0, 0) +DEFPARAM (PARAM_SRA_MAX_PROPAGATIONS, + "sra-max-propagations", + "Maximum number of artificial accesses to enable forward propagation " + "that Scalar Replacement of Aggregates will keep for one local " + "variable.", + 32, 0, 0) + DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE, "ipa-cp-value-list-size", "Maximum size of a list of values associated with each parameter for " diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 71686c72a33..6be82b5d5c2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-04-01 Martin Jambor <mjambor@suse.cz> + + PR tree-optimization/93435 + * gcc.dg/tree-ssa/pr93435.c: New test. + 2020-03-28 Tobias Burnus <tobias@codesourcery.com> Backport from mainline diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c new file mode 100644 index 00000000000..cb8e7495b15 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c @@ -0,0 +1,159 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef signed char int8_T; +typedef int int32_T; + +typedef struct { + int8_T a; +} struct0_T; + +typedef struct { + struct0_T f10[4]; +} struct_T; + +typedef struct { + struct_T f9[4]; +} b_struct_T; + +typedef struct { + b_struct_T f8[4]; +} c_struct_T; + +typedef struct { + c_struct_T f7[4]; +} d_struct_T; + +typedef struct { + d_struct_T f6[4]; +} e_struct_T; + +typedef struct { + e_struct_T f5[4]; +} f_struct_T; + +typedef struct { + f_struct_T f4[4]; +} g_struct_T; + +typedef struct { + g_struct_T f3[4]; +} h_struct_T; + +typedef struct { + h_struct_T f2[4]; +} i_struct_T; + +typedef struct { + i_struct_T f1[4]; +} j_struct_T; + +typedef struct { + struct { + j_struct_T ds21[4]; + i_struct_T ds20[4]; + i_struct_T r9; + } f0; +} deep_struct_arraysStackData; + +/* Function Definitions */ +void deep_struct_arrays(deep_struct_arraysStackData *SD, + int8_T in1, int8_T inCount, int8_T *out1, int8_T *out2, struct0_T out3[4]) +{ + struct0_T r; + struct_T r1; + b_struct_T r2; + c_struct_T r3; + d_struct_T r4; + e_struct_T r5; + f_struct_T r6; + g_struct_T r7; + h_struct_T r8; + int32_T count; + int32_T i; + + /* Check properties of input in1 */ + /* Check properties of input inCount */ + /* Copyright 2006 The MathWorks, Inc. */ + r.a = in1; + r1.f10[0] = r; + r1.f10[1] = r; + r1.f10[2] = r; + r1.f10[3] = r; + r2.f9[0] = r1; + r2.f9[1] = r1; + r2.f9[2] = r1; + r2.f9[3] = r1; + r3.f8[0] = r2; + r3.f8[1] = r2; + r3.f8[2] = r2; + r3.f8[3] = r2; + r4.f7[0] = r3; + r4.f7[1] = r3; + r4.f7[2] = r3; + r4.f7[3] = r3; + r5.f6[0] = r4; + r5.f6[1] = r4; + r5.f6[2] = r4; + r5.f6[3] = r4; + r6.f5[0] = r5; + r6.f5[1] = r5; + r6.f5[2] = r5; + r6.f5[3] = r5; + r7.f4[0] = r6; + r7.f4[1] = r6; + r7.f4[2] = r6; + r7.f4[3] = r6; + r8.f3[0] = r7; + r8.f3[1] = r7; + r8.f3[2] = r7; + r8.f3[3] = r7; + SD->f0.r9.f2[0] = r8; + SD->f0.r9.f2[1] = r8; + SD->f0.r9.f2[2] = r8; + SD->f0.r9.f2[3] = r8; + SD->f0.ds20[0] = SD->f0.r9; + SD->f0.ds20[3] = SD->f0.r9; + count = 0; + while (count < inCount) { + i = in1 + SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0] + .f10[0].a; + if (i > 127) { + i = 127; + } else { + if (i < -128) { + i = -128; + } + } + + SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0].a = + (int8_T)i; + i = SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a + + 3; + if (i > 127) { + i = 127; + } + + SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3].a = + (int8_T)i; + count++; + } + + if (inCount > 10) { + SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. + a = 14; + } else { + SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. + a = 16; + } + + *out1 = SD->f0.ds20[0].f2[0].f3[0].f4[0].f5[0].f6[0].f7[0].f8[0].f9[0].f10[0]. + a; + *out2 = SD->f0.ds20[3].f2[3].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3].f10[3]. + a; + out3[0] = r; + out3[1] = r; + out3[2] = r; + out3[3] = SD->f0.ds21[0].f1[1].f2[2].f3[3].f4[3].f5[3].f6[3].f7[3].f8[3].f9[3] + .f10[3]; +} diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index fd51a3d0323..0a5c24a183a 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -291,6 +291,9 @@ static object_allocator<assign_link> assign_link_pool ("SRA links"); /* Base (tree) -> Vector (vec<access_p> *) map. */ static hash_map<tree, auto_vec<access_p> > *base_access_vec; +/* Hash to limit creation of artificial accesses */ +static hash_map<tree, unsigned> *propagation_budget; + /* Candidate hash table helpers. */ struct uid_decl_hasher : nofree_ptr_hash <tree_node> @@ -2670,6 +2673,32 @@ subtree_mark_written_and_enqueue (struct access *access) subtree_mark_written_and_enqueue (child); } +/* If there is still budget to create a propagation access for DECL, return + true and decrement the budget. Otherwise return false. */ + +static bool +budget_for_propagation_access (tree decl) +{ + unsigned b, *p = propagation_budget->get (decl); + if (p) + b = *p; + else + b = PARAM_SRA_MAX_PROPAGATIONS; + + if (b == 0) + return false; + b--; + + if (b == 0 && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "The propagation budget of "); + print_generic_expr (dump_file, decl); + fprintf (dump_file, " (UID: %u) has been exhausted.\n", DECL_UID (decl)); + } + propagation_budget->put (decl, b); + return true; +} + /* Propagate subaccesses and grp_write flags of RACC across an assignment link to LACC. Enqueue sub-accesses as necessary so that the write flag is propagated transitively. Return true if anything changed. Additionally, if @@ -2770,7 +2799,8 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) continue; } - if (rchild->grp_unscalarizable_region) + if (rchild->grp_unscalarizable_region + || !budget_for_propagation_access (lacc->base)) { if (rchild->grp_write && !lacc->grp_write) { @@ -2800,6 +2830,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) static void propagate_all_subaccesses (void) { + propagation_budget = new hash_map<tree, unsigned>; while (work_queue_head) { struct access *racc = pop_access_from_work_queue (); @@ -2838,6 +2869,7 @@ propagate_all_subaccesses (void) while (lacc); } } + delete propagation_budget; } /* Go through all accesses collected throughout the (intraprocedural) analysis