diff mbox series

sra: Fix bug in grp_write propagation (PR 97009)

Message ID ri6tuorggl9.fsf@suse.cz
State New
Headers show
Series sra: Fix bug in grp_write propagation (PR 97009) | expand

Commit Message

Martin Jambor March 31, 2021, 9:26 p.m. UTC
Hi,

SRA represents parts of aggregates which are arrays accessed with
unknown index as "unscalarizable regions."  When there are two such
regions one within another and the outer is only read whereas the
inner is written to, SRA fails to propagate that write information
across assignments.  This means that a second aggregate can contain
data while SRA thinks it does not and the pass can wrongly eliminate
big chunks of assignment from that second aggregate into a third
aggregate, which is what happens in PR 97009.

Fixed by checking all children of unscalariable accesses for the
grp_write flag.

Bootstrap and testing on top of trunk and the gcc-10 branch is underway
and I also plan to backport this to gcc-9.  OK if they pass?

Thanks,

Martin


gcc/ChangeLog:

2021-03-31  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/97009
	* tree-sra.c (access_or_its_child_written): New function.
	(propagate_subaccesses_from_rhs): Use it instead of a simple grp_write
	test.

gcc/testsuite/ChangeLog:

2021-03-31  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/97009
	* gcc.dg/tree-ssa/pr97009.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr97009.c | 66 +++++++++++++++++++++++++
 gcc/tree-sra.c                          | 15 +++++-
 2 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97009.c

Comments

Richard Biener April 1, 2021, 7:51 a.m. UTC | #1
On Wed, 31 Mar 2021, Martin Jambor wrote:

> Hi,
> 
> SRA represents parts of aggregates which are arrays accessed with
> unknown index as "unscalarizable regions."  When there are two such
> regions one within another and the outer is only read whereas the
> inner is written to, SRA fails to propagate that write information
> across assignments.  This means that a second aggregate can contain
> data while SRA thinks it does not and the pass can wrongly eliminate
> big chunks of assignment from that second aggregate into a third
> aggregate, which is what happens in PR 97009.
> 
> Fixed by checking all children of unscalariable accesses for the
> grp_write flag.
> 
> Bootstrap and testing on top of trunk and the gcc-10 branch is underway
> and I also plan to backport this to gcc-9.  OK if they pass?

OK.

Thanks,
Richard.

> Thanks,
> 
> Martin
> 
> 
> gcc/ChangeLog:
> 
> 2021-03-31  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/97009
> 	* tree-sra.c (access_or_its_child_written): New function.
> 	(propagate_subaccesses_from_rhs): Use it instead of a simple grp_write
> 	test.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-03-31  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/97009
> 	* gcc.dg/tree-ssa/pr97009.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr97009.c | 66 +++++++++++++++++++++++++
>  gcc/tree-sra.c                          | 15 +++++-
>  2 files changed, 80 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97009.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c
> new file mode 100644
> index 00000000000..741dbc270c3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c
> @@ -0,0 +1,66 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +static int __attribute__((noipa))
> +get_5 (void)
> +{
> +  return 5;
> +}
> +
> +static int __attribute__((noipa))
> +verify_5 (int v)
> +{
> +  if (v != 5)
> +    __builtin_abort ();
> +}
> +
> +struct T
> +{
> +  int w;
> +  int a[4];
> +};
> +
> +struct S
> +{
> +  int v;
> +  int x;
> +  struct T t[2];
> +  char alotofstuff[128];
> +};
> +
> +volatile int vol;
> +
> +void __attribute__((noipa))
> +consume_t (struct T t)
> +{
> +  vol = t.a[0];
> +}
> +
> +int __attribute__((noipa))
> +foo (int l1, int l2)
> +{
> +  struct S s1, s2, s3;
> +  int i, j;
> +
> +  s1.v = get_5 ();
> +  for (i = 0; i < l1; i++)
> +    {
> +      for (j = 0; j < l2; j++)
> +	s1.t[i].a[j] = get_5 ();
> +      consume_t(s1.t[i]);
> +    }
> +
> +  s2 = s1;
> +
> +  s3 = s2;
> +  for (i = 0; i < l1; i++)
> +    for (j = 0; j < l2; j++)
> +      verify_5 (s3.t[i].a[j]);
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  foo (2, 4);
> +  return 0;
> +}
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index d177f1ba11c..8dfc923ed7e 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -2723,6 +2723,19 @@ budget_for_propagation_access (tree decl)
>    return true;
>  }
>  
> +/* Return true if ACC or any of its subaccesses has grp_child set.  */
> +
> +static bool
> +access_or_its_child_written (struct access *acc)
> +{
> +  if (acc->grp_write)
> +    return true;
> +  for (struct access *sub = acc->first_child; sub; sub = sub->next_sibling)
> +    if (access_or_its_child_written (sub))
> +      return true;
> +  return false;
> +}
> +
>  /* Propagate subaccesses and grp_write flags of RACC across an assignment link
>     to LACC.  Enqueue sub-accesses as necessary so that the write flag is
>     propagated transitively.  Return true if anything changed.  Additionally, if
> @@ -2836,7 +2849,7 @@ propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc)
>        if (rchild->grp_unscalarizable_region
>  	  || !budget_for_propagation_access (lacc->base))
>  	{
> -	  if (rchild->grp_write && !lacc->grp_write)
> +	  if (!lacc->grp_write && access_or_its_child_written (rchild))
>  	    {
>  	      ret = true;
>  	      subtree_mark_written_and_rhs_enqueue (lacc);
>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c
new file mode 100644
index 00000000000..741dbc270c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97009.c
@@ -0,0 +1,66 @@ 
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+static int __attribute__((noipa))
+get_5 (void)
+{
+  return 5;
+}
+
+static int __attribute__((noipa))
+verify_5 (int v)
+{
+  if (v != 5)
+    __builtin_abort ();
+}
+
+struct T
+{
+  int w;
+  int a[4];
+};
+
+struct S
+{
+  int v;
+  int x;
+  struct T t[2];
+  char alotofstuff[128];
+};
+
+volatile int vol;
+
+void __attribute__((noipa))
+consume_t (struct T t)
+{
+  vol = t.a[0];
+}
+
+int __attribute__((noipa))
+foo (int l1, int l2)
+{
+  struct S s1, s2, s3;
+  int i, j;
+
+  s1.v = get_5 ();
+  for (i = 0; i < l1; i++)
+    {
+      for (j = 0; j < l2; j++)
+	s1.t[i].a[j] = get_5 ();
+      consume_t(s1.t[i]);
+    }
+
+  s2 = s1;
+
+  s3 = s2;
+  for (i = 0; i < l1; i++)
+    for (j = 0; j < l2; j++)
+      verify_5 (s3.t[i].a[j]);
+}
+
+int
+main (int argc, char *argv[])
+{
+  foo (2, 4);
+  return 0;
+}
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index d177f1ba11c..8dfc923ed7e 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -2723,6 +2723,19 @@  budget_for_propagation_access (tree decl)
   return true;
 }
 
+/* Return true if ACC or any of its subaccesses has grp_child set.  */
+
+static bool
+access_or_its_child_written (struct access *acc)
+{
+  if (acc->grp_write)
+    return true;
+  for (struct access *sub = acc->first_child; sub; sub = sub->next_sibling)
+    if (access_or_its_child_written (sub))
+      return true;
+  return false;
+}
+
 /* Propagate subaccesses and grp_write flags of RACC across an assignment link
    to LACC.  Enqueue sub-accesses as necessary so that the write flag is
    propagated transitively.  Return true if anything changed.  Additionally, if
@@ -2836,7 +2849,7 @@  propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc)
       if (rchild->grp_unscalarizable_region
 	  || !budget_for_propagation_access (lacc->base))
 	{
-	  if (rchild->grp_write && !lacc->grp_write)
+	  if (!lacc->grp_write && access_or_its_child_written (rchild))
 	    {
 	      ret = true;
 	      subtree_mark_written_and_rhs_enqueue (lacc);