diff mbox

Avoid an unwanted decl re-map in copy_gimple_seq_and_replace_locals

Message ID 20160108145901.GD3982@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor Jan. 8, 2016, 2:59 p.m. UTC
Hi,

I ran into an ICE when compiling the following function on the HSA branch:

foo (int n, int m, int o, int (*a)[m][o])
{
  int i, j, k;
#pragma omp target teams distribute parallel for shared(a) firstprivate(n, m, o) private(i,j,k)
  for (i = 0; i < n; i++)
    for (j = 0; j < m; j++)
      for (k = 0; k < o; k++)
        a[i][j][k] = i + j + k;
}

The problem is that when I duplicate the loop with
copy_gimple_seq_and_replace_locals, I get one extra re-mapping.
Specifically, I feed the function this:

{
  int i.2;

  #pragma omp teams shared(a) firstprivate(n) firstprivate(m) firstprivate(o) shared(m.1) shared(D.3275) shared(o.0)
    {
      #pragma omp distribute private(i.2)
      for (i.2 = 0; i.2 < n; i.2 = i.2 + 1)
        {
          #pragma omp parallel shared(a) firstprivate(n) firstprivate(m) firstprivate(o) private(i) private(j) private(k) shared(m.1) shared(D.3275) shared(o.0)
            {
              sizetype D.3286;
              long unsigned int D.3287;
              sizetype D.3288;
              sizetype D.3289;
              sizetype D.3290;
              long unsigned int D.3291;
              long unsigned int D.3292;
              int[0:D.3279][0:D.3271] * D.3293;
              int D.3294;
              int D.3295;

              #pragma omp for nowait
              for (i = 0; i < n; i = i + 1)
                {
                  j = 0;
                  goto <D.3244>;
                  <D.3243>:
                  k = 0;
                  goto <D.3241>;
                  <D.3240>:
                  D.3286 = D.3275 /[ex] 4;   <--- here I get wrog decl
                  D.3287 = (long unsigned int) i;
                  D.3288 = (sizetype) o.0;
                  D.3289 = (sizetype) m.1;
                  D.3290 = D.3288 * D.3289;
                  D.3291 = D.3287 * D.3290;
                  D.3292 = D.3291 * 4;
                  D.3293 = a + D.3292;
                  D.3294 = i + j;
                  D.3295 = D.3294 + k;
                  *D.3293[j]{lb: 0 sz: D.3286 * 4}[k] = D.3295;
                  k = k + 1;
                  <D.3241>:
                  if (k < o) goto <D.3240>; else goto <D.3242>;
                  <D.3242>:
                  j = j + 1;
                  <D.3244>:
                  if (j < m) goto <D.3243>; else goto <D.3245>;
                  <D.3245>:
                }
            }
        }
    }
}

and it replaces D.3275 with its new copy with undefined value.  The
mapping is created when an array type where the size is defined in
terms of that variable declaration is copied.  The comment in
type-remapping code says that we "use the already remaped data" but
that is not true.

My solution was to prevent declaration duplication in this case with
yet another state variable in struct copy_body_data that holds a
special value when we are running copy_gimple_seq_and_replace_locals
and another when we are within type-remapping.

I'll be happy for any suggestion how to deal with this without
cluttering copy_body_date even more but so far I have not found any.
If nobody has a better idea, is the following good for trunk?  (I am
about to commit it to the hsa branch.)  It has passed bootstrap and
testing on x86_64-linux.

Thanks,

Martin


2016-01-06  Martin Jambor  <mjambor@suse.cz>

	* tree-inline.h (copy_body_data): New field
	decl_creation_prevention_level.  Moved remap_var_for_cilk to minimize
	padding.
	* tree-inline.c (remap_decl): Return original decls if
	decl_creation_prevention_level is two or bigger.
	(remap_type_1): Increment and decrement decl_creation_prevention_level
	if appropriate.
	(copy_gimple_seq_and_replace_locals): Set
	decl_creation_prevention_level to 1.

Comments

Richard Biener Jan. 11, 2016, 8:41 a.m. UTC | #1
On Fri, Jan 8, 2016 at 3:59 PM, Martin Jambor <mjambor@suse.cz> wrote:
> Hi,
>
> I ran into an ICE when compiling the following function on the HSA branch:
>
> foo (int n, int m, int o, int (*a)[m][o])
> {
>   int i, j, k;
> #pragma omp target teams distribute parallel for shared(a) firstprivate(n, m, o) private(i,j,k)
>   for (i = 0; i < n; i++)
>     for (j = 0; j < m; j++)
>       for (k = 0; k < o; k++)
>         a[i][j][k] = i + j + k;
> }
>
> The problem is that when I duplicate the loop with
> copy_gimple_seq_and_replace_locals, I get one extra re-mapping.
> Specifically, I feed the function this:
>
> {
>   int i.2;
>
>   #pragma omp teams shared(a) firstprivate(n) firstprivate(m) firstprivate(o) shared(m.1) shared(D.3275) shared(o.0)
>     {
>       #pragma omp distribute private(i.2)
>       for (i.2 = 0; i.2 < n; i.2 = i.2 + 1)
>         {
>           #pragma omp parallel shared(a) firstprivate(n) firstprivate(m) firstprivate(o) private(i) private(j) private(k) shared(m.1) shared(D.3275) shared(o.0)
>             {
>               sizetype D.3286;
>               long unsigned int D.3287;
>               sizetype D.3288;
>               sizetype D.3289;
>               sizetype D.3290;
>               long unsigned int D.3291;
>               long unsigned int D.3292;
>               int[0:D.3279][0:D.3271] * D.3293;
>               int D.3294;
>               int D.3295;
>
>               #pragma omp for nowait
>               for (i = 0; i < n; i = i + 1)
>                 {
>                   j = 0;
>                   goto <D.3244>;
>                   <D.3243>:
>                   k = 0;
>                   goto <D.3241>;
>                   <D.3240>:
>                   D.3286 = D.3275 /[ex] 4;   <--- here I get wrog decl
>                   D.3287 = (long unsigned int) i;
>                   D.3288 = (sizetype) o.0;
>                   D.3289 = (sizetype) m.1;
>                   D.3290 = D.3288 * D.3289;
>                   D.3291 = D.3287 * D.3290;
>                   D.3292 = D.3291 * 4;
>                   D.3293 = a + D.3292;
>                   D.3294 = i + j;
>                   D.3295 = D.3294 + k;
>                   *D.3293[j]{lb: 0 sz: D.3286 * 4}[k] = D.3295;
>                   k = k + 1;
>                   <D.3241>:
>                   if (k < o) goto <D.3240>; else goto <D.3242>;
>                   <D.3242>:
>                   j = j + 1;
>                   <D.3244>:
>                   if (j < m) goto <D.3243>; else goto <D.3245>;
>                   <D.3245>:
>                 }
>             }
>         }
>     }
> }
>
> and it replaces D.3275 with its new copy with undefined value.  The
> mapping is created when an array type where the size is defined in
> terms of that variable declaration is copied.  The comment in
> type-remapping code says that we "use the already remaped data" but
> that is not true.
>
> My solution was to prevent declaration duplication in this case with
> yet another state variable in struct copy_body_data that holds a
> special value when we are running copy_gimple_seq_and_replace_locals
> and another when we are within type-remapping.
>
> I'll be happy for any suggestion how to deal with this without
> cluttering copy_body_date even more but so far I have not found any.
> If nobody has a better idea, is the following good for trunk?  (I am
> about to commit it to the hsa branch.)  It has passed bootstrap and
> testing on x86_64-linux.

Hum.  Can't you check id->remapping_type_depth?  That said, how do
we end up recursing into remap_decl when copying the variable length
decl/type?  Can't we avoid the recursion (basically avoid remapping
variable-size types at all?)

Richard.

> Thanks,
>
> Martin
>
>
> 2016-01-06  Martin Jambor  <mjambor@suse.cz>
>
>         * tree-inline.h (copy_body_data): New field
>         decl_creation_prevention_level.  Moved remap_var_for_cilk to minimize
>         padding.
>         * tree-inline.c (remap_decl): Return original decls if
>         decl_creation_prevention_level is two or bigger.
>         (remap_type_1): Increment and decrement decl_creation_prevention_level
>         if appropriate.
>         (copy_gimple_seq_and_replace_locals): Set
>         decl_creation_prevention_level to 1.
>
> diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
> index 88a6753..2df11a2 100644
> --- a/gcc/tree-inline.c
> +++ b/gcc/tree-inline.c
> @@ -340,8 +340,20 @@ remap_decl (tree decl, copy_body_data *id)
>        return decl;
>      }
>
> -  /* If we didn't already have an equivalent for this declaration,
> -     create one now.  */
> +  /* If decl copying is forbidden (which happens when copying a type with size
> +     defined outside of the copied sequence) work with the original decl. */
> +  if (!n
> +      && id->decl_creation_prevention_level > 1
> +      && (VAR_P (decl) || TREE_CODE (decl) == PARM_DECL))
> +    {
> +      if (id->do_not_unshare)
> +       return decl;
> +      else
> +       return unshare_expr (decl);
> +    }
> +
> +  /* If we didn't already have an equivalent for this declaration, create one
> +     now.  */
>    if (!n)
>      {
>        /* Make a copy of the variable or label.  */
> @@ -526,7 +538,10 @@ remap_type_1 (tree type, copy_body_data *id)
>        gcc_unreachable ();
>      }
>
> -  /* All variants of type share the same size, so use the already remaped data.  */
> +  /* All variants of type share the same size, so use the already remaped
> +     data.  */
> +  if (id->decl_creation_prevention_level > 0)
> +    id->decl_creation_prevention_level++;
>    if (TYPE_MAIN_VARIANT (new_tree) != new_tree)
>      {
>        gcc_checking_assert (TYPE_SIZE (type) == TYPE_SIZE (TYPE_MAIN_VARIANT (type)));
> @@ -540,6 +555,8 @@ remap_type_1 (tree type, copy_body_data *id)
>        walk_tree (&TYPE_SIZE (new_tree), copy_tree_body_r, id, NULL);
>        walk_tree (&TYPE_SIZE_UNIT (new_tree), copy_tree_body_r, id, NULL);
>      }
> +  if (id->decl_creation_prevention_level > 1)
> +    id->decl_creation_prevention_level--;
>
>    return new_tree;
>  }
> @@ -5276,6 +5293,7 @@ copy_gimple_seq_and_replace_locals (gimple_seq seq)
>    id.transform_return_to_modify = false;
>    id.transform_parameter = false;
>    id.transform_lang_insert_block = NULL;
> +  id.decl_creation_prevention_level = 1;
>
>    /* Walk the tree once to find local labels.  */
>    memset (&wi, 0, sizeof (wi));
> diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
> index b8fb2a2..f737daf 100644
> --- a/gcc/tree-inline.h
> +++ b/gcc/tree-inline.h
> @@ -140,14 +140,21 @@ struct copy_body_data
>       the originals have been mapped to a value rather than to a
>       variable.  */
>    hash_map<tree, tree> *debug_map;
> -
> -  /* Cilk keywords currently need to replace some variables that
> -     ordinary nested functions do not.  */
> -  bool remap_var_for_cilk;
>
>    /* A map from the inlined functions dependence info cliques to
>       equivalents in the function into which it is being inlined.  */
>    hash_map<dependence_hash, unsigned short> *dependence_map;
> +
> +  /* Cilk keywords currently need to replace some variables that
> +     ordinary nested functions do not.  */
> +  bool remap_var_for_cilk;
> +
> +  /* When zero, do nothing.  When one or higher, increment during type
> +     remapping, When two or higher, do not create new variables when remapping
> +     decls.  Used when remapping types with variable size, but when the size is
> +     defined outside the sequence copied by
> +     copy_gimple_seq_and_replace_locals.  */
> +  unsigned decl_creation_prevention_level;
>  };
>
>  /* Weights of constructions for estimate_num_insns.  */
Jakub Jelinek Jan. 11, 2016, 4:38 p.m. UTC | #2
On Mon, Jan 11, 2016 at 09:41:31AM +0100, Richard Biener wrote:
> Hum.  Can't you check id->remapping_type_depth?  That said, how do
> we end up recursing into remap_decl when copying the variable length
> decl/type?  Can't we avoid the recursion (basically avoid remapping
> variable-size types at all?)

I guess it depends, VLA types that refer in their various gimplified
expressions only to decls defined outside of bind stmts we are duplicating
are fine as is, they don't need remapping, or could be remapped to VLA types
that use all the same temporary decls.
VLAs that have some or all references to decls inside of the bind stmts
we are duplicating IMHO need to be remapped.
So, perhaps we need to remap_decls in replace_locals_stmt in two phases
in presence of VLAs (or also vars with DECL_VALUE_EXPR)
- phase 1 would just walk the
  for (old_var = decls; old_var; old_var = DECL_CHAIN (old_var))
    {
      if (!can_be_nonlocal (old_var, id)
	  && ! variably_modified_type_p (TREE_TYPE (old_var), id->src_fn))
	remap_decl (old_var, id);
    }
- phase 2 - do the full remap_decls, but during that arrange that
  remap_decl for non-zero id->remapping_type_depth if (!n) just returns
  decl
That way, I think if the types refer to some temporaries that are defined
in the bind stmts being copied, they will be properly duplicated, otherwise
they will be shared.
So, we'd need some flag in *id (just bool bitfield would be enough) that would
allow replace_locals_stmt to set it before the remap_decls call in phase 2
and clear it afterwards, and use that flag together with
id->remapping_type_depth in remap_decls.

	Jakub
Martin Jambor Jan. 12, 2016, 5:36 p.m. UTC | #3
Hi,

On Mon, Jan 11, 2016 at 05:38:47PM +0100, Jakub Jelinek wrote:
> On Mon, Jan 11, 2016 at 09:41:31AM +0100, Richard Biener wrote:
> > Hum.  Can't you check id->remapping_type_depth?

For some reason, last week I reached the conclusion that no.  But I
must have done something wrong because I have tested it again today
and just never creating a new decl in remap_decl if
id->remapping_type_depth is non zero is good enough for my testcase
and it survives bootstrap and testing too (previously I thought it did
not).

id->remapping_type_depth seems to be incremented for DECL_VALUE_EXPR
id->as well, so it actually might help in that situation too.

> That said, how do
> > we end up recursing into remap_decl when copying the variable length
> > decl/type?  Can't we avoid the recursion (basically avoid remapping
> > variable-size types at all?)

Here I agree with Jakub that there are situations where we have to.
There is a comment towards the end of remap_type_1 saying that when
remapping types, all required decls should have already been mapped.
If that is correct, and I belive it is, the remapping_type_depth test
should be fine.

> 
> I guess it depends, VLA types that refer in their various gimplified
> expressions only to decls defined outside of bind stmts we are duplicating
> are fine as is, they don't need remapping, or could be remapped to VLA types
> that use all the same temporary decls.
> VLAs that have some or all references to decls inside of the bind stmts
> we are duplicating IMHO need to be remapped.
> So, perhaps we need to remap_decls in replace_locals_stmt in two phases
> in presence of VLAs (or also vars with DECL_VALUE_EXPR)

I'm a bit worried what would happen do local DECLs that are pointers
to VLAs, because...

> - phase 1 would just walk the
>   for (old_var = decls; old_var; old_var = DECL_CHAIN (old_var))
>     {
>       if (!can_be_nonlocal (old_var, id)
> 	  && ! variably_modified_type_p (TREE_TYPE (old_var), id->src_fn))

...variably_modified_type_p seems to return true for them and...

> 	remap_decl (old_var, id);
>     }
> - phase 2 - do the full remap_decls, but during that arrange that
>   remap_decl for non-zero id->remapping_type_depth if (!n) just returns
>   decl

...they would not be copied here because remap_decl would not be
duplicating stuff.  So I'd end up with an original local decl when I
actually need a duplicate.

But let me go with just checking the remapping_type_depth for now.

Thanks for looking into this,

Martin


> That way, I think if the types refer to some temporaries that are defined
> in the bind stmts being copied, they will be properly duplicated, otherwise
> they will be shared.
> So, we'd need some flag in *id (just bool bitfield would be enough) that would
> allow replace_locals_stmt to set it before the remap_decls call in phase 2
> and clear it afterwards, and use that flag together with
> id->remapping_type_depth in remap_decls.
> 
> 	Jakub
Martin Jambor Jan. 12, 2016, 5:51 p.m. UTC | #4
On Tue, Jan 12, 2016 at 06:36:21PM +0100, Martin Jambor wrote:
> > 	remap_decl (old_var, id);
> >     }
> > - phase 2 - do the full remap_decls, but during that arrange that
> >   remap_decl for non-zero id->remapping_type_depth if (!n) just returns
> >   decl
> 
> ...they would not be copied here because remap_decl would not be
> duplicating stuff.  So I'd end up with an original local decl when I
> actually need a duplicate.
> 

ugh, I'm trying to be too fast and obviously forgot about the
id->remapping_type_depth part of the proposed condition.

Still, when could relying solely on id->remapping_type_depth fail?

Sorry for the noise,

Martin
Jakub Jelinek Jan. 12, 2016, 6:14 p.m. UTC | #5
On Tue, Jan 12, 2016 at 06:51:31PM +0100, Martin Jambor wrote:
> On Tue, Jan 12, 2016 at 06:36:21PM +0100, Martin Jambor wrote:
> > > 	remap_decl (old_var, id);
> > >     }
> > > - phase 2 - do the full remap_decls, but during that arrange that
> > >   remap_decl for non-zero id->remapping_type_depth if (!n) just returns
> > >   decl
> > 
> > ...they would not be copied here because remap_decl would not be
> > duplicating stuff.  So I'd end up with an original local decl when I
> > actually need a duplicate.
> > 
> 
> ugh, I'm trying to be too fast and obviously forgot about the
> id->remapping_type_depth part of the proposed condition.
> 
> Still, when could relying solely on id->remapping_type_depth fail?

Well, those functions are used for numerous purposes, and you'd only
want to not remap decls if not already remapped if id->remapping_type_depth
when inside of the copy_gimple_seq_and_replace_locals
path (and only for the remap_decls in there), so you need IMHO some
flag to distinguish that.

And the reason for the above suggested 2 phases, where the first phase just
calls remap_decl and nothing else on the non-VLAs is to make sure that
if a VLA type or DECL_VALUE_EXPR uses (usually scalar) vars declared in the
same bind block, then those are processed first.

	Jakub
diff mbox

Patch

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 88a6753..2df11a2 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -340,8 +340,20 @@  remap_decl (tree decl, copy_body_data *id)
       return decl;
     }
 
-  /* If we didn't already have an equivalent for this declaration,
-     create one now.  */
+  /* If decl copying is forbidden (which happens when copying a type with size
+     defined outside of the copied sequence) work with the original decl. */
+  if (!n
+      && id->decl_creation_prevention_level > 1
+      && (VAR_P (decl) || TREE_CODE (decl) == PARM_DECL))
+    {
+      if (id->do_not_unshare)
+	return decl;
+      else
+	return unshare_expr (decl);
+    }
+
+  /* If we didn't already have an equivalent for this declaration, create one
+     now.  */
   if (!n)
     {
       /* Make a copy of the variable or label.  */
@@ -526,7 +538,10 @@  remap_type_1 (tree type, copy_body_data *id)
       gcc_unreachable ();
     }
 
-  /* All variants of type share the same size, so use the already remaped data.  */
+  /* All variants of type share the same size, so use the already remaped
+     data.  */
+  if (id->decl_creation_prevention_level > 0)
+    id->decl_creation_prevention_level++;
   if (TYPE_MAIN_VARIANT (new_tree) != new_tree)
     {
       gcc_checking_assert (TYPE_SIZE (type) == TYPE_SIZE (TYPE_MAIN_VARIANT (type)));
@@ -540,6 +555,8 @@  remap_type_1 (tree type, copy_body_data *id)
       walk_tree (&TYPE_SIZE (new_tree), copy_tree_body_r, id, NULL);
       walk_tree (&TYPE_SIZE_UNIT (new_tree), copy_tree_body_r, id, NULL);
     }
+  if (id->decl_creation_prevention_level > 1)
+    id->decl_creation_prevention_level--;
 
   return new_tree;
 }
@@ -5276,6 +5293,7 @@  copy_gimple_seq_and_replace_locals (gimple_seq seq)
   id.transform_return_to_modify = false;
   id.transform_parameter = false;
   id.transform_lang_insert_block = NULL;
+  id.decl_creation_prevention_level = 1;
 
   /* Walk the tree once to find local labels.  */
   memset (&wi, 0, sizeof (wi));
diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
index b8fb2a2..f737daf 100644
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@@ -140,14 +140,21 @@  struct copy_body_data
      the originals have been mapped to a value rather than to a
      variable.  */
   hash_map<tree, tree> *debug_map;
- 
-  /* Cilk keywords currently need to replace some variables that
-     ordinary nested functions do not.  */ 
-  bool remap_var_for_cilk;
 
   /* A map from the inlined functions dependence info cliques to
      equivalents in the function into which it is being inlined.  */
   hash_map<dependence_hash, unsigned short> *dependence_map;
+
+  /* Cilk keywords currently need to replace some variables that
+     ordinary nested functions do not.  */
+  bool remap_var_for_cilk;
+
+  /* When zero, do nothing.  When one or higher, increment during type
+     remapping, When two or higher, do not create new variables when remapping
+     decls.  Used when remapping types with variable size, but when the size is
+     defined outside the sequence copied by
+     copy_gimple_seq_and_replace_locals.  */
+  unsigned decl_creation_prevention_level;
 };
 
 /* Weights of constructions for estimate_num_insns.  */