diff mbox

[PR69607] Mark offload symbols as global in lto

Message ID 56C49658.1000908@mentor.com
State New
Headers show

Commit Message

Tom de Vries Feb. 17, 2016, 3:48 p.m. UTC
On 17/02/16 13:30, Jakub Jelinek wrote:
> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:
>> Mark offload symbols as global in lto
>
> I'm really not familiar with that part of LTO, so I'm CCing Honza and
> Richard here.
>> 2016-02-08  Tom de Vries  <tom@codesourcery.com>
>>
>> 	PR lto/69607
>> 	* lto-partition.c (promote_offload_tables): New function.
>> 	* lto-partition.h (promote_offload_tables):  Declare.
>
> Just one space instead of two after :
>
>> 	* lto.c (do_whole_program_analysis): call promote_offload_tables.
>
> Capital C in Call.
>

Done.

>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
>> new file mode 100644
>> index 0000000..1edb21e
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>> @@ -0,0 +1,98 @@
>> +/* { dg-do run { target lto } } */
>> +/* { dg-additional-sources "target-38.c" } */
>> +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
>> +
>> +extern
>> +#ifdef __cplusplus
>> +"C"
>> +#endif
>> +void abort (void);
>
> Why the C++ stuff in there?  Do you intend to include the testcase
> also in libgomp.c++?

No, that's just there because I started both target-37.c and target-38.c 
by copying target-1.c.

> If not, it is not needed.

Removed.

> Otherwise, the tests LGTM.
>

Updated patch attached.

Thanks,
- Tom

Comments

Tom de Vries Feb. 24, 2016, 1:37 p.m. UTC | #1
On 17/02/16 16:48, Tom de Vries wrote:
> On 17/02/16 13:30, Jakub Jelinek wrote:
>> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:
>>> Mark offload symbols as global in lto
>>
>> I'm really not familiar with that part of LTO, so I'm CCing Honza and
>> Richard here.
>>> 2016-02-08  Tom de Vries  <tom@codesourcery.com>
>>>
>>>     PR lto/69607
>>>     * lto-partition.c (promote_offload_tables): New function.
>>>     * lto-partition.h (promote_offload_tables):  Declare.
>>
>> Just one space instead of two after :
>>
>>>     * lto.c (do_whole_program_analysis): call promote_offload_tables.
>>
>> Capital C in Call.
>>
>
> Done.
>
>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c
>>> b/libgomp/testsuite/libgomp.c/target-37.c
>>> new file mode 100644
>>> index 0000000..1edb21e
>>> --- /dev/null
>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>>> @@ -0,0 +1,98 @@
>>> +/* { dg-do run { target lto } } */
>>> +/* { dg-additional-sources "target-38.c" } */
>>> +/* { dg-additional-options "-flto -flto-partition=1to1
>>> -fno-toplevel-reorder" } */
>>> +
>>> +extern
>>> +#ifdef __cplusplus
>>> +"C"
>>> +#endif
>>> +void abort (void);
>>
>> Why the C++ stuff in there?  Do you intend to include the testcase
>> also in libgomp.c++?
>
> No, that's just there because I started both target-37.c and target-38.c
> by copying target-1.c.
>
>> If not, it is not needed.
>
> Removed.
>
>> Otherwise, the tests LGTM.
>>
>
> Updated patch attached.
>

Ping.

[ Original submission here:
     https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html
   Latest patch with updated testcases:
     https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html
]

OK for trunk, stage1 (or stage4, if that's appropriate)?

> Thanks,
> - Tom
>
>
> 0001-Mark-offload-symbols-as-global-in-lto.patch
>
>
> Mark offload symbols as global in lto
>
> 2016-02-17  Tom de Vries  <tom@codesourcery.com>
>
> 	PR lto/69607
> 	* lto-partition.c (promote_offload_tables): New function.
> 	* lto-partition.h (promote_offload_tables): Declare.
> 	* lto.c (do_whole_program_analysis): Call promote_offload_tables.
>
> 	* testsuite/libgomp.c/target-36.c: New test.
> 	* testsuite/libgomp.c/target-37.c: New test.
> 	* testsuite/libgomp.c/target-38.c: New test.
>
> ---
>   gcc/lto/lto-partition.c                 | 28 ++++++++++
>   gcc/lto/lto-partition.h                 |  1 +
>   gcc/lto/lto.c                           |  2 +
>   libgomp/testsuite/libgomp.c/target-36.c |  4 ++
>   libgomp/testsuite/libgomp.c/target-37.c | 94 +++++++++++++++++++++++++++++++++
>   libgomp/testsuite/libgomp.c/target-38.c | 91 +++++++++++++++++++++++++++++++
>   6 files changed, 220 insertions(+)
>
> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> index 9eb63c2..56598d4 100644
> --- a/gcc/lto/lto-partition.c
> +++ b/gcc/lto/lto-partition.c
> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "ipa-prop.h"
>   #include "ipa-inline.h"
>   #include "lto-partition.h"
> +#include "omp-low.h"
>
>   vec<ltrans_partition> ltrans_partitions;
>
> @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
>   	    "Promoting as hidden: %s\n", node->name ());
>   }
>
> +/* Promote the symbols in the offload tables.  */
> +
> +void
> +promote_offload_tables (void)
> +{
> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> +    return;
> +
> +  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
> +    {
> +      tree fn_decl = (*offload_funcs)[i];
> +      cgraph_node *node = cgraph_node::get (fn_decl);
> +      if (node->externally_visible)
> +	continue;
> +      promote_symbol (node);
> +    }
> +
> +  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
> +    {
> +      tree var_decl = (*offload_vars)[i];
> +      varpool_node *node = varpool_node::get (var_decl);
> +      if (node->externally_visible)
> +	continue;
> +      promote_symbol (node);
> +    }
> +}
> +
>   /* Return true if NODE needs named section even if it won't land in the partition
>      symbol table.
>      FIXME: we should really not use named sections for inline clones and master
> diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
> index 31e3764..1a38126 100644
> --- a/gcc/lto/lto-partition.h
> +++ b/gcc/lto/lto-partition.h
> @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions;
>   void lto_1_to_1_map (void);
>   void lto_max_map (void);
>   void lto_balanced_map (int);
> +extern void promote_offload_tables (void);
>   void lto_promote_cross_file_statics (void);
>   void free_ltrans_partitions (void);
>   void lto_promote_statics_nonwpa (void);
> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> index 9dd513f..2736c5c 100644
> --- a/gcc/lto/lto.c
> +++ b/gcc/lto/lto.c
> @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
>        to globals with hidden visibility because they are accessed from multiple
>        partitions.  */
>     lto_promote_cross_file_statics ();
> +  /* Promote all the offload symbols.  */
> +  promote_offload_tables ();
>     timevar_pop (TV_WHOPR_PARTITIONING);
>
>     timevar_stop (TV_PHASE_OPT_GEN);
> diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
> new file mode 100644
> index 0000000..bafb718
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-36.c
> @@ -0,0 +1,4 @@
> +/* { dg-do run { target lto } } */
> +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
> +
> +#include "target-1.c"
> diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
> new file mode 100644
> index 0000000..fe5b8ef
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-37.c
> @@ -0,0 +1,94 @@
> +/* { dg-do run { target lto } } */
> +/* { dg-additional-sources "target-38.c" } */
> +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
> +
> +extern void abort (void);
> +
> +void
> +fn1 (double *x, double *y, int z)
> +{
> +  int i;
> +  for (i = 0; i < z; i++)
> +    {
> +      x[i] = i & 31;
> +      y[i] = (i & 63) - 30;
> +    }
> +}
> +
> +#pragma omp declare target
> +static int tgtv = 6;
> +static int
> +tgt (void)
> +{
> +  #pragma omp atomic update
> +    tgtv++;
> +  return 0;
> +}
> +#pragma omp end declare target
> +
> +static double
> +fn2 (int x, int y, int z)
> +{
> +  double b[1024], c[1024], s = 0;
> +  int i, j;
> +  fn1 (b, c, x);
> +  #pragma omp target data map(to: b)
> +  {
> +    #pragma omp target map(tofrom: c, s)
> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
> +	#pragma omp distribute dist_schedule(static, 4) collapse(1)
> +	  for (j=0; j < x; j += y)
> +	    #pragma omp parallel for reduction(+:s)
> +	      for (i = j; i < j + y; i++)
> +		tgt (), s += b[i] * c[i];
> +    #pragma omp target update from(b, tgtv)
> +  }
> +  return s;
> +}
> +
> +static double
> +fn3 (int x)
> +{
> +  double b[1024], c[1024], s = 0;
> +  int i;
> +  fn1 (b, c, x);
> +  #pragma omp target map(to: b, c) map(tofrom:s)
> +    #pragma omp parallel for reduction(+:s)
> +      for (i = 0; i < x; i++)
> +	tgt (), s += b[i] * c[i];
> +  return s;
> +}
> +
> +static double
> +fn4 (int x, double *p)
> +{
> +  double b[1024], c[1024], d[1024], s = 0;
> +  int i;
> +  fn1 (b, c, x);
> +  fn1 (d + x, p + x, x);
> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
> +		     map(tofrom: s)
> +    #pragma omp parallel for reduction(+:s)
> +      for (i = 0; i < x; i++)
> +	s += b[i] * c[i] + d[x + i] + p[x + i];
> +  return s;
> +}
> +
> +extern int other_main (void);
> +
> +int
> +main ()
> +{
> +  double a = fn2 (128, 4, 6);
> +  int b = tgtv;
> +  double c = fn3 (61);
> +  #pragma omp target update from(tgtv)
> +  int d = tgtv;
> +  double e[1024];
> +  double f = fn4 (64, e);
> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
> +      || f != 8032.0)
> +    abort ();
> +  other_main ();
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c
> new file mode 100644
> index 0000000..de2b4c8
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-38.c
> @@ -0,0 +1,91 @@
> +/* { dg-skip-if "additional source" { *-*-* } } */
> +
> +extern void abort (void);
> +
> +static void
> +fna1 (double *x, double *y, int z)
> +{
> +  int i;
> +  for (i = 0; i < z; i++)
> +    {
> +      x[i] = i & 31;
> +      y[i] = (i & 63) - 30;
> +    }
> +}
> +
> +#pragma omp declare target
> +static int tgtva = 6;
> +static int
> +tgta (void)
> +{
> +  #pragma omp atomic update
> +    tgtva++;
> +  return 0;
> +}
> +#pragma omp end declare target
> +
> +double
> +fna2 (int x, int y, int z)
> +{
> +  double b[1024], c[1024], s = 0;
> +  int i, j;
> +  fna1 (b, c, x);
> +  #pragma omp target data map(to: b)
> +  {
> +    #pragma omp target map(tofrom: c, s)
> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
> +	#pragma omp distribute dist_schedule(static, 4) collapse(1)
> +	  for (j=0; j < x; j += y)
> +	    #pragma omp parallel for reduction(+:s)
> +	      for (i = j; i < j + y; i++)
> +		tgta (), s += b[i] * c[i];
> +    #pragma omp target update from(b, tgtva)
> +  }
> +  return s;
> +}
> +
> +static double
> +fna3 (int x)
> +{
> +  double b[1024], c[1024], s = 0;
> +  int i;
> +  fna1 (b, c, x);
> +  #pragma omp target map(to: b, c) map(tofrom:s)
> +    #pragma omp parallel for reduction(+:s)
> +      for (i = 0; i < x; i++)
> +	tgta (), s += b[i] * c[i];
> +  return s;
> +}
> +
> +static double
> +fna4 (int x, double *p)
> +{
> +  double b[1024], c[1024], d[1024], s = 0;
> +  int i;
> +  fna1 (b, c, x);
> +  fna1 (d + x, p + x, x);
> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
> +		     map(tofrom: s)
> +    #pragma omp parallel for reduction(+:s)
> +      for (i = 0; i < x; i++)
> +	s += b[i] * c[i] + d[x + i] + p[x + i];
> +  return s;
> +}
> +
> +extern int other_main (void);
> +
> +int
> +other_main (void)
> +{
> +  double a = fna2 (128, 4, 6);
> +  int b = tgtva;
> +  double c = fna3 (61);
> +  #pragma omp target update from(tgtva)
> +  int d = tgtva;
> +  double e[1024];
> +  double f = fna4 (64, e);
> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
> +      || f != 8032.0)
> +    abort ();
> +  return 0;
> +}
>
Tom de Vries March 7, 2016, 2:38 p.m. UTC | #2
On 24/02/16 14:37, Tom de Vries wrote:
> On 17/02/16 16:48, Tom de Vries wrote:
>> On 17/02/16 13:30, Jakub Jelinek wrote:
>>> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:
>>>> Mark offload symbols as global in lto
>>>
>>> I'm really not familiar with that part of LTO, so I'm CCing Honza and
>>> Richard here.
>>>> 2016-02-08  Tom de Vries  <tom@codesourcery.com>
>>>>
>>>>     PR lto/69607
>>>>     * lto-partition.c (promote_offload_tables): New function.
>>>>     * lto-partition.h (promote_offload_tables):  Declare.
>>>
>>> Just one space instead of two after :
>>>
>>>>     * lto.c (do_whole_program_analysis): call promote_offload_tables.
>>>
>>> Capital C in Call.
>>>
>>
>> Done.
>>
>>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c
>>>> b/libgomp/testsuite/libgomp.c/target-37.c
>>>> new file mode 100644
>>>> index 0000000..1edb21e
>>>> --- /dev/null
>>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>>>> @@ -0,0 +1,98 @@
>>>> +/* { dg-do run { target lto } } */
>>>> +/* { dg-additional-sources "target-38.c" } */
>>>> +/* { dg-additional-options "-flto -flto-partition=1to1
>>>> -fno-toplevel-reorder" } */
>>>> +
>>>> +extern
>>>> +#ifdef __cplusplus
>>>> +"C"
>>>> +#endif
>>>> +void abort (void);
>>>
>>> Why the C++ stuff in there?  Do you intend to include the testcase
>>> also in libgomp.c++?
>>
>> No, that's just there because I started both target-37.c and target-38.c
>> by copying target-1.c.
>>
>>> If not, it is not needed.
>>
>> Removed.
>>
>>> Otherwise, the tests LGTM.
>>>
>>
>> Updated patch attached.
>>
>

Ping^2.

This patch fixes an ICE when compiling an openmp program using -flto 
-partition=1to1 -fno-toplevel-reorder.

[ Original submission here:
      https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html
    Latest patch with updated testcases:
      https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html
]

OK for trunk, stage1 (or stage4, if that's appropriate)?

Thanks,
- Tom

>>
>>
>> 0001-Mark-offload-symbols-as-global-in-lto.patch
>>
>>
>> Mark offload symbols as global in lto
>>
>> 2016-02-17  Tom de Vries  <tom@codesourcery.com>
>>
>>     PR lto/69607
>>     * lto-partition.c (promote_offload_tables): New function.
>>     * lto-partition.h (promote_offload_tables): Declare.
>>     * lto.c (do_whole_program_analysis): Call promote_offload_tables.
>>
>>     * testsuite/libgomp.c/target-36.c: New test.
>>     * testsuite/libgomp.c/target-37.c: New test.
>>     * testsuite/libgomp.c/target-38.c: New test.
>>
>> ---
>>   gcc/lto/lto-partition.c                 | 28 ++++++++++
>>   gcc/lto/lto-partition.h                 |  1 +
>>   gcc/lto/lto.c                           |  2 +
>>   libgomp/testsuite/libgomp.c/target-36.c |  4 ++
>>   libgomp/testsuite/libgomp.c/target-37.c | 94
>> +++++++++++++++++++++++++++++++++
>>   libgomp/testsuite/libgomp.c/target-38.c | 91
>> +++++++++++++++++++++++++++++++
>>   6 files changed, 220 insertions(+)
>>
>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
>> index 9eb63c2..56598d4 100644
>> --- a/gcc/lto/lto-partition.c
>> +++ b/gcc/lto/lto-partition.c
>> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
>>   #include "ipa-prop.h"
>>   #include "ipa-inline.h"
>>   #include "lto-partition.h"
>> +#include "omp-low.h"
>>
>>   vec<ltrans_partition> ltrans_partitions;
>>
>> @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
>>           "Promoting as hidden: %s\n", node->name ());
>>   }
>>
>> +/* Promote the symbols in the offload tables.  */
>> +
>> +void
>> +promote_offload_tables (void)
>> +{
>> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty
>> (offload_vars))
>> +    return;
>> +
>> +  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
>> +    {
>> +      tree fn_decl = (*offload_funcs)[i];
>> +      cgraph_node *node = cgraph_node::get (fn_decl);
>> +      if (node->externally_visible)
>> +    continue;
>> +      promote_symbol (node);
>> +    }
>> +
>> +  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
>> +    {
>> +      tree var_decl = (*offload_vars)[i];
>> +      varpool_node *node = varpool_node::get (var_decl);
>> +      if (node->externally_visible)
>> +    continue;
>> +      promote_symbol (node);
>> +    }
>> +}
>> +
>>   /* Return true if NODE needs named section even if it won't land in
>> the partition
>>      symbol table.
>>      FIXME: we should really not use named sections for inline clones
>> and master
>> diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
>> index 31e3764..1a38126 100644
>> --- a/gcc/lto/lto-partition.h
>> +++ b/gcc/lto/lto-partition.h
>> @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions;
>>   void lto_1_to_1_map (void);
>>   void lto_max_map (void);
>>   void lto_balanced_map (int);
>> +extern void promote_offload_tables (void);
>>   void lto_promote_cross_file_statics (void);
>>   void free_ltrans_partitions (void);
>>   void lto_promote_statics_nonwpa (void);
>> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
>> index 9dd513f..2736c5c 100644
>> --- a/gcc/lto/lto.c
>> +++ b/gcc/lto/lto.c
>> @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
>>        to globals with hidden visibility because they are accessed
>> from multiple
>>        partitions.  */
>>     lto_promote_cross_file_statics ();
>> +  /* Promote all the offload symbols.  */
>> +  promote_offload_tables ();
>>     timevar_pop (TV_WHOPR_PARTITIONING);
>>
>>     timevar_stop (TV_PHASE_OPT_GEN);
>> diff --git a/libgomp/testsuite/libgomp.c/target-36.c
>> b/libgomp/testsuite/libgomp.c/target-36.c
>> new file mode 100644
>> index 0000000..bafb718
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.c/target-36.c
>> @@ -0,0 +1,4 @@
>> +/* { dg-do run { target lto } } */
>> +/* { dg-additional-options "-flto -flto-partition=1to1
>> -fno-toplevel-reorder" } */
>> +
>> +#include "target-1.c"
>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c
>> b/libgomp/testsuite/libgomp.c/target-37.c
>> new file mode 100644
>> index 0000000..fe5b8ef
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>> @@ -0,0 +1,94 @@
>> +/* { dg-do run { target lto } } */
>> +/* { dg-additional-sources "target-38.c" } */
>> +/* { dg-additional-options "-flto -flto-partition=1to1
>> -fno-toplevel-reorder" } */
>> +
>> +extern void abort (void);
>> +
>> +void
>> +fn1 (double *x, double *y, int z)
>> +{
>> +  int i;
>> +  for (i = 0; i < z; i++)
>> +    {
>> +      x[i] = i & 31;
>> +      y[i] = (i & 63) - 30;
>> +    }
>> +}
>> +
>> +#pragma omp declare target
>> +static int tgtv = 6;
>> +static int
>> +tgt (void)
>> +{
>> +  #pragma omp atomic update
>> +    tgtv++;
>> +  return 0;
>> +}
>> +#pragma omp end declare target
>> +
>> +static double
>> +fn2 (int x, int y, int z)
>> +{
>> +  double b[1024], c[1024], s = 0;
>> +  int i, j;
>> +  fn1 (b, c, x);
>> +  #pragma omp target data map(to: b)
>> +  {
>> +    #pragma omp target map(tofrom: c, s)
>> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s)
>> firstprivate(x)
>> +    #pragma omp distribute dist_schedule(static, 4) collapse(1)
>> +      for (j=0; j < x; j += y)
>> +        #pragma omp parallel for reduction(+:s)
>> +          for (i = j; i < j + y; i++)
>> +        tgt (), s += b[i] * c[i];
>> +    #pragma omp target update from(b, tgtv)
>> +  }
>> +  return s;
>> +}
>> +
>> +static double
>> +fn3 (int x)
>> +{
>> +  double b[1024], c[1024], s = 0;
>> +  int i;
>> +  fn1 (b, c, x);
>> +  #pragma omp target map(to: b, c) map(tofrom:s)
>> +    #pragma omp parallel for reduction(+:s)
>> +      for (i = 0; i < x; i++)
>> +    tgt (), s += b[i] * c[i];
>> +  return s;
>> +}
>> +
>> +static double
>> +fn4 (int x, double *p)
>> +{
>> +  double b[1024], c[1024], d[1024], s = 0;
>> +  int i;
>> +  fn1 (b, c, x);
>> +  fn1 (d + x, p + x, x);
>> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x &
>> 31)]) \
>> +             map(tofrom: s)
>> +    #pragma omp parallel for reduction(+:s)
>> +      for (i = 0; i < x; i++)
>> +    s += b[i] * c[i] + d[x + i] + p[x + i];
>> +  return s;
>> +}
>> +
>> +extern int other_main (void);
>> +
>> +int
>> +main ()
>> +{
>> +  double a = fn2 (128, 4, 6);
>> +  int b = tgtv;
>> +  double c = fn3 (61);
>> +  #pragma omp target update from(tgtv)
>> +  int d = tgtv;
>> +  double e[1024];
>> +  double f = fn4 (64, e);
>> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
>> +      || f != 8032.0)
>> +    abort ();
>> +  other_main ();
>> +  return 0;
>> +}
>> diff --git a/libgomp/testsuite/libgomp.c/target-38.c
>> b/libgomp/testsuite/libgomp.c/target-38.c
>> new file mode 100644
>> index 0000000..de2b4c8
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.c/target-38.c
>> @@ -0,0 +1,91 @@
>> +/* { dg-skip-if "additional source" { *-*-* } } */
>> +
>> +extern void abort (void);
>> +
>> +static void
>> +fna1 (double *x, double *y, int z)
>> +{
>> +  int i;
>> +  for (i = 0; i < z; i++)
>> +    {
>> +      x[i] = i & 31;
>> +      y[i] = (i & 63) - 30;
>> +    }
>> +}
>> +
>> +#pragma omp declare target
>> +static int tgtva = 6;
>> +static int
>> +tgta (void)
>> +{
>> +  #pragma omp atomic update
>> +    tgtva++;
>> +  return 0;
>> +}
>> +#pragma omp end declare target
>> +
>> +double
>> +fna2 (int x, int y, int z)
>> +{
>> +  double b[1024], c[1024], s = 0;
>> +  int i, j;
>> +  fna1 (b, c, x);
>> +  #pragma omp target data map(to: b)
>> +  {
>> +    #pragma omp target map(tofrom: c, s)
>> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s)
>> firstprivate(x)
>> +    #pragma omp distribute dist_schedule(static, 4) collapse(1)
>> +      for (j=0; j < x; j += y)
>> +        #pragma omp parallel for reduction(+:s)
>> +          for (i = j; i < j + y; i++)
>> +        tgta (), s += b[i] * c[i];
>> +    #pragma omp target update from(b, tgtva)
>> +  }
>> +  return s;
>> +}
>> +
>> +static double
>> +fna3 (int x)
>> +{
>> +  double b[1024], c[1024], s = 0;
>> +  int i;
>> +  fna1 (b, c, x);
>> +  #pragma omp target map(to: b, c) map(tofrom:s)
>> +    #pragma omp parallel for reduction(+:s)
>> +      for (i = 0; i < x; i++)
>> +    tgta (), s += b[i] * c[i];
>> +  return s;
>> +}
>> +
>> +static double
>> +fna4 (int x, double *p)
>> +{
>> +  double b[1024], c[1024], d[1024], s = 0;
>> +  int i;
>> +  fna1 (b, c, x);
>> +  fna1 (d + x, p + x, x);
>> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x &
>> 31)]) \
>> +             map(tofrom: s)
>> +    #pragma omp parallel for reduction(+:s)
>> +      for (i = 0; i < x; i++)
>> +    s += b[i] * c[i] + d[x + i] + p[x + i];
>> +  return s;
>> +}
>> +
>> +extern int other_main (void);
>> +
>> +int
>> +other_main (void)
>> +{
>> +  double a = fna2 (128, 4, 6);
>> +  int b = tgtva;
>> +  double c = fna3 (61);
>> +  #pragma omp target update from(tgtva)
>> +  int d = tgtva;
>> +  double e[1024];
>> +  double f = fna4 (64, e);
>> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
>> +      || f != 8032.0)
>> +    abort ();
>> +  return 0;
>> +}
>>
>
Tom de Vries March 14, 2016, 6:16 a.m. UTC | #3
On 07/03/16 15:38, Tom de Vries wrote:
> On 24/02/16 14:37, Tom de Vries wrote:
>> On 17/02/16 16:48, Tom de Vries wrote:
>>> On 17/02/16 13:30, Jakub Jelinek wrote:
>>>> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:
>>>>> Mark offload symbols as global in lto
>>>>
>>>> I'm really not familiar with that part of LTO, so I'm CCing Honza and
>>>> Richard here.
>>>>> 2016-02-08  Tom de Vries  <tom@codesourcery.com>
>>>>>
>>>>>     PR lto/69607
>>>>>     * lto-partition.c (promote_offload_tables): New function.
>>>>>     * lto-partition.h (promote_offload_tables):  Declare.
>>>>
>>>> Just one space instead of two after :
>>>>
>>>>>     * lto.c (do_whole_program_analysis): call promote_offload_tables.
>>>>
>>>> Capital C in Call.
>>>>
>>>
>>> Done.
>>>
>>>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c
>>>>> b/libgomp/testsuite/libgomp.c/target-37.c
>>>>> new file mode 100644
>>>>> index 0000000..1edb21e
>>>>> --- /dev/null
>>>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>>>>> @@ -0,0 +1,98 @@
>>>>> +/* { dg-do run { target lto } } */
>>>>> +/* { dg-additional-sources "target-38.c" } */
>>>>> +/* { dg-additional-options "-flto -flto-partition=1to1
>>>>> -fno-toplevel-reorder" } */
>>>>> +
>>>>> +extern
>>>>> +#ifdef __cplusplus
>>>>> +"C"
>>>>> +#endif
>>>>> +void abort (void);
>>>>
>>>> Why the C++ stuff in there?  Do you intend to include the testcase
>>>> also in libgomp.c++?
>>>
>>> No, that's just there because I started both target-37.c and target-38.c
>>> by copying target-1.c.
>>>
>>>> If not, it is not needed.
>>>
>>> Removed.
>>>
>>>> Otherwise, the tests LGTM.
>>>>
>>>
>>> Updated patch attached.
>>>
>>

Ping^3.


This patch fixes an ICE when compiling an openmp program using -flto
-partition=1to1 -fno-toplevel-reorder.

[ Original submission here:
       https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html
     Latest patch with updated testcases:
       https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html
]

OK for trunk, stage1 (or stage4, if that's appropriate)?

Thanks,
- Tom

>>>
>>>
>>> 0001-Mark-offload-symbols-as-global-in-lto.patch
>>>
>>>
>>> Mark offload symbols as global in lto
>>>
>>> 2016-02-17  Tom de Vries  <tom@codesourcery.com>
>>>
>>>     PR lto/69607
>>>     * lto-partition.c (promote_offload_tables): New function.
>>>     * lto-partition.h (promote_offload_tables): Declare.
>>>     * lto.c (do_whole_program_analysis): Call promote_offload_tables.
>>>
>>>     * testsuite/libgomp.c/target-36.c: New test.
>>>     * testsuite/libgomp.c/target-37.c: New test.
>>>     * testsuite/libgomp.c/target-38.c: New test.
>>>
>>> ---
>>>   gcc/lto/lto-partition.c                 | 28 ++++++++++
>>>   gcc/lto/lto-partition.h                 |  1 +
>>>   gcc/lto/lto.c                           |  2 +
>>>   libgomp/testsuite/libgomp.c/target-36.c |  4 ++
>>>   libgomp/testsuite/libgomp.c/target-37.c | 94
>>> +++++++++++++++++++++++++++++++++
>>>   libgomp/testsuite/libgomp.c/target-38.c | 91
>>> +++++++++++++++++++++++++++++++
>>>   6 files changed, 220 insertions(+)
>>>
>>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
>>> index 9eb63c2..56598d4 100644
>>> --- a/gcc/lto/lto-partition.c
>>> +++ b/gcc/lto/lto-partition.c
>>> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
>>>   #include "ipa-prop.h"
>>>   #include "ipa-inline.h"
>>>   #include "lto-partition.h"
>>> +#include "omp-low.h"
>>>
>>>   vec<ltrans_partition> ltrans_partitions;
>>>
>>> @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
>>>           "Promoting as hidden: %s\n", node->name ());
>>>   }
>>>
>>> +/* Promote the symbols in the offload tables.  */
>>> +
>>> +void
>>> +promote_offload_tables (void)
>>> +{
>>> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty
>>> (offload_vars))
>>> +    return;
>>> +
>>> +  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
>>> +    {
>>> +      tree fn_decl = (*offload_funcs)[i];
>>> +      cgraph_node *node = cgraph_node::get (fn_decl);
>>> +      if (node->externally_visible)
>>> +    continue;
>>> +      promote_symbol (node);
>>> +    }
>>> +
>>> +  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
>>> +    {
>>> +      tree var_decl = (*offload_vars)[i];
>>> +      varpool_node *node = varpool_node::get (var_decl);
>>> +      if (node->externally_visible)
>>> +    continue;
>>> +      promote_symbol (node);
>>> +    }
>>> +}
>>> +
>>>   /* Return true if NODE needs named section even if it won't land in
>>> the partition
>>>      symbol table.
>>>      FIXME: we should really not use named sections for inline clones
>>> and master
>>> diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
>>> index 31e3764..1a38126 100644
>>> --- a/gcc/lto/lto-partition.h
>>> +++ b/gcc/lto/lto-partition.h
>>> @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions;
>>>   void lto_1_to_1_map (void);
>>>   void lto_max_map (void);
>>>   void lto_balanced_map (int);
>>> +extern void promote_offload_tables (void);
>>>   void lto_promote_cross_file_statics (void);
>>>   void free_ltrans_partitions (void);
>>>   void lto_promote_statics_nonwpa (void);
>>> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
>>> index 9dd513f..2736c5c 100644
>>> --- a/gcc/lto/lto.c
>>> +++ b/gcc/lto/lto.c
>>> @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
>>>        to globals with hidden visibility because they are accessed
>>> from multiple
>>>        partitions.  */
>>>     lto_promote_cross_file_statics ();
>>> +  /* Promote all the offload symbols.  */
>>> +  promote_offload_tables ();
>>>     timevar_pop (TV_WHOPR_PARTITIONING);
>>>
>>>     timevar_stop (TV_PHASE_OPT_GEN);
>>> diff --git a/libgomp/testsuite/libgomp.c/target-36.c
>>> b/libgomp/testsuite/libgomp.c/target-36.c
>>> new file mode 100644
>>> index 0000000..bafb718
>>> --- /dev/null
>>> +++ b/libgomp/testsuite/libgomp.c/target-36.c
>>> @@ -0,0 +1,4 @@
>>> +/* { dg-do run { target lto } } */
>>> +/* { dg-additional-options "-flto -flto-partition=1to1
>>> -fno-toplevel-reorder" } */
>>> +
>>> +#include "target-1.c"
>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c
>>> b/libgomp/testsuite/libgomp.c/target-37.c
>>> new file mode 100644
>>> index 0000000..fe5b8ef
>>> --- /dev/null
>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c
>>> @@ -0,0 +1,94 @@
>>> +/* { dg-do run { target lto } } */
>>> +/* { dg-additional-sources "target-38.c" } */
>>> +/* { dg-additional-options "-flto -flto-partition=1to1
>>> -fno-toplevel-reorder" } */
>>> +
>>> +extern void abort (void);
>>> +
>>> +void
>>> +fn1 (double *x, double *y, int z)
>>> +{
>>> +  int i;
>>> +  for (i = 0; i < z; i++)
>>> +    {
>>> +      x[i] = i & 31;
>>> +      y[i] = (i & 63) - 30;
>>> +    }
>>> +}
>>> +
>>> +#pragma omp declare target
>>> +static int tgtv = 6;
>>> +static int
>>> +tgt (void)
>>> +{
>>> +  #pragma omp atomic update
>>> +    tgtv++;
>>> +  return 0;
>>> +}
>>> +#pragma omp end declare target
>>> +
>>> +static double
>>> +fn2 (int x, int y, int z)
>>> +{
>>> +  double b[1024], c[1024], s = 0;
>>> +  int i, j;
>>> +  fn1 (b, c, x);
>>> +  #pragma omp target data map(to: b)
>>> +  {
>>> +    #pragma omp target map(tofrom: c, s)
>>> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s)
>>> firstprivate(x)
>>> +    #pragma omp distribute dist_schedule(static, 4) collapse(1)
>>> +      for (j=0; j < x; j += y)
>>> +        #pragma omp parallel for reduction(+:s)
>>> +          for (i = j; i < j + y; i++)
>>> +        tgt (), s += b[i] * c[i];
>>> +    #pragma omp target update from(b, tgtv)
>>> +  }
>>> +  return s;
>>> +}
>>> +
>>> +static double
>>> +fn3 (int x)
>>> +{
>>> +  double b[1024], c[1024], s = 0;
>>> +  int i;
>>> +  fn1 (b, c, x);
>>> +  #pragma omp target map(to: b, c) map(tofrom:s)
>>> +    #pragma omp parallel for reduction(+:s)
>>> +      for (i = 0; i < x; i++)
>>> +    tgt (), s += b[i] * c[i];
>>> +  return s;
>>> +}
>>> +
>>> +static double
>>> +fn4 (int x, double *p)
>>> +{
>>> +  double b[1024], c[1024], d[1024], s = 0;
>>> +  int i;
>>> +  fn1 (b, c, x);
>>> +  fn1 (d + x, p + x, x);
>>> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x &
>>> 31)]) \
>>> +             map(tofrom: s)
>>> +    #pragma omp parallel for reduction(+:s)
>>> +      for (i = 0; i < x; i++)
>>> +    s += b[i] * c[i] + d[x + i] + p[x + i];
>>> +  return s;
>>> +}
>>> +
>>> +extern int other_main (void);
>>> +
>>> +int
>>> +main ()
>>> +{
>>> +  double a = fn2 (128, 4, 6);
>>> +  int b = tgtv;
>>> +  double c = fn3 (61);
>>> +  #pragma omp target update from(tgtv)
>>> +  int d = tgtv;
>>> +  double e[1024];
>>> +  double f = fn4 (64, e);
>>> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
>>> +      || f != 8032.0)
>>> +    abort ();
>>> +  other_main ();
>>> +  return 0;
>>> +}
>>> diff --git a/libgomp/testsuite/libgomp.c/target-38.c
>>> b/libgomp/testsuite/libgomp.c/target-38.c
>>> new file mode 100644
>>> index 0000000..de2b4c8
>>> --- /dev/null
>>> +++ b/libgomp/testsuite/libgomp.c/target-38.c
>>> @@ -0,0 +1,91 @@
>>> +/* { dg-skip-if "additional source" { *-*-* } } */
>>> +
>>> +extern void abort (void);
>>> +
>>> +static void
>>> +fna1 (double *x, double *y, int z)
>>> +{
>>> +  int i;
>>> +  for (i = 0; i < z; i++)
>>> +    {
>>> +      x[i] = i & 31;
>>> +      y[i] = (i & 63) - 30;
>>> +    }
>>> +}
>>> +
>>> +#pragma omp declare target
>>> +static int tgtva = 6;
>>> +static int
>>> +tgta (void)
>>> +{
>>> +  #pragma omp atomic update
>>> +    tgtva++;
>>> +  return 0;
>>> +}
>>> +#pragma omp end declare target
>>> +
>>> +double
>>> +fna2 (int x, int y, int z)
>>> +{
>>> +  double b[1024], c[1024], s = 0;
>>> +  int i, j;
>>> +  fna1 (b, c, x);
>>> +  #pragma omp target data map(to: b)
>>> +  {
>>> +    #pragma omp target map(tofrom: c, s)
>>> +      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s)
>>> firstprivate(x)
>>> +    #pragma omp distribute dist_schedule(static, 4) collapse(1)
>>> +      for (j=0; j < x; j += y)
>>> +        #pragma omp parallel for reduction(+:s)
>>> +          for (i = j; i < j + y; i++)
>>> +        tgta (), s += b[i] * c[i];
>>> +    #pragma omp target update from(b, tgtva)
>>> +  }
>>> +  return s;
>>> +}
>>> +
>>> +static double
>>> +fna3 (int x)
>>> +{
>>> +  double b[1024], c[1024], s = 0;
>>> +  int i;
>>> +  fna1 (b, c, x);
>>> +  #pragma omp target map(to: b, c) map(tofrom:s)
>>> +    #pragma omp parallel for reduction(+:s)
>>> +      for (i = 0; i < x; i++)
>>> +    tgta (), s += b[i] * c[i];
>>> +  return s;
>>> +}
>>> +
>>> +static double
>>> +fna4 (int x, double *p)
>>> +{
>>> +  double b[1024], c[1024], d[1024], s = 0;
>>> +  int i;
>>> +  fna1 (b, c, x);
>>> +  fna1 (d + x, p + x, x);
>>> +  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x &
>>> 31)]) \
>>> +             map(tofrom: s)
>>> +    #pragma omp parallel for reduction(+:s)
>>> +      for (i = 0; i < x; i++)
>>> +    s += b[i] * c[i] + d[x + i] + p[x + i];
>>> +  return s;
>>> +}
>>> +
>>> +extern int other_main (void);
>>> +
>>> +int
>>> +other_main (void)
>>> +{
>>> +  double a = fna2 (128, 4, 6);
>>> +  int b = tgtva;
>>> +  double c = fna3 (61);
>>> +  #pragma omp target update from(tgtva)
>>> +  int d = tgtva;
>>> +  double e[1024];
>>> +  double f = fna4 (64, e);
>>> +  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
>>> +      || f != 8032.0)
>>> +    abort ();
>>> +  return 0;
>>> +}
>>>
>>
>
diff mbox

Patch

Mark offload symbols as global in lto

2016-02-17  Tom de Vries  <tom@codesourcery.com>

	PR lto/69607
	* lto-partition.c (promote_offload_tables): New function.
	* lto-partition.h (promote_offload_tables): Declare.
	* lto.c (do_whole_program_analysis): Call promote_offload_tables.

	* testsuite/libgomp.c/target-36.c: New test.
	* testsuite/libgomp.c/target-37.c: New test.
	* testsuite/libgomp.c/target-38.c: New test.

---
 gcc/lto/lto-partition.c                 | 28 ++++++++++
 gcc/lto/lto-partition.h                 |  1 +
 gcc/lto/lto.c                           |  2 +
 libgomp/testsuite/libgomp.c/target-36.c |  4 ++
 libgomp/testsuite/libgomp.c/target-37.c | 94 +++++++++++++++++++++++++++++++++
 libgomp/testsuite/libgomp.c/target-38.c | 91 +++++++++++++++++++++++++++++++
 6 files changed, 220 insertions(+)

diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..56598d4 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -34,6 +34,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "ipa-inline.h"
 #include "lto-partition.h"
+#include "omp-low.h"
 
 vec<ltrans_partition> ltrans_partitions;
 
@@ -1003,6 +1004,33 @@  promote_symbol (symtab_node *node)
 	    "Promoting as hidden: %s\n", node->name ());
 }
 
+/* Promote the symbols in the offload tables.  */
+
+void
+promote_offload_tables (void)
+{
+  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+    return;
+
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+    {
+      tree fn_decl = (*offload_funcs)[i];
+      cgraph_node *node = cgraph_node::get (fn_decl);
+      if (node->externally_visible)
+	continue;
+      promote_symbol (node);
+    }
+
+  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+    {
+      tree var_decl = (*offload_vars)[i];
+      varpool_node *node = varpool_node::get (var_decl);
+      if (node->externally_visible)
+	continue;
+      promote_symbol (node);
+    }
+}
+
 /* Return true if NODE needs named section even if it won't land in the partition
    symbol table.
    FIXME: we should really not use named sections for inline clones and master
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..1a38126 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -36,6 +36,7 @@  extern vec<ltrans_partition> ltrans_partitions;
 void lto_1_to_1_map (void);
 void lto_max_map (void);
 void lto_balanced_map (int);
+extern void promote_offload_tables (void);
 void lto_promote_cross_file_statics (void);
 void free_ltrans_partitions (void);
 void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..2736c5c 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3138,6 +3138,8 @@  do_whole_program_analysis (void)
      to globals with hidden visibility because they are accessed from multiple
      partitions.  */
   lto_promote_cross_file_statics ();
+  /* Promote all the offload symbols.  */
+  promote_offload_tables ();
   timevar_pop (TV_WHOPR_PARTITIONING);
 
   timevar_stop (TV_PHASE_OPT_GEN);
diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
new file mode 100644
index 0000000..bafb718
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-36.c
@@ -0,0 +1,4 @@ 
+/* { dg-do run { target lto } } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+#include "target-1.c"
diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
new file mode 100644
index 0000000..fe5b8ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-37.c
@@ -0,0 +1,94 @@ 
+/* { dg-do run { target lto } } */
+/* { dg-additional-sources "target-38.c" } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+extern void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+    {
+      x[i] = i & 31;
+      y[i] = (i & 63) - 30;
+    }
+}
+
+#pragma omp declare target
+static int tgtv = 6;
+static int
+tgt (void)
+{
+  #pragma omp atomic update
+    tgtv++;
+  return 0;
+}
+#pragma omp end declare target
+
+static double
+fn2 (int x, int y, int z)
+{
+  double b[1024], c[1024], s = 0;
+  int i, j;
+  fn1 (b, c, x);
+  #pragma omp target data map(to: b)
+  {
+    #pragma omp target map(tofrom: c, s)
+      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
+	#pragma omp distribute dist_schedule(static, 4) collapse(1)
+	  for (j=0; j < x; j += y)
+	    #pragma omp parallel for reduction(+:s)
+	      for (i = j; i < j + y; i++)
+		tgt (), s += b[i] * c[i];
+    #pragma omp target update from(b, tgtv)
+  }
+  return s;
+}
+
+static double
+fn3 (int x)
+{
+  double b[1024], c[1024], s = 0;
+  int i;
+  fn1 (b, c, x);
+  #pragma omp target map(to: b, c) map(tofrom:s)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	tgt (), s += b[i] * c[i];
+  return s;
+}
+
+static double
+fn4 (int x, double *p)
+{
+  double b[1024], c[1024], d[1024], s = 0;
+  int i;
+  fn1 (b, c, x);
+  fn1 (d + x, p + x, x);
+  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
+		     map(tofrom: s)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	s += b[i] * c[i] + d[x + i] + p[x + i];
+  return s;
+}
+
+extern int other_main (void);
+
+int
+main ()
+{
+  double a = fn2 (128, 4, 6);
+  int b = tgtv;
+  double c = fn3 (61);
+  #pragma omp target update from(tgtv)
+  int d = tgtv;
+  double e[1024];
+  double f = fn4 (64, e);
+  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
+      || f != 8032.0)
+    abort ();
+  other_main ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c
new file mode 100644
index 0000000..de2b4c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-38.c
@@ -0,0 +1,91 @@ 
+/* { dg-skip-if "additional source" { *-*-* } } */
+
+extern void abort (void);
+
+static void
+fna1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+    {
+      x[i] = i & 31;
+      y[i] = (i & 63) - 30;
+    }
+}
+
+#pragma omp declare target
+static int tgtva = 6;
+static int
+tgta (void)
+{
+  #pragma omp atomic update
+    tgtva++;
+  return 0;
+}
+#pragma omp end declare target
+
+double
+fna2 (int x, int y, int z)
+{
+  double b[1024], c[1024], s = 0;
+  int i, j;
+  fna1 (b, c, x);
+  #pragma omp target data map(to: b)
+  {
+    #pragma omp target map(tofrom: c, s)
+      #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
+	#pragma omp distribute dist_schedule(static, 4) collapse(1)
+	  for (j=0; j < x; j += y)
+	    #pragma omp parallel for reduction(+:s)
+	      for (i = j; i < j + y; i++)
+		tgta (), s += b[i] * c[i];
+    #pragma omp target update from(b, tgtva)
+  }
+  return s;
+}
+
+static double
+fna3 (int x)
+{
+  double b[1024], c[1024], s = 0;
+  int i;
+  fna1 (b, c, x);
+  #pragma omp target map(to: b, c) map(tofrom:s)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	tgta (), s += b[i] * c[i];
+  return s;
+}
+
+static double
+fna4 (int x, double *p)
+{
+  double b[1024], c[1024], d[1024], s = 0;
+  int i;
+  fna1 (b, c, x);
+  fna1 (d + x, p + x, x);
+  #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
+		     map(tofrom: s)
+    #pragma omp parallel for reduction(+:s)
+      for (i = 0; i < x; i++)
+	s += b[i] * c[i] + d[x + i] + p[x + i];
+  return s;
+}
+
+extern int other_main (void);
+
+int
+other_main (void)
+{
+  double a = fna2 (128, 4, 6);
+  int b = tgtva;
+  double c = fna3 (61);
+  #pragma omp target update from(tgtva)
+  int d = tgtva;
+  double e[1024];
+  double f = fna4 (64, e);
+  if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
+      || f != 8032.0)
+    abort ();
+  return 0;
+}