Message ID | 56C49658.1000908@mentor.com |
---|---|
State | New |
Headers | show |
On 17/02/16 16:48, Tom de Vries wrote: > On 17/02/16 13:30, Jakub Jelinek wrote: >> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote: >>> Mark offload symbols as global in lto >> >> I'm really not familiar with that part of LTO, so I'm CCing Honza and >> Richard here. >>> 2016-02-08 Tom de Vries <tom@codesourcery.com> >>> >>> PR lto/69607 >>> * lto-partition.c (promote_offload_tables): New function. >>> * lto-partition.h (promote_offload_tables): Declare. >> >> Just one space instead of two after : >> >>> * lto.c (do_whole_program_analysis): call promote_offload_tables. >> >> Capital C in Call. >> > > Done. > >>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c >>> b/libgomp/testsuite/libgomp.c/target-37.c >>> new file mode 100644 >>> index 0000000..1edb21e >>> --- /dev/null >>> +++ b/libgomp/testsuite/libgomp.c/target-37.c >>> @@ -0,0 +1,98 @@ >>> +/* { dg-do run { target lto } } */ >>> +/* { dg-additional-sources "target-38.c" } */ >>> +/* { dg-additional-options "-flto -flto-partition=1to1 >>> -fno-toplevel-reorder" } */ >>> + >>> +extern >>> +#ifdef __cplusplus >>> +"C" >>> +#endif >>> +void abort (void); >> >> Why the C++ stuff in there? Do you intend to include the testcase >> also in libgomp.c++? > > No, that's just there because I started both target-37.c and target-38.c > by copying target-1.c. > >> If not, it is not needed. > > Removed. > >> Otherwise, the tests LGTM. >> > > Updated patch attached. > Ping. [ Original submission here: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html Latest patch with updated testcases: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html ] OK for trunk, stage1 (or stage4, if that's appropriate)? > Thanks, > - Tom > > > 0001-Mark-offload-symbols-as-global-in-lto.patch > > > Mark offload symbols as global in lto > > 2016-02-17 Tom de Vries <tom@codesourcery.com> > > PR lto/69607 > * lto-partition.c (promote_offload_tables): New function. > * lto-partition.h (promote_offload_tables): Declare. > * lto.c (do_whole_program_analysis): Call promote_offload_tables. > > * testsuite/libgomp.c/target-36.c: New test. > * testsuite/libgomp.c/target-37.c: New test. > * testsuite/libgomp.c/target-38.c: New test. > > --- > gcc/lto/lto-partition.c | 28 ++++++++++ > gcc/lto/lto-partition.h | 1 + > gcc/lto/lto.c | 2 + > libgomp/testsuite/libgomp.c/target-36.c | 4 ++ > libgomp/testsuite/libgomp.c/target-37.c | 94 +++++++++++++++++++++++++++++++++ > libgomp/testsuite/libgomp.c/target-38.c | 91 +++++++++++++++++++++++++++++++ > 6 files changed, 220 insertions(+) > > diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c > index 9eb63c2..56598d4 100644 > --- a/gcc/lto/lto-partition.c > +++ b/gcc/lto/lto-partition.c > @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see > #include "ipa-prop.h" > #include "ipa-inline.h" > #include "lto-partition.h" > +#include "omp-low.h" > > vec<ltrans_partition> ltrans_partitions; > > @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node) > "Promoting as hidden: %s\n", node->name ()); > } > > +/* Promote the symbols in the offload tables. */ > + > +void > +promote_offload_tables (void) > +{ > + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)) > + return; > + > + for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++) > + { > + tree fn_decl = (*offload_funcs)[i]; > + cgraph_node *node = cgraph_node::get (fn_decl); > + if (node->externally_visible) > + continue; > + promote_symbol (node); > + } > + > + for (unsigned i = 0; i < vec_safe_length (offload_vars); i++) > + { > + tree var_decl = (*offload_vars)[i]; > + varpool_node *node = varpool_node::get (var_decl); > + if (node->externally_visible) > + continue; > + promote_symbol (node); > + } > +} > + > /* Return true if NODE needs named section even if it won't land in the partition > symbol table. > FIXME: we should really not use named sections for inline clones and master > diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h > index 31e3764..1a38126 100644 > --- a/gcc/lto/lto-partition.h > +++ b/gcc/lto/lto-partition.h > @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions; > void lto_1_to_1_map (void); > void lto_max_map (void); > void lto_balanced_map (int); > +extern void promote_offload_tables (void); > void lto_promote_cross_file_statics (void); > void free_ltrans_partitions (void); > void lto_promote_statics_nonwpa (void); > diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c > index 9dd513f..2736c5c 100644 > --- a/gcc/lto/lto.c > +++ b/gcc/lto/lto.c > @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void) > to globals with hidden visibility because they are accessed from multiple > partitions. */ > lto_promote_cross_file_statics (); > + /* Promote all the offload symbols. */ > + promote_offload_tables (); > timevar_pop (TV_WHOPR_PARTITIONING); > > timevar_stop (TV_PHASE_OPT_GEN); > diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c > new file mode 100644 > index 0000000..bafb718 > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/target-36.c > @@ -0,0 +1,4 @@ > +/* { dg-do run { target lto } } */ > +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */ > + > +#include "target-1.c" > diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c > new file mode 100644 > index 0000000..fe5b8ef > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/target-37.c > @@ -0,0 +1,94 @@ > +/* { dg-do run { target lto } } */ > +/* { dg-additional-sources "target-38.c" } */ > +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */ > + > +extern void abort (void); > + > +void > +fn1 (double *x, double *y, int z) > +{ > + int i; > + for (i = 0; i < z; i++) > + { > + x[i] = i & 31; > + y[i] = (i & 63) - 30; > + } > +} > + > +#pragma omp declare target > +static int tgtv = 6; > +static int > +tgt (void) > +{ > + #pragma omp atomic update > + tgtv++; > + return 0; > +} > +#pragma omp end declare target > + > +static double > +fn2 (int x, int y, int z) > +{ > + double b[1024], c[1024], s = 0; > + int i, j; > + fn1 (b, c, x); > + #pragma omp target data map(to: b) > + { > + #pragma omp target map(tofrom: c, s) > + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) > + #pragma omp distribute dist_schedule(static, 4) collapse(1) > + for (j=0; j < x; j += y) > + #pragma omp parallel for reduction(+:s) > + for (i = j; i < j + y; i++) > + tgt (), s += b[i] * c[i]; > + #pragma omp target update from(b, tgtv) > + } > + return s; > +} > + > +static double > +fn3 (int x) > +{ > + double b[1024], c[1024], s = 0; > + int i; > + fn1 (b, c, x); > + #pragma omp target map(to: b, c) map(tofrom:s) > + #pragma omp parallel for reduction(+:s) > + for (i = 0; i < x; i++) > + tgt (), s += b[i] * c[i]; > + return s; > +} > + > +static double > +fn4 (int x, double *p) > +{ > + double b[1024], c[1024], d[1024], s = 0; > + int i; > + fn1 (b, c, x); > + fn1 (d + x, p + x, x); > + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \ > + map(tofrom: s) > + #pragma omp parallel for reduction(+:s) > + for (i = 0; i < x; i++) > + s += b[i] * c[i] + d[x + i] + p[x + i]; > + return s; > +} > + > +extern int other_main (void); > + > +int > +main () > +{ > + double a = fn2 (128, 4, 6); > + int b = tgtv; > + double c = fn3 (61); > + #pragma omp target update from(tgtv) > + int d = tgtv; > + double e[1024]; > + double f = fn4 (64, e); > + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 > + || f != 8032.0) > + abort (); > + other_main (); > + return 0; > +} > diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c > new file mode 100644 > index 0000000..de2b4c8 > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/target-38.c > @@ -0,0 +1,91 @@ > +/* { dg-skip-if "additional source" { *-*-* } } */ > + > +extern void abort (void); > + > +static void > +fna1 (double *x, double *y, int z) > +{ > + int i; > + for (i = 0; i < z; i++) > + { > + x[i] = i & 31; > + y[i] = (i & 63) - 30; > + } > +} > + > +#pragma omp declare target > +static int tgtva = 6; > +static int > +tgta (void) > +{ > + #pragma omp atomic update > + tgtva++; > + return 0; > +} > +#pragma omp end declare target > + > +double > +fna2 (int x, int y, int z) > +{ > + double b[1024], c[1024], s = 0; > + int i, j; > + fna1 (b, c, x); > + #pragma omp target data map(to: b) > + { > + #pragma omp target map(tofrom: c, s) > + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) > + #pragma omp distribute dist_schedule(static, 4) collapse(1) > + for (j=0; j < x; j += y) > + #pragma omp parallel for reduction(+:s) > + for (i = j; i < j + y; i++) > + tgta (), s += b[i] * c[i]; > + #pragma omp target update from(b, tgtva) > + } > + return s; > +} > + > +static double > +fna3 (int x) > +{ > + double b[1024], c[1024], s = 0; > + int i; > + fna1 (b, c, x); > + #pragma omp target map(to: b, c) map(tofrom:s) > + #pragma omp parallel for reduction(+:s) > + for (i = 0; i < x; i++) > + tgta (), s += b[i] * c[i]; > + return s; > +} > + > +static double > +fna4 (int x, double *p) > +{ > + double b[1024], c[1024], d[1024], s = 0; > + int i; > + fna1 (b, c, x); > + fna1 (d + x, p + x, x); > + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \ > + map(tofrom: s) > + #pragma omp parallel for reduction(+:s) > + for (i = 0; i < x; i++) > + s += b[i] * c[i] + d[x + i] + p[x + i]; > + return s; > +} > + > +extern int other_main (void); > + > +int > +other_main (void) > +{ > + double a = fna2 (128, 4, 6); > + int b = tgtva; > + double c = fna3 (61); > + #pragma omp target update from(tgtva) > + int d = tgtva; > + double e[1024]; > + double f = fna4 (64, e); > + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 > + || f != 8032.0) > + abort (); > + return 0; > +} >
On 24/02/16 14:37, Tom de Vries wrote: > On 17/02/16 16:48, Tom de Vries wrote: >> On 17/02/16 13:30, Jakub Jelinek wrote: >>> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote: >>>> Mark offload symbols as global in lto >>> >>> I'm really not familiar with that part of LTO, so I'm CCing Honza and >>> Richard here. >>>> 2016-02-08 Tom de Vries <tom@codesourcery.com> >>>> >>>> PR lto/69607 >>>> * lto-partition.c (promote_offload_tables): New function. >>>> * lto-partition.h (promote_offload_tables): Declare. >>> >>> Just one space instead of two after : >>> >>>> * lto.c (do_whole_program_analysis): call promote_offload_tables. >>> >>> Capital C in Call. >>> >> >> Done. >> >>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c >>>> b/libgomp/testsuite/libgomp.c/target-37.c >>>> new file mode 100644 >>>> index 0000000..1edb21e >>>> --- /dev/null >>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c >>>> @@ -0,0 +1,98 @@ >>>> +/* { dg-do run { target lto } } */ >>>> +/* { dg-additional-sources "target-38.c" } */ >>>> +/* { dg-additional-options "-flto -flto-partition=1to1 >>>> -fno-toplevel-reorder" } */ >>>> + >>>> +extern >>>> +#ifdef __cplusplus >>>> +"C" >>>> +#endif >>>> +void abort (void); >>> >>> Why the C++ stuff in there? Do you intend to include the testcase >>> also in libgomp.c++? >> >> No, that's just there because I started both target-37.c and target-38.c >> by copying target-1.c. >> >>> If not, it is not needed. >> >> Removed. >> >>> Otherwise, the tests LGTM. >>> >> >> Updated patch attached. >> > Ping^2. This patch fixes an ICE when compiling an openmp program using -flto -partition=1to1 -fno-toplevel-reorder. [ Original submission here: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html Latest patch with updated testcases: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html ] OK for trunk, stage1 (or stage4, if that's appropriate)? Thanks, - Tom >> >> >> 0001-Mark-offload-symbols-as-global-in-lto.patch >> >> >> Mark offload symbols as global in lto >> >> 2016-02-17 Tom de Vries <tom@codesourcery.com> >> >> PR lto/69607 >> * lto-partition.c (promote_offload_tables): New function. >> * lto-partition.h (promote_offload_tables): Declare. >> * lto.c (do_whole_program_analysis): Call promote_offload_tables. >> >> * testsuite/libgomp.c/target-36.c: New test. >> * testsuite/libgomp.c/target-37.c: New test. >> * testsuite/libgomp.c/target-38.c: New test. >> >> --- >> gcc/lto/lto-partition.c | 28 ++++++++++ >> gcc/lto/lto-partition.h | 1 + >> gcc/lto/lto.c | 2 + >> libgomp/testsuite/libgomp.c/target-36.c | 4 ++ >> libgomp/testsuite/libgomp.c/target-37.c | 94 >> +++++++++++++++++++++++++++++++++ >> libgomp/testsuite/libgomp.c/target-38.c | 91 >> +++++++++++++++++++++++++++++++ >> 6 files changed, 220 insertions(+) >> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c >> index 9eb63c2..56598d4 100644 >> --- a/gcc/lto/lto-partition.c >> +++ b/gcc/lto/lto-partition.c >> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see >> #include "ipa-prop.h" >> #include "ipa-inline.h" >> #include "lto-partition.h" >> +#include "omp-low.h" >> >> vec<ltrans_partition> ltrans_partitions; >> >> @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node) >> "Promoting as hidden: %s\n", node->name ()); >> } >> >> +/* Promote the symbols in the offload tables. */ >> + >> +void >> +promote_offload_tables (void) >> +{ >> + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty >> (offload_vars)) >> + return; >> + >> + for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++) >> + { >> + tree fn_decl = (*offload_funcs)[i]; >> + cgraph_node *node = cgraph_node::get (fn_decl); >> + if (node->externally_visible) >> + continue; >> + promote_symbol (node); >> + } >> + >> + for (unsigned i = 0; i < vec_safe_length (offload_vars); i++) >> + { >> + tree var_decl = (*offload_vars)[i]; >> + varpool_node *node = varpool_node::get (var_decl); >> + if (node->externally_visible) >> + continue; >> + promote_symbol (node); >> + } >> +} >> + >> /* Return true if NODE needs named section even if it won't land in >> the partition >> symbol table. >> FIXME: we should really not use named sections for inline clones >> and master >> diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h >> index 31e3764..1a38126 100644 >> --- a/gcc/lto/lto-partition.h >> +++ b/gcc/lto/lto-partition.h >> @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions; >> void lto_1_to_1_map (void); >> void lto_max_map (void); >> void lto_balanced_map (int); >> +extern void promote_offload_tables (void); >> void lto_promote_cross_file_statics (void); >> void free_ltrans_partitions (void); >> void lto_promote_statics_nonwpa (void); >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c >> index 9dd513f..2736c5c 100644 >> --- a/gcc/lto/lto.c >> +++ b/gcc/lto/lto.c >> @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void) >> to globals with hidden visibility because they are accessed >> from multiple >> partitions. */ >> lto_promote_cross_file_statics (); >> + /* Promote all the offload symbols. */ >> + promote_offload_tables (); >> timevar_pop (TV_WHOPR_PARTITIONING); >> >> timevar_stop (TV_PHASE_OPT_GEN); >> diff --git a/libgomp/testsuite/libgomp.c/target-36.c >> b/libgomp/testsuite/libgomp.c/target-36.c >> new file mode 100644 >> index 0000000..bafb718 >> --- /dev/null >> +++ b/libgomp/testsuite/libgomp.c/target-36.c >> @@ -0,0 +1,4 @@ >> +/* { dg-do run { target lto } } */ >> +/* { dg-additional-options "-flto -flto-partition=1to1 >> -fno-toplevel-reorder" } */ >> + >> +#include "target-1.c" >> diff --git a/libgomp/testsuite/libgomp.c/target-37.c >> b/libgomp/testsuite/libgomp.c/target-37.c >> new file mode 100644 >> index 0000000..fe5b8ef >> --- /dev/null >> +++ b/libgomp/testsuite/libgomp.c/target-37.c >> @@ -0,0 +1,94 @@ >> +/* { dg-do run { target lto } } */ >> +/* { dg-additional-sources "target-38.c" } */ >> +/* { dg-additional-options "-flto -flto-partition=1to1 >> -fno-toplevel-reorder" } */ >> + >> +extern void abort (void); >> + >> +void >> +fn1 (double *x, double *y, int z) >> +{ >> + int i; >> + for (i = 0; i < z; i++) >> + { >> + x[i] = i & 31; >> + y[i] = (i & 63) - 30; >> + } >> +} >> + >> +#pragma omp declare target >> +static int tgtv = 6; >> +static int >> +tgt (void) >> +{ >> + #pragma omp atomic update >> + tgtv++; >> + return 0; >> +} >> +#pragma omp end declare target >> + >> +static double >> +fn2 (int x, int y, int z) >> +{ >> + double b[1024], c[1024], s = 0; >> + int i, j; >> + fn1 (b, c, x); >> + #pragma omp target data map(to: b) >> + { >> + #pragma omp target map(tofrom: c, s) >> + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) >> firstprivate(x) >> + #pragma omp distribute dist_schedule(static, 4) collapse(1) >> + for (j=0; j < x; j += y) >> + #pragma omp parallel for reduction(+:s) >> + for (i = j; i < j + y; i++) >> + tgt (), s += b[i] * c[i]; >> + #pragma omp target update from(b, tgtv) >> + } >> + return s; >> +} >> + >> +static double >> +fn3 (int x) >> +{ >> + double b[1024], c[1024], s = 0; >> + int i; >> + fn1 (b, c, x); >> + #pragma omp target map(to: b, c) map(tofrom:s) >> + #pragma omp parallel for reduction(+:s) >> + for (i = 0; i < x; i++) >> + tgt (), s += b[i] * c[i]; >> + return s; >> +} >> + >> +static double >> +fn4 (int x, double *p) >> +{ >> + double b[1024], c[1024], d[1024], s = 0; >> + int i; >> + fn1 (b, c, x); >> + fn1 (d + x, p + x, x); >> + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & >> 31)]) \ >> + map(tofrom: s) >> + #pragma omp parallel for reduction(+:s) >> + for (i = 0; i < x; i++) >> + s += b[i] * c[i] + d[x + i] + p[x + i]; >> + return s; >> +} >> + >> +extern int other_main (void); >> + >> +int >> +main () >> +{ >> + double a = fn2 (128, 4, 6); >> + int b = tgtv; >> + double c = fn3 (61); >> + #pragma omp target update from(tgtv) >> + int d = tgtv; >> + double e[1024]; >> + double f = fn4 (64, e); >> + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 >> + || f != 8032.0) >> + abort (); >> + other_main (); >> + return 0; >> +} >> diff --git a/libgomp/testsuite/libgomp.c/target-38.c >> b/libgomp/testsuite/libgomp.c/target-38.c >> new file mode 100644 >> index 0000000..de2b4c8 >> --- /dev/null >> +++ b/libgomp/testsuite/libgomp.c/target-38.c >> @@ -0,0 +1,91 @@ >> +/* { dg-skip-if "additional source" { *-*-* } } */ >> + >> +extern void abort (void); >> + >> +static void >> +fna1 (double *x, double *y, int z) >> +{ >> + int i; >> + for (i = 0; i < z; i++) >> + { >> + x[i] = i & 31; >> + y[i] = (i & 63) - 30; >> + } >> +} >> + >> +#pragma omp declare target >> +static int tgtva = 6; >> +static int >> +tgta (void) >> +{ >> + #pragma omp atomic update >> + tgtva++; >> + return 0; >> +} >> +#pragma omp end declare target >> + >> +double >> +fna2 (int x, int y, int z) >> +{ >> + double b[1024], c[1024], s = 0; >> + int i, j; >> + fna1 (b, c, x); >> + #pragma omp target data map(to: b) >> + { >> + #pragma omp target map(tofrom: c, s) >> + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) >> firstprivate(x) >> + #pragma omp distribute dist_schedule(static, 4) collapse(1) >> + for (j=0; j < x; j += y) >> + #pragma omp parallel for reduction(+:s) >> + for (i = j; i < j + y; i++) >> + tgta (), s += b[i] * c[i]; >> + #pragma omp target update from(b, tgtva) >> + } >> + return s; >> +} >> + >> +static double >> +fna3 (int x) >> +{ >> + double b[1024], c[1024], s = 0; >> + int i; >> + fna1 (b, c, x); >> + #pragma omp target map(to: b, c) map(tofrom:s) >> + #pragma omp parallel for reduction(+:s) >> + for (i = 0; i < x; i++) >> + tgta (), s += b[i] * c[i]; >> + return s; >> +} >> + >> +static double >> +fna4 (int x, double *p) >> +{ >> + double b[1024], c[1024], d[1024], s = 0; >> + int i; >> + fna1 (b, c, x); >> + fna1 (d + x, p + x, x); >> + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & >> 31)]) \ >> + map(tofrom: s) >> + #pragma omp parallel for reduction(+:s) >> + for (i = 0; i < x; i++) >> + s += b[i] * c[i] + d[x + i] + p[x + i]; >> + return s; >> +} >> + >> +extern int other_main (void); >> + >> +int >> +other_main (void) >> +{ >> + double a = fna2 (128, 4, 6); >> + int b = tgtva; >> + double c = fna3 (61); >> + #pragma omp target update from(tgtva) >> + int d = tgtva; >> + double e[1024]; >> + double f = fna4 (64, e); >> + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 >> + || f != 8032.0) >> + abort (); >> + return 0; >> +} >> >
On 07/03/16 15:38, Tom de Vries wrote: > On 24/02/16 14:37, Tom de Vries wrote: >> On 17/02/16 16:48, Tom de Vries wrote: >>> On 17/02/16 13:30, Jakub Jelinek wrote: >>>> On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote: >>>>> Mark offload symbols as global in lto >>>> >>>> I'm really not familiar with that part of LTO, so I'm CCing Honza and >>>> Richard here. >>>>> 2016-02-08 Tom de Vries <tom@codesourcery.com> >>>>> >>>>> PR lto/69607 >>>>> * lto-partition.c (promote_offload_tables): New function. >>>>> * lto-partition.h (promote_offload_tables): Declare. >>>> >>>> Just one space instead of two after : >>>> >>>>> * lto.c (do_whole_program_analysis): call promote_offload_tables. >>>> >>>> Capital C in Call. >>>> >>> >>> Done. >>> >>>>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c >>>>> b/libgomp/testsuite/libgomp.c/target-37.c >>>>> new file mode 100644 >>>>> index 0000000..1edb21e >>>>> --- /dev/null >>>>> +++ b/libgomp/testsuite/libgomp.c/target-37.c >>>>> @@ -0,0 +1,98 @@ >>>>> +/* { dg-do run { target lto } } */ >>>>> +/* { dg-additional-sources "target-38.c" } */ >>>>> +/* { dg-additional-options "-flto -flto-partition=1to1 >>>>> -fno-toplevel-reorder" } */ >>>>> + >>>>> +extern >>>>> +#ifdef __cplusplus >>>>> +"C" >>>>> +#endif >>>>> +void abort (void); >>>> >>>> Why the C++ stuff in there? Do you intend to include the testcase >>>> also in libgomp.c++? >>> >>> No, that's just there because I started both target-37.c and target-38.c >>> by copying target-1.c. >>> >>>> If not, it is not needed. >>> >>> Removed. >>> >>>> Otherwise, the tests LGTM. >>>> >>> >>> Updated patch attached. >>> >> Ping^3. This patch fixes an ICE when compiling an openmp program using -flto -partition=1to1 -fno-toplevel-reorder. [ Original submission here: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00547.html Latest patch with updated testcases: https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01196.html ] OK for trunk, stage1 (or stage4, if that's appropriate)? Thanks, - Tom >>> >>> >>> 0001-Mark-offload-symbols-as-global-in-lto.patch >>> >>> >>> Mark offload symbols as global in lto >>> >>> 2016-02-17 Tom de Vries <tom@codesourcery.com> >>> >>> PR lto/69607 >>> * lto-partition.c (promote_offload_tables): New function. >>> * lto-partition.h (promote_offload_tables): Declare. >>> * lto.c (do_whole_program_analysis): Call promote_offload_tables. >>> >>> * testsuite/libgomp.c/target-36.c: New test. >>> * testsuite/libgomp.c/target-37.c: New test. >>> * testsuite/libgomp.c/target-38.c: New test. >>> >>> --- >>> gcc/lto/lto-partition.c | 28 ++++++++++ >>> gcc/lto/lto-partition.h | 1 + >>> gcc/lto/lto.c | 2 + >>> libgomp/testsuite/libgomp.c/target-36.c | 4 ++ >>> libgomp/testsuite/libgomp.c/target-37.c | 94 >>> +++++++++++++++++++++++++++++++++ >>> libgomp/testsuite/libgomp.c/target-38.c | 91 >>> +++++++++++++++++++++++++++++++ >>> 6 files changed, 220 insertions(+) >>> >>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c >>> index 9eb63c2..56598d4 100644 >>> --- a/gcc/lto/lto-partition.c >>> +++ b/gcc/lto/lto-partition.c >>> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see >>> #include "ipa-prop.h" >>> #include "ipa-inline.h" >>> #include "lto-partition.h" >>> +#include "omp-low.h" >>> >>> vec<ltrans_partition> ltrans_partitions; >>> >>> @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node) >>> "Promoting as hidden: %s\n", node->name ()); >>> } >>> >>> +/* Promote the symbols in the offload tables. */ >>> + >>> +void >>> +promote_offload_tables (void) >>> +{ >>> + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty >>> (offload_vars)) >>> + return; >>> + >>> + for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++) >>> + { >>> + tree fn_decl = (*offload_funcs)[i]; >>> + cgraph_node *node = cgraph_node::get (fn_decl); >>> + if (node->externally_visible) >>> + continue; >>> + promote_symbol (node); >>> + } >>> + >>> + for (unsigned i = 0; i < vec_safe_length (offload_vars); i++) >>> + { >>> + tree var_decl = (*offload_vars)[i]; >>> + varpool_node *node = varpool_node::get (var_decl); >>> + if (node->externally_visible) >>> + continue; >>> + promote_symbol (node); >>> + } >>> +} >>> + >>> /* Return true if NODE needs named section even if it won't land in >>> the partition >>> symbol table. >>> FIXME: we should really not use named sections for inline clones >>> and master >>> diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h >>> index 31e3764..1a38126 100644 >>> --- a/gcc/lto/lto-partition.h >>> +++ b/gcc/lto/lto-partition.h >>> @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions; >>> void lto_1_to_1_map (void); >>> void lto_max_map (void); >>> void lto_balanced_map (int); >>> +extern void promote_offload_tables (void); >>> void lto_promote_cross_file_statics (void); >>> void free_ltrans_partitions (void); >>> void lto_promote_statics_nonwpa (void); >>> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c >>> index 9dd513f..2736c5c 100644 >>> --- a/gcc/lto/lto.c >>> +++ b/gcc/lto/lto.c >>> @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void) >>> to globals with hidden visibility because they are accessed >>> from multiple >>> partitions. */ >>> lto_promote_cross_file_statics (); >>> + /* Promote all the offload symbols. */ >>> + promote_offload_tables (); >>> timevar_pop (TV_WHOPR_PARTITIONING); >>> >>> timevar_stop (TV_PHASE_OPT_GEN); >>> diff --git a/libgomp/testsuite/libgomp.c/target-36.c >>> b/libgomp/testsuite/libgomp.c/target-36.c >>> new file mode 100644 >>> index 0000000..bafb718 >>> --- /dev/null >>> +++ b/libgomp/testsuite/libgomp.c/target-36.c >>> @@ -0,0 +1,4 @@ >>> +/* { dg-do run { target lto } } */ >>> +/* { dg-additional-options "-flto -flto-partition=1to1 >>> -fno-toplevel-reorder" } */ >>> + >>> +#include "target-1.c" >>> diff --git a/libgomp/testsuite/libgomp.c/target-37.c >>> b/libgomp/testsuite/libgomp.c/target-37.c >>> new file mode 100644 >>> index 0000000..fe5b8ef >>> --- /dev/null >>> +++ b/libgomp/testsuite/libgomp.c/target-37.c >>> @@ -0,0 +1,94 @@ >>> +/* { dg-do run { target lto } } */ >>> +/* { dg-additional-sources "target-38.c" } */ >>> +/* { dg-additional-options "-flto -flto-partition=1to1 >>> -fno-toplevel-reorder" } */ >>> + >>> +extern void abort (void); >>> + >>> +void >>> +fn1 (double *x, double *y, int z) >>> +{ >>> + int i; >>> + for (i = 0; i < z; i++) >>> + { >>> + x[i] = i & 31; >>> + y[i] = (i & 63) - 30; >>> + } >>> +} >>> + >>> +#pragma omp declare target >>> +static int tgtv = 6; >>> +static int >>> +tgt (void) >>> +{ >>> + #pragma omp atomic update >>> + tgtv++; >>> + return 0; >>> +} >>> +#pragma omp end declare target >>> + >>> +static double >>> +fn2 (int x, int y, int z) >>> +{ >>> + double b[1024], c[1024], s = 0; >>> + int i, j; >>> + fn1 (b, c, x); >>> + #pragma omp target data map(to: b) >>> + { >>> + #pragma omp target map(tofrom: c, s) >>> + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) >>> firstprivate(x) >>> + #pragma omp distribute dist_schedule(static, 4) collapse(1) >>> + for (j=0; j < x; j += y) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = j; i < j + y; i++) >>> + tgt (), s += b[i] * c[i]; >>> + #pragma omp target update from(b, tgtv) >>> + } >>> + return s; >>> +} >>> + >>> +static double >>> +fn3 (int x) >>> +{ >>> + double b[1024], c[1024], s = 0; >>> + int i; >>> + fn1 (b, c, x); >>> + #pragma omp target map(to: b, c) map(tofrom:s) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = 0; i < x; i++) >>> + tgt (), s += b[i] * c[i]; >>> + return s; >>> +} >>> + >>> +static double >>> +fn4 (int x, double *p) >>> +{ >>> + double b[1024], c[1024], d[1024], s = 0; >>> + int i; >>> + fn1 (b, c, x); >>> + fn1 (d + x, p + x, x); >>> + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & >>> 31)]) \ >>> + map(tofrom: s) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = 0; i < x; i++) >>> + s += b[i] * c[i] + d[x + i] + p[x + i]; >>> + return s; >>> +} >>> + >>> +extern int other_main (void); >>> + >>> +int >>> +main () >>> +{ >>> + double a = fn2 (128, 4, 6); >>> + int b = tgtv; >>> + double c = fn3 (61); >>> + #pragma omp target update from(tgtv) >>> + int d = tgtv; >>> + double e[1024]; >>> + double f = fn4 (64, e); >>> + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 >>> + || f != 8032.0) >>> + abort (); >>> + other_main (); >>> + return 0; >>> +} >>> diff --git a/libgomp/testsuite/libgomp.c/target-38.c >>> b/libgomp/testsuite/libgomp.c/target-38.c >>> new file mode 100644 >>> index 0000000..de2b4c8 >>> --- /dev/null >>> +++ b/libgomp/testsuite/libgomp.c/target-38.c >>> @@ -0,0 +1,91 @@ >>> +/* { dg-skip-if "additional source" { *-*-* } } */ >>> + >>> +extern void abort (void); >>> + >>> +static void >>> +fna1 (double *x, double *y, int z) >>> +{ >>> + int i; >>> + for (i = 0; i < z; i++) >>> + { >>> + x[i] = i & 31; >>> + y[i] = (i & 63) - 30; >>> + } >>> +} >>> + >>> +#pragma omp declare target >>> +static int tgtva = 6; >>> +static int >>> +tgta (void) >>> +{ >>> + #pragma omp atomic update >>> + tgtva++; >>> + return 0; >>> +} >>> +#pragma omp end declare target >>> + >>> +double >>> +fna2 (int x, int y, int z) >>> +{ >>> + double b[1024], c[1024], s = 0; >>> + int i, j; >>> + fna1 (b, c, x); >>> + #pragma omp target data map(to: b) >>> + { >>> + #pragma omp target map(tofrom: c, s) >>> + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) >>> firstprivate(x) >>> + #pragma omp distribute dist_schedule(static, 4) collapse(1) >>> + for (j=0; j < x; j += y) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = j; i < j + y; i++) >>> + tgta (), s += b[i] * c[i]; >>> + #pragma omp target update from(b, tgtva) >>> + } >>> + return s; >>> +} >>> + >>> +static double >>> +fna3 (int x) >>> +{ >>> + double b[1024], c[1024], s = 0; >>> + int i; >>> + fna1 (b, c, x); >>> + #pragma omp target map(to: b, c) map(tofrom:s) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = 0; i < x; i++) >>> + tgta (), s += b[i] * c[i]; >>> + return s; >>> +} >>> + >>> +static double >>> +fna4 (int x, double *p) >>> +{ >>> + double b[1024], c[1024], d[1024], s = 0; >>> + int i; >>> + fna1 (b, c, x); >>> + fna1 (d + x, p + x, x); >>> + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & >>> 31)]) \ >>> + map(tofrom: s) >>> + #pragma omp parallel for reduction(+:s) >>> + for (i = 0; i < x; i++) >>> + s += b[i] * c[i] + d[x + i] + p[x + i]; >>> + return s; >>> +} >>> + >>> +extern int other_main (void); >>> + >>> +int >>> +other_main (void) >>> +{ >>> + double a = fna2 (128, 4, 6); >>> + int b = tgtva; >>> + double c = fna3 (61); >>> + #pragma omp target update from(tgtva) >>> + int d = tgtva; >>> + double e[1024]; >>> + double f = fna4 (64, e); >>> + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 >>> + || f != 8032.0) >>> + abort (); >>> + return 0; >>> +} >>> >> >
Mark offload symbols as global in lto 2016-02-17 Tom de Vries <tom@codesourcery.com> PR lto/69607 * lto-partition.c (promote_offload_tables): New function. * lto-partition.h (promote_offload_tables): Declare. * lto.c (do_whole_program_analysis): Call promote_offload_tables. * testsuite/libgomp.c/target-36.c: New test. * testsuite/libgomp.c/target-37.c: New test. * testsuite/libgomp.c/target-38.c: New test. --- gcc/lto/lto-partition.c | 28 ++++++++++ gcc/lto/lto-partition.h | 1 + gcc/lto/lto.c | 2 + libgomp/testsuite/libgomp.c/target-36.c | 4 ++ libgomp/testsuite/libgomp.c/target-37.c | 94 +++++++++++++++++++++++++++++++++ libgomp/testsuite/libgomp.c/target-38.c | 91 +++++++++++++++++++++++++++++++ 6 files changed, 220 insertions(+) diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 9eb63c2..56598d4 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-inline.h" #include "lto-partition.h" +#include "omp-low.h" vec<ltrans_partition> ltrans_partitions; @@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node) "Promoting as hidden: %s\n", node->name ()); } +/* Promote the symbols in the offload tables. */ + +void +promote_offload_tables (void) +{ + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)) + return; + + for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++) + { + tree fn_decl = (*offload_funcs)[i]; + cgraph_node *node = cgraph_node::get (fn_decl); + if (node->externally_visible) + continue; + promote_symbol (node); + } + + for (unsigned i = 0; i < vec_safe_length (offload_vars); i++) + { + tree var_decl = (*offload_vars)[i]; + varpool_node *node = varpool_node::get (var_decl); + if (node->externally_visible) + continue; + promote_symbol (node); + } +} + /* Return true if NODE needs named section even if it won't land in the partition symbol table. FIXME: we should really not use named sections for inline clones and master diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h index 31e3764..1a38126 100644 --- a/gcc/lto/lto-partition.h +++ b/gcc/lto/lto-partition.h @@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions; void lto_1_to_1_map (void); void lto_max_map (void); void lto_balanced_map (int); +extern void promote_offload_tables (void); void lto_promote_cross_file_statics (void); void free_ltrans_partitions (void); void lto_promote_statics_nonwpa (void); diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index 9dd513f..2736c5c 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -3138,6 +3138,8 @@ do_whole_program_analysis (void) to globals with hidden visibility because they are accessed from multiple partitions. */ lto_promote_cross_file_statics (); + /* Promote all the offload symbols. */ + promote_offload_tables (); timevar_pop (TV_WHOPR_PARTITIONING); timevar_stop (TV_PHASE_OPT_GEN); diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c new file mode 100644 index 0000000..bafb718 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-36.c @@ -0,0 +1,4 @@ +/* { dg-do run { target lto } } */ +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */ + +#include "target-1.c" diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c new file mode 100644 index 0000000..fe5b8ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-37.c @@ -0,0 +1,94 @@ +/* { dg-do run { target lto } } */ +/* { dg-additional-sources "target-38.c" } */ +/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */ + +extern void abort (void); + +void +fn1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +#pragma omp declare target +static int tgtv = 6; +static int +tgt (void) +{ + #pragma omp atomic update + tgtv++; + return 0; +} +#pragma omp end declare target + +static double +fn2 (int x, int y, int z) +{ + double b[1024], c[1024], s = 0; + int i, j; + fn1 (b, c, x); + #pragma omp target data map(to: b) + { + #pragma omp target map(tofrom: c, s) + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) + #pragma omp distribute dist_schedule(static, 4) collapse(1) + for (j=0; j < x; j += y) + #pragma omp parallel for reduction(+:s) + for (i = j; i < j + y; i++) + tgt (), s += b[i] * c[i]; + #pragma omp target update from(b, tgtv) + } + return s; +} + +static double +fn3 (int x) +{ + double b[1024], c[1024], s = 0; + int i; + fn1 (b, c, x); + #pragma omp target map(to: b, c) map(tofrom:s) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + tgt (), s += b[i] * c[i]; + return s; +} + +static double +fn4 (int x, double *p) +{ + double b[1024], c[1024], d[1024], s = 0; + int i; + fn1 (b, c, x); + fn1 (d + x, p + x, x); + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \ + map(tofrom: s) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + s += b[i] * c[i] + d[x + i] + p[x + i]; + return s; +} + +extern int other_main (void); + +int +main () +{ + double a = fn2 (128, 4, 6); + int b = tgtv; + double c = fn3 (61); + #pragma omp target update from(tgtv) + int d = tgtv; + double e[1024]; + double f = fn4 (64, e); + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 + || f != 8032.0) + abort (); + other_main (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c new file mode 100644 index 0000000..de2b4c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-38.c @@ -0,0 +1,91 @@ +/* { dg-skip-if "additional source" { *-*-* } } */ + +extern void abort (void); + +static void +fna1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +#pragma omp declare target +static int tgtva = 6; +static int +tgta (void) +{ + #pragma omp atomic update + tgtva++; + return 0; +} +#pragma omp end declare target + +double +fna2 (int x, int y, int z) +{ + double b[1024], c[1024], s = 0; + int i, j; + fna1 (b, c, x); + #pragma omp target data map(to: b) + { + #pragma omp target map(tofrom: c, s) + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) + #pragma omp distribute dist_schedule(static, 4) collapse(1) + for (j=0; j < x; j += y) + #pragma omp parallel for reduction(+:s) + for (i = j; i < j + y; i++) + tgta (), s += b[i] * c[i]; + #pragma omp target update from(b, tgtva) + } + return s; +} + +static double +fna3 (int x) +{ + double b[1024], c[1024], s = 0; + int i; + fna1 (b, c, x); + #pragma omp target map(to: b, c) map(tofrom:s) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + tgta (), s += b[i] * c[i]; + return s; +} + +static double +fna4 (int x, double *p) +{ + double b[1024], c[1024], d[1024], s = 0; + int i; + fna1 (b, c, x); + fna1 (d + x, p + x, x); + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \ + map(tofrom: s) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + s += b[i] * c[i] + d[x + i] + p[x + i]; + return s; +} + +extern int other_main (void); + +int +other_main (void) +{ + double a = fna2 (128, 4, 6); + int b = tgtva; + double c = fna3 (61); + #pragma omp target update from(tgtva) + int d = tgtva; + double e[1024]; + double f = fna4 (64, e); + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 + || f != 8032.0) + abort (); + return 0; +}