LTO/WPA: Ensure that output_offload_tables only writes table once [PR116535]
When ltrans was written concurrently, e.g. via -flto=N (N > 1, assuming
sufficient partiations, e.g., via -flto-partition=max), output_offload_tables
wrote the output tables once per fork.
PR lto/116535
gcc/ChangeLog:
* omp-offload.h (offload_output_tables_p): New extern bool var.
* omp-offload.cc (offload_output_tables_p): Define it with value true.
* lto-cgraph.cc (output_offload_tables): Only output tables when
offload_output_tables_p is true.
gcc/lto/ChangeLog:
* lto.cc (stream_out_partitions_1): Set offload_output_tables_p to false
except for the first partition.
gcc/lto-cgraph.cc | 16 ++++------------
gcc/lto/lto.cc | 3 +++
gcc/omp-offload.cc | 2 ++
gcc/omp-offload.h | 1 +
4 files changed, 10 insertions(+), 12 deletions(-)
@@ -1081,8 +1081,10 @@ output_offload_tables (void)
{
bool output_requires = (flag_openmp
&& (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 0);
- if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)
- && !output_requires)
+ if (!offload_output_tables_p
+ || (vec_safe_is_empty (offload_funcs)
+ && vec_safe_is_empty (offload_vars)
+ && !output_requires))
return;
struct lto_simple_output_block *ob
@@ -1139,16 +1141,6 @@ output_offload_tables (void)
streamer_write_uhwi_stream (ob->main_stream, 0);
lto_destroy_simple_output_block (ob);
-
- /* In WHOPR mode during the WPA stage the joint offload tables need to be
- streamed to one partition only. That's why we free offload_funcs and
- offload_vars after the first call of output_offload_tables. */
- if (flag_wpa)
- {
- vec_free (offload_funcs);
- vec_free (offload_vars);
- vec_free (offload_ind_funcs);
- }
}
/* Verify the partitioning of NODE. */
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "lto-common.h"
#include "opts-jobserver.h"
+#include "omp-offload.h"
/* Number of parallel tasks to run. */
static int lto_parallelism;
@@ -226,12 +227,14 @@ wait_for_child ()
static void
stream_out_partitions_1 (char *temp_filename, int blen, int min, int max)
{
+ offload_output_tables_p = (min == 0);
/* Write all the nodes in SET. */
for (int p = min; p < max; p ++)
{
sprintf (temp_filename + blen, "%u.o", p);
stream_out (temp_filename, ltrans_partitions[p]->encoder, p);
ltrans_partitions[p]->encoder = NULL;
+ offload_output_tables_p = false;
}
}
@@ -88,6 +88,8 @@ struct oacc_loop
/* Holds offload tables with decls. */
vec<tree, va_gc> *offload_funcs, *offload_vars, *offload_ind_funcs;
+bool offload_output_tables_p = true;
+
/* Return level at which oacc routine may spawn a partitioned loop, or
-1 if it is not a routine (i.e. is an offload fn). */
@@ -29,6 +29,7 @@ extern int oacc_fn_attrib_level (tree attr);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
extern GTY(()) vec<tree, va_gc> *offload_ind_funcs;
+extern bool offload_output_tables_p;
extern void omp_finish_file (void);
extern void omp_discover_implicit_declare_target (void);