Message ID | 20241106152722.2821586-2-ams@baylibre.com |
---|---|
State | New |
Headers | show |
Series | [1/4] openmp: Tune omp_max_vf for offload targets | expand |
On Wed, Nov 06, 2024 at 03:27:19PM +0000, Andrew Stubbs wrote: > If requested, return the vectorization factor appropriate for the offload > device, if any. > > This change gives a significant speedup in the BabelStream "dot" benchmark on > amdgcn. > > The omp_adjust_chunk_size usecase is set "false", for now, but I intend to > change that in a follow-up patch. > > Note that NVPTX SIMT offload does not use this code-path. > > gcc/ChangeLog: > > * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set > omp_max_vf to offload == false. > * omp-expand.cc (omp_adjust_chunk_size): Likewise. > * omp-general.cc (omp_max_vf): Add "offload" parameter, and detect > amdgcn offload devices. > * omp-general.h (omp_max_vf): Likewise. > * omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to > omp_max_vf. Ok. Jakub
On Wed, Nov 6, 2024 at 7:28 AM Andrew Stubbs <ams@baylibre.com> wrote: > > If requested, return the vectorization factor appropriate for the offload > device, if any. > > This change gives a significant speedup in the BabelStream "dot" benchmark on > amdgcn. > > The omp_adjust_chunk_size usecase is set "false", for now, but I intend to > change that in a follow-up patch. > > Note that NVPTX SIMT offload does not use this code-path. > > gcc/ChangeLog: > > * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set > omp_max_vf to offload == false. > * omp-expand.cc (omp_adjust_chunk_size): Likewise. > * omp-general.cc (omp_max_vf): Add "offload" parameter, and detect > amdgcn offload devices. > * omp-general.h (omp_max_vf): Likewise. > * omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to > omp_max_vf. > --- > gcc/gimple-loop-versioning.cc | 2 +- > gcc/omp-expand.cc | 2 +- > gcc/omp-general.cc | 17 +++++++++++++++-- > gcc/omp-general.h | 2 +- > gcc/omp-low.cc | 3 ++- > 5 files changed, 20 insertions(+), 6 deletions(-) > > diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc > index 107b0020024..2968c929d04 100644 > --- a/gcc/gimple-loop-versioning.cc > +++ b/gcc/gimple-loop-versioning.cc > @@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn) > handled efficiently by scalar code. omp_max_vf calculates the > maximum number of bytes in a vector, when such a value is relevant > to loop optimization. */ > - m_maximum_scale = estimated_poly_value (omp_max_vf ()); > + m_maximum_scale = estimated_poly_value (omp_max_vf (false)); > m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); > } > > diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc > index b0b4ddf5dbc..907fd46a5b2 100644 > --- a/gcc/omp-expand.cc > +++ b/gcc/omp-expand.cc > @@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) > if (!simd_schedule || integer_zerop (chunk_size)) > return chunk_size; > > - poly_uint64 vf = omp_max_vf (); > + poly_uint64 vf = omp_max_vf (false); > if (known_eq (vf, 1U)) > return chunk_size; > > diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc > index f74b9bf5e96..1ae575ee181 100644 > --- a/gcc/omp-general.cc > +++ b/gcc/omp-general.cc > @@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data) > return NULL_TREE; > } > > -/* Return maximum possible vectorization factor for the target. */ > +/* Return maximum possible vectorization factor for the target, or for > + the OpenMP offload target if one exists. */ > > poly_uint64 > -omp_max_vf (void) > +omp_max_vf (bool offload) > { > if (!optimize > || optimize_debug > @@ -999,6 +1000,18 @@ omp_max_vf (void) > && OPTION_SET_P (flag_tree_loop_vectorize))) > return 1; > > + if (ENABLE_OFFLOADING && offload) > + { > + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) > + { > + if (startswith (c, "amdgcn")) > + return ordered_max (64, omp_max_vf (false)); This causes a bootstrap failure for me (and others) on x86_64-linux-gnu: In file included from ../../src-master/gcc/coretypes.h:497, from ../../src-master/gcc/omp-general.cc:25: ../../src-master/gcc/poly-int.h: In instantiation of ‘typename if_nonpoly<Cb, bool>::type maybe_lt(const poly_int<N, C>&, const Cb&) [with unsigned int N = 1; Ca = long unsigned int; Cb = int; typename if_nonpoly<Cb, bool>::type = bool]’: ../../src-master/gcc/poly-int.h:1440:7: required from ‘poly_int<N, typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type> ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename if_nonpoly<Ca>::type = int]’ 1342 | #define maybe_gt(A, B) maybe_lt (B, A) | ~~~~~~~~~^~~~~~ ../../src-master/gcc/omp-general.cc:1008:25: required from here 1008 | return ordered_max (64, omp_max_vf (false)); | ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ ../../src-master/gcc/poly-int.h:1317:22: error: comparison of integer expressions of different signedness: ‘const long unsigned int’ and ‘const int’ [-Werror=sign-compare] 1317 | return a.coeffs[0] < b; | ~~~~~~~~~~~~^~~ ../../src-master/gcc/poly-int.h: In instantiation of ‘typename if_nonpoly<Ca, bool>::type maybe_lt(const Ca&, const poly_int<N, Cb>&) [with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename if_nonpoly<Ca, bool>::type = bool]’: ../../src-master/gcc/poly-int.h:1445:2: required from ‘poly_int<N, typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type> ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename if_nonpoly<Ca>::type = int]’ 1342 | #define maybe_gt(A, B) maybe_lt (B, A) | ~~~~~~~~~^~~~~~ ../../src-master/gcc/omp-general.cc:1008:25: required from here 1008 | return ordered_max (64, omp_max_vf (false)); | ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ ../../src-master/gcc/poly-int.h:1328:12: error: comparison of integer expressions of different signedness: ‘const int’ and ‘const long unsigned int’ [-Werror=sign-compare] 1328 | return a < b.coeffs[0]; | ~~^~~~~~~~~~~ Thanks, Andrew Pinski > + else if ((c = strchr (c, ':'))) > + c++; > + } > + /* Otherwise, fall through to host VF. */ > + } > + > auto_vector_modes modes; > targetm.vectorize.autovectorize_vector_modes (&modes, true); > if (!modes.is_empty ()) > diff --git a/gcc/omp-general.h b/gcc/omp-general.h > index f3778131626..70f78d2055b 100644 > --- a/gcc/omp-general.h > +++ b/gcc/omp-general.h > @@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, > struct omp_for_data_loop *loops); > extern gimple *omp_build_barrier (tree lhs); > extern tree find_combined_omp_for (tree *, int *, void *); > -extern poly_uint64 omp_max_vf (void); > +extern poly_uint64 omp_max_vf (bool); > extern int omp_max_simt_vf (void); > extern const char *omp_context_name_list_prop (tree); > extern void omp_construct_traits_to_codes (tree, int, enum tree_code *); > diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc > index 44c4310075b..70a2c108fbc 100644 > --- a/gcc/omp-low.cc > +++ b/gcc/omp-low.cc > @@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, > { > if (known_eq (sctx->max_vf, 0U)) > { > - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); > + sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf () > + : omp_max_vf (omp_maybe_offloaded_ctx (ctx))); > if (maybe_gt (sctx->max_vf, 1U)) > { > tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), > -- > 2.46.0 >
On Wed, Nov 06, 2024 at 09:38:21AM -0800, Andrew Pinski wrote: > > + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) > > + { > > + if (startswith (c, "amdgcn")) > > + return ordered_max (64, omp_max_vf (false)); > > This causes a bootstrap failure for me (and others) on x86_64-linux-gnu: > > In file included from ../../src-master/gcc/coretypes.h:497, > from ../../src-master/gcc/omp-general.cc:25: > ../../src-master/gcc/poly-int.h: In instantiation of ‘typename > if_nonpoly<Cb, bool>::type maybe_lt(const poly_int<N, C>&, const Cb&) > [with unsigned int N = 1; Ca = long unsigned int; Cb = int; typename > if_nonpoly<Cb, bool>::type = bool]’: > ../../src-master/gcc/poly-int.h:1440:7: required from ‘poly_int<N, > typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type> > ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N = > 1; Ca = int; Cb = long unsigned int; typename poly_result<typename > if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename > if_nonpoly<Ca>::type = int]’ > 1342 | #define maybe_gt(A, B) maybe_lt (B, A) > | ~~~~~~~~~^~~~~~ > ../../src-master/gcc/omp-general.cc:1008:25: required from here > 1008 | return ordered_max (64, omp_max_vf (false)); > | ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ > ../../src-master/gcc/poly-int.h:1317:22: error: comparison of integer > expressions of different signedness: ‘const long unsigned int’ and > ‘const int’ [-Werror=sign-compare] > 1317 | return a.coeffs[0] < b; > | ~~~~~~~~~~~~^~~ > ../../src-master/gcc/poly-int.h: In instantiation of ‘typename > if_nonpoly<Ca, bool>::type maybe_lt(const Ca&, const poly_int<N, Cb>&) > [with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename > if_nonpoly<Ca, bool>::type = bool]’: > ../../src-master/gcc/poly-int.h:1445:2: required from ‘poly_int<N, > typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type> > ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N = > 1; Ca = int; Cb = long unsigned int; typename poly_result<typename > if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename > if_nonpoly<Ca>::type = int]’ > 1342 | #define maybe_gt(A, B) maybe_lt (B, A) > | ~~~~~~~~~^~~~~~ > ../../src-master/gcc/omp-general.cc:1008:25: required from here > 1008 | return ordered_max (64, omp_max_vf (false)); > | ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ > ../../src-master/gcc/poly-int.h:1328:12: error: comparison of integer > expressions of different signedness: ‘const int’ and ‘const long > unsigned int’ [-Werror=sign-compare] > 1328 | return a < b.coeffs[0]; > | ~~^~~~~~~~~~~ Guess we need ordered_max (uint64_t (64), omp_max_vf (false)) or something similar. Jakub
On 06/11/2024 17:38, Andrew Pinski wrote: >> + if (ENABLE_OFFLOADING && offload) >> + { >> + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) >> + { >> + if (startswith (c, "amdgcn")) >> + return ordered_max (64, omp_max_vf (false)); > > This causes a bootstrap failure for me (and others) on x86_64-linux-gnu: I'm not sure why I didn't see this. I'm testing the attached patch. Andrew
On Wed, Nov 06, 2024 at 05:53:53PM +0000, Andrew Stubbs wrote: > I'm not sure why I didn't see this. Was it bootstrap tested or just built without bootstrap + tested? Otherwise it is just a warning. > I'm testing the attached patch. If it makes it to stage3, this is ok for trunk. Just 64U would most likely work as well (at least I don't get any warnings in that case). > From 345eb9b795d9728733bd0e472529e259ce796ff6 Mon Sep 17 00:00:00 2001 > From: Andrew Stubbs <ams@baylibre.com> > Date: Wed, 6 Nov 2024 17:50:00 +0000 > Subject: [PATCH] openmp: Fix signed/unsigned warning > > My previous patch broke things when building with Werror. > > gcc/ChangeLog: > > * omp-general.cc (omp_max_vf): Cast the constant to poly_uint64. > --- > gcc/omp-general.cc | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc > index 1ae575ee181..72fb7f92ff7 100644 > --- a/gcc/omp-general.cc > +++ b/gcc/omp-general.cc > @@ -1005,7 +1005,7 @@ omp_max_vf (bool offload) > for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) > { > if (startswith (c, "amdgcn")) > - return ordered_max (64, omp_max_vf (false)); > + return ordered_max (poly_uint64 (64), omp_max_vf (false)); > else if ((c = strchr (c, ':'))) > c++; > } Jakub
On 06/11/2024 17:59, Jakub Jelinek wrote: > On Wed, Nov 06, 2024 at 05:53:53PM +0000, Andrew Stubbs wrote: >> I'm not sure why I didn't see this. > > Was it bootstrap tested or just built without bootstrap + tested? > Otherwise it is just a warning. Apparently I forgot to rerun the bootstrap after making the changes you requested. Sorry, everyone. :-( >> I'm testing the attached patch. > > If it makes it to stage3, this is ok for trunk. > Just 64U would most likely work as well (at least I don't get any warnings > in that case). Thanks, pushed now. Andrew
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 107b0020024..2968c929d04 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn) handled efficiently by scalar code. omp_max_vf calculates the maximum number of bytes in a vector, when such a value is relevant to loop optimization. */ - m_maximum_scale = estimated_poly_value (omp_max_vf ()); + m_maximum_scale = estimated_poly_value (omp_max_vf (false)); m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); } diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc index b0b4ddf5dbc..907fd46a5b2 100644 --- a/gcc/omp-expand.cc +++ b/gcc/omp-expand.cc @@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) if (!simd_schedule || integer_zerop (chunk_size)) return chunk_size; - poly_uint64 vf = omp_max_vf (); + poly_uint64 vf = omp_max_vf (false); if (known_eq (vf, 1U)) return chunk_size; diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index f74b9bf5e96..1ae575ee181 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data) return NULL_TREE; } -/* Return maximum possible vectorization factor for the target. */ +/* Return maximum possible vectorization factor for the target, or for + the OpenMP offload target if one exists. */ poly_uint64 -omp_max_vf (void) +omp_max_vf (bool offload) { if (!optimize || optimize_debug @@ -999,6 +1000,18 @@ omp_max_vf (void) && OPTION_SET_P (flag_tree_loop_vectorize))) return 1; + if (ENABLE_OFFLOADING && offload) + { + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) + { + if (startswith (c, "amdgcn")) + return ordered_max (64, omp_max_vf (false)); + else if ((c = strchr (c, ':'))) + c++; + } + /* Otherwise, fall through to host VF. */ + } + auto_vector_modes modes; targetm.vectorize.autovectorize_vector_modes (&modes, true); if (!modes.is_empty ()) diff --git a/gcc/omp-general.h b/gcc/omp-general.h index f3778131626..70f78d2055b 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, struct omp_for_data_loop *loops); extern gimple *omp_build_barrier (tree lhs); extern tree find_combined_omp_for (tree *, int *, void *); -extern poly_uint64 omp_max_vf (void); +extern poly_uint64 omp_max_vf (bool); extern int omp_max_simt_vf (void); extern const char *omp_context_name_list_prop (tree); extern void omp_construct_traits_to_codes (tree, int, enum tree_code *); diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 44c4310075b..70a2c108fbc 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, { if (known_eq (sctx->max_vf, 0U)) { - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); + sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf () + : omp_max_vf (omp_maybe_offloaded_ctx (ctx))); if (maybe_gt (sctx->max_vf, 1U)) { tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),