Message ID | 20230614004316.546426-1-xry111@xry111.site |
---|---|
State | New |
Headers | show |
Series | LoongArch: Set default alignment for functions and labels with -mtune | expand |
LGTM! Thanks! 在 2023/6/14 上午8:43, Xi Ruoyao 写道: > The LA464 micro-architecture is sensitive to alignment of code. The > Loongson team has benchmarked various combinations of function, the > results [1] show that 16-byte label alignment together with 32-byte > function alignment gives best results in terms of SPEC score. > > Add a mtune-based table-driven mechanism to set the default of > -falign-{functions,labels}. As LA464 is the first (and the only for > now) uarch supported by GCC, the same setting is also used for > the "generic" -mtune=loongarch64. In the future we may set different > settings for LA{2,3,6}64 once we add the support for them. > > Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk? > > gcc/ChangeLog: > > * config/loongarch/loongarch-tune.h (loongarch_align): New > struct. > * config/loongarch/loongarch-def.h (loongarch_cpu_align): New > array. > * config/loongarch/loongarch-def.c (loongarch_cpu_align): Define > the array. > * config/loongarch/loongarch.cc > (loongarch_option_override_internal): Set the value of > -falign-functions= if -falign-functions is enabled but no value > is given. Likewise for -falign-labels=. > --- > gcc/config/loongarch/loongarch-def.c | 12 ++++++++++++ > gcc/config/loongarch/loongarch-def.h | 1 + > gcc/config/loongarch/loongarch-tune.h | 8 ++++++++ > gcc/config/loongarch/loongarch.cc | 6 ++++++ > 4 files changed, 27 insertions(+) > > diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c > index fc4ebbefede..6729c857f7c 100644 > --- a/gcc/config/loongarch/loongarch-def.c > +++ b/gcc/config/loongarch/loongarch-def.c > @@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = { > }, > }; > > +struct loongarch_align > +loongarch_cpu_align[N_TUNE_TYPES] = { > + [CPU_LOONGARCH64] = { > + .function = "32", > + .label = "16", > + }, > + [CPU_LA464] = { > + .function = "32", > + .label = "16", > + }, > +}; > + > /* The following properties cannot be looked up directly using "cpucfg". > So it is necessary to provide a default value for "unknown native" > tune targets (i.e. -mtune=native while PRID does not correspond to > diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h > index 778b1409956..fb8bb88eb52 100644 > --- a/gcc/config/loongarch/loongarch-def.h > +++ b/gcc/config/loongarch/loongarch-def.h > @@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[]; > extern int loongarch_cpu_multipass_dfa_lookahead[]; > > extern struct loongarch_cache loongarch_cpu_cache[]; > +extern struct loongarch_align loongarch_cpu_align[]; > extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[]; > > #ifdef __cplusplus > diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h > index ba31c4f08c3..5c03262daff 100644 > --- a/gcc/config/loongarch/loongarch-tune.h > +++ b/gcc/config/loongarch/loongarch-tune.h > @@ -48,4 +48,12 @@ struct loongarch_cache { > int simultaneous_prefetches; /* number of parallel prefetch */ > }; > > +/* Alignment for functions and labels for best performance. For new uarchs > + the value should be measured via benchmarking. See the documentation for > + -falign-functions and -falign-labels in invoke.texi for the format. */ > +struct loongarch_align { > + const char *function; /* default value for -falign-functions */ > + const char *label; /* default value for -falign-labels */ > +}; > + > #endif /* LOONGARCH_TUNE_H */ > diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc > index eb73d11b869..5b8b93eb24b 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -6249,6 +6249,12 @@ loongarch_option_override_internal (struct gcc_options *opts) > && !opts->x_optimize_size) > opts->x_flag_prefetch_loop_arrays = 1; > > + if (opts->x_flag_align_functions && !opts->x_str_align_functions) > + opts->x_str_align_functions = loongarch_cpu_align[LARCH_ACTUAL_TUNE].function; > + > + if (opts->x_flag_align_labels && !opts->x_str_align_labels) > + opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label; > + > if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) > error ("%qs cannot be used for compiling a shared library", > "-mdirect-extern-access");
Pushed r14-1839. On Thu, 2023-06-15 at 09:12 +0800, Lulu Cheng wrote: > LGTM! Thanks! > > 在 2023/6/14 上午8:43, Xi Ruoyao 写道: > > The LA464 micro-architecture is sensitive to alignment of code. The > > Loongson team has benchmarked various combinations of function, the > > results [1] show that 16-byte label alignment together with 32-byte > > function alignment gives best results in terms of SPEC score. > > > > Add a mtune-based table-driven mechanism to set the default of > > -falign-{functions,labels}. As LA464 is the first (and the only for > > now) uarch supported by GCC, the same setting is also used for > > the "generic" -mtune=loongarch64. In the future we may set > > different > > settings for LA{2,3,6}64 once we add the support for them. > > > > Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk? > > > > gcc/ChangeLog: > > > > * config/loongarch/loongarch-tune.h (loongarch_align): New > > struct. > > * config/loongarch/loongarch-def.h (loongarch_cpu_align): > > New > > array. > > * config/loongarch/loongarch-def.c (loongarch_cpu_align): > > Define > > the array. > > * config/loongarch/loongarch.cc > > (loongarch_option_override_internal): Set the value of > > -falign-functions= if -falign-functions is enabled but no > > value > > is given. Likewise for -falign-labels=. > > --- > > gcc/config/loongarch/loongarch-def.c | 12 ++++++++++++ > > gcc/config/loongarch/loongarch-def.h | 1 + > > gcc/config/loongarch/loongarch-tune.h | 8 ++++++++ > > gcc/config/loongarch/loongarch.cc | 6 ++++++ > > 4 files changed, 27 insertions(+) > > > > diff --git a/gcc/config/loongarch/loongarch-def.c > > b/gcc/config/loongarch/loongarch-def.c > > index fc4ebbefede..6729c857f7c 100644 > > --- a/gcc/config/loongarch/loongarch-def.c > > +++ b/gcc/config/loongarch/loongarch-def.c > > @@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = { > > }, > > }; > > > > +struct loongarch_align > > +loongarch_cpu_align[N_TUNE_TYPES] = { > > + [CPU_LOONGARCH64] = { > > + .function = "32", > > + .label = "16", > > + }, > > + [CPU_LA464] = { > > + .function = "32", > > + .label = "16", > > + }, > > +}; > > + > > /* The following properties cannot be looked up directly using > > "cpucfg". > > So it is necessary to provide a default value for "unknown > > native" > > tune targets (i.e. -mtune=native while PRID does not correspond > > to > > diff --git a/gcc/config/loongarch/loongarch-def.h > > b/gcc/config/loongarch/loongarch-def.h > > index 778b1409956..fb8bb88eb52 100644 > > --- a/gcc/config/loongarch/loongarch-def.h > > +++ b/gcc/config/loongarch/loongarch-def.h > > @@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[]; > > extern int loongarch_cpu_multipass_dfa_lookahead[]; > > > > extern struct loongarch_cache loongarch_cpu_cache[]; > > +extern struct loongarch_align loongarch_cpu_align[]; > > extern struct loongarch_rtx_cost_data > > loongarch_cpu_rtx_cost_data[]; > > > > #ifdef __cplusplus > > diff --git a/gcc/config/loongarch/loongarch-tune.h > > b/gcc/config/loongarch/loongarch-tune.h > > index ba31c4f08c3..5c03262daff 100644 > > --- a/gcc/config/loongarch/loongarch-tune.h > > +++ b/gcc/config/loongarch/loongarch-tune.h > > @@ -48,4 +48,12 @@ struct loongarch_cache { > > int simultaneous_prefetches; /* number of parallel prefetch */ > > }; > > > > +/* Alignment for functions and labels for best performance. For > > new uarchs > > + the value should be measured via benchmarking. See the > > documentation for > > + -falign-functions and -falign-labels in invoke.texi for the > > format. */ > > +struct loongarch_align { > > + const char *function; /* default value for -falign- > > functions */ > > + const char *label; /* default value for -falign-labels */ > > +}; > > + > > #endif /* LOONGARCH_TUNE_H */ > > diff --git a/gcc/config/loongarch/loongarch.cc > > b/gcc/config/loongarch/loongarch.cc > > index eb73d11b869..5b8b93eb24b 100644 > > --- a/gcc/config/loongarch/loongarch.cc > > +++ b/gcc/config/loongarch/loongarch.cc > > @@ -6249,6 +6249,12 @@ loongarch_option_override_internal (struct > > gcc_options *opts) > > && !opts->x_optimize_size) > > opts->x_flag_prefetch_loop_arrays = 1; > > > > + if (opts->x_flag_align_functions && !opts->x_str_align_functions) > > + opts->x_str_align_functions = > > loongarch_cpu_align[LARCH_ACTUAL_TUNE].function; > > + > > + if (opts->x_flag_align_labels && !opts->x_str_align_labels) > > + opts->x_str_align_labels = > > loongarch_cpu_align[LARCH_ACTUAL_TUNE].label; > > + > > if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) > > error ("%qs cannot be used for compiling a shared library", > > "-mdirect-extern-access"); >
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c index fc4ebbefede..6729c857f7c 100644 --- a/gcc/config/loongarch/loongarch-def.c +++ b/gcc/config/loongarch/loongarch-def.c @@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = { }, }; +struct loongarch_align +loongarch_cpu_align[N_TUNE_TYPES] = { + [CPU_LOONGARCH64] = { + .function = "32", + .label = "16", + }, + [CPU_LA464] = { + .function = "32", + .label = "16", + }, +}; + /* The following properties cannot be looked up directly using "cpucfg". So it is necessary to provide a default value for "unknown native" tune targets (i.e. -mtune=native while PRID does not correspond to diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h index 778b1409956..fb8bb88eb52 100644 --- a/gcc/config/loongarch/loongarch-def.h +++ b/gcc/config/loongarch/loongarch-def.h @@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[]; extern int loongarch_cpu_multipass_dfa_lookahead[]; extern struct loongarch_cache loongarch_cpu_cache[]; +extern struct loongarch_align loongarch_cpu_align[]; extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[]; #ifdef __cplusplus diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h index ba31c4f08c3..5c03262daff 100644 --- a/gcc/config/loongarch/loongarch-tune.h +++ b/gcc/config/loongarch/loongarch-tune.h @@ -48,4 +48,12 @@ struct loongarch_cache { int simultaneous_prefetches; /* number of parallel prefetch */ }; +/* Alignment for functions and labels for best performance. For new uarchs + the value should be measured via benchmarking. See the documentation for + -falign-functions and -falign-labels in invoke.texi for the format. */ +struct loongarch_align { + const char *function; /* default value for -falign-functions */ + const char *label; /* default value for -falign-labels */ +}; + #endif /* LOONGARCH_TUNE_H */ diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index eb73d11b869..5b8b93eb24b 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -6249,6 +6249,12 @@ loongarch_option_override_internal (struct gcc_options *opts) && !opts->x_optimize_size) opts->x_flag_prefetch_loop_arrays = 1; + if (opts->x_flag_align_functions && !opts->x_str_align_functions) + opts->x_str_align_functions = loongarch_cpu_align[LARCH_ACTUAL_TUNE].function; + + if (opts->x_flag_align_labels && !opts->x_str_align_labels) + opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label; + if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) error ("%qs cannot be used for compiling a shared library", "-mdirect-extern-access");