===================================================================
@@ -35,7 +35,12 @@ static const struct default_options rs6000_option_
{ OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
/* Enable -fsched-pressure for first pass instruction scheduling. */
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
- { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
+ /* Enable -funroll-loops with -munroll-small-loops. */
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
+ /* Disable -fweb and -frename-registers. */
+ { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
+ { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
{ OPT_LEVELS_NONE, 0, NULL, 0 }
};
===================================================================
@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut
#undef TARGET_VECTORIZE_DESTROY_COST_DATA
#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
#undef TARGET_BUILTIN_DECL
@@ -4540,24 +4543,18 @@ rs6000_option_override_internal (bool global_init_
global_options.x_param_values,
global_options_set.x_param_values);
- /* unroll very small loops 2 time if no -funroll-loops. */
- if (!global_options_set.x_flag_unroll_loops
- && !global_options_set.x_flag_unroll_all_loops)
+ /* Explicit -funroll-loops turns -munroll-small-loops off, and turns
+ fweb or frename-registers on. */
+ if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+ || (global_options_set.x_flag_unroll_all_loops
+ && flag_unroll_all_loops))
{
- maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
- global_options.x_param_values,
- global_options_set.x_param_values);
-
- maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
- global_options.x_param_values,
- global_options_set.x_param_values);
-
- /* If fweb or frename-registers are not specificed in command-line,
- do not turn them on implicitly. */
+ if (!global_options_set.x_unroll_small_loops)
+ unroll_small_loops = 0;
if (!global_options_set.x_flag_web)
- global_options.x_flag_web = 0;
+ flag_web = 1;
if (!global_options_set.x_flag_rename_registers)
- global_options.x_flag_rename_registers = 0;
+ flag_rename_registers = 1;
}
/* If using typedef char *va_list, signal that
@@ -5101,6 +5098,25 @@ rs6000_destroy_cost_data (void *data)
free (data);
}
+/* Implement targetm.loop_unroll_adjust. */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
+{
+ if (unroll_small_loops)
+ {
+ /* TODO: This is hardcoded to 10 right now. It can be refined, for
+ example we may want to unroll very small loops more times (4 perhaps).
+ We also should use a PARAM for this. */
+ if (loop->ninsns <= 10)
+ return MIN (2, nunroll);
+ else
+ return 0;
+ }
+
+ return nunroll;
+}
+
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */
===================================================================
@@ -501,6 +501,10 @@ moptimize-swaps
Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
Analyze and remove doubleword swaps from VSX computations.
+munroll-small-loops
+Target Undocumented Var(unroll_small_loops) Init(0) Save
+Use conservative small loop unrolling.
+
mpower9-misc
Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
Use certain scalar instructions added in ISA 3.0.
===================================================================
@@ -1,9 +1,6 @@
/* PR tree-optimization/59643 */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-pcom-details" } */
-/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
-/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
- loop of this case. */
void
foo (double *a, double *b, double *c, double d, double e, int n)
From: guojiufu <guojiufu@linux.ibm.com> Hi, Here is updated patch combining comments and suggestions from previous review. In this patch, loop unroll adjust hook is introduced for powerpc. We can do target related heuristic adjustment in this hook. In this patch, -funroll-loops is enabled for small loops at O2 and O3. Based on [PATCH V3], this patch enhanced a little by adding an option -munroll-small-loops to guard the small loops unrolling to make it works fine with -flto. Successfully bootstrapped and regtested on powerpc64le. OK for trunk? Jiufu Guo BR. gcc/ 2019-11-06 Jiufu Guo <guojiufu@linux.ibm.com> PR tree-optimization/88760 * gcc/config/rs6000/rs6000.opt (-munroll-small-loops): New option. * gcc/common/config/rs6000/rs6000-common.c (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]: Turn on -funroll-loops and -munroll-small-loops. [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers. * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS. Turn off -munroll-small-loops, turn on -fweb and -frename-registers for explicit funroll-loops. (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook. (rs6000_loop_unroll_adjust): Define it. Use -munroll-small-loops. gcc.testsuite/ 2019-11-06 Jiufu Guo <guojiufu@linux.ibm.com> PR tree-optimization/88760 * gcc.dg/pr59643.c: Update back to r277550.