@@ -3620,7 +3620,8 @@ public:
/* opt_pass methods: */
bool gate (function *) final override
{
- return optimize && optimize_function_for_speed_p (cfun);
+ return TARGET_ALIGN_TIGHT_LOOPS
+ && optimize && optimize_function_for_speed_p (cfun);
}
unsigned int execute (function *) final override
@@ -466,6 +466,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
#define TARGET_SSE_MOVCC_USE_BLENDV \
ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
+#define TARGET_ALIGN_TIGHT_LOOPS \
+ ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
@@ -214,7 +214,7 @@ DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT
DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT | m_LAKEMONT)
/*****************************************************************************/
-/* Branch predictor tuning */
+/* Branch predictor and The Front-end tuning */
/*****************************************************************************/
/* X86_TUNE_PAD_SHORT_FUNCTION: Make every function to be at least 4
@@ -235,6 +235,12 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_GOLDMONT
| m_GOLDMONT_PLUS | m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
+/* X86_TUNE_ALIGN_TIGHT_LOOP: For tight loop whose size is
+ smaller than prefetch_block, align it to ceil_log2 (loop_size). The tune
+ overwrites -falign-loops=N. */
+DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
+ ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512))
+
/*****************************************************************************/
/* Integer instruction selection tuning */
/*****************************************************************************/