From 93f49b7739d87106988869ee9a5ebe441e0b56ab Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 12 Nov 2019 16:49:41 +0800
Subject: [PATCH] Split X86_TUNE_AVX128_OPTIMAL into X86_TUNE_AVX256_SPLIT_REGS
and X86_TUNE_AVX128_OPTIMAL.
Changelog
gcc/
PR target/92448
* config/i386/i386-expand.c (ix86_expand_set_or_cpymem):
Replace TARGET_AVX128_OPTIMAL with TARGET_AVX256_SPLIT_REGS.
* config/i386/i386-option.c (ix86_vec_cost): Ditto.
(ix86_reassociation_width): Ditto.
* config/i386/i386-options.c (ix86_option_override_internal):
Replace TARGET_AVX128_OPTIAML with
ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
* config/i386/i386.h (TARGET_AVX256_SPLIT_REGS): New macro.
(TARGET_AVX128_OPTIMAL): Deleted.
* config/i386/x86-tune.def (X86_TUNE_AVX256_SPLIT_REGS): New
DEF_TUNE.
---
gcc/config/i386/i386-expand.c | 2 +-
gcc/config/i386/i386-options.c | 2 +-
gcc/config/i386/i386.c | 4 ++--
gcc/config/i386/i386.h | 4 ++--
gcc/config/i386/x86-tune.def | 4 ++++
5 files changed, 10 insertions(+), 6 deletions(-)
@@ -7348,7 +7348,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
&& optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
move_mode = wider_mode;
- if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
+ if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
move_mode = TImode;
/* Find the corresponding vector mode with the same size as MOVE_MODE.
@@ -2692,7 +2692,7 @@ ix86_option_override_internal (bool main_args_p,
/* Enable 128-bit AVX instruction generation
for the auto-vectorizer. */
- if (TARGET_AVX128_OPTIMAL
+ if (ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
&& (opts_set->x_prefer_vector_width_type == PVW_NONE))
opts->x_prefer_vector_width_type = PVW_AVX128;
@@ -18960,7 +18960,7 @@ ix86_vec_cost (machine_mode mode, int cost)
&& TARGET_SSE_SPLIT_REGS)
return cost * 2;
if (GET_MODE_BITSIZE (mode) > 128
- && TARGET_AVX128_OPTIMAL)
+ && TARGET_AVX256_SPLIT_REGS)
return cost * GET_MODE_BITSIZE (mode) / 128;
return cost;
}
@@ -21298,7 +21298,7 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
return 1;
/* Account for targets that splits wide vectors into multiple parts. */
- if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
+ if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
div = GET_MODE_BITSIZE (mode) / 128;
else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
div = GET_MODE_BITSIZE (mode) / 64;
@@ -578,8 +578,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
#define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
-#define TARGET_AVX128_OPTIMAL \
- ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
+#define TARGET_AVX256_SPLIT_REGS \
+ ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
#define TARGET_GENERAL_REGS_SSE_SPILL \
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
@@ -453,6 +453,10 @@ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal",
DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal",
~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_ZNVER1 | m_GENERIC))
+/* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX256 ops are split into two AVX128 ops. */
+DEF_TUNE (X86_TUNE_AVX256_SPLIT_REGS, "avx256_split_regs",m_BDVER | m_BTVER2
+ | m_ZNVER1)
+
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
the auto-vectorizer. */
DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
--
2.18.1