From 66549572467fe5dc5c4221e7857f3051d4f51554 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Mon, 24 Aug 2020 20:36:52 +0800
Subject: [PATCH] Retune mask <->integer cost for non-AVX512
micro-architecture.
gcc/ChangeLog:
PR taregt/96744
* config/i386/x86-tune-costs.h (struct processor_costs):
Increase mask <-> integer cost for non AVX512 target to avoid
spill gpr to mask. Also retune mask <-> integer and
mask_load/store for skylake_cost.
---
gcc/config/i386/x86-tune-costs.h | 88 ++++++++++++++++----------------
1 file changed, 44 insertions(+), 44 deletions(-)
@@ -58,8 +58,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
in 32,64,128,256 and 512-bit */
{3, 3, 3, 3, 3}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{2, 2, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
@@ -169,8 +169,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */
in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
@@ -277,8 +277,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */
in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
@@ -387,8 +387,8 @@ struct processor_costs pentium_cost = {
in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
@@ -488,8 +488,8 @@ struct processor_costs lakemont_cost = {
in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
@@ -604,8 +604,8 @@ struct processor_costs pentiumpro_cost = {
in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
@@ -711,8 +711,8 @@ struct processor_costs geode_cost = {
in 32,64,128,256 and 512-bit */
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 6, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ 6, 6, /* mask->integer and integer->mask moves */
{2, 2, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
@@ -818,8 +818,8 @@ struct processor_costs k6_cost = {
in 32,64,128,256 and 512-bit */
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 6, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ 6, 6, /* mask->integer and integer->mask moves */
{4, 5, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 3, 2}, /* cost if storing mask register
@@ -931,8 +931,8 @@ struct processor_costs athlon_cost = {
in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 5, 5, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ 5, 5, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
@@ -1046,8 +1046,8 @@ struct processor_costs k8_cost = {
in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 5, 5, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ 5, 5, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
@@ -1165,8 +1165,8 @@ struct processor_costs amdfam10_cost = {
in 32,64,128,256 and 512-bit */
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
@@ -1295,7 +1295,7 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
16, 20, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 16, 20, /* mask->integer and integer->mask moves */
{8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
@@ -1431,8 +1431,8 @@ struct processor_costs znver1_cost = {
in 32,64,128,256 and 512-bit. */
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
- 6, 6, /* SSE->integer and integer->SSE moves. */
- 2, 2, /* mask->integer and integer->mask moves */
+ 6, 6, /* SSE->integer and integer->SSE moves. */
+ 8, 8, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
@@ -1587,7 +1587,7 @@ struct processor_costs znver2_cost = {
in 32,64,128,256 and 512-bit. */
6, 6, /* SSE->integer and integer->SSE
moves. */
- 2, 2, /* mask->integer and integer->mask moves */
+ 8, 8, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
@@ -1726,11 +1726,11 @@ struct processor_costs skylake_cost = {
in 32,64,128,256 and 512-bit */
{8, 8, 8, 12, 24}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 6, 6, /* SSE->integer and integer->SSE moves */
- 4, 6, /* mask->integer and integer->mask moves */
- {6, 6, 6}, /* cost of loading mask register
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ 5, 5, /* mask->integer and integer->mask moves */
+ {8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
- {8, 8, 8}, /* cost if storing mask register
+ {6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
3, /* cost of moving mask register. */
/* End of register allocator costs. */
@@ -1841,7 +1841,7 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 14, 14, /* mask->integer and integer->mask moves */
{6, 8, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 8, 6}, /* cost if storing mask register
@@ -1951,7 +1951,7 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 14, 14, /* mask->integer and integer->mask moves */
{8, 8, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 6}, /* cost if storing mask register
@@ -2060,7 +2060,7 @@ struct processor_costs pentium4_cost = {
{16, 16, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 20, 12, /* mask->integer and integer->mask moves */
{4, 5, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 3, 2}, /* cost if storing mask register
@@ -2172,7 +2172,7 @@ struct processor_costs nocona_cost = {
{12, 12, 12, 24, 48}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 20, 12, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{4, 4, 4}, /* cost if storing mask register
@@ -2281,8 +2281,8 @@ struct processor_costs atom_cost = {
in 32,64,128,256 and 512-bit */
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 8, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ 8, 6, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
@@ -2391,8 +2391,8 @@ struct processor_costs slm_cost = {
in 32,64,128,256 and 512-bit */
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 8, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ 8, 6, /* mask->integer and integer->mask moves */
{8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
@@ -2501,8 +2501,8 @@ struct processor_costs intel_cost = {
in 32,64,128,256 and 512-bit */
{6, 6, 6, 6, 6}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 4, 4, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 4, 4, /* SSE->integer and integer->SSE moves */
+ 4, 4, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
@@ -2615,8 +2615,8 @@ struct processor_costs generic_cost = {
in 32,64,128,256 and 512-bit */
{6, 6, 6, 10, 15}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 6, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ 6, 6, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
@@ -2734,8 +2734,8 @@ struct processor_costs core_cost = {
in 32,64,128,256 and 512-bit */
{6, 6, 6, 6, 12}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- 6, 6, /* SSE->integer and integer->SSE moves */
- 2, 2, /* mask->integer and integer->mask moves */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ 6, 6, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
--
2.18.1