diff mbox series

Return mask <-> integer cost for non-AVX512 micro-architecture.

Message ID CAMZc-bxw3g5tdkHk39h1x4J1+9YikDH=R0Zj+1uBbkguTGogUA@mail.gmail.com
State New
Headers show
Series Return mask <-> integer cost for non-AVX512 micro-architecture. | expand

Commit Message

Hongtao Liu Sept. 15, 2020, 3 a.m. UTC
Hi:
  This patch would avoid spill gprs to mask registers for non-AVX512
micro-architecture and fix regression in PR96744.

  Bootstrap is ok, regression test for i386/x86-64 backend is ok.
  No big performance impact on SPEC2017.

gcc/ChangeLog:

        PR taregt/96744
        * config/i386/x86-tune-costs.h (struct processor_costs):
        Increase mask <-> integer cost for non AVX512 target to avoid
        spill gpr to mask. Also retune mask <-> integer and
        mask_load/store for skylake_cost.

Comments

Uros Bizjak Sept. 15, 2020, 8:17 a.m. UTC | #1
On Tue, Sep 15, 2020 at 4:59 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi:
>   This patch would avoid spill gprs to mask registers for non-AVX512
> micro-architecture and fix regression in PR96744.
>
>   Bootstrap is ok, regression test for i386/x86-64 backend is ok.
>   No big performance impact on SPEC2017.
>
> gcc/ChangeLog:
>
>         PR taregt/96744
>         * config/i386/x86-tune-costs.h (struct processor_costs):
>         Increase mask <-> integer cost for non AVX512 target to avoid
>         spill gpr to mask. Also retune mask <-> integer and
>         mask_load/store for skylake_cost.

LGTM.

Thanks,
Uros.

>
> --
> BR,
> Hongtao
diff mbox series

Patch

From 66549572467fe5dc5c4221e7857f3051d4f51554 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Mon, 24 Aug 2020 20:36:52 +0800
Subject: [PATCH] Retune mask <->integer cost for non-AVX512
 micro-architecture.

gcc/ChangeLog:

	PR taregt/96744
	* config/i386/x86-tune-costs.h (struct processor_costs):
	Increase mask <-> integer cost for non AVX512 target to avoid
	spill gpr to mask. Also retune mask <-> integer and
	mask_load/store for skylake_cost.
---
 gcc/config/i386/x86-tune-costs.h | 88 ++++++++++++++++----------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index a782a9dd9e3..0ad4b28903c 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -58,8 +58,8 @@  struct processor_costs ix86_size_cost = {/* costs for tuning for size */
 					   in 32,64,128,256 and 512-bit */
   {3, 3, 3, 3, 3},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {2, 2, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 2, 2},				/* cost if storing mask register
@@ -169,8 +169,8 @@  struct processor_costs i386_cost = {	/* 386 specific costs */
 					   in 32,64,128,256 and 512-bit */
   {4, 8, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {2, 4, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 4, 2},				/* cost if storing mask register
@@ -277,8 +277,8 @@  struct processor_costs i486_cost = {	/* 486 specific costs */
 					   in 32,64,128,256 and 512-bit */
   {4, 8, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {2, 4, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 4, 2},				/* cost if storing mask register
@@ -387,8 +387,8 @@  struct processor_costs pentium_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 8, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {2, 4, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 4, 2},				/* cost if storing mask register
@@ -488,8 +488,8 @@  struct processor_costs lakemont_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 8, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {2, 4, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 4, 2},				/* cost if storing mask register
@@ -604,8 +604,8 @@  struct processor_costs pentiumpro_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 8, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {4, 4, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 2, 2},				/* cost if storing mask register
@@ -711,8 +711,8 @@  struct processor_costs geode_cost = {
 					   in 32,64,128,256 and 512-bit */
   {2, 2, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  6, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  6, 6,				/* SSE->integer and integer->SSE moves */
+  6, 6,				/* mask->integer and integer->mask moves */
   {2, 2, 2},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 2, 2},				/* cost if storing mask register
@@ -818,8 +818,8 @@  struct processor_costs k6_cost = {
 					   in 32,64,128,256 and 512-bit */
   {2, 2, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  6, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  6, 6,				/* SSE->integer and integer->SSE moves */
+  6, 6,				/* mask->integer and integer->mask moves */
   {4, 5, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 3, 2},				/* cost if storing mask register
@@ -931,8 +931,8 @@  struct processor_costs athlon_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 4, 10, 10, 20},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  5, 5,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  5, 5,				/* SSE->integer and integer->SSE moves */
+  5, 5,				/* mask->integer and integer->mask moves */
   {3, 4, 3},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {3, 4, 3},				/* cost if storing mask register
@@ -1046,8 +1046,8 @@  struct processor_costs k8_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 4, 10, 10, 20},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  5, 5,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  5, 5,				/* SSE->integer and integer->SSE moves */
+  5, 5,				/* mask->integer and integer->mask moves */
   {3, 4, 3},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {3, 4, 3},				/* cost if storing mask register
@@ -1165,8 +1165,8 @@  struct processor_costs amdfam10_cost = {
 					   in 32,64,128,256 and 512-bit */
   {4, 4, 5, 10, 20},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  3, 3,				/* SSE->integer and integer->SSE moves */
+  3, 3,				/* mask->integer and integer->mask moves */
   {3, 4, 3},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {3, 4, 3},				/* cost if storing mask register
@@ -1295,7 +1295,7 @@  const struct processor_costs bdver_cost = {
   {10, 10, 10, 40, 60},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
   16, 20,				/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  16, 20,				/* mask->integer and integer->mask moves */
   {8, 8, 8},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {8, 8, 8},				/* cost if storing mask register
@@ -1431,8 +1431,8 @@  struct processor_costs znver1_cost = {
 					   in 32,64,128,256 and 512-bit.  */
   {8, 8, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit.  */
-  6, 6,					/* SSE->integer and integer->SSE moves.  */
-  2, 2,				/* mask->integer and integer->mask moves */
+  6, 6,				/* SSE->integer and integer->SSE moves.  */
+  8, 8,				/* mask->integer and integer->mask moves */
   {6, 6, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {8, 8, 8},				/* cost if storing mask register
@@ -1587,7 +1587,7 @@  struct processor_costs znver2_cost = {
 					   in 32,64,128,256 and 512-bit.  */
   6, 6,					/* SSE->integer and integer->SSE
 					   moves.  */
-  2, 2,				/* mask->integer and integer->mask moves */
+  8, 8,				/* mask->integer and integer->mask moves */
   {6, 6, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {8, 8, 8},				/* cost if storing mask register
@@ -1726,11 +1726,11 @@  struct processor_costs skylake_cost = {
 					   in 32,64,128,256 and 512-bit */
   {8, 8, 8, 12, 24},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  6, 6,					/* SSE->integer and integer->SSE moves */
-  4, 6,				/* mask->integer and integer->mask moves */
-  {6, 6, 6},				/* cost of loading mask register
+  6, 6,				/* SSE->integer and integer->SSE moves */
+  5, 5,				/* mask->integer and integer->mask moves */
+  {8, 8, 8},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
-  {8, 8, 8},				/* cost if storing mask register
+  {6, 6, 6},				/* cost if storing mask register
 					   in QImode, HImode, SImode.  */
   3,					/* cost of moving mask register.  */
   /* End of register allocator costs.  */
@@ -1841,7 +1841,7 @@  const struct processor_costs btver1_cost = {
   {10, 10, 12, 48, 96},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
   14, 14,				/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  14, 14,				/* mask->integer and integer->mask moves */
   {6, 8, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 8, 6},				/* cost if storing mask register
@@ -1951,7 +1951,7 @@  const struct processor_costs btver2_cost = {
   {10, 10, 12, 48, 96},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
   14, 14,				/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  14, 14,				/* mask->integer and integer->mask moves */
   {8, 8, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {8, 8, 6},				/* cost if storing mask register
@@ -2060,7 +2060,7 @@  struct processor_costs pentium4_cost = {
   {16, 16, 16, 32, 64},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
   20, 12,				/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  20, 12,				/* mask->integer and integer->mask moves */
   {4, 5, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {2, 3, 2},				/* cost if storing mask register
@@ -2172,7 +2172,7 @@  struct processor_costs nocona_cost = {
   {12, 12, 12, 24, 48},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
   20, 12,				/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  20, 12,				/* mask->integer and integer->mask moves */
   {4, 4, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {4, 4, 4},				/* cost if storing mask register
@@ -2281,8 +2281,8 @@  struct processor_costs atom_cost = {
 					   in 32,64,128,256 and 512-bit */
   {8, 8, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  8, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  8, 6,				/* SSE->integer and integer->SSE moves */
+  8, 6,				/* mask->integer and integer->mask moves */
   {6, 6, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 6, 6},			/* cost if storing mask register
@@ -2391,8 +2391,8 @@  struct processor_costs slm_cost = {
 					   in 32,64,128,256 and 512-bit */
   {8, 8, 8, 16, 32},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  8, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  8, 6,				/* SSE->integer and integer->SSE moves */
+  8, 6,				/* mask->integer and integer->mask moves */
   {8, 8, 8},			/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 6, 6},			/* cost if storing mask register
@@ -2501,8 +2501,8 @@  struct processor_costs intel_cost = {
 					   in 32,64,128,256 and 512-bit */
   {6, 6, 6, 6, 6},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  4, 4,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  4, 4,				/* SSE->integer and integer->SSE moves */
+  4, 4,				/* mask->integer and integer->mask moves */
   {4, 4, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 6, 6},				/* cost if storing mask register
@@ -2615,8 +2615,8 @@  struct processor_costs generic_cost = {
 					   in 32,64,128,256 and 512-bit */
   {6, 6, 6, 10, 15},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  6, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  6, 6,				/* SSE->integer and integer->SSE moves */
+  6, 6,				/* mask->integer and integer->mask moves */
   {6, 6, 6},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 6, 6},			/* cost if storing mask register
@@ -2734,8 +2734,8 @@  struct processor_costs core_cost = {
 					   in 32,64,128,256 and 512-bit */
   {6, 6, 6, 6, 12},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  6, 6,					/* SSE->integer and integer->SSE moves */
-  2, 2,				/* mask->integer and integer->mask moves */
+  6, 6,				/* SSE->integer and integer->SSE moves */
+  6, 6,				/* mask->integer and integer->mask moves */
   {4, 4, 4},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
   {6, 6, 6},				/* cost if storing mask register
-- 
2.18.1