diff mbox series

[x86_64] Support wide immediate constants in STV.

Message ID 012901daeef6$5ffea3a0$1ffbeae0$@nextmovesoftware.com
State New
Headers show
Series [x86_64] Support wide immediate constants in STV. | expand

Commit Message

Roger Sayle Aug. 15, 2024, 9:34 a.m. UTC
As requested this patch is split out from my earlier submission.
This patch provides more accurate costs/gains for (wide) immediate
constants in STV, suitably adjusting the costs/gains when the highpart
and lowpart words are the same.  One minor complication is that the
middle-end assumes (when generating memset) that SSE constants will
be shared/amortized across multiple consecutive writes.  Hence to
avoid testsuite regressions, we add a heuristic that considers an immediate
constant to be very cheap, if that same immediate value occurs in the
previous instruction or in the following instruction.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-08-15  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386-features.cc (timode_immed_const_gain): New
        function to determine the gain/cost on a CONST_WIDE_INT.
        (local_duplicate_constant_p): Helper function to see if the
        same immediate constant appears in the previous or next insn.
        (timode_scalar_chain::compute_convert_gain): Fix whitespace.
        <case CONST_WIDE_INT>: Provide more accurate estimates using
        timode_immed_const_gain and local_duplicate_constant_p.
        <case AND>: Handle CONSTANT_SCALAR_INT_P (src).


Thanks again,
Roger
--

Comments

Uros Bizjak Aug. 15, 2024, 10:11 a.m. UTC | #1
On Thu, Aug 15, 2024 at 11:34 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> As requested this patch is split out from my earlier submission.
> This patch provides more accurate costs/gains for (wide) immediate
> constants in STV, suitably adjusting the costs/gains when the highpart
> and lowpart words are the same.  One minor complication is that the
> middle-end assumes (when generating memset) that SSE constants will
> be shared/amortized across multiple consecutive writes.  Hence to
> avoid testsuite regressions, we add a heuristic that considers an immediate
> constant to be very cheap, if that same immediate value occurs in the
> previous instruction or in the following instruction.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2024-08-15  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-features.cc (timode_immed_const_gain): New
>         function to determine the gain/cost on a CONST_WIDE_INT.
>         (local_duplicate_constant_p): Helper function to see if the
>         same immediate constant appears in the previous or next insn.
>         (timode_scalar_chain::compute_convert_gain): Fix whitespace.
>         <case CONST_WIDE_INT>: Provide more accurate estimates using
>         timode_immed_const_gain and local_duplicate_constant_p.
>         <case AND>: Handle CONSTANT_SCALAR_INT_P (src).

LGTM.

Thanks,
Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c36d181..78564df 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1503,6 +1503,53 @@  general_scalar_chain::convert_insn (rtx_insn *insn)
   df_insn_rescan (insn);
 }
 
+/* Helper function to compute gain for loading an immediate constant.
+   Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
+   with numerous special cases.  */
+
+static int
+timode_immed_const_gain (rtx cst)
+{
+  /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
+  if (CONST_WIDE_INT_P (cst)
+      && CONST_WIDE_INT_NUNITS (cst) == 2
+      && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
+    return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+				       : -COSTS_N_INSNS (2);
+  /* 2x movabsq ~ vmovdqa.  */
+  return 0;
+}
+
+/* Return true if the constant CST in mode MODE is found as an
+   immediate operand in the insn after INSN, or the insn before it.  */
+
+static bool
+local_duplicate_constant_p (rtx_insn *insn, machine_mode mode, rtx cst)
+{
+  rtx set;
+
+  rtx_insn *next = NEXT_INSN (insn);
+  if (next)
+    {
+      set = single_set (insn);
+      if (set
+	  && GET_MODE (SET_DEST (set)) == mode
+	  && rtx_equal_p (SET_SRC (set), cst))
+	return true;
+    }
+
+  rtx_insn *prev = PREV_INSN (insn);
+  if (prev)
+    {
+      set = single_set (insn);
+      if (set
+	  && GET_MODE (SET_DEST (set)) == mode
+	  && rtx_equal_p (SET_SRC (set), cst))
+	return true;
+    }
+  return false;
+}
+
 /* Compute a gain for chain conversion.  */
 
 int
@@ -1549,7 +1596,17 @@  timode_scalar_chain::compute_convert_gain ()
 	case CONST_INT:
 	  if (MEM_P (dst)
 	      && standard_sse_constant_p (src, V1TImode))
-	    igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (11) : 1;
+	    igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+	  break;
+
+	case CONST_WIDE_INT:
+	  igain = local_duplicate_constant_p (insn, TImode, src)
+		  ? 0
+		  : timode_immed_const_gain (src);
+	  /* 2 x mov vs. vmovdqa.  */
+	  if (MEM_P (dst))
+	    igain += optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
+						 : COSTS_N_INSNS (1);
 	  break;
 
 	case NOT:
@@ -1562,6 +1619,8 @@  timode_scalar_chain::compute_convert_gain ()
 	case IOR:
 	  if (!MEM_P (dst))
 	    igain = COSTS_N_INSNS (1);
+	  if (CONST_SCALAR_INT_P (XEXP (src, 1)))
+	    igain += timode_immed_const_gain (XEXP (src, 1));
 	  break;
 
 	case ASHIFT: