diff mbox series

[2/3] aarch64: Handle cost for vector add reduction

Message ID 20240828043331.3359171-2-quic_apinski@quicinc.com
State New
Headers show
Series [1/3] expand: Add debug dump on the cost for `popcount==1` expand | expand

Commit Message

Andrew Pinski Aug. 28, 2024, 4:33 a.m. UTC
While working on PR 114224 (popcount costs is not modeled), I noticed
that addv (vector reduction add) was not handled either. This adds the handling
there. Some of the extends are part of the instructions so we need to handle those
too.

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_rtx_addv_costs): New function.
	(aarch64_rtx_costs): For unspec_addv, call aarch64_rtx_addv_costs.
	For unspec_addv under a zero_extend, call aarch64_rtx_addv_costs.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/config/aarch64/aarch64.cc | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 40dacfcf2e7..7607b85e3cf 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14097,6 +14097,31 @@  aarch64_abd_rtx_p (rtx x)
   return rtx_equal_p (maxop0, minop0) && rtx_equal_p (maxop1, minop1);
 }
 
+/* Handle the cost for unspec ADDV (reduction add).
+   Result is true if the total cost of the operation
+   has now been calculated. */
+static bool
+aarch64_rtx_addv_costs (rtx op0, int *cost, bool speed)
+{
+  const struct cpu_cost_table *extra_cost
+    = aarch64_tune_params.insn_extra_cost;
+
+  if (speed)
+    *cost += extra_cost->vect.alu;
+
+  /* The zero/sign extend part of the reduction is part of the instruction. */
+  if (GET_CODE (op0) == ZERO_EXTEND
+      || GET_CODE (op0) == SIGN_EXTEND)
+   {
+     *cost += rtx_cost (XEXP (op0, 0), GET_MODE (XEXP (op0, 0)),
+			UNSPEC, 0, speed);
+     return true;
+   }
+
+   *cost += rtx_cost (op0, GET_MODE (op0), UNSPEC, 0, speed);
+   return true;
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
@@ -14912,6 +14937,11 @@  cost_plus:
     case ZERO_EXTEND:
 
       op0 = XEXP (x, 0);
+      /* Addv with an implicit zero extend. */
+      if (GET_CODE (op0) == UNSPEC
+	  && XINT (op0, 1) == UNSPEC_ADDV)
+	return aarch64_rtx_addv_costs (XVECEXP (op0, 0, 0),
+				       cost, speed);
       /* If a value is written in SI mode, then zero extended to DI
 	 mode, the operation will in general be free as a write to
 	 a 'w' register implicitly zeroes the upper bits of an 'x'
@@ -15378,6 +15408,11 @@  cost_plus:
 
           return false;
         }
+      /* The vector integer/floating point add reduction instructions. */
+      if (XINT (x, 1) == UNSPEC_ADDV
+	  || XINT (x, 1) == UNSPEC_FADDV)
+	return aarch64_rtx_addv_costs (XVECEXP (x, 0, 0), cost, speed);
+
       break;
 
     case TRUNCATE: