diff mbox series

i386: Add integer nabs instructions [PR101044]

Message ID CAFULd4YK+akDXtxyckanFpifrvWF+AELcjHiNTrj0bEBiok9Xg@mail.gmail.com
State New
Headers show
Series i386: Add integer nabs instructions [PR101044] | expand

Commit Message

Uros Bizjak July 1, 2021, 9:23 a.m. UTC
The patch adds integer nabs "(NEG (ABS (...)))" instructions, adds STV
conversion and adjusts STV cost calculations accordingly.  When CMOV
instruction is used to implement abs, the sign is determined from the
preceding operand negation, and CMOVS is used to select between
negated and non-negated value.

To implement nabs, just reverse the condition and emit CMOVNS instead.

The STV costs are adjusted for inherent NOT of nabs insn. V2DI NOT is
somehow costly operation, since it is implemented as a load of zero,
followed by a SUB insn. OTOH, integer nabs with inherent NOT is relatively
cheap, so some STV chains became less profitable for conversion.

The patch rewrites operand scanner in compute_convert_gain to a switch
and reorders case instances in general_scalar_to_vector_candidate_p
to benefit from fallthroughs, and to remove special processing of
andnot in the later case.

gcc/

2021-07-01  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/101044
    * config/i386/i386.md (*nabs<dwi>2_doubleword):
    New insn_and_split pattern.
    (*nabs<dwi>2_1): Ditto.
    * config/i386/i386-features.c
    (general_scalar_chain::compute_convert_gain):
    Handle (NEG (ABS (...))) RTX.  Rewrite src code
    scanner as switch statement.
    (general_scalar_chain::convert_insn):
    Handle (NEG (ABS (...))) RTX.
    (general_scalar_to_vector_candidate_p):
    Detect  (NEG (ABS (...))) RTX.  Reorder case statements
    for (AND (NOT (...) ...)) fallthrough.

gcc/testsuite/

2021-07-01  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/101044
    * gcc.target/i386/pr101044.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index a25769ae478..cbd430a2ecf 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -544,71 +544,83 @@  general_scalar_chain::compute_convert_gain ()
 	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
       else if (MEM_P (src) && REG_P (dst))
 	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
-      else if (GET_CODE (src) == ASHIFT
-	       || GET_CODE (src) == ASHIFTRT
-	       || GET_CODE (src) == LSHIFTRT)
-	{
-	  if (m == 2)
-	    {
-	      if (INTVAL (XEXP (src, 1)) >= 32)
-		igain += ix86_cost->add;
-	      else
-		igain += ix86_cost->shift_const;
-	    }
+      else
+	switch (GET_CODE (src))
+	  {
+	  case ASHIFT:
+	  case ASHIFTRT:
+	  case LSHIFTRT:
+	    if (m == 2)
+	      {
+		if (INTVAL (XEXP (src, 1)) >= 32)
+		  igain += ix86_cost->add;
+		else
+		  igain += ix86_cost->shift_const;
+	      }
 
-	  igain += ix86_cost->shift_const - ix86_cost->sse_op;
+	    igain += ix86_cost->shift_const - ix86_cost->sse_op;
 
-	  if (CONST_INT_P (XEXP (src, 0)))
-	    igain -= vector_const_cost (XEXP (src, 0));
-	}
-      else if (GET_CODE (src) == PLUS
-	       || GET_CODE (src) == MINUS
-	       || GET_CODE (src) == IOR
-	       || GET_CODE (src) == XOR
-	       || GET_CODE (src) == AND)
-	{
-	  igain += m * ix86_cost->add - ix86_cost->sse_op;
-	  /* Additional gain for andnot for targets without BMI.  */
-	  if (GET_CODE (XEXP (src, 0)) == NOT
-	      && !TARGET_BMI)
-	    igain += m * ix86_cost->add;
-
-	  if (CONST_INT_P (XEXP (src, 0)))
-	    igain -= vector_const_cost (XEXP (src, 0));
-	  if (CONST_INT_P (XEXP (src, 1)))
-	    igain -= vector_const_cost (XEXP (src, 1));
-	}
-      else if (GET_CODE (src) == NEG
-	       || GET_CODE (src) == NOT)
-	igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
-      else if (GET_CODE (src) == ABS
-	       || GET_CODE (src) == SMAX
-	       || GET_CODE (src) == SMIN
-	       || GET_CODE (src) == UMAX
-	       || GET_CODE (src) == UMIN)
-	{
-	  /* We do not have any conditional move cost, estimate it as a
-	     reg-reg move.  Comparisons are costed as adds.  */
-	  igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
-	  /* Integer SSE ops are all costed the same.  */
-	  igain -= ix86_cost->sse_op;
-	}
-      else if (GET_CODE (src) == COMPARE)
-	{
-	  /* Assume comparison cost is the same.  */
-	}
-      else if (CONST_INT_P (src))
-	{
-	  if (REG_P (dst))
-	    /* DImode can be immediate for TARGET_64BIT and SImode always.  */
-	    igain += m * COSTS_N_INSNS (1);
-	  else if (MEM_P (dst))
-	    igain += (m * ix86_cost->int_store[2]
-		     - ix86_cost->sse_store[sse_cost_idx]);
-	  igain -= vector_const_cost (src);
-	}
-      else
-	gcc_unreachable ();
+	    if (CONST_INT_P (XEXP (src, 0)))
+	      igain -= vector_const_cost (XEXP (src, 0));
+	    break;
+
+	  case AND:
+	  case IOR:
+	  case XOR:
+	  case PLUS:
+	  case MINUS:
+	    igain += m * ix86_cost->add - ix86_cost->sse_op;
+	    /* Additional gain for andnot for targets without BMI.  */
+	    if (GET_CODE (XEXP (src, 0)) == NOT
+		&& !TARGET_BMI)
+	      igain += m * ix86_cost->add;
+
+	    if (CONST_INT_P (XEXP (src, 0)))
+	      igain -= vector_const_cost (XEXP (src, 0));
+	    if (CONST_INT_P (XEXP (src, 1)))
+	      igain -= vector_const_cost (XEXP (src, 1));
+	    break;
+
+	  case NEG:
+	  case NOT:
+	    igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
+
+	    if (GET_CODE (XEXP (src, 0)) != ABS)
+	      {
+		igain += m * ix86_cost->add;
+		break;
+	      }
+	    /* FALLTHRU */
+
+	  case ABS:
+	  case SMAX:
+	  case SMIN:
+	  case UMAX:
+	  case UMIN:
+	    /* We do not have any conditional move cost, estimate it as a
+	       reg-reg move.  Comparisons are costed as adds.  */
+	    igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
+	    /* Integer SSE ops are all costed the same.  */
+	    igain -= ix86_cost->sse_op;
+	    break;
+
+	  case COMPARE:
+	    /* Assume comparison cost is the same.  */
+	    break;
+
+	  case CONST_INT:
+	    if (REG_P (dst))
+	      /* DImode can be immediate for TARGET_64BIT and SImode always.  */
+	      igain += m * COSTS_N_INSNS (1);
+	    else if (MEM_P (dst))
+	      igain += (m * ix86_cost->int_store[2]
+			- ix86_cost->sse_store[sse_cost_idx]);
+	    igain -= vector_const_cost (src);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
 
       if (igain != 0 && dump_file)
 	{
@@ -1009,7 +1021,19 @@  general_scalar_chain::convert_insn (rtx_insn *insn)
 
     case NEG:
       src = XEXP (src, 0);
-      convert_op (&src, insn);
+
+      if (GET_CODE (src) == ABS)
+	{
+	  src = XEXP (src, 0);
+	  convert_op (&src, insn);
+	  subreg = gen_reg_rtx (vmode);
+	  emit_insn_before (gen_rtx_SET (subreg,
+					 gen_rtx_ABS (vmode, src)), insn);
+	  src = subreg;
+	}
+      else
+	convert_op (&src, insn);
+
       subreg = gen_reg_rtx (vmode);
       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
       src = gen_rtx_MINUS (vmode, subreg, src);
@@ -1042,9 +1066,10 @@  general_scalar_chain::convert_insn (rtx_insn *insn)
 
       gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
       subreg = gen_rtx_SUBREG (V2DImode, src, 0);
-      emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
-						    copy_rtx_if_shared (subreg),
-						    copy_rtx_if_shared (subreg)),
+      emit_insn_before (gen_vec_interleave_lowv2di
+			(copy_rtx_if_shared (subreg),
+			 copy_rtx_if_shared (subreg),
+			 copy_rtx_if_shared (subreg)),
 			insn);
       dst = gen_rtx_REG (CCmode, FLAGS_REG);
       src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
@@ -1400,11 +1425,11 @@  general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
 	return false;
       /* Fallthru.  */
 
-    case PLUS:
-    case MINUS:
+    case AND:
     case IOR:
     case XOR:
-    case AND:
+    case PLUS:
+    case MINUS:
       if (!REG_P (XEXP (src, 1))
 	  && !MEM_P (XEXP (src, 1))
 	  && !CONST_INT_P (XEXP (src, 1)))
@@ -1413,18 +1438,32 @@  general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
       if (GET_MODE (XEXP (src, 1)) != mode
 	  && !CONST_INT_P (XEXP (src, 1)))
 	return false;
+
+      /* Check for andnot case.  */
+      if (GET_CODE (src) != AND
+	  || GET_CODE (XEXP (src, 0)) != NOT)
+	break;
+
+      src = XEXP (src, 0);
+      /* FALLTHRU */
+
+    case NOT:
       break;
 
+    case NEG:
+      /* Check for nabs case.  */
+      if (GET_CODE (XEXP (src, 0)) != ABS)
+	break;
+
+      src = XEXP (src, 0);
+      /* FALLTHRU */
+
     case ABS:
       if ((mode == DImode && !TARGET_AVX512VL)
 	  || (mode == SImode && !TARGET_SSSE3))
 	return false;
       break;
 
-    case NEG:
-    case NOT:
-      break;
-
     case REG:
       return true;
 
@@ -1438,12 +1477,8 @@  general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
 
   if (!REG_P (XEXP (src, 0))
       && !MEM_P (XEXP (src, 0))
-      && !CONST_INT_P (XEXP (src, 0))
-      /* Check for andnot case.  */
-      && (GET_CODE (src) != AND
-	  || GET_CODE (XEXP (src, 0)) != NOT
-	  || !REG_P (XEXP (XEXP (src, 0), 0))))
-      return false;
+      && !CONST_INT_P (XEXP (src, 0)))
+    return false;
 
   if (GET_MODE (XEXP (src, 0)) != mode
       && !CONST_INT_P (XEXP (src, 0)))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9b619e2f78f..156c6a94989 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10305,6 +10305,50 @@  (define_insn_and_split "*abs<dwi>2_doubleword"
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
 })
 
+(define_insn_and_split "*nabs<dwi>2_doubleword"
+  [(set (match_operand:<DWI> 0 "register_operand")
+	(neg:<DWI>
+	  (abs:<DWI>
+	    (match_operand:<DWI> 1 "general_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE
+   && ix86_pre_reload_split ()"
+   "#"
+   "&& 1"
+  [(parallel
+    [(set (reg:CCC FLAGS_REG)
+	  (ne:CCC (match_dup 1) (const_int 0)))
+     (set (match_dup 2) (neg:DWIH (match_dup 1)))])
+   (parallel
+    [(set (match_dup 5)
+	  (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+				(match_dup 4))
+		     (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CCGOC FLAGS_REG)
+	   (compare:CCGOC
+	     (neg:DWIH (match_dup 5))
+	     (const_int 0)))
+      (set (match_dup 5)
+	   (neg:DWIH (match_dup 5)))])
+   (set (match_dup 0)
+        (if_then_else:DWIH
+	  (lt (reg:CCGOC FLAGS_REG) (const_int 0))
+	  (match_dup 2)
+	  (match_dup 1)))
+   (set (match_dup 3)
+        (if_then_else:DWIH
+	  (lt (reg:CCGOC FLAGS_REG) (const_int 0))
+	  (match_dup 5)
+	  (match_dup 4)))]
+{
+  operands[1] = force_reg (<DWI>mode, operands[1]);
+  operands[2] = gen_reg_rtx (<DWI>mode);
+
+  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
+})
+
 (define_insn_and_split "*abs<mode>2_1"
   [(set (match_operand:SWI 0 "register_operand")
 	(abs:SWI
@@ -10332,6 +10376,34 @@  (define_insn_and_split "*abs<mode>2_1"
   operands[2] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_insn_and_split "*nabs<mode>2_1"
+  [(set (match_operand:SWI 0 "register_operand")
+	(neg:SWI
+	  (abs:SWI
+	    (match_operand:SWI 1 "general_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE
+   && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
+   && ix86_pre_reload_split ()"
+   "#"
+   "&& 1"
+  [(parallel
+     [(set (reg:CCGOC FLAGS_REG)
+	   (compare:CCGOC
+	     (neg:SWI (match_dup 1))
+	     (const_int 0)))
+      (set (match_dup 2)
+	   (neg:SWI (match_dup 1)))])
+   (set (match_dup 0)
+        (if_then_else:SWI
+	  (lt (reg:CCGOC FLAGS_REG) (const_int 0))
+	  (match_dup 2)
+	  (match_dup 1)))]
+{
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  operands[2] = gen_reg_rtx (<MODE>mode);
+})
+
 (define_expand "<code>tf2"
   [(set (match_operand:TF 0 "register_operand")
 	(absneg:TF (match_operand:TF 1 "register_operand")))]
diff --git a/gcc/testsuite/gcc.target/i386/pr101044.c b/gcc/testsuite/gcc.target/i386/pr101044.c
new file mode 100644
index 00000000000..03df86debb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101044.c
@@ -0,0 +1,9 @@ 
+/* PR target/101044 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic" } */
+/* { dg-final { scan-assembler-times "neg" 1 } } */
+
+int foo (int x)
+{
+  return (x < 0) ? x : -x;
+}