diff mbox series

[2/2] Extend usdot_prodv*qi with vpmaddwd when AVXVNNI/AVX512VNNI is not available.

Message ID 20240428055819.2313356-2-hongtao.liu@intel.com
State New
Headers show
Series [1/2,x86] Support dot_prod optabs for 64-bit vector. | expand

Commit Message

liuhongt April 28, 2024, 5:58 a.m. UTC
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	* config/i386/sse.md (usdot_prodv*qi): Extend to VI1_AVX512
	with vpmaddwd when avxvnni/avx512vnni is not available.
---
 gcc/config/i386/sse.md | 55 +++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1bf50726e83..f57f36ae380 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -29955,21 +29955,48 @@  (define_insn "vpshldv_<mode>_maskz_1"
 
 (define_expand "usdot_prod<mode>"
   [(match_operand:<ssedvecmode> 0 "register_operand")
-   (match_operand:VI1_AVX512VNNI 1 "register_operand")
-   (match_operand:VI1_AVX512VNNI 2 "register_operand")
+   (match_operand:VI1_AVX512 1 "register_operand")
+   (match_operand:VI1_AVX512 2 "register_operand")
    (match_operand:<ssedvecmode> 3 "register_operand")]
-  "((<MODE_SIZE> == 64 && TARGET_EVEX512)
-    || ((TARGET_AVX512VNNI && TARGET_AVX512VL)
-	|| TARGET_AVXVNNI))"
-{
-  operands[1] = lowpart_subreg (<ssedvecmode>mode,
-				force_reg (<MODE>mode, operands[1]),
-				<MODE>mode);
-  operands[2] = lowpart_subreg (<ssedvecmode>mode,
-				force_reg (<MODE>mode, operands[2]),
-				<MODE>mode);
-  emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3],
-				  operands[1], operands[2]));
+  "TARGET_SSE2"
+{
+  if (<MODE_SIZE> == 64
+     ? TARGET_AVX512VNNI
+     : ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI))
+    {
+      operands[1] = lowpart_subreg (<ssedvecmode>mode,
+				    force_reg (<MODE>mode, operands[1]),
+				    <MODE>mode);
+      operands[2] = lowpart_subreg (<ssedvecmode>mode,
+				    force_reg (<MODE>mode, operands[2]),
+				    <MODE>mode);
+      emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3],
+						  operands[1], operands[2]));
+    }
+  else
+    {
+      /* Emulate with vpdpwssd.  */
+      rtx op1_lo = gen_reg_rtx (<sseunpackmode>mode);
+      rtx op1_hi = gen_reg_rtx (<sseunpackmode>mode);
+      rtx op2_lo = gen_reg_rtx (<sseunpackmode>mode);
+      rtx op2_hi = gen_reg_rtx (<sseunpackmode>mode);
+
+      emit_insn (gen_vec_unpacku_lo_<mode> (op1_lo, operands[1]));
+      emit_insn (gen_vec_unpacks_lo_<mode> (op2_lo, operands[2]));
+      emit_insn (gen_vec_unpacku_hi_<mode> (op1_hi, operands[1]));
+      emit_insn (gen_vec_unpacks_hi_<mode> (op2_hi, operands[2]));
+
+      rtx res1 = gen_reg_rtx (<ssedvecmode>mode);
+      rtx res2 = gen_reg_rtx (<ssedvecmode>mode);
+      rtx sum = gen_reg_rtx (<ssedvecmode>mode);
+
+      emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
+      emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+						    op2_lo, sum));
+      emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+						    op2_hi, operands[3]));
+      emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
+    }
   DONE;
 })