@@ -610,6 +610,10 @@
(define_mode_iterator VI1_AVX512VNNI
[(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+(define_mode_iterator VI1_AVX512VNNIBW
+ [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ (V32QI "TARGET_AVX2") V16QI])
+
(define_mode_iterator VI12_256_512_AVX512VL
[(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
(V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
@@ -627,6 +631,9 @@
[(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
(V16HI "TARGET_AVX2") V8HI])
+(define_mode_iterator VI2_AVX10_2
+ [(V32HI "TARGET_AVX10_2_512") V16HI V8HI])
+
(define_mode_iterator VI4_AVX
[(V8SI "TARGET_AVX") V4SI])
@@ -31232,12 +31239,13 @@
(define_expand "sdot_prod<mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
- (match_operand:VI1_AVX2 1 "register_operand")
- (match_operand:VI1_AVX2 2 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
(match_operand:<ssedvecmode> 3 "register_operand")]
"TARGET_SSE2"
{
- if (TARGET_AVXVNNIINT8)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
+ || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
{
operands[1] = lowpart_subreg (<ssedvecmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31276,44 +31284,15 @@
DONE;
})
-(define_expand "sdot_prodv64qi"
- [(match_operand:V16SI 0 "register_operand")
- (match_operand:V64QI 1 "register_operand")
- (match_operand:V64QI 2 "register_operand")
- (match_operand:V16SI 3 "register_operand")]
- "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
- /* Emulate with vpdpwssd. */
- rtx op1_lo = gen_reg_rtx (V32HImode);
- rtx op1_hi = gen_reg_rtx (V32HImode);
- rtx op2_lo = gen_reg_rtx (V32HImode);
- rtx op2_hi = gen_reg_rtx (V32HImode);
-
- emit_insn (gen_vec_unpacks_lo_v64qi (op1_lo, operands[1]));
- emit_insn (gen_vec_unpacks_lo_v64qi (op2_lo, operands[2]));
- emit_insn (gen_vec_unpacks_hi_v64qi (op1_hi, operands[1]));
- emit_insn (gen_vec_unpacks_hi_v64qi (op2_hi, operands[2]));
-
- rtx res1 = gen_reg_rtx (V16SImode);
- rtx res2 = gen_reg_rtx (V16SImode);
- rtx sum = gen_reg_rtx (V16SImode);
-
- emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
- emit_insn (gen_addv16si3 (operands[0], res1, res2));
- DONE;
-})
-
(define_expand "udot_prod<mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
- (match_operand:VI1_AVX2 1 "register_operand")
- (match_operand:VI1_AVX2 2 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
(match_operand:<ssedvecmode> 3 "register_operand")]
"TARGET_SSE2"
{
- if (TARGET_AVXVNNIINT8)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
+ || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
{
operands[1] = lowpart_subreg (<ssedvecmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31352,36 +31331,6 @@
DONE;
})
-(define_expand "udot_prodv64qi"
- [(match_operand:V16SI 0 "register_operand")
- (match_operand:V64QI 1 "register_operand")
- (match_operand:V64QI 2 "register_operand")
- (match_operand:V16SI 3 "register_operand")]
- "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
- /* Emulate with vpdpwssd. */
- rtx op1_lo = gen_reg_rtx (V32HImode);
- rtx op1_hi = gen_reg_rtx (V32HImode);
- rtx op2_lo = gen_reg_rtx (V32HImode);
- rtx op2_hi = gen_reg_rtx (V32HImode);
-
- emit_insn (gen_vec_unpacku_lo_v64qi (op1_lo, operands[1]));
- emit_insn (gen_vec_unpacku_lo_v64qi (op2_lo, operands[2]));
- emit_insn (gen_vec_unpacku_hi_v64qi (op1_hi, operands[1]));
- emit_insn (gen_vec_unpacku_hi_v64qi (op2_hi, operands[2]));
-
- rtx res1 = gen_reg_rtx (V16SImode);
- rtx res2 = gen_reg_rtx (V16SImode);
- rtx sum = gen_reg_rtx (V16SImode);
-
- emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
- emit_insn (gen_addv16si3 (operands[0], res1, res2));
- DONE;
-})
-
(define_insn "vpdp<vpdotprodtype>_<mode>"
[(set (match_operand:VI4_AVX 0 "register_operand" "=v")
(unspec:VI4_AVX
@@ -31757,10 +31706,10 @@
(define_expand "usdot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")
- (match_operand:VI2_AVX2 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
- "TARGET_AVXVNNIINT16"
+ "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
{
operands[1] = lowpart_subreg (<sseunpackmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31775,10 +31724,10 @@
(define_expand "udot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")
- (match_operand:VI2_AVX2 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
- "TARGET_AVXVNNIINT16"
+ "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
{
operands[1] = lowpart_subreg (<sseunpackmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -2,19 +2,24 @@
/* { dg-options "-O2 -mavxvnniint16" } */
/* { dg-require-effective-target avxvnniint16 } */
+#ifndef AVX10_2
#define AVXVNNIINT16
+#endif
+
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
+#include CHECK
+#include "vnniint16-auto-vectorize-1.c"
+
#ifndef TEST
#define TEST avx_test
#endif
-#include CHECK
-#include "vnniint16-auto-vectorize-1.c"
-
+#ifndef N
#define N 256
+#endif
short a_i16[N];
unsigned short b_u16[N], c_u16[N], d_u16[N];
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler "vpdpwusd\t" } } */
+/* { dg-final { scan-assembler "vpdpwuud\t" } } */
+
+#include "vnniint16-auto-vectorize-1.c"
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define N 512
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#define AVX512F_LEN 512
+#include "vnniint16-auto-vectorize-2.c"
@@ -2,19 +2,25 @@
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
+#ifndef AVX10_2
#define AVXVNNIINT8
+#endif
+
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
+#include CHECK
+#include "vnniint8-auto-vectorize-1.c"
+
#ifndef TEST
#define TEST avx_test
#endif
-#include CHECK
-#include "vnniint8-auto-vectorize-1.c"
-
+#ifndef N
#define N 256
+#endif
+
char a_i8[N], b_i8[N];
unsigned char c_u8[N], d_u8[N];
int i8_exp, i8_ref;
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler "vpdpbssd\t" } } */
+/* { dg-final { scan-assembler "vpdpbuud\t" } } */
+
+#include "vnniint8-auto-vectorize-1.c"
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define N 512
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#define AVX512F_LEN 512
+#include "vnniint8-auto-vectorize-2.c"