@@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
DONE;
})
-(define_expand "usdot_prodv8qi"
+(define_expand "usdot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
DONE;
})
-(define_expand "sdot_prodv8qi"
+(define_expand "sdot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
})
-(define_expand "udot_prodv8qi"
+(define_expand "udot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"
})
-(define_expand "usdot_prodv4hi"
+(define_expand "usdot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6492,12 +6492,12 @@ (define_expand "usdot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6513,12 +6513,12 @@ (define_expand "udot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6534,7 +6534,7 @@ (define_expand "sdot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
@@ -16727,7 +16727,7 @@ (define_mode_attr SDOT_PMADD_SUF
(define_mode_attr SDOT_VPDP_SUF
[(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX512VNNIBW 1 "register_operand")
(match_operand:VI2_AVX512VNNIBW 2 "register_operand")
@@ -16762,7 +16762,7 @@ (define_expand "sdot_prod<mode>"
;; Normally we use widen_mul_even/odd, but combine can't quite get it all
;; back together when madd is available.
-(define_expand "sdot_prodv4si"
+(define_expand "sdot_prodv2div4si"
[(match_operand:V2DI 0 "register_operand")
(match_operand:V4SI 1 "register_operand")
(match_operand:V4SI 2 "register_operand")
@@ -30190,7 +30190,7 @@ (define_insn "vpshldv_<mode>_maskz_1"
[(set_attr ("prefix") ("evex"))
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<ssedvecmodelower><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX512 1 "register_operand")
(match_operand:VI1_AVX512 2 "register_operand")
@@ -30228,9 +30228,9 @@ (define_expand "usdot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31049,7 +31049,7 @@ (define_int_attr vpdotprodtype
(UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
(UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<ssedvecmodelower><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")
@@ -31085,9 +31085,9 @@ (define_expand "sdot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31095,7 +31095,7 @@ (define_expand "sdot_prod<mode>"
DONE;
})
-(define_expand "sdot_prodv64qi"
+(define_expand "sdot_prodv16siv64qi"
[(match_operand:V16SI 0 "register_operand")
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "register_operand")
@@ -31118,14 +31118,14 @@ (define_expand "sdot_prodv64qi"
rtx sum = gen_reg_rtx (V16SImode);
emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+ emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
emit_insn (gen_addv16si3 (operands[0], res1, res2));
DONE;
})
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<ssedvecmodelower><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")
@@ -31161,9 +31161,9 @@ (define_expand "udot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31171,7 +31171,7 @@ (define_expand "udot_prod<mode>"
DONE;
})
-(define_expand "udot_prodv64qi"
+(define_expand "udot_prodv16qiv64qi"
[(match_operand:V16SI 0 "register_operand")
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "register_operand")
@@ -31194,8 +31194,8 @@ (define_expand "udot_prodv64qi"
rtx sum = gen_reg_rtx (V16SImode);
emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+ emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
emit_insn (gen_addv16si3 (operands[0], res1, res2));
DONE;
@@ -31301,7 +31301,7 @@ (define_int_attr vpdpwprodtype
(UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
(UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
(match_operand:VI2_AVX2 2 "register_operand")
@@ -31319,7 +31319,7 @@ (define_expand "usdot_prod<mode>"
DONE;
})
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
(match_operand:VI2_AVX2 2 "register_operand")