Message ID | 20240710140602.1707875-6-victor.donascimento@arm.com |
---|---|
State | New |
Headers | show |
Series | Make `dot_prod' a convert-type optab | expand |
On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento <victor.donascimento@arm.com> wrote: > > Following the migration of the dot_prod optab from a direct to a > conversion-type optab, ensure all back-end patterns incorporate the > second machine mode into pattern names. The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc> > > gcc/ChangeLog: > > * config/i386/mmx.md (usdot_prodv8qi): Deleted. > (usdot_prodv2siv8qi): New. > (sdot_prodv8qi): Deleted. > (sdot_prodv2siv8qi): New. > (udot_prodv8qi): Deleted. > (udot_prodv2siv8qi): New. > (usdot_prodv4hi): Deleted. > (usdot_prodv2siv4hi): New. > (udot_prodv4hi): Deleted. > (udot_prodv2siv4hi): New. > (sdot_prodv4hi): Deleted. > (sdot_prodv2siv4hi): New. > * config/i386/sse.md (fourwayacc): New. > (twowayacc): New. > (sdot_prod<mode>): Deleted. > (sdot_prod<twowayacc><mode>): New. > (sdot_prodv4si): Deleted. > (sdot_prodv2div4si): New. > (usdot_prod<mode>): Deleted. > (usdot_prod<fourwayacc><mode>): New. > (sdot_prod<mode>): Deleted. > (sdot_prod<fourwayacc><mode>): New. > (sdot_prodv64qi): Deleted. > (sdot_prodv16siv64qi): New. > (udot_prod<mode>): Deleted. > (udot_prod<fourwayacc><mode>): New. > (udot_prodv64qi): Deleted. > (udot_prodv16qiv64qi): New. > (usdot_prod<mode>): Deleted. > (usdot_prod<twowayacc><mode>): New. > (udot_prod<mode>): Deleted. > (udot_prod<twowayacc><mode>): New. > --- > gcc/config/i386/mmx.md | 30 +++++++++++++-------------- > gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++----------------- > 2 files changed, 43 insertions(+), 34 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 94d3a6e5692..d78739b033d 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi" > DONE; > }) > > -(define_expand "usdot_prodv8qi" > +(define_expand "usdot_prodv2siv8qi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V8QI 1 "register_operand") > (match_operand:V8QI 2 "register_operand") > @@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3)); > + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > } > else > @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi" > emit_move_insn (op3, CONST0_RTX (V4SImode)); > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > /* vec_perm (op0, 2, 3, 0, 1); */ > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); > @@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi" > DONE; > }) > > -(define_expand "sdot_prodv8qi" > +(define_expand "sdot_prodv2siv8qi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V8QI 1 "register_operand") > (match_operand:V8QI 2 "register_operand") > @@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3)); > + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > } > else > @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi" > emit_move_insn (op3, CONST0_RTX (V4SImode)); > emit_insn (gen_extendv8qiv8hi2 (op1, operands[1])); > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > /* vec_perm (op0, 2, 3, 0, 1); */ > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); > @@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi" > > }) > > -(define_expand "udot_prodv8qi" > +(define_expand "udot_prodv2siv8qi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V8QI 1 "register_operand") > (match_operand:V8QI 2 "register_operand") > @@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3)); > + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > } > else > @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi" > emit_move_insn (op3, CONST0_RTX (V4SImode)); > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); > emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2])); > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > /* vec_perm (op0, 2, 3, 0, 1); */ > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); > @@ -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi" > > }) > > -(define_expand "usdot_prodv4hi" > +(define_expand "usdot_prodv2siv4hi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V4HI 1 "register_operand") > (match_operand:V4HI 2 "register_operand") > @@ -6492,12 +6492,12 @@ (define_expand "usdot_prodv4hi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > DONE; > }) > > -(define_expand "udot_prodv4hi" > +(define_expand "udot_prodv2siv4hi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V4HI 1 "register_operand") > (match_operand:V4HI 2 "register_operand") > @@ -6513,12 +6513,12 @@ (define_expand "udot_prodv4hi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > DONE; > }) > > -(define_expand "sdot_prodv4hi" > +(define_expand "sdot_prodv2siv4hi" > [(match_operand:V2SI 0 "register_operand") > (match_operand:V4HI 1 "register_operand") > (match_operand:V4HI 2 "register_operand") > @@ -6534,7 +6534,7 @@ (define_expand "sdot_prodv4hi" > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > rtx op0 = gen_reg_rtx (V4SImode); > > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); > DONE; > }) > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index bda66d5e121..861b87bb50f 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode > (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") > (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) > > +;; Mapping of input type to 4-way accumulated type > +(define_mode_attr fourwayacc > + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")]) > + > +;; Mapping of input type to 2-way accumulated type > +(define_mode_attr twowayacc > + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si") > + (V32QI "v16hi") (V16QI "v8hi")]) > + > ;; Pointer size override for scalar modes (Intel asm dialect) > (define_mode_attr iptr > [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") > @@ -16712,7 +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF > (define_mode_attr SDOT_VPDP_SUF > [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")]) > > -(define_expand "sdot_prod<mode>" > +(define_expand "sdot_prod<twowayacc><mode>" > [(match_operand:<sseunpackmode> 0 "register_operand") > (match_operand:VI2_AVX512VNNIBW 1 "register_operand") > (match_operand:VI2_AVX512VNNIBW 2 "register_operand") > @@ -16747,7 +16756,7 @@ (define_expand "sdot_prod<mode>" > > ;; Normally we use widen_mul_even/odd, but combine can't quite get it all > ;; back together when madd is available. > -(define_expand "sdot_prodv4si" > +(define_expand "sdot_prodv2div4si" > [(match_operand:V2DI 0 "register_operand") > (match_operand:V4SI 1 "register_operand") > (match_operand:V4SI 2 "register_operand") > @@ -30290,7 +30299,7 @@ (define_insn "vpshldv_<mode>_maskz_1" > [(set_attr ("prefix") ("evex")) > (set_attr "mode" "<sseinsnmode>")]) > > -(define_expand "usdot_prod<mode>" > +(define_expand "usdot_prod<fourwayacc><mode>" > [(match_operand:<ssedvecmode> 0 "register_operand") > (match_operand:VI1_AVX512 1 "register_operand") > (match_operand:VI1_AVX512 2 "register_operand") > @@ -30328,9 +30337,9 @@ (define_expand "usdot_prod<mode>" > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, > op2_lo, sum)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, > op2_hi, operands[3])); > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > } > @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype > (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") > (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) > > -(define_expand "sdot_prod<mode>" > +(define_expand "sdot_prod<fourwayacc><mode>" > [(match_operand:<ssedvecmode> 0 "register_operand") > (match_operand:VI1_AVX2 1 "register_operand") > (match_operand:VI1_AVX2 2 "register_operand") > @@ -31185,9 +31194,9 @@ (define_expand "sdot_prod<mode>" > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, > op2_lo, sum)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, > op2_hi, operands[3])); > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > } > @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>" > DONE; > }) > > -(define_expand "sdot_prodv64qi" > +(define_expand "sdot_prodv16siv64qi" > [(match_operand:V16SI 0 "register_operand") > (match_operand:V64QI 1 "register_operand") > (match_operand:V64QI 2 "register_operand") > @@ -31218,14 +31227,14 @@ (define_expand "sdot_prodv64qi" > rtx sum = gen_reg_rtx (V16SImode); > > emit_move_insn (sum, CONST0_RTX (V16SImode)); > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3])); > > emit_insn (gen_addv16si3 (operands[0], res1, res2)); > DONE; > }) > > -(define_expand "udot_prod<mode>" > +(define_expand "udot_prod<fourwayacc><mode>" > [(match_operand:<ssedvecmode> 0 "register_operand") > (match_operand:VI1_AVX2 1 "register_operand") > (match_operand:VI1_AVX2 2 "register_operand") > @@ -31261,9 +31270,9 @@ (define_expand "udot_prod<mode>" > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, > op2_lo, sum)); > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, > op2_hi, operands[3])); > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > } > @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>" > DONE; > }) > > -(define_expand "udot_prodv64qi" > +(define_expand "udot_prodv16qiv64qi" > [(match_operand:V16SI 0 "register_operand") > (match_operand:V64QI 1 "register_operand") > (match_operand:V64QI 2 "register_operand") > @@ -31294,8 +31303,8 @@ (define_expand "udot_prodv64qi" > rtx sum = gen_reg_rtx (V16SImode); > > emit_move_insn (sum, CONST0_RTX (V16SImode)); > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3])); > > emit_insn (gen_addv16si3 (operands[0], res1, res2)); > DONE; > @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype > (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds") > (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")]) > > -(define_expand "usdot_prod<mode>" > +(define_expand "usdot_prod<twowayacc><mode>" > [(match_operand:<sseunpackmode> 0 "register_operand") > (match_operand:VI2_AVX2 1 "register_operand") > (match_operand:VI2_AVX2 2 "register_operand") > @@ -31419,7 +31428,7 @@ (define_expand "usdot_prod<mode>" > DONE; > }) > > -(define_expand "udot_prod<mode>" > +(define_expand "udot_prod<twowayacc><mode>" > [(match_operand:<sseunpackmode> 0 "register_operand") > (match_operand:VI2_AVX2 1 "register_operand") > (match_operand:VI2_AVX2 2 "register_operand") > -- > 2.34.1 >
> -----Original Message----- > From: Hongtao Liu <crazylht@gmail.com> > Sent: Thursday, July 11, 2024 9:45 AM > To: Victor Do Nascimento <victor.donascimento@arm.com> > Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com; > Richard.Earnshaw@arm.com > Subject: Re: [PATCH 05/10] i386: Fix dot_prod backend patterns for mmx and > sse targets > > On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento > <victor.donascimento@arm.com> wrote: > > > > Following the migration of the dot_prod optab from a direct to a > > conversion-type optab, ensure all back-end patterns incorporate the > > second machine mode into pattern names. > The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of > new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc> > > > > gcc/ChangeLog: > > > > * config/i386/mmx.md (usdot_prodv8qi): Deleted. > > (usdot_prodv2siv8qi): New. Hi Victor, I suppose all the patterns are renamed not deleted and new right? If that is the case, I suppose the log might be better and easier to understand if changed to something like: (old pattern): Renamed to ... (new pattern): this. Thx, Haochen > > (sdot_prodv8qi): Deleted. > > (sdot_prodv2siv8qi): New. > > (udot_prodv8qi): Deleted. > > (udot_prodv2siv8qi): New. > > (usdot_prodv4hi): Deleted. > > (usdot_prodv2siv4hi): New. > > (udot_prodv4hi): Deleted. > > (udot_prodv2siv4hi): New. > > (sdot_prodv4hi): Deleted. > > (sdot_prodv2siv4hi): New. > > * config/i386/sse.md (fourwayacc): New. > > (twowayacc): New. > > (sdot_prod<mode>): Deleted. > > (sdot_prod<twowayacc><mode>): New. > > (sdot_prodv4si): Deleted. > > (sdot_prodv2div4si): New. > > (usdot_prod<mode>): Deleted. > > (usdot_prod<fourwayacc><mode>): New. > > (sdot_prod<mode>): Deleted. > > (sdot_prod<fourwayacc><mode>): New. > > (sdot_prodv64qi): Deleted. > > (sdot_prodv16siv64qi): New. > > (udot_prod<mode>): Deleted. > > (udot_prod<fourwayacc><mode>): New. > > (udot_prodv64qi): Deleted. > > (udot_prodv16qiv64qi): New. > > (usdot_prod<mode>): Deleted. > > (usdot_prod<twowayacc><mode>): New. > > (udot_prod<mode>): Deleted. > > (udot_prod<twowayacc><mode>): New. > > --- > > gcc/config/i386/mmx.md | 30 +++++++++++++-------------- > > gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++---------------- > - > > 2 files changed, 43 insertions(+), 34 deletions(-) > > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index > > 94d3a6e5692..d78739b033d 100644 > > --- a/gcc/config/i386/mmx.md > > +++ b/gcc/config/i386/mmx.md > > @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi" > > DONE; > > }) > > > > -(define_expand "usdot_prodv8qi" > > +(define_expand "usdot_prodv2siv8qi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V8QI 1 "register_operand") > > (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@ > > (define_expand "usdot_prodv8qi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3)); > > + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > } > > else > > @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi" > > emit_move_insn (op3, CONST0_RTX (V4SImode)); > > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); > > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); > > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > > > /* vec_perm (op0, 2, 3, 0, 1); */ > > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ > > -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi" > > DONE; > > }) > > > > -(define_expand "sdot_prodv8qi" > > +(define_expand "sdot_prodv2siv8qi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V8QI 1 "register_operand") > > (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@ > > (define_expand "sdot_prodv8qi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3)); > > + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > } > > else > > @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi" > > emit_move_insn (op3, CONST0_RTX (V4SImode)); > > emit_insn (gen_extendv8qiv8hi2 (op1, operands[1])); > > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); > > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > > > /* vec_perm (op0, 2, 3, 0, 1); */ > > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ > > -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi" > > > > }) > > > > -(define_expand "udot_prodv8qi" > > +(define_expand "udot_prodv2siv8qi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V8QI 1 "register_operand") > > (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@ > > (define_expand "udot_prodv8qi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3)); > > + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > } > > else > > @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi" > > emit_move_insn (op3, CONST0_RTX (V4SImode)); > > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); > > emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2])); > > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > > > /* vec_perm (op0, 2, 3, 0, 1); */ > > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ > > -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi" > > > > }) > > > > -(define_expand "usdot_prodv4hi" > > +(define_expand "usdot_prodv2siv4hi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V4HI 1 "register_operand") > > (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12 > @@ > > (define_expand "usdot_prodv4hi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > DONE; > > }) > > > > -(define_expand "udot_prodv4hi" > > +(define_expand "udot_prodv2siv4hi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V4HI 1 "register_operand") > > (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12 > @@ > > (define_expand "udot_prodv4hi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > DONE; > > }) > > > > -(define_expand "sdot_prodv4hi" > > +(define_expand "sdot_prodv2siv4hi" > > [(match_operand:V2SI 0 "register_operand") > > (match_operand:V4HI 1 "register_operand") > > (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@ > > (define_expand "sdot_prodv4hi" > > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); > > rtx op0 = gen_reg_rtx (V4SImode); > > > > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); > > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); > > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, > V4SImode)); > > DONE; > > }) > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > > bda66d5e121..861b87bb50f 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode > > (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") > > (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) > > > > +;; Mapping of input type to 4-way accumulated type (define_mode_attr > > +fourwayacc > > + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")]) > > + > > +;; Mapping of input type to 2-way accumulated type (define_mode_attr > > +twowayacc > > + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si") > > + (V32QI "v16hi") (V16QI "v8hi")]) > > + > > ;; Pointer size override for scalar modes (Intel asm dialect) > > (define_mode_attr iptr > > [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7 > > +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF (define_mode_attr > > SDOT_VPDP_SUF > > [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")]) > > > > -(define_expand "sdot_prod<mode>" > > +(define_expand "sdot_prod<twowayacc><mode>" > > [(match_operand:<sseunpackmode> 0 "register_operand") > > (match_operand:VI2_AVX512VNNIBW 1 "register_operand") > > (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ - > 16747,7 > > +16756,7 @@ (define_expand "sdot_prod<mode>" > > > > ;; Normally we use widen_mul_even/odd, but combine can't quite get it > > all ;; back together when madd is available. > > -(define_expand "sdot_prodv4si" > > +(define_expand "sdot_prodv2div4si" > > [(match_operand:V2DI 0 "register_operand") > > (match_operand:V4SI 1 "register_operand") > > (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@ > > (define_insn "vpshldv_<mode>_maskz_1" > > [(set_attr ("prefix") ("evex")) > > (set_attr "mode" "<sseinsnmode>")]) > > > > -(define_expand "usdot_prod<mode>" > > +(define_expand "usdot_prod<fourwayacc><mode>" > > [(match_operand:<ssedvecmode> 0 "register_operand") > > (match_operand:VI1_AVX512 1 "register_operand") > > (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9 > > +30337,9 @@ (define_expand "usdot_prod<mode>" > > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, > > + op1_lo, > > op2_lo, sum)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, > > + op1_hi, > > op2_hi, operands[3])); > > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > > } > > @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype > > (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") > > (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) > > > > -(define_expand "sdot_prod<mode>" > > +(define_expand "sdot_prod<fourwayacc><mode>" > > [(match_operand:<ssedvecmode> 0 "register_operand") > > (match_operand:VI1_AVX2 1 "register_operand") > > (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9 > +31194,9 > > @@ (define_expand "sdot_prod<mode>" > > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, > > + op1_lo, > > op2_lo, sum)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, > > + op1_hi, > > op2_hi, operands[3])); > > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > > } > > @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>" > > DONE; > > }) > > > > -(define_expand "sdot_prodv64qi" > > +(define_expand "sdot_prodv16siv64qi" > > [(match_operand:V16SI 0 "register_operand") > > (match_operand:V64QI 1 "register_operand") > > (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14 > > @@ (define_expand "sdot_prodv64qi" > > rtx sum = gen_reg_rtx (V16SImode); > > > > emit_move_insn (sum, CONST0_RTX (V16SImode)); > > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); > > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); > > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); > > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, > > + operands[3])); > > > > emit_insn (gen_addv16si3 (operands[0], res1, res2)); > > DONE; > > }) > > > > -(define_expand "udot_prod<mode>" > > +(define_expand "udot_prod<fourwayacc><mode>" > > [(match_operand:<ssedvecmode> 0 "register_operand") > > (match_operand:VI1_AVX2 1 "register_operand") > > (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9 > +31270,9 > > @@ (define_expand "udot_prod<mode>" > > rtx sum = gen_reg_rtx (<ssedvecmode>mode); > > > > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, > > + op1_lo, > > op2_lo, sum)); > > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, > > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, > > + op1_hi, > > op2_hi, operands[3])); > > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); > > } > > @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>" > > DONE; > > }) > > > > -(define_expand "udot_prodv64qi" > > +(define_expand "udot_prodv16qiv64qi" > > [(match_operand:V16SI 0 "register_operand") > > (match_operand:V64QI 1 "register_operand") > > (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8 > @@ > > (define_expand "udot_prodv64qi" > > rtx sum = gen_reg_rtx (V16SImode); > > > > emit_move_insn (sum, CONST0_RTX (V16SImode)); > > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); > > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); > > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); > > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, > > + operands[3])); > > > > emit_insn (gen_addv16si3 (operands[0], res1, res2)); > > DONE; > > @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype > > (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds") > > (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")]) > > > > -(define_expand "usdot_prod<mode>" > > +(define_expand "usdot_prod<twowayacc><mode>" > > [(match_operand:<sseunpackmode> 0 "register_operand") > > (match_operand:VI2_AVX2 1 "register_operand") > > (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7 > +31428,7 > > @@ (define_expand "usdot_prod<mode>" > > DONE; > > }) > > > > -(define_expand "udot_prod<mode>" > > +(define_expand "udot_prod<twowayacc><mode>" > > [(match_operand:<sseunpackmode> 0 "register_operand") > > (match_operand:VI2_AVX2 1 "register_operand") > > (match_operand:VI2_AVX2 2 "register_operand") > > -- > > 2.34.1 > > > > > -- > BR, > Hongtao
On 7/12/24 03:23, Jiang, Haochen wrote: >> -----Original Message----- >> From: Hongtao Liu <crazylht@gmail.com> >> Sent: Thursday, July 11, 2024 9:45 AM >> To: Victor Do Nascimento <victor.donascimento@arm.com> >> Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com; >> Richard.Earnshaw@arm.com >> Subject: Re: [PATCH 05/10] i386: Fix dot_prod backend patterns for mmx and >> sse targets >> >> On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento >> <victor.donascimento@arm.com> wrote: >>> >>> Following the migration of the dot_prod optab from a direct to a >>> conversion-type optab, ensure all back-end patterns incorporate the >>> second machine mode into pattern names. >> The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of >> new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc> >>> >>> gcc/ChangeLog: >>> >>> * config/i386/mmx.md (usdot_prodv8qi): Deleted. >>> (usdot_prodv2siv8qi): New. > > Hi Victor, > > I suppose all the patterns are renamed not deleted and new right? > If that is the case, I suppose the log might be better and easier to understand > if changed to something like: > > (old pattern): Renamed to ... > (new pattern): this. > > Thx, > Haochen You're right, it's a straight-forward renaming. I will amend the changelogs as per your suggestion. Thanks for the tip!, Victor >>> (sdot_prodv8qi): Deleted. >>> (sdot_prodv2siv8qi): New. >>> (udot_prodv8qi): Deleted. >>> (udot_prodv2siv8qi): New. >>> (usdot_prodv4hi): Deleted. >>> (usdot_prodv2siv4hi): New. >>> (udot_prodv4hi): Deleted. >>> (udot_prodv2siv4hi): New. >>> (sdot_prodv4hi): Deleted. >>> (sdot_prodv2siv4hi): New. >>> * config/i386/sse.md (fourwayacc): New. >>> (twowayacc): New. >>> (sdot_prod<mode>): Deleted. >>> (sdot_prod<twowayacc><mode>): New. >>> (sdot_prodv4si): Deleted. >>> (sdot_prodv2div4si): New. >>> (usdot_prod<mode>): Deleted. >>> (usdot_prod<fourwayacc><mode>): New. >>> (sdot_prod<mode>): Deleted. >>> (sdot_prod<fourwayacc><mode>): New. >>> (sdot_prodv64qi): Deleted. >>> (sdot_prodv16siv64qi): New. >>> (udot_prod<mode>): Deleted. >>> (udot_prod<fourwayacc><mode>): New. >>> (udot_prodv64qi): Deleted. >>> (udot_prodv16qiv64qi): New. >>> (usdot_prod<mode>): Deleted. >>> (usdot_prod<twowayacc><mode>): New. >>> (udot_prod<mode>): Deleted. >>> (udot_prod<twowayacc><mode>): New. >>> --- >>> gcc/config/i386/mmx.md | 30 +++++++++++++-------------- >>> gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++---------------- >> - >>> 2 files changed, 43 insertions(+), 34 deletions(-) >>> >>> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index >>> 94d3a6e5692..d78739b033d 100644 >>> --- a/gcc/config/i386/mmx.md >>> +++ b/gcc/config/i386/mmx.md >>> @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi" >>> DONE; >>> }) >>> >>> -(define_expand "usdot_prodv8qi" >>> +(define_expand "usdot_prodv2siv8qi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V8QI 1 "register_operand") >>> (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@ >>> (define_expand "usdot_prodv8qi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3)); >>> + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> } >>> else >>> @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi" >>> emit_move_insn (op3, CONST0_RTX (V4SImode)); >>> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); >>> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); >>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); >>> >>> /* vec_perm (op0, 2, 3, 0, 1); */ >>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ >>> -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi" >>> DONE; >>> }) >>> >>> -(define_expand "sdot_prodv8qi" >>> +(define_expand "sdot_prodv2siv8qi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V8QI 1 "register_operand") >>> (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@ >>> (define_expand "sdot_prodv8qi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3)); >>> + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> } >>> else >>> @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi" >>> emit_move_insn (op3, CONST0_RTX (V4SImode)); >>> emit_insn (gen_extendv8qiv8hi2 (op1, operands[1])); >>> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); >>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); >>> >>> /* vec_perm (op0, 2, 3, 0, 1); */ >>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ >>> -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi" >>> >>> }) >>> >>> -(define_expand "udot_prodv8qi" >>> +(define_expand "udot_prodv2siv8qi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V8QI 1 "register_operand") >>> (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@ >>> (define_expand "udot_prodv8qi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3)); >>> + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> } >>> else >>> @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi" >>> emit_move_insn (op3, CONST0_RTX (V4SImode)); >>> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); >>> emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2])); >>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); >>> >>> /* vec_perm (op0, 2, 3, 0, 1); */ >>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ >>> -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi" >>> >>> }) >>> >>> -(define_expand "usdot_prodv4hi" >>> +(define_expand "usdot_prodv2siv4hi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V4HI 1 "register_operand") >>> (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12 >> @@ >>> (define_expand "usdot_prodv4hi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> DONE; >>> }) >>> >>> -(define_expand "udot_prodv4hi" >>> +(define_expand "udot_prodv2siv4hi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V4HI 1 "register_operand") >>> (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12 >> @@ >>> (define_expand "udot_prodv4hi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> DONE; >>> }) >>> >>> -(define_expand "sdot_prodv4hi" >>> +(define_expand "sdot_prodv2siv4hi" >>> [(match_operand:V2SI 0 "register_operand") >>> (match_operand:V4HI 1 "register_operand") >>> (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@ >>> (define_expand "sdot_prodv4hi" >>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); >>> rtx op0 = gen_reg_rtx (V4SImode); >>> >>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); >>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); >>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, >> V4SImode)); >>> DONE; >>> }) >>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index >>> bda66d5e121..861b87bb50f 100644 >>> --- a/gcc/config/i386/sse.md >>> +++ b/gcc/config/i386/sse.md >>> @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode >>> (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") >>> (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) >>> >>> +;; Mapping of input type to 4-way accumulated type (define_mode_attr >>> +fourwayacc >>> + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")]) >>> + >>> +;; Mapping of input type to 2-way accumulated type (define_mode_attr >>> +twowayacc >>> + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si") >>> + (V32QI "v16hi") (V16QI "v8hi")]) >>> + >>> ;; Pointer size override for scalar modes (Intel asm dialect) >>> (define_mode_attr iptr >>> [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7 >>> +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF (define_mode_attr >>> SDOT_VPDP_SUF >>> [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")]) >>> >>> -(define_expand "sdot_prod<mode>" >>> +(define_expand "sdot_prod<twowayacc><mode>" >>> [(match_operand:<sseunpackmode> 0 "register_operand") >>> (match_operand:VI2_AVX512VNNIBW 1 "register_operand") >>> (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ - >> 16747,7 >>> +16756,7 @@ (define_expand "sdot_prod<mode>" >>> >>> ;; Normally we use widen_mul_even/odd, but combine can't quite get it >>> all ;; back together when madd is available. >>> -(define_expand "sdot_prodv4si" >>> +(define_expand "sdot_prodv2div4si" >>> [(match_operand:V2DI 0 "register_operand") >>> (match_operand:V4SI 1 "register_operand") >>> (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@ >>> (define_insn "vpshldv_<mode>_maskz_1" >>> [(set_attr ("prefix") ("evex")) >>> (set_attr "mode" "<sseinsnmode>")]) >>> >>> -(define_expand "usdot_prod<mode>" >>> +(define_expand "usdot_prod<fourwayacc><mode>" >>> [(match_operand:<ssedvecmode> 0 "register_operand") >>> (match_operand:VI1_AVX512 1 "register_operand") >>> (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9 >>> +30337,9 @@ (define_expand "usdot_prod<mode>" >>> rtx sum = gen_reg_rtx (<ssedvecmode>mode); >>> >>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, >>> + op1_lo, >>> op2_lo, sum)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, >>> + op1_hi, >>> op2_hi, operands[3])); >>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); >>> } >>> @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype >>> (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") >>> (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) >>> >>> -(define_expand "sdot_prod<mode>" >>> +(define_expand "sdot_prod<fourwayacc><mode>" >>> [(match_operand:<ssedvecmode> 0 "register_operand") >>> (match_operand:VI1_AVX2 1 "register_operand") >>> (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9 >> +31194,9 >>> @@ (define_expand "sdot_prod<mode>" >>> rtx sum = gen_reg_rtx (<ssedvecmode>mode); >>> >>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, >>> + op1_lo, >>> op2_lo, sum)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, >>> + op1_hi, >>> op2_hi, operands[3])); >>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); >>> } >>> @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>" >>> DONE; >>> }) >>> >>> -(define_expand "sdot_prodv64qi" >>> +(define_expand "sdot_prodv16siv64qi" >>> [(match_operand:V16SI 0 "register_operand") >>> (match_operand:V64QI 1 "register_operand") >>> (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14 >>> @@ (define_expand "sdot_prodv64qi" >>> rtx sum = gen_reg_rtx (V16SImode); >>> >>> emit_move_insn (sum, CONST0_RTX (V16SImode)); >>> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); >>> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); >>> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); >>> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, >>> + operands[3])); >>> >>> emit_insn (gen_addv16si3 (operands[0], res1, res2)); >>> DONE; >>> }) >>> >>> -(define_expand "udot_prod<mode>" >>> +(define_expand "udot_prod<fourwayacc><mode>" >>> [(match_operand:<ssedvecmode> 0 "register_operand") >>> (match_operand:VI1_AVX2 1 "register_operand") >>> (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9 >> +31270,9 >>> @@ (define_expand "udot_prod<mode>" >>> rtx sum = gen_reg_rtx (<ssedvecmode>mode); >>> >>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, >>> + op1_lo, >>> op2_lo, sum)); >>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, >>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, >>> + op1_hi, >>> op2_hi, operands[3])); >>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); >>> } >>> @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>" >>> DONE; >>> }) >>> >>> -(define_expand "udot_prodv64qi" >>> +(define_expand "udot_prodv16qiv64qi" >>> [(match_operand:V16SI 0 "register_operand") >>> (match_operand:V64QI 1 "register_operand") >>> (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8 >> @@ >>> (define_expand "udot_prodv64qi" >>> rtx sum = gen_reg_rtx (V16SImode); >>> >>> emit_move_insn (sum, CONST0_RTX (V16SImode)); >>> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); >>> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); >>> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); >>> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, >>> + operands[3])); >>> >>> emit_insn (gen_addv16si3 (operands[0], res1, res2)); >>> DONE; >>> @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype >>> (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds") >>> (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")]) >>> >>> -(define_expand "usdot_prod<mode>" >>> +(define_expand "usdot_prod<twowayacc><mode>" >>> [(match_operand:<sseunpackmode> 0 "register_operand") >>> (match_operand:VI2_AVX2 1 "register_operand") >>> (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7 >> +31428,7 >>> @@ (define_expand "usdot_prod<mode>" >>> DONE; >>> }) >>> >>> -(define_expand "udot_prod<mode>" >>> +(define_expand "udot_prod<twowayacc><mode>" >>> [(match_operand:<sseunpackmode> 0 "register_operand") >>> (match_operand:VI2_AVX2 1 "register_operand") >>> (match_operand:VI2_AVX2 2 "register_operand") >>> -- >>> 2.34.1 >>> >> >> >> -- >> BR, >> Hongtao
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 94d3a6e5692..d78739b033d 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi" DONE; }) -(define_expand "usdot_prodv8qi" +(define_expand "usdot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi" emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi" DONE; }) -(define_expand "sdot_prodv8qi" +(define_expand "sdot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi" emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi" }) -(define_expand "udot_prodv8qi" +(define_expand "udot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi" emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi" }) -(define_expand "usdot_prodv4hi" +(define_expand "usdot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12 @@ (define_expand "usdot_prodv4hi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) -(define_expand "udot_prodv4hi" +(define_expand "udot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12 @@ (define_expand "udot_prodv4hi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) -(define_expand "sdot_prodv4hi" +(define_expand "sdot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@ (define_expand "sdot_prodv4hi" rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index bda66d5e121..861b87bb50f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) +;; Mapping of input type to 4-way accumulated type +(define_mode_attr fourwayacc + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")]) + +;; Mapping of input type to 2-way accumulated type +(define_mode_attr twowayacc + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si") + (V32QI "v16hi") (V16QI "v8hi")]) + ;; Pointer size override for scalar modes (Intel asm dialect) (define_mode_attr iptr [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7 +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF (define_mode_attr SDOT_VPDP_SUF [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")]) -(define_expand "sdot_prod<mode>" +(define_expand "sdot_prod<twowayacc><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") (match_operand:VI2_AVX512VNNIBW 1 "register_operand") (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ -16747,7 +16756,7 @@ (define_expand "sdot_prod<mode>" ;; Normally we use widen_mul_even/odd, but combine can't quite get it all ;; back together when madd is available. -(define_expand "sdot_prodv4si" +(define_expand "sdot_prodv2div4si" [(match_operand:V2DI 0 "register_operand") (match_operand:V4SI 1 "register_operand") (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@ (define_insn "vpshldv_<mode>_maskz_1" [(set_attr ("prefix") ("evex")) (set_attr "mode" "<sseinsnmode>")]) -(define_expand "usdot_prod<mode>" +(define_expand "usdot_prod<fourwayacc><mode>" [(match_operand:<ssedvecmode> 0 "register_operand") (match_operand:VI1_AVX512 1 "register_operand") (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9 +30337,9 @@ (define_expand "usdot_prod<mode>" rtx sum = gen_reg_rtx (<ssedvecmode>mode); emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, op2_hi, operands[3])); emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); } @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) -(define_expand "sdot_prod<mode>" +(define_expand "sdot_prod<fourwayacc><mode>" [(match_operand:<ssedvecmode> 0 "register_operand") (match_operand:VI1_AVX2 1 "register_operand") (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9 +31194,9 @@ (define_expand "sdot_prod<mode>" rtx sum = gen_reg_rtx (<ssedvecmode>mode); emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, op2_hi, operands[3])); emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); } @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>" DONE; }) -(define_expand "sdot_prodv64qi" +(define_expand "sdot_prodv16siv64qi" [(match_operand:V16SI 0 "register_operand") (match_operand:V64QI 1 "register_operand") (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14 @@ (define_expand "sdot_prodv64qi" rtx sum = gen_reg_rtx (V16SImode); emit_move_insn (sum, CONST0_RTX (V16SImode)); - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3])); emit_insn (gen_addv16si3 (operands[0], res1, res2)); DONE; }) -(define_expand "udot_prod<mode>" +(define_expand "udot_prod<fourwayacc><mode>" [(match_operand:<ssedvecmode> 0 "register_operand") (match_operand:VI1_AVX2 1 "register_operand") (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9 +31270,9 @@ (define_expand "udot_prod<mode>" rtx sum = gen_reg_rtx (<ssedvecmode>mode); emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi, op2_hi, operands[3])); emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); } @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>" DONE; }) -(define_expand "udot_prodv64qi" +(define_expand "udot_prodv16qiv64qi" [(match_operand:V16SI 0 "register_operand") (match_operand:V64QI 1 "register_operand") (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8 @@ (define_expand "udot_prodv64qi" rtx sum = gen_reg_rtx (V16SImode); emit_move_insn (sum, CONST0_RTX (V16SImode)); - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum)); - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3])); + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum)); + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3])); emit_insn (gen_addv16si3 (operands[0], res1, res2)); DONE; @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds") (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")]) -(define_expand "usdot_prod<mode>" +(define_expand "usdot_prod<twowayacc><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") (match_operand:VI2_AVX2 1 "register_operand") (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7 +31428,7 @@ (define_expand "usdot_prod<mode>" DONE; }) -(define_expand "udot_prod<mode>" +(define_expand "udot_prod<twowayacc><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") (match_operand:VI2_AVX2 1 "register_operand") (match_operand:VI2_AVX2 2 "register_operand")