Message ID | 20241102125828.29183-8-rdapp.gcc@gmail.com |
---|---|
State | New |
Headers | show |
Series | Rebased: Add maskload else operand. | expand |
On Sat, Nov 2, 2024 at 8:58 PM Robin Dapp <rdapp.gcc@gmail.com> wrote: > > From: Robin Dapp <rdapp@ventanamicro.com> > > This patch adds a zero else operand to masked loads, in particular the > masked gather load builtins that are used for gather vectorization. > > gcc/ChangeLog: > > * config/i386/i386-expand.cc (ix86_expand_special_args_builtin): > Add else-operand handling. > (ix86_expand_builtin): Ditto. > * config/i386/predicates.md (vcvtne2ps2bf_parallel): New > predicate. > (maskload_else_operand): Ditto. > * config/i386/sse.md: Use predicate. > --- > gcc/config/i386/i386-expand.cc | 26 ++++++-- > gcc/config/i386/predicates.md | 4 ++ > gcc/config/i386/sse.md | 112 +++++++++++++++++++++------------ > 3 files changed, 97 insertions(+), 45 deletions(-) > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 0de0e842731..6c61f9f87c2 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -12995,10 +12995,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, > { > tree arg; > rtx pat, op; > - unsigned int i, nargs, arg_adjust, memory; > + unsigned int i, nargs, arg_adjust, memory = -1; > unsigned int constant = 100; > bool aligned_mem = false; > - rtx xops[4]; > + rtx xops[4] = {}; > + bool add_els = false; > enum insn_code icode = d->icode; > const struct insn_data_d *insn_p = &insn_data[icode]; > machine_mode tmode = insn_p->operand[0].mode; > @@ -13125,6 +13126,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, > case V4DI_FTYPE_PCV4DI_V4DI: > case V4SI_FTYPE_PCV4SI_V4SI: > case V2DI_FTYPE_PCV2DI_V2DI: > + /* Two actual args but an additional else operand. */ > + add_els = true; > + /* Fallthru. */ > case VOID_FTYPE_INT_INT64: > nargs = 2; > klass = load; > @@ -13397,6 +13401,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, > xops[i]= op; > } > > + if (add_els) > + { > + xops[i] = CONST0_RTX (GET_MODE (xops[0])); > + nargs++; > + } > + > switch (nargs) > { > case 0: > @@ -13653,7 +13663,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, > enum insn_code icode, icode2; > tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); > tree arg0, arg1, arg2, arg3, arg4; > - rtx op0, op1, op2, op3, op4, pat, pat2, insn; > + rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn; > machine_mode mode0, mode1, mode2, mode3, mode4; > unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); > HOST_WIDE_INT bisa, bisa2; > @@ -15560,12 +15570,15 @@ rdseed_step: > op3 = copy_to_reg (op3); > op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); > } > + > if (!insn_data[icode].operand[5].predicate (op4, mode4)) > { > - error ("the last argument must be scale 1, 2, 4, 8"); > - return const0_rtx; > + error ("the last argument must be scale 1, 2, 4, 8"); > + return const0_rtx; > } > > + opels = CONST0_RTX (GET_MODE (subtarget)); > + > /* Optimize. If mask is known to have all high bits set, > replace op0 with pc_rtx to signal that the instruction > overwrites the whole destination and doesn't use its > @@ -15634,7 +15647,8 @@ rdseed_step: > } > } > > - pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); > + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels); > + > if (! pat) > return const0_rtx; > emit_insn (pat); > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 053312bbe27..7c7d8f61f11 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -2346,3 +2346,7 @@ (define_predicate "apx_evex_add_memory_operand" > > return true; > }) > + > +(define_predicate "maskload_else_operand" > + (and (match_code "const_int,const_vector") > + (match_test "op == CONST0_RTX (GET_MODE (op))"))) > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 36f8567b66f..41c1badbc00 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -28632,7 +28632,7 @@ (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" > (set_attr "btver2_decode" "vector") > (set_attr "mode" "<sseinsnmode>")]) > > -(define_expand "maskload<mode><sseintvecmodelower>" > +(define_expand "maskload<mode><sseintvecmodelower>_1" > [(set (match_operand:V48_128_256 0 "register_operand") > (unspec:V48_128_256 > [(match_operand:<sseintvecmode> 2 "register_operand") > @@ -28640,13 +28640,28 @@ (define_expand "maskload<mode><sseintvecmodelower>" > UNSPEC_MASKMOV))] > "TARGET_AVX") > > +(define_expand "maskload<mode><sseintvecmodelower>" > + [(set (match_operand:V48_128_256 0 "register_operand") > + (unspec:V48_128_256 > + [(match_operand:<sseintvecmode> 2 "register_operand") > + (match_operand:V48_128_256 1 "memory_operand") > + (match_operand:V48_128_256 3 "const0_operand")] > + UNSPEC_MASKMOV))] > + "TARGET_AVX" > +{ > + emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0], > + operands[1], > + operands[2])); > + DONE; > +}) > + > (define_expand "maskload<mode><avx512fmaskmodelower>" > [(set (match_operand:V48_AVX512VL 0 "register_operand") > (vec_merge:V48_AVX512VL > (unspec:V48_AVX512VL > [(match_operand:V48_AVX512VL 1 "memory_operand")] > UNSPEC_MASKLOAD) > - (match_dup 0) > + (match_operand:V48_AVX512VL 3 "const0_operand") > (match_operand:<avx512fmaskmode> 2 "register_operand")))] > "TARGET_AVX512F") > > @@ -28656,8 +28671,9 @@ (define_expand "maskload<mode><avx512fmaskmodelower>" > (unspec:VI12HFBF_AVX512VL > [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] > UNSPEC_MASKLOAD) > - (match_dup 0) > - (match_operand:<avx512fmaskmode> 2 "register_operand")))] > + (match_operand:VI12HFBF_AVX512VL 3 "const0_operand") > + (match_operand:<avx512fmaskmode> 2 "register_operand"))) > + ] > "TARGET_AVX512BW") > > (define_expand "maskstore<mode><sseintvecmodelower>" > @@ -29223,20 +29239,22 @@ (define_expand "avx2_gathersi<mode>" > (unspec:VEC_GATHER_MODE > [(match_operand:VEC_GATHER_MODE 1 "register_operand") > (mem:<ssescalarmode> > - (match_par_dup 6 > + (match_par_dup 7 > [(match_operand 2 "vsib_address_operand") > (match_operand:<VEC_GATHER_IDXSI> > 3 "register_operand") > - (match_operand:SI 5 "const1248_operand ")])) > + (match_operand:SI 5 "const1248_operand ") > + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) > (mem:BLK (scratch)) > (match_operand:VEC_GATHER_MODE 4 "register_operand")] > UNSPEC_GATHER)) > - (clobber (match_scratch:VEC_GATHER_MODE 7))])] > + (clobber (match_scratch:VEC_GATHER_MODE 8))])] > "TARGET_AVX2" > { > - operands[6] > - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], > - operands[5]), UNSPEC_VSIBADDR); > + operands[7] > + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], > + operands[5], operands[6]), > + UNSPEC_VSIBADDR); > }) > > (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" > @@ -29247,7 +29265,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" > [(unspec:P > [(match_operand:P 3 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") > - (match_operand:SI 6 "const1248_operand")] > + (match_operand:SI 6 "const1248_operand") > + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] > @@ -29268,7 +29287,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2" > [(unspec:P > [(match_operand:P 2 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") > - (match_operand:SI 5 "const1248_operand")] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] > @@ -29286,20 +29306,22 @@ (define_expand "avx2_gatherdi<mode>" > (unspec:VEC_GATHER_MODE > [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") > (mem:<ssescalarmode> > - (match_par_dup 6 > + (match_par_dup 7 > [(match_operand 2 "vsib_address_operand") > (match_operand:<VEC_GATHER_IDXDI> > 3 "register_operand") > - (match_operand:SI 5 "const1248_operand ")])) > + (match_operand:SI 5 "const1248_operand ") > + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) > (mem:BLK (scratch)) > (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")] > UNSPEC_GATHER)) > - (clobber (match_scratch:VEC_GATHER_MODE 7))])] > + (clobber (match_scratch:VEC_GATHER_MODE 8))])] > "TARGET_AVX2" > { > - operands[6] > - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], > - operands[5]), UNSPEC_VSIBADDR); > + operands[7] > + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], > + operands[5], operands[6]), > + UNSPEC_VSIBADDR); > }) > x86 doesn't define mask_gather_loadmn, so I think you can drop this and all related, only keep the patch I give you in [1] Sorry I didn't make that clear last time. [1] https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666814.html > (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" > @@ -29310,7 +29332,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" > [(unspec:P > [(match_operand:P 3 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") > - (match_operand:SI 6 "const1248_operand")] > + (match_operand:SI 6 "const1248_operand") > + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] > @@ -29331,7 +29354,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2" > [(unspec:P > [(match_operand:P 2 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") > - (match_operand:SI 5 "const1248_operand")] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] > @@ -29357,7 +29381,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3" > [(unspec:P > [(match_operand:P 3 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") > - (match_operand:SI 6 "const1248_operand")] > + (match_operand:SI 6 "const1248_operand") > + (match_operand:VI4F_256 8 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] > @@ -29381,7 +29406,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4" > [(unspec:P > [(match_operand:P 2 "vsib_address_operand" "jb") > (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") > - (match_operand:SI 5 "const1248_operand")] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VI4F_256 7 "maskload_else_operand")] > UNSPEC_VSIBADDR)]) > (mem:BLK (scratch)) > (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] > @@ -29402,17 +29428,19 @@ (define_expand "<avx512>_gathersi<mode>" > [(match_operand:VI48F 1 "register_operand") > (match_operand:<avx512fmaskmode> 4 "register_operand") > (mem:<ssescalarmode> > - (match_par_dup 6 > + (match_par_dup 7 > [(match_operand 2 "vsib_address_operand") > (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") > - (match_operand:SI 5 "const1248_operand")]))] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VI48F 6 "maskload_else_operand")]))] > UNSPEC_GATHER)) > - (clobber (match_scratch:<avx512fmaskmode> 7))])] > + (clobber (match_scratch:<avx512fmaskmode> 8))])] > "TARGET_AVX512F" > { > - operands[6] > - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], > - operands[5]), UNSPEC_VSIBADDR); > + operands[7] > + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], > + operands[5], operands[6]), > + UNSPEC_VSIBADDR); > }) > > (define_insn "*avx512f_gathersi<VI48F:mode>" > @@ -29424,7 +29452,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>" > [(unspec:P > [(match_operand:P 4 "vsib_address_operand" "Tv") > (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v") > - (match_operand:SI 5 "const1248_operand")] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VI48F 8 "maskload_else_operand")] > UNSPEC_VSIBADDR)])] > UNSPEC_GATHER)) > (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] > @@ -29445,7 +29474,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2" > [(unspec:P > [(match_operand:P 3 "vsib_address_operand" "Tv") > (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") > - (match_operand:SI 4 "const1248_operand")] > + (match_operand:SI 4 "const1248_operand") > + (match_operand:VI48F 7 "maskload_else_operand")] > UNSPEC_VSIBADDR)])] > UNSPEC_GATHER)) > (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] > @@ -29464,17 +29494,19 @@ (define_expand "<avx512>_gatherdi<mode>" > [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") > (match_operand:QI 4 "register_operand") > (mem:<ssescalarmode> > - (match_par_dup 6 > + (match_par_dup 7 > [(match_operand 2 "vsib_address_operand") > (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") > - (match_operand:SI 5 "const1248_operand")]))] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VI48F 6 "maskload_else_operand")]))] > UNSPEC_GATHER)) > - (clobber (match_scratch:QI 7))])] > + (clobber (match_scratch:QI 8))])] > "TARGET_AVX512F" > { > - operands[6] > - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], > - operands[5]), UNSPEC_VSIBADDR); > + operands[7] > + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], > + operands[5], operands[6]), > + UNSPEC_VSIBADDR); > }) > > (define_insn "*avx512f_gatherdi<VI48F:mode>" > @@ -29486,7 +29518,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>" > [(unspec:P > [(match_operand:P 4 "vsib_address_operand" "Tv") > (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v") > - (match_operand:SI 5 "const1248_operand")] > + (match_operand:SI 5 "const1248_operand") > + (match_operand:VI48F 8 "maskload_else_operand")] > UNSPEC_VSIBADDR)])] > UNSPEC_GATHER)) > (clobber (match_scratch:QI 2 "=&Yk"))] > @@ -29507,7 +29540,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2" > [(unspec:P > [(match_operand:P 3 "vsib_address_operand" "Tv") > (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") > - (match_operand:SI 4 "const1248_operand")] > + (match_operand:SI 4 "const1248_operand") > + (match_operand:VI48F 7 "maskload_else_operand")] > UNSPEC_VSIBADDR)])] > UNSPEC_GATHER)) > (clobber (match_scratch:QI 1 "=&Yk"))] > @@ -29544,7 +29578,7 @@ (define_expand "<avx512>_scattersi<mode>" > operands[5] > = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2], > operands[4], operands[1]), > - UNSPEC_VSIBADDR); > + UNSPEC_VSIBADDR); > }) > > (define_insn "*avx512f_scattersi<VI48F:mode>" > -- > 2.47.0 >
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 0de0e842731..6c61f9f87c2 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12995,10 +12995,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, { tree arg; rtx pat, op; - unsigned int i, nargs, arg_adjust, memory; + unsigned int i, nargs, arg_adjust, memory = -1; unsigned int constant = 100; bool aligned_mem = false; - rtx xops[4]; + rtx xops[4] = {}; + bool add_els = false; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; machine_mode tmode = insn_p->operand[0].mode; @@ -13125,6 +13126,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_PCV4DI_V4DI: case V4SI_FTYPE_PCV4SI_V4SI: case V2DI_FTYPE_PCV2DI_V2DI: + /* Two actual args but an additional else operand. */ + add_els = true; + /* Fallthru. */ case VOID_FTYPE_INT_INT64: nargs = 2; klass = load; @@ -13397,6 +13401,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, xops[i]= op; } + if (add_els) + { + xops[i] = CONST0_RTX (GET_MODE (xops[0])); + nargs++; + } + switch (nargs) { case 0: @@ -13653,7 +13663,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, enum insn_code icode, icode2; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg0, arg1, arg2, arg3, arg4; - rtx op0, op1, op2, op3, op4, pat, pat2, insn; + rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn; machine_mode mode0, mode1, mode2, mode3, mode4; unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); HOST_WIDE_INT bisa, bisa2; @@ -15560,12 +15570,15 @@ rdseed_step: op3 = copy_to_reg (op3); op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); } + if (!insn_data[icode].operand[5].predicate (op4, mode4)) { - error ("the last argument must be scale 1, 2, 4, 8"); - return const0_rtx; + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; } + opels = CONST0_RTX (GET_MODE (subtarget)); + /* Optimize. If mask is known to have all high bits set, replace op0 with pc_rtx to signal that the instruction overwrites the whole destination and doesn't use its @@ -15634,7 +15647,8 @@ rdseed_step: } } - pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels); + if (! pat) return const0_rtx; emit_insn (pat); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 053312bbe27..7c7d8f61f11 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -2346,3 +2346,7 @@ (define_predicate "apx_evex_add_memory_operand" return true; }) + +(define_predicate "maskload_else_operand" + (and (match_code "const_int,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 36f8567b66f..41c1badbc00 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -28632,7 +28632,7 @@ (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" (set_attr "btver2_decode" "vector") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "maskload<mode><sseintvecmodelower>" +(define_expand "maskload<mode><sseintvecmodelower>_1" [(set (match_operand:V48_128_256 0 "register_operand") (unspec:V48_128_256 [(match_operand:<sseintvecmode> 2 "register_operand") @@ -28640,13 +28640,28 @@ (define_expand "maskload<mode><sseintvecmodelower>" UNSPEC_MASKMOV))] "TARGET_AVX") +(define_expand "maskload<mode><sseintvecmodelower>" + [(set (match_operand:V48_128_256 0 "register_operand") + (unspec:V48_128_256 + [(match_operand:<sseintvecmode> 2 "register_operand") + (match_operand:V48_128_256 1 "memory_operand") + (match_operand:V48_128_256 3 "const0_operand")] + UNSPEC_MASKMOV))] + "TARGET_AVX" +{ + emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) + (define_expand "maskload<mode><avx512fmaskmodelower>" [(set (match_operand:V48_AVX512VL 0 "register_operand") (vec_merge:V48_AVX512VL (unspec:V48_AVX512VL [(match_operand:V48_AVX512VL 1 "memory_operand")] UNSPEC_MASKLOAD) - (match_dup 0) + (match_operand:V48_AVX512VL 3 "const0_operand") (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512F") @@ -28656,8 +28671,9 @@ (define_expand "maskload<mode><avx512fmaskmodelower>" (unspec:VI12HFBF_AVX512VL [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] UNSPEC_MASKLOAD) - (match_dup 0) - (match_operand:<avx512fmaskmode> 2 "register_operand")))] + (match_operand:VI12HFBF_AVX512VL 3 "const0_operand") + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + ] "TARGET_AVX512BW") (define_expand "maskstore<mode><sseintvecmodelower>" @@ -29223,20 +29239,22 @@ (define_expand "avx2_gathersi<mode>" (unspec:VEC_GATHER_MODE [(match_operand:VEC_GATHER_MODE 1 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand ")])) + (match_operand:SI 5 "const1248_operand ") + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 4 "register_operand")] UNSPEC_GATHER)) - (clobber (match_scratch:VEC_GATHER_MODE 7))])] + (clobber (match_scratch:VEC_GATHER_MODE 8))])] "TARGET_AVX2" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" @@ -29247,7 +29265,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] @@ -29268,7 +29287,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] @@ -29286,20 +29306,22 @@ (define_expand "avx2_gatherdi<mode>" (unspec:VEC_GATHER_MODE [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand ")])) + (match_operand:SI 5 "const1248_operand ") + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")])) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")] UNSPEC_GATHER)) - (clobber (match_scratch:VEC_GATHER_MODE 7))])] + (clobber (match_scratch:VEC_GATHER_MODE 8))])] "TARGET_AVX2" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" @@ -29310,7 +29332,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] @@ -29331,7 +29354,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] @@ -29357,7 +29381,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") - (match_operand:SI 6 "const1248_operand")] + (match_operand:SI 6 "const1248_operand") + (match_operand:VI4F_256 8 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] @@ -29381,7 +29406,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "jb") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI4F_256 7 "maskload_else_operand")] UNSPEC_VSIBADDR)]) (mem:BLK (scratch)) (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] @@ -29402,17 +29428,19 @@ (define_expand "<avx512>_gathersi<mode>" [(match_operand:VI48F 1 "register_operand") (match_operand:<avx512fmaskmode> 4 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand")]))] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 6 "maskload_else_operand")]))] UNSPEC_GATHER)) - (clobber (match_scratch:<avx512fmaskmode> 7))])] + (clobber (match_scratch:<avx512fmaskmode> 8))])] "TARGET_AVX512F" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_gathersi<VI48F:mode>" @@ -29424,7 +29452,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>" [(unspec:P [(match_operand:P 4 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 8 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] @@ -29445,7 +29474,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") - (match_operand:SI 4 "const1248_operand")] + (match_operand:SI 4 "const1248_operand") + (match_operand:VI48F 7 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -29464,17 +29494,19 @@ (define_expand "<avx512>_gatherdi<mode>" [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") (match_operand:QI 4 "register_operand") (mem:<ssescalarmode> - (match_par_dup 6 + (match_par_dup 7 [(match_operand 2 "vsib_address_operand") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") - (match_operand:SI 5 "const1248_operand")]))] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 6 "maskload_else_operand")]))] UNSPEC_GATHER)) - (clobber (match_scratch:QI 7))])] + (clobber (match_scratch:QI 8))])] "TARGET_AVX512F" { - operands[6] - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], - operands[5]), UNSPEC_VSIBADDR); + operands[7] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3], + operands[5], operands[6]), + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_gatherdi<VI48F:mode>" @@ -29486,7 +29518,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>" [(unspec:P [(match_operand:P 4 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v") - (match_operand:SI 5 "const1248_operand")] + (match_operand:SI 5 "const1248_operand") + (match_operand:VI48F 8 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:QI 2 "=&Yk"))] @@ -29507,7 +29540,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2" [(unspec:P [(match_operand:P 3 "vsib_address_operand" "Tv") (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") - (match_operand:SI 4 "const1248_operand")] + (match_operand:SI 4 "const1248_operand") + (match_operand:VI48F 7 "maskload_else_operand")] UNSPEC_VSIBADDR)])] UNSPEC_GATHER)) (clobber (match_scratch:QI 1 "=&Yk"))] @@ -29544,7 +29578,7 @@ (define_expand "<avx512>_scattersi<mode>" operands[5] = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2], operands[4], operands[1]), - UNSPEC_VSIBADDR); + UNSPEC_VSIBADDR); }) (define_insn "*avx512f_scattersi<VI48F:mode>"
From: Robin Dapp <rdapp@ventanamicro.com> This patch adds a zero else operand to masked loads, in particular the masked gather load builtins that are used for gather vectorization. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_special_args_builtin): Add else-operand handling. (ix86_expand_builtin): Ditto. * config/i386/predicates.md (vcvtne2ps2bf_parallel): New predicate. (maskload_else_operand): Ditto. * config/i386/sse.md: Use predicate. --- gcc/config/i386/i386-expand.cc | 26 ++++++-- gcc/config/i386/predicates.md | 4 ++ gcc/config/i386/sse.md | 112 +++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 45 deletions(-)