@@ -12462,10 +12462,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
tree arg;
rtx pat, op;
- unsigned int i, nargs, arg_adjust, memory;
+ unsigned int i, nargs, arg_adjust, memory = -1;
unsigned int constant = 100;
bool aligned_mem = false;
- rtx xops[4];
+ rtx xops[4] = {};
+ bool add_els = false;
enum insn_code icode = d->icode;
const struct insn_data_d *insn_p = &insn_data[icode];
machine_mode tmode = insn_p->operand[0].mode;
@@ -12592,6 +12593,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_PCV4DI_V4DI:
case V4SI_FTYPE_PCV4SI_V4SI:
case V2DI_FTYPE_PCV2DI_V2DI:
+ /* Two actual args but an additional else operand. */
+ add_els = true;
+ /* Fallthru. */
case VOID_FTYPE_INT_INT64:
nargs = 2;
klass = load;
@@ -12864,6 +12868,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
xops[i]= op;
}
+ if (add_els)
+ {
+ xops[i] = CONST0_RTX (GET_MODE (xops[0]));
+ nargs++;
+ }
+
switch (nargs)
{
case 0:
@@ -13113,10 +13123,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
size_t i;
enum insn_code icode, icode2;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0, arg1, arg2, arg3, arg4;
- rtx op0, op1, op2, op3, op4, pat, pat2, insn;
- machine_mode mode0, mode1, mode2, mode3, mode4;
+ tree arg0, arg1, arg2, arg3, arg4, arg5;
+ rtx op0, op1, op2, op3, op4, op5, opels, pat, pat2, insn;
+ machine_mode mode0, mode1, mode2, mode3, mode4, mode5;
unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+ bool has_else_op;
HOST_WIDE_INT bisa, bisa2;
/* For CPU builtins that can be folded, fold first and expand the fold. */
@@ -14919,6 +14930,7 @@ rdseed_step:
arg2 = CALL_EXPR_ARG (exp, 2);
arg3 = CALL_EXPR_ARG (exp, 3);
arg4 = CALL_EXPR_ARG (exp, 4);
+ has_else_op = call_expr_nargs (exp) == 6;
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
@@ -15021,10 +15033,38 @@ rdseed_step:
op3 = copy_to_reg (op3);
op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
}
- if (!insn_data[icode].operand[5].predicate (op4, mode4))
+ /* The vectorizer only adds an else operand for real masks. */
+ if (has_else_op)
+ {
+ if (op4 != CONST0_RTX (GET_MODE (subtarget)))
+ {
+ error ("the else operand must be 0");
+ return const0_rtx;
+ }
+ else
+ {
+ arg5 = CALL_EXPR_ARG (exp, 5);
+ op5 = expand_normal (arg5);
+ /* Note the arg order is different from the operand order. */
+ mode5 = insn_data[icode].operand[5].mode;
+ if (!insn_data[icode].operand[5].predicate (op5, mode5))
+ {
+ error ("the last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+ }
+ opels = op4;
+ op4 = op5;
+ mode4 = mode5;
+ }
+ else
{
- error ("the last argument must be scale 1, 2, 4, 8");
- return const0_rtx;
+ if (!insn_data[icode].operand[5].predicate (op4, mode4))
+ {
+ error ("the last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+ opels = CONST0_RTX (GET_MODE (subtarget));
}
/* Optimize. If mask is known to have all high bits set,
@@ -15095,7 +15135,8 @@ rdseed_step:
}
}
- pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
+ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
+
if (! pat)
return const0_rtx;
emit_insn (pat);
@@ -2332,3 +2332,18 @@ (define_predicate "apx_ndd_add_memory_operand"
return true;
})
+
+;; Check that each element is odd and incrementally increasing from 1
+(define_predicate "vcvtne2ps2bf_parallel"
+ (and (match_code "const_vector")
+ (match_code "const_int" "a"))
+{
+ for (int i = 0; i < XVECLEN (op, 0); ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
+ return false;
+ return true;
+})
+
+(define_predicate "maskload_else_operand"
+ (and (match_code "const_int,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))")))
@@ -1487,7 +1487,8 @@ (define_expand "<avx512>_load<mode>_mask"
}
else if (MEM_P (operands[1]))
operands[1] = gen_rtx_UNSPEC (<MODE>mode,
- gen_rtvec(1, operands[1]),
+ gen_rtvec(2, operands[1],
+ CONST0_RTX (<MODE>mode)),
UNSPEC_MASKLOAD);
})
@@ -1495,7 +1496,8 @@ (define_insn "*<avx512>_load<mode>_mask"
[(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
(vec_merge:V48_AVX512VL
(unspec:V48_AVX512VL
- [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
+ [(match_operand:V48_AVX512VL 1 "memory_operand" "m")
+ (match_operand:V48_AVX512VL 4 "maskload_else_operand")]
UNSPEC_MASKLOAD)
(match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
@@ -1523,7 +1525,8 @@ (define_insn "*<avx512>_load<mode>_mask"
(define_insn_and_split "*<avx512>_load<mode>"
[(set (match_operand:V48_AVX512VL 0 "register_operand")
(unspec:V48_AVX512VL
- [(match_operand:V48_AVX512VL 1 "memory_operand")]
+ [(match_operand:V48_AVX512VL 1 "memory_operand")
+ (match_operand:V48_AVX512VL 2 "maskload_else_operand")]
UNSPEC_MASKLOAD))]
"TARGET_AVX512F"
"#"
@@ -1545,7 +1548,8 @@ (define_expand "<avx512>_load<mode>_mask"
}
else if (MEM_P (operands[1]))
operands[1] = gen_rtx_UNSPEC (<MODE>mode,
- gen_rtvec(1, operands[1]),
+ gen_rtvec(2, operands[1],
+ CONST0_RTX (<MODE>mode)),
UNSPEC_MASKLOAD);
})
@@ -1554,7 +1558,8 @@ (define_insn "*<avx512>_load<mode>_mask"
[(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI12HFBF_AVX512VL
(unspec:VI12HFBF_AVX512VL
- [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")
+ (match_operand:VI12HFBF_AVX512VL 4 "maskload_else_operand")]
UNSPEC_MASKLOAD)
(match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
@@ -1567,7 +1572,8 @@ (define_insn "*<avx512>_load<mode>_mask"
(define_insn_and_split "*<avx512>_load<mode>"
[(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
(unspec:VI12HFBF_AVX512VL
- [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")
+ (match_operand:VI12HFBF_AVX512VL 2 "maskload_else_operand")]
UNSPEC_MASKLOAD))]
"TARGET_AVX512BW"
"#"
@@ -28440,7 +28446,8 @@ (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:V48_128_256 0 "register_operand" "=x")
(unspec:V48_128_256
[(match_operand:<sseintvecmode> 2 "register_operand" "x")
- (match_operand:V48_128_256 1 "memory_operand" "jm")]
+ (match_operand:V48_128_256 1 "memory_operand" "jm")
+ (match_operand:V48_128_256 3 "maskload_else_operand")]
UNSPEC_MASKMOV))]
"TARGET_AVX"
{
@@ -28481,7 +28488,8 @@ (define_expand "maskload<mode><sseintvecmodelower>"
[(set (match_operand:V48_128_256 0 "register_operand")
(unspec:V48_128_256
[(match_operand:<sseintvecmode> 2 "register_operand")
- (match_operand:V48_128_256 1 "memory_operand")]
+ (match_operand:V48_128_256 1 "memory_operand")
+ (match_operand:V48_128_256 3 "maskload_else_operand")]
UNSPEC_MASKMOV))]
"TARGET_AVX")
@@ -28489,20 +28497,24 @@ (define_expand "maskload<mode><avx512fmaskmodelower>"
[(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
(unspec:V48_AVX512VL
- [(match_operand:V48_AVX512VL 1 "memory_operand")]
+ [(match_operand:V48_AVX512VL 1 "memory_operand")
+ (match_operand:V48_AVX512VL 3 "maskload_else_operand")]
UNSPEC_MASKLOAD)
(match_dup 0)
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))
+ ]
"TARGET_AVX512F")
(define_expand "maskload<mode><avx512fmaskmodelower>"
[(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
(vec_merge:VI12HFBF_AVX512VL
(unspec:VI12HFBF_AVX512VL
- [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")
+ (match_operand:VI12HFBF_AVX512VL 3 "maskload_else_operand")]
UNSPEC_MASKLOAD)
(match_dup 0)
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))
+ ]
"TARGET_AVX512BW")
(define_expand "maskstore<mode><sseintvecmodelower>"
@@ -29067,20 +29079,22 @@ (define_expand "avx2_gathersi<mode>"
(unspec:VEC_GATHER_MODE
[(match_operand:VEC_GATHER_MODE 1 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI>
3 "register_operand")
- (match_operand:SI 5 "const1248_operand ")]))
+ (match_operand:SI 5 "const1248_operand ")
+ (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand")]
UNSPEC_GATHER))
- (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+ (clobber (match_scratch:VEC_GATHER_MODE 8))])]
"TARGET_AVX2"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
@@ -29091,7 +29105,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
@@ -29112,7 +29127,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
@@ -29130,20 +29146,22 @@ (define_expand "avx2_gatherdi<mode>"
(unspec:VEC_GATHER_MODE
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXDI>
3 "register_operand")
- (match_operand:SI 5 "const1248_operand ")]))
+ (match_operand:SI 5 "const1248_operand ")
+ (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
UNSPEC_GATHER))
- (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+ (clobber (match_scratch:VEC_GATHER_MODE 8))])]
"TARGET_AVX2"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
@@ -29154,7 +29172,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29175,7 +29194,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29201,7 +29221,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VI4F_256 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29225,7 +29246,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI4F_256 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29246,17 +29268,19 @@ (define_expand "<avx512>_gathersi<mode>"
[(match_operand:VI48F 1 "register_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
- (match_operand:SI 5 "const1248_operand")]))]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 6 "maskload_else_operand")]))]
UNSPEC_GATHER))
- (clobber (match_scratch:<avx512fmaskmode> 7))])]
+ (clobber (match_scratch:<avx512fmaskmode> 8))])]
"TARGET_AVX512F"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_gathersi<VI48F:mode>"
@@ -29268,7 +29292,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>"
[(unspec:P
[(match_operand:P 4 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
@@ -29289,7 +29314,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand")]
+ (match_operand:SI 4 "const1248_operand")
+ (match_operand:VI48F 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
@@ -29308,17 +29334,19 @@ (define_expand "<avx512>_gatherdi<mode>"
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(match_operand:QI 4 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
- (match_operand:SI 5 "const1248_operand")]))]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 6 "maskload_else_operand")]))]
UNSPEC_GATHER))
- (clobber (match_scratch:QI 7))])]
+ (clobber (match_scratch:QI 8))])]
"TARGET_AVX512F"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_gatherdi<VI48F:mode>"
@@ -29330,7 +29358,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>"
[(unspec:P
[(match_operand:P 4 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:QI 2 "=&Yk"))]
@@ -29351,7 +29380,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand")]
+ (match_operand:SI 4 "const1248_operand")
+ (match_operand:VI48F 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:QI 1 "=&Yk"))]
@@ -29388,7 +29418,7 @@ (define_expand "<avx512>_scattersi<mode>"
operands[5]
= gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
operands[4], operands[1]),
- UNSPEC_VSIBADDR);
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scattersi<VI48F:mode>"