@@ -15204,17 +15204,28 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
rtvec v;
switch (mode)
{
- case SImode:
+ case V4SImode:
gcc_assert (vect);
v = gen_rtvec (4, value, value, value, value);
return gen_rtx_CONST_VECTOR (V4SImode, v);
- case DImode:
+ case V2DImode:
gcc_assert (vect);
v = gen_rtvec (2, value, value);
return gen_rtx_CONST_VECTOR (V2DImode, v);
- case SFmode:
+ case V8SFmode:
+ if (vect)
+ v = gen_rtvec (8, value, value, value, value,
+ value, value, value, value);
+ else
+ v = gen_rtvec (8, value, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ return gen_rtx_CONST_VECTOR (V8SFmode, v);
+
+ case V4SFmode:
if (vect)
v = gen_rtvec (4, value, value, value, value);
else
@@ -15222,7 +15233,15 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
return gen_rtx_CONST_VECTOR (V4SFmode, v);
- case DFmode:
+ case V4DFmode:
+ if (vect)
+ v = gen_rtvec (4, value, value, value, value);
+ else
+ v = gen_rtvec (4, value, CONST0_RTX (DFmode),
+ CONST0_RTX (DFmode), CONST0_RTX (DFmode));
+ return gen_rtx_CONST_VECTOR (V4DFmode, v);
+
+ case V2DFmode:
if (vect)
v = gen_rtvec (2, value, value);
else
@@ -15252,17 +15271,21 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
/* Find the sign bit, sign extended to 2*HWI. */
switch (mode)
{
- case SImode:
- case SFmode:
+ case V4SImode:
+ case V8SFmode:
+ case V4SFmode:
+ vec_mode = mode;
+ mode = GET_MODE_INNER (mode);
imode = SImode;
- vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
lo = 0x80000000, hi = lo < 0;
break;
- case DImode:
- case DFmode:
+ case V2DImode:
+ case V4DFmode:
+ case V2DFmode:
+ vec_mode = mode;
+ mode = GET_MODE_INNER (mode);
imode = DImode;
- vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
if (HOST_BITS_PER_WIDE_INT >= 64)
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
else
@@ -15316,7 +15339,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
if (vec_mode == VOIDmode)
return force_reg (mode, mask);
- v = ix86_build_const_vector (mode, vect, mask);
+ v = ix86_build_const_vector (vec_mode, vect, mask);
return force_reg (vec_mode, v);
}
@@ -15329,22 +15352,25 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
rtx mask, set, use, clob, dst, src;
bool use_sse = false;
bool vector_mode = VECTOR_MODE_P (mode);
- enum machine_mode elt_mode = mode;
+ enum machine_mode vmode = mode;
if (vector_mode)
- {
- elt_mode = GET_MODE_INNER (mode);
- use_sse = true;
- }
+ use_sse = true;
else if (mode == TFmode)
use_sse = true;
else if (TARGET_SSE_MATH)
- use_sse = SSE_FLOAT_MODE_P (mode);
+ {
+ use_sse = SSE_FLOAT_MODE_P (mode);
+ if (mode == SFmode)
+ vmode = V4SFmode;
+ else if (mode == DFmode)
+ vmode = V2DFmode;
+ }
/* NEG and ABS performed with SSE use bitwise mask operations.
Create the appropriate mask now. */
if (use_sse)
- mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
+ mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
else
mask = NULL_RTX;
@@ -15378,7 +15404,7 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
void
ix86_expand_copysign (rtx operands[])
{
- enum machine_mode mode;
+ enum machine_mode mode, vmode;
rtx dest, op0, op1, mask, nmask;
dest = operands[0];
@@ -15387,6 +15413,13 @@ ix86_expand_copysign (rtx operands[])
mode = GET_MODE (dest);
+ if (mode == SFmode)
+ vmode = V4SFmode;
+ else if (mode == DFmode)
+ vmode = V2DFmode;
+ else
+ vmode = mode;
+
if (GET_CODE (op0) == CONST_DOUBLE)
{
rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
@@ -15396,15 +15429,11 @@ ix86_expand_copysign (rtx operands[])
if (mode == SFmode || mode == DFmode)
{
- enum machine_mode vmode;
-
- vmode = mode == SFmode ? V4SFmode : V2DFmode;
-
if (op0 == CONST0_RTX (mode))
op0 = CONST0_RTX (vmode);
else
{
- rtx v = ix86_build_const_vector (mode, false, op0);
+ rtx v = ix86_build_const_vector (vmode, false, op0);
op0 = force_reg (vmode, v);
}
@@ -15412,7 +15441,7 @@ ix86_expand_copysign (rtx operands[])
else if (op0 != CONST0_RTX (mode))
op0 = force_reg (mode, op0);
- mask = ix86_build_signbit_mask (mode, 0, 0);
+ mask = ix86_build_signbit_mask (vmode, 0, 0);
if (mode == SFmode)
copysign_insn = gen_copysignsf3_const;
@@ -15427,8 +15456,8 @@ ix86_expand_copysign (rtx operands[])
{
rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
- nmask = ix86_build_signbit_mask (mode, 0, 1);
- mask = ix86_build_signbit_mask (mode, 0, 0);
+ nmask = ix86_build_signbit_mask (vmode, 0, 1);
+ mask = ix86_build_signbit_mask (vmode, 0, 0);
if (mode == SFmode)
copysign_insn = gen_copysignsf3_var;
@@ -17335,8 +17364,7 @@ ix86_expand_int_vcond (rtx operands[])
/* Subtract (-(INT MAX) - 1) from both operands to make
them signed. */
- mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
- true, false);
+ mask = ix86_build_signbit_mask (mode, true, false);
gen_sub3 = (mode == V4SImode
? gen_subv4si3 : gen_subv2di3);
t1 = gen_reg_rtx (mode);
@@ -22157,6 +22185,8 @@ enum ix86_builtins
/* Vectorizer support builtins. */
IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD,
+ IX86_BUILTIN_CPYSGNPS256,
+ IX86_BUILTIN_CPYSGNPD256,
IX86_BUILTIN_CVTUDQ2PS,
@@ -23294,6 +23324,9 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+
{ OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
/* F16C */
@@ -25480,15 +25513,23 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
switch (fn)
{
case BUILT_IN_SQRT:
- if (out_mode == DFmode && out_n == 2
- && in_mode == DFmode && in_n == 2)
- return ix86_builtins[IX86_BUILTIN_SQRTPD];
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+ }
break;
case BUILT_IN_SQRTF:
- if (out_mode == SFmode && out_n == 4
- && in_mode == SFmode && in_n == 4)
- return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+ }
break;
case BUILT_IN_LRINT:
@@ -25498,21 +25539,33 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
break;
case BUILT_IN_LRINTF:
- if (out_mode == SImode && out_n == 4
- && in_mode == SFmode && in_n == 4)
- return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+ if (out_mode == SImode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
+ }
break;
case BUILT_IN_COPYSIGN:
- if (out_mode == DFmode && out_n == 2
- && in_mode == DFmode && in_n == 2)
- return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+ }
break;
case BUILT_IN_COPYSIGNF:
- if (out_mode == SFmode && out_n == 4
- && in_mode == SFmode && in_n == 4)
- return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+ }
break;
default:
@@ -25835,6 +25888,9 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
case IX86_BUILTIN_SQRTPS_NR:
return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
+ case IX86_BUILTIN_SQRTPS_NR256:
+ return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
+
default:
return NULL_TREE;
}
@@ -29377,7 +29433,7 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
if (VECTOR_MODE_P (mode))
- two = ix86_build_const_vector (SFmode, true, two);
+ two = ix86_build_const_vector (mode, true, two);
two = force_reg (mode, two);
@@ -29424,8 +29480,8 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
if (VECTOR_MODE_P (mode))
{
- mthree = ix86_build_const_vector (SFmode, true, mthree);
- mhalf = ix86_build_const_vector (SFmode, true, mhalf);
+ mthree = ix86_build_const_vector (mode, true, mthree);
+ mhalf = ix86_build_const_vector (mode, true, mhalf);
}
/* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
@@ -29570,7 +29626,16 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
rtx sgn = gen_reg_rtx (mode);
if (mask == NULL_RTX)
{
- mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+ enum machine_mode vmode;
+
+ if (mode == SFmode)
+ vmode = V4SFmode;
+ else if (mode == DFmode)
+ vmode = V2DFmode;
+ else
+ vmode = mode;
+
+ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
if (!VECTOR_MODE_P (mode))
{
/* We need to generate a scalar mode mask in this case. */
@@ -29594,11 +29659,17 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
static rtx
ix86_expand_sse_fabs (rtx op0, rtx *smask)
{
- enum machine_mode mode = GET_MODE (op0);
+ enum machine_mode vmode, mode = GET_MODE (op0);
rtx xa, mask;
xa = gen_reg_rtx (mode);
- mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
+ if (mode == SFmode)
+ vmode = V4SFmode;
+ else if (mode == DFmode)
+ vmode = V2DFmode;
+ else
+ vmode = mode;
+ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
if (!VECTOR_MODE_P (mode))
{
/* We need to generate a scalar mode mask in this case. */
@@ -30941,7 +31012,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
static bool
expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
- rtx t1, t2, t3, t4;
+ rtx t1, t2, t3;
switch (d->vmode)
{
@@ -30963,34 +31034,34 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
case V8SFmode:
{
- static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
- static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
- static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
+ int mask = odd ? 0xdd : 0x88;
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
t3 = gen_reg_rtx (V8SFmode);
- t4 = gen_reg_rtx (V8SFmode);
/* Shuffle within the 128-bit lanes to produce:
- { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
- expand_vselect (t1, d->op0, perm1, 8);
- expand_vselect (t2, d->op1, perm1, 8);
+ { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
+ emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
+ GEN_INT (mask)));
+
+ /* Shuffle the lanes around to produce:
+ { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
+ emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
+ GEN_INT (0x3)));
+
+ /* Shuffle within the 128-bit lanes to produce:
+ { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
+ emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
+
+ /* Shuffle within the 128-bit lanes to produce:
+ { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
+ emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
/* Shuffle the lanes around to produce:
- { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
- emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
- emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
-
- /* Now a vpermil2p will produce the result required. */
- /* ??? The vpermil2p requires a vector constant. Another option
- is a unpck[lh]ps to merge the two vectors to produce
- { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
- vpermilps to get the elements into the final order. */
- d->op0 = t3;
- d->op1 = t4;
- memcpy (d->perm, odd ? permo: perme, 8);
- expand_vec_perm_vpermil (d);
+ { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
+ emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
+ GEN_INT (0x20)));
}
break;
@@ -4518,7 +4518,7 @@
real_ldexp (&TWO31r, &dconst1, 31);
two31 = const_double_from_real_value (TWO31r, mode);
- two31 = ix86_build_const_vector (mode, true, two31);
+ two31 = ix86_build_const_vector (vecmode, true, two31);
operands[2] = force_reg (vecmode, two31);
})
@@ -504,6 +504,14 @@
; define patterns for other modes that would expand to several insns.
(define_expand "storent<mode>"
+ [(set (match_operand:AVX256MODEF2P 0 "memory_operand" "")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "")
+
+(define_expand "storent<mode>"
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "")]
@@ -540,6 +548,13 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "<code><mode>2"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (absneg:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "register_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_expand "<code><mode>2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(absneg:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "")))]
@@ -1385,6 +1400,19 @@
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
+(define_expand "reduc_splus_v8sf"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ rtx tmp = gen_reg_rtx (V8SFmode);
+ rtx tmp2 = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
+ DONE;
+})
+
(define_expand "reduc_splus_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
@@ -1401,6 +1429,17 @@
DONE;
})
+(define_expand "reduc_splus_v4df"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ rtx tmp = gen_reg_rtx (V4DFmode);
+ emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
+ DONE;
+})
+
(define_expand "reduc_splus_v2df"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "register_operand" "")]
@@ -1655,6 +1694,24 @@
(define_expand "copysign<mode>3"
[(set (match_dup 4)
+ (and:AVX256MODEF2P
+ (not:AVX256MODEF2P (match_dup 3))
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")))
+ (set (match_dup 5)
+ (and:AVX256MODEF2P (match_dup 3)
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))
+ (set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (ior:AVX256MODEF2P (match_dup 4) (match_dup 5)))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
+
+ operands[4] = gen_reg_rtx (<MODE>mode);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_expand "copysign<mode>3"
+ [(set (match_dup 4)
(and:SSEMODEF2P
(not:SSEMODEF2P (match_dup 3))
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
@@ -1665,7 +1722,7 @@
(ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
{
- operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
+ operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
operands[4] = gen_reg_rtx (<MODE>mode);
operands[5] = gen_reg_rtx (<MODE>mode);
@@ -2662,7 +2719,8 @@
x = const_double_from_real_value (TWO32r, SFmode);
operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
- operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
+ operands[4] = force_reg (V4SFmode,
+ ix86_build_const_vector (V4SFmode, 1, x));
for (i = 5; i < 8; i++)
operands[i] = gen_reg_rtx (V4SFmode);
@@ -2897,6 +2955,18 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+(define_insn "*avx_cvtdq2pd256_2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float:V4DF
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vcvtdq2pd\t{%x1, %0|%0, %x1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtdq2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float:V2DF
@@ -3077,6 +3147,18 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+(define_insn "*avx_cvtps2pd256_2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vcvtps2pd\t{%x1, %0|%0, %x1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float_extend:V2DF
@@ -3111,6 +3193,22 @@
operands[2] = gen_reg_rtx (V4SFmode);
})
+(define_expand "vec_unpacks_hi_v8sf"
+ [(set (match_dup 2)
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (float_extend:V4DF
+ (match_dup 2)))]
+ "TARGET_AVX"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
(define_expand "vec_unpacks_lo_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "")
(float_extend:V2DF
@@ -3119,6 +3217,14 @@
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2")
+(define_expand "vec_unpacks_lo_v8sf"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX")
+
(define_expand "vec_unpacks_float_hi_v8hi"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V8HI 1 "register_operand" "")]
@@ -3191,6 +3297,28 @@
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2")
+(define_expand "vec_unpacks_float_hi_v8si"
+ [(set (match_dup 2)
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (float:V4DF
+ (match_dup 2)))]
+ "TARGET_AVX"
+ "operands[2] = gen_reg_rtx (V4SImode);")
+
+(define_expand "vec_unpacks_float_lo_v8si"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (float:V4DF
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX")
+
(define_expand "vec_unpacku_float_hi_v4si"
[(set (match_dup 5)
(vec_select:V4SI
@@ -3220,7 +3348,8 @@
x = const_double_from_real_value (TWO32r, DFmode);
operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
- operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
+ operands[4] = force_reg (V2DFmode,
+ ix86_build_const_vector (V2DFmode, 1, x));
operands[5] = gen_reg_rtx (V4SImode);
@@ -3250,12 +3379,30 @@
x = const_double_from_real_value (TWO32r, DFmode);
operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
- operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
+ operands[4] = force_reg (V2DFmode,
+ ix86_build_const_vector (V2DFmode, 1, x));
for (i = 5; i < 8; i++)
operands[i] = gen_reg_rtx (V2DFmode);
})
+(define_expand "vec_pack_trunc_v4df"
+ [(set (match_dup 3)
+ (float_truncate:V4SF
+ (match_operand:V4DF 1 "nonimmediate_operand" "")))
+ (set (match_dup 4)
+ (float_truncate:V4SF
+ (match_operand:V4DF 2 "nonimmediate_operand" "")))
+ (set (match_operand:V8SF 0 "register_operand" "")
+ (vec_concat:V8SF
+ (match_dup 3)
+ (match_dup 4)))]
+ "TARGET_AVX"
+{
+ operands[3] = gen_reg_rtx (V4SFmode);
+ operands[4] = gen_reg_rtx (V4SFmode);
+})
+
(define_expand "vec_pack_trunc_v2df"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V2DF 1 "nonimmediate_operand" "")
@@ -3448,6 +3595,41 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
+(define_expand "vec_interleave_highv8sf"
+ [(set (match_dup 3)
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))
+ (set (match_dup 4)
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_dup 1)
+ (match_dup 2))
+ (parallel [(const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))
+ (set (match_operand:V8SF 0 "register_operand" "")
+ (vec_concat:V8SF
+ (vec_select:V4SF
+ (match_dup 3)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (vec_select:V4SF
+ (match_dup 4)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX"
+{
+ operands[3] = gen_reg_rtx (V8SFmode);
+ operands[4] = gen_reg_rtx (V8SFmode);
+})
+
(define_insn "vec_interleave_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
@@ -3492,6 +3674,41 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
+(define_expand "vec_interleave_lowv8sf"
+ [(set (match_dup 3)
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))
+ (set (match_dup 4)
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_dup 1)
+ (match_dup 2))
+ (parallel [(const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))
+ (set (match_operand:V8SF 0 "register_operand" "")
+ (vec_concat:V8SF
+ (vec_select:V4SF
+ (match_dup 3)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (vec_select:V4SF
+ (match_dup 4)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+{
+ operands[3] = gen_reg_rtx (V8SFmode);
+ operands[4] = gen_reg_rtx (V8SFmode);
+})
+
(define_insn "vec_interleave_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
@@ -4360,6 +4577,17 @@
})
(define_expand "vec_extract<mode>"
+ [(match_operand:<avxscalarmode> 0 "register_operand" "")
+ (match_operand:AVX256MODEF2P 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extract<mode>"
[(match_operand:<ssescalarmode> 0 "register_operand" "")
(match_operand:SSEMODE 1 "register_operand" "")
(match_operand 2 "const_int_operand" "")]
@@ -4391,6 +4619,36 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+(define_expand "vec_interleave_highv4df"
+ [(set (match_dup 3)
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))
+ (set (match_dup 4)
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_dup 1)
+ (match_dup 2))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (vec_concat:V4DF
+ (vec_select:V2DF
+ (match_dup 3)
+ (parallel [(const_int 2) (const_int 3)]))
+ (vec_select:V2DF
+ (match_dup 4)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+{
+ operands[3] = gen_reg_rtx (V4DFmode);
+ operands[4] = gen_reg_rtx (V4DFmode);
+})
+
+
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
@@ -4498,6 +4756,35 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+(define_expand "vec_interleave_lowv4df"
+ [(set (match_dup 3)
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))
+ (set (match_dup 4)
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_dup 1)
+ (match_dup 2))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (vec_concat:V4DF
+ (vec_select:V2DF
+ (match_dup 3)
+ (parallel [(const_int 0) (const_int 1)]))
+ (vec_select:V2DF
+ (match_dup 4)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX"
+{
+ operands[3] = gen_reg_rtx (V4DFmode);
+ operands[4] = gen_reg_rtx (V4DFmode);
+})
+
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
@@ -12079,3 +12366,67 @@
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+
+(define_insn "*vec_concat_lo_<mode>_avx"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)]))
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX"
+ "vperm2f128\t{$0x20, %2, %1, %0|%0, %1, %2, 0x20}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*vec_concat_hi_<mode>_avx"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 3)]))
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vperm2f128\t{$0x31, %2, %1, %0|%0, %1, %2, 0x31}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*vec_concat_lo_<mode>_avx"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vperm2f128\t{$0x20, %2, %1, %0|%0, %1, %2, 0x20}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*vec_concat_hi_<mode>_avx"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX"
+ "vperm2f128\t{$0x31, %2, %1, %0|%0, %1, %2, 00x31}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])