===================================================================
@@ -70,7 +70,32 @@
(define_mode_iterator VI24_128 [V8HI V4SI])
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse
+ [(SF "sse") (DF "sse2")
+ (V4SF "sse") (V2DF "sse2")
+ (V8SF "avx") (V4DF "avx")])
+(define_mode_attr sse2
+ [(V16QI "sse2") (V32QI "avx")
+ (V2DI "sse2") (V4DI "avx")])
+
+(define_mode_attr sse3
+ [(V16QI "sse3") (V32QI "avx")])
+
+(define_mode_attr sse4_1
+ [(V4SF "sse4_1") (V2DF "sse4_1")
+ (V8SF "avx") (V4DF "avx")])
+
+;; Pack/unpack vector modes
+(define_mode_attr sseunpackmode
+ [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+(define_mode_attr ssepackmode
+ [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
+
+
+
;; Instruction suffix for sign and zero extensions.
(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
@@ -126,23 +151,6 @@
(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
-;; Mapping from float mode to required SSE level
-(define_mode_attr sse
- [(SF "sse") (DF "sse2")
- (V4SF "sse") (V2DF "sse2")
- (V8SF "avx") (V4DF "avx")])
-
-(define_mode_attr sse2
- [(V16QI "sse2") (V32QI "avx")
- (V2DI "sse2") (V4DI "avx")])
-
-(define_mode_attr sse3
- [(V16QI "sse3") (V32QI "avx")])
-
-(define_mode_attr sse4_1
- [(V4SF "sse4_1") (V2DF "sse4_1")
- (V8SF "avx") (V4DF "avx")])
-
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
@@ -5856,42 +5864,18 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "vec_pack_trunc_v8hi"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")
- (match_operand:V8HI 2 "register_operand" "")]
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<ssepackmode> 0 "register_operand" "")
+ (match_operand:VI248_128 1 "register_operand" "")
+ (match_operand:VI248_128 2 "register_operand" "")]
"TARGET_SSE2"
{
- rtx op1 = gen_lowpart (V16QImode, operands[1]);
- rtx op2 = gen_lowpart (V16QImode, operands[2]);
+ rtx op1 = gen_lowpart (<MODE>mode, operands[1]);
+ rtx op2 = gen_lowpart (<MODE>mode, operands[2]);
ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
DONE;
})
-(define_expand "vec_pack_trunc_v4si"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")
- (match_operand:V4SI 2 "register_operand" "")]
- "TARGET_SSE2"
-{
- rtx op1 = gen_lowpart (V8HImode, operands[1]);
- rtx op2 = gen_lowpart (V8HImode, operands[2]);
- ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
- DONE;
-})
-
-(define_expand "vec_pack_trunc_v2di"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V2DI 1 "register_operand" "")
- (match_operand:V2DI 2 "register_operand" "")]
- "TARGET_SSE2"
-{
- rtx op1 = gen_lowpart (V4SImode, operands[1]);
- rtx op2 = gen_lowpart (V4SImode, operands[2]);
- ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
- DONE;
-})
-
(define_insn "sse2_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x,x")
(vec_concat:V16QI
@@ -6767,150 +6751,30 @@
(set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
(set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
-(define_expand "vec_unpacku_hi_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacku_hi_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, true, true); DONE;")
-(define_expand "vec_unpacks_hi_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacks_hi_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, false, true); DONE;")
-(define_expand "vec_unpacku_lo_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacku_lo_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, true, false); DONE;")
-(define_expand "vec_unpacks_lo_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacks_lo_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, false, false); DONE;")
-(define_expand "vec_unpacku_hi_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
-
-(define_expand "vec_unpacks_hi_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
-
-(define_expand "vec_unpacku_lo_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
-
-(define_expand "vec_unpacks_lo_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
-
-(define_expand "vec_unpacku_hi_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
-
-(define_expand "vec_unpacks_hi_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
-
-(define_expand "vec_unpacku_lo_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
-
-(define_expand "vec_unpacks_lo_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Miscellaneous
@@ -10062,7 +9926,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn_and_split "vec_dup<mode>"
+(define_insn "vec_dup<mode>"
[(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
(vec_duplicate:AVX256MODE24P
(match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
@@ -10070,15 +9934,20 @@
"@
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
#"
- "&& reload_completed && REG_P (operands[1])"
- [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
- (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
- "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_split
+ [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
+ (vec_duplicate:AVX256MODE24P
+ (match_operand:<avxscalarmode> 1 "register_operand" "")))]
+ "TARGET_AVX && reload_completed"
+ [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
+ (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));")
+
(define_insn "avx_vbroadcastf128_<mode>"
[(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
(vec_concat:AVX256MODE
===================================================================
@@ -114,7 +114,6 @@
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
-extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void ix86_split_call_vzeroupper (rtx, rtx);
===================================================================
@@ -19100,91 +19100,87 @@
ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
{
enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx, rtx);
- rtx se, dest;
+ rtx tmp, dest;
- switch (imode)
+ if (TARGET_SSE4_1)
{
- case V16QImode:
+ rtx (*unpack)(rtx, rtx);
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_sign_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_sign_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
if (high_p)
- unpack = gen_vec_interleave_highv16qi;
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ tmp = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
+ gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ }
else
- unpack = gen_vec_interleave_lowv16qi;
- break;
- case V8HImode:
- if (high_p)
- unpack = gen_vec_interleave_highv8hi;
- else
- unpack = gen_vec_interleave_lowv8hi;
- break;
- case V4SImode:
- if (high_p)
- unpack = gen_vec_interleave_highv4si;
- else
- unpack = gen_vec_interleave_lowv4si;
- break;
- default:
- gcc_unreachable ();
- }
+ tmp = operands[1];
- dest = gen_lowpart (imode, operands[0]);
-
- if (unsigned_p)
- se = force_reg (imode, CONST0_RTX (imode));
+ emit_insn (unpack (operands[0], tmp));
+ }
else
- se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
- operands[1], pc_rtx, pc_rtx);
+ {
+ rtx (*unpack)(rtx, rtx, rtx);
- emit_insn (unpack (dest, operands[1], se));
-}
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
-/* This function performs the same task as ix86_expand_sse_unpack,
- but with SSE4.1 instructions. */
+ dest = gen_lowpart (imode, operands[0]);
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
- enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx);
- rtx src, dest;
-
- switch (imode)
- {
- case V16QImode:
if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ tmp = force_reg (imode, CONST0_RTX (imode));
else
- unpack = gen_sse4_1_sign_extendv8qiv8hi2;
- break;
- case V8HImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv4hiv4si2;
- else
- unpack = gen_sse4_1_sign_extendv4hiv4si2;
- break;
- case V4SImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv2siv2di2;
- else
- unpack = gen_sse4_1_sign_extendv2siv2di2;
- break;
- default:
- gcc_unreachable ();
- }
+ tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
- dest = operands[0];
- if (high_p)
- {
- /* Shift higher 8 bytes to lower 8 bytes. */
- src = gen_reg_rtx (imode);
- emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
- gen_lowpart (V1TImode, operands[1]),
- GEN_INT (64)));
+ emit_insn (unpack (dest, operands[1], tmp));
}
- else
- src = operands[1];
-
- emit_insn (unpack (dest, src));
}
/* Expand conditional increment or decrement using adb/sbb instructions.