From patchwork Sat Apr 16 12:53:38 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 91485 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id EE92AB6FE6 for ; Sat, 16 Apr 2011 22:54:03 +1000 (EST) Received: (qmail 6120 invoked by alias); 16 Apr 2011 12:53:58 -0000 Received: (qmail 6105 invoked by uid 22791); 16 Apr 2011 12:53:56 -0000 X-SWARE-Spam-Status: No, hits=-2.1 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, RFC_ABUSE_POST, TW_AV, TW_VX, TW_ZJ, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-pv0-f175.google.com (HELO mail-pv0-f175.google.com) (74.125.83.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sat, 16 Apr 2011 12:53:39 +0000 Received: by pvc30 with SMTP id 30so1749358pvc.20 for ; Sat, 16 Apr 2011 05:53:38 -0700 (PDT) MIME-Version: 1.0 Received: by 10.142.191.3 with SMTP id o3mr1544807wff.59.1302958418711; Sat, 16 Apr 2011 05:53:38 -0700 (PDT) Received: by 10.142.87.14 with HTTP; Sat, 16 Apr 2011 05:53:38 -0700 (PDT) Date: Sat, 16 Apr 2011 14:53:38 +0200 Message-ID: Subject: [PATCH, i386]: Macroize and simplify vector integer pack/unpack patterns. From: Uros Bizjak To: gcc-patches@gcc.gnu.org Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hello! 2011-04-16 Uros Bizjak * config/i386/sse.md (sseunpackmode): New mode attribute. (ssepackmode): Ditto. (vec_pack_trunc_): Macroize expander from vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator. (vec_unpacks_lo_): Macroize expander from vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacks_hi_): Macroize expander from vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacku_lo_): Macroize expander from vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator. (vec_unpacku_hi_): Macroize expander from vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator. * config/i386/i386.c (ix86_expand_sse_unpack): Merge with ix86_expand_sse4_unpack. * config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove. Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Committed to SVN mainline. Uros. Index: sse.md =================================================================== --- sse.md (revision 172580) +++ sse.md (working copy) @@ -70,7 +70,32 @@ (define_mode_iterator VI24_128 [V8HI V4SI]) (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) +;; Mapping from float mode to required SSE level +(define_mode_attr sse + [(SF "sse") (DF "sse2") + (V4SF "sse") (V2DF "sse2") + (V8SF "avx") (V4DF "avx")]) +(define_mode_attr sse2 + [(V16QI "sse2") (V32QI "avx") + (V2DI "sse2") (V4DI "avx")]) + +(define_mode_attr sse3 + [(V16QI "sse3") (V32QI "avx")]) + +(define_mode_attr sse4_1 + [(V4SF "sse4_1") (V2DF "sse4_1") + (V8SF "avx") (V4DF "avx")]) + +;; Pack/unpack vector modes +(define_mode_attr sseunpackmode + [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + +(define_mode_attr ssepackmode + [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")]) + + + ;; Instruction suffix for sign and zero extensions. (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) @@ -126,23 +151,6 @@ (V2DF "TARGET_SSE") (V4SF "TARGET_SSE") (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) -;; Mapping from float mode to required SSE level -(define_mode_attr sse - [(SF "sse") (DF "sse2") - (V4SF "sse") (V2DF "sse2") - (V8SF "avx") (V4DF "avx")]) - -(define_mode_attr sse2 - [(V16QI "sse2") (V32QI "avx") - (V2DI "sse2") (V4DI "avx")]) - -(define_mode_attr sse3 - [(V16QI "sse3") (V32QI "avx")]) - -(define_mode_attr sse4_1 - [(V4SF "sse4_1") (V2DF "sse4_1") - (V8SF "avx") (V4DF "avx")]) - ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) @@ -5856,42 +5864,18 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "vec_pack_trunc_v8hi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI248_128 1 "register_operand" "") + (match_operand:VI248_128 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1 = gen_lowpart (V16QImode, operands[1]); - rtx op2 = gen_lowpart (V16QImode, operands[2]); + rtx op1 = gen_lowpart (mode, operands[1]); + rtx op2 = gen_lowpart (mode, operands[2]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); DONE; }) -(define_expand "vec_pack_trunc_v4si" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V8HImode, operands[1]); - rtx op2 = gen_lowpart (V8HImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - -(define_expand "vec_pack_trunc_v2di" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")] - "TARGET_SSE2" -{ - rtx op1 = gen_lowpart (V4SImode, operands[1]); - rtx op2 = gen_lowpart (V4SImode, operands[2]); - ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); - DONE; -}) - (define_insn "sse2_packsswb" [(set (match_operand:V16QI 0 "register_operand" "=x,x") (vec_concat:V16QI @@ -6767,150 +6751,30 @@ (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) -(define_expand "vec_unpacku_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] +(define_expand "vec_unpacku_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, true); DONE;") -(define_expand "vec_unpacks_hi_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] +(define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, true); DONE;") -(define_expand "vec_unpacku_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] +(define_expand "vec_unpacku_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, true, false); DONE;") -(define_expand "vec_unpacks_lo_v16qi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "")] +(define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VI124_128 1 "register_operand" "")] "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) + "ix86_expand_sse_unpack (operands, false, false); DONE;") -(define_expand "vec_unpacku_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - -(define_expand "vec_unpacku_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, true); - else - ix86_expand_sse_unpack (operands, true, true); - DONE; -}) - -(define_expand "vec_unpacks_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, true); - else - ix86_expand_sse_unpack (operands, false, true); - DONE; -}) - -(define_expand "vec_unpacku_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, true, false); - else - ix86_expand_sse_unpack (operands, true, false); - DONE; -}) - -(define_expand "vec_unpacks_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "")] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_expand_sse4_unpack (operands, false, false); - else - ix86_expand_sse_unpack (operands, false, false); - DONE; -}) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Miscellaneous @@ -10062,7 +9926,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn_and_split "vec_dup" +(define_insn "vec_dup" [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x") (vec_duplicate:AVX256MODE24P (match_operand: 1 "nonimmediate_operand" "m,?x")))] @@ -10070,15 +9934,20 @@ "@ vbroadcast\t{%1, %0|%0, %1} #" - "&& reload_completed && REG_P (operands[1])" - [(set (match_dup 2) (vec_duplicate: (match_dup 1))) - (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] - "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_split + [(set (match_operand:AVX256MODE24P 0 "register_operand" "") + (vec_duplicate:AVX256MODE24P + (match_operand: 1 "register_operand" "")))] + "TARGET_AVX && reload_completed" + [(set (match_dup 2) (vec_duplicate: (match_dup 1))) + (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] + "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));") + (define_insn "avx_vbroadcastf128_" [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") (vec_concat:AVX256MODE Index: i386-protos.h =================================================================== --- i386-protos.h (revision 172580) +++ i386-protos.h (working copy) @@ -114,7 +114,6 @@ extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); -extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void ix86_split_call_vzeroupper (rtx, rtx); Index: i386.c =================================================================== --- i386.c (revision 172580) +++ i386.c (working copy) @@ -19100,91 +19100,87 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) { enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx, rtx); - rtx se, dest; + rtx tmp, dest; - switch (imode) + if (TARGET_SSE4_1) { - case V16QImode: + rtx (*unpack)(rtx, rtx); + + switch (imode) + { + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_sign_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_sign_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_sign_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + if (high_p) - unpack = gen_vec_interleave_highv16qi; + { + /* Shift higher 8 bytes to lower 8 bytes. */ + tmp = gen_reg_rtx (imode); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp), + gen_lowpart (V1TImode, operands[1]), + GEN_INT (64))); + } else - unpack = gen_vec_interleave_lowv16qi; - break; - case V8HImode: - if (high_p) - unpack = gen_vec_interleave_highv8hi; - else - unpack = gen_vec_interleave_lowv8hi; - break; - case V4SImode: - if (high_p) - unpack = gen_vec_interleave_highv4si; - else - unpack = gen_vec_interleave_lowv4si; - break; - default: - gcc_unreachable (); - } + tmp = operands[1]; - dest = gen_lowpart (imode, operands[0]); - - if (unsigned_p) - se = force_reg (imode, CONST0_RTX (imode)); + emit_insn (unpack (operands[0], tmp)); + } else - se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), - operands[1], pc_rtx, pc_rtx); + { + rtx (*unpack)(rtx, rtx, rtx); - emit_insn (unpack (dest, operands[1], se)); -} + switch (imode) + { + case V16QImode: + if (high_p) + unpack = gen_vec_interleave_highv16qi; + else + unpack = gen_vec_interleave_lowv16qi; + break; + case V8HImode: + if (high_p) + unpack = gen_vec_interleave_highv8hi; + else + unpack = gen_vec_interleave_lowv8hi; + break; + case V4SImode: + if (high_p) + unpack = gen_vec_interleave_highv4si; + else + unpack = gen_vec_interleave_lowv4si; + break; + default: + gcc_unreachable (); + } -/* This function performs the same task as ix86_expand_sse_unpack, - but with SSE4.1 instructions. */ + dest = gen_lowpart (imode, operands[0]); -void -ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) -{ - enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx); - rtx src, dest; - - switch (imode) - { - case V16QImode: if (unsigned_p) - unpack = gen_sse4_1_zero_extendv8qiv8hi2; + tmp = force_reg (imode, CONST0_RTX (imode)); else - unpack = gen_sse4_1_sign_extendv8qiv8hi2; - break; - case V8HImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv4hiv4si2; - else - unpack = gen_sse4_1_sign_extendv4hiv4si2; - break; - case V4SImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv2siv2di2; - else - unpack = gen_sse4_1_sign_extendv2siv2di2; - break; - default: - gcc_unreachable (); - } + tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), + operands[1], pc_rtx, pc_rtx); - dest = operands[0]; - if (high_p) - { - /* Shift higher 8 bytes to lower 8 bytes. */ - src = gen_reg_rtx (imode); - emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src), - gen_lowpart (V1TImode, operands[1]), - GEN_INT (64))); + emit_insn (unpack (dest, operands[1], tmp)); } - else - src = operands[1]; - - emit_insn (unpack (dest, src)); } /* Expand conditional increment or decrement using adb/sbb instructions.