[ia64,rfa] vector pattern improvements

Message ID	1295384783.21802.207.camel@hpsje.cup.hp.com
State	New
Headers	show Return-Path: <gcc-patches-return-283223-incoming=patchwork.ozlabs.org@gcc.gnu.org> Subject: Re: [ia64, rfa] vector pattern improvements From: Steve Ellcey <sje@cup.hp.com> Reply-To: sje@cup.hp.com To: Richard Henderson <rth@redhat.com> Cc: gcc-patches@gcc.gnu.org In-Reply-To: <4D2BB6B7.60409@redhat.com> References: <201101061725.p06HPCX21825@lucas.cup.hp.com> <4D260FB1.80800@redhat.com> <1294345320.21802.38.camel@hpsje.cup.hp.com> <4D279B39.2090404@redhat.com> <1294686965.21802.78.camel@hpsje.cup.hp.com> <4D2B6079.5040904@redhat.com> <1294691079.21802.82.camel@hpsje.cup.hp.com> <4D2B6C5E.6090203@redhat.com> <1294701243.21802.132.camel@hpsje.cup.hp.com> <4D2B9908.3020901@redhat.com> <1294707439.21802.134.camel@hpsje.cup.hp.com> <4D2BB6B7.60409@redhat.com> Content-Type: multipart/mixed; boundary="=-7DydpiSIXKVPEILGlJW6" Date: Tue, 18 Jan 2011 13:06:23 -0800 Message-ID: <1295384783.21802.207.camel@hpsje.cup.hp.com> Mime-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: config/ia64/ia64.c =================================================================== --- config/ia64/ia64.c (revision 168941) +++ config/ia64/ia64.c (working copy) @@ -2007,7 +2007,10 @@ ia64_expand_unpack (rtx operands[3], boo gcc_assert (!neg); } - emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); + if (TARGET_BIG_ENDIAN) + emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1])); + else + emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); } /* Emit an integral vector widening sum operations. */ @@ -2058,8 +2061,16 @@ ia64_expand_widen_sum (rtx operands[3], h = gen_reg_rtx (wmode); s = gen_reg_rtx (wmode); - emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); - emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); + if (TARGET_BIG_ENDIAN) + { + emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1])); + emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1])); + } + else + { + emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); + emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); + } emit_insn (plus (s, l, operands[2])); emit_insn (plus (operands[0], h, s)); } @@ -2082,7 +2093,10 @@ ia64_expand_widen_mul_v4hi (rtx operands emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16))); interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; - emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h)); + if (TARGET_BIG_ENDIAN) + emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l)); + else + emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h)); } /* Emit a signed or unsigned V8QI dot product operation. */ @@ -2115,14 +2129,28 @@ ia64_expand_dot_prod_v8qi (rtx operands[ h1 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode); - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l1), operands[1], x1)); - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l2), operands[2], x2)); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h1), operands[1], x1)); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h2), operands[2], x2)); + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l1), x1, operands[1])); + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l2), x2, operands[2])); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h1), x1, operands[1])); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h2), x2, operands[2])); + } + else + { + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l1), operands[1], x1)); + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l2), operands[2], x2)); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h1), operands[1], x1)); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h2), operands[2], x2)); + } p1 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode); Index: config/ia64/vect.md =================================================================== --- config/ia64/vect.md (revision 168941) +++ config/ia64/vect.md (working copy) @@ -370,7 +370,7 @@ (define_expand "mulv2si3" [(set (match_operand:V2SI 0 "gr_register_operand" "") (mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r") (match_operand:V2SI 2 "gr_register_operand" "r")))] - "" + "!TARGET_BIG_ENDIAN" { rtx t0, t1, t2, t3, t4, t5, t6, t7, x; rtx op1h = gen_lowpart (V4HImode, operands[1]); @@ -709,7 +709,13 @@ (define_insn "vec_interleave_lowv8qi" (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] "" - "unpack1.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack1.l %0 = %r1, %r2"; + else + return "%,unpack1.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "vec_interleave_highv8qi" @@ -723,7 +729,13 @@ (define_insn "vec_interleave_highv8qi" (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] "" - "unpack1.h %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack1.h %0 = %r1, %r2"; + else + return "%,unpack1.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "mix1_r" @@ -857,7 +869,10 @@ (define_expand "vec_extract_evenv8qi" "" { rtx temp = gen_reg_rtx (V8QImode); - emit_insn (gen_mix1_r (temp, operands[1], operands[2])); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mix1_l (temp, operands[2], operands[1])); + else + emit_insn (gen_mix1_r (temp, operands[1], operands[2])); emit_insn (gen_mux1_alt (operands[0], temp)); DONE; }) @@ -869,7 +884,10 @@ (define_expand "vec_extract_oddv8qi" "" { rtx temp = gen_reg_rtx (V8QImode); - emit_insn (gen_mix1_l (temp, operands[1], operands[2])); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mix1_r (temp, operands[2], operands[1])); + else + emit_insn (gen_mix1_l (temp, operands[1], operands[2])); emit_insn (gen_mux1_alt (operands[0], temp)); DONE; }) @@ -885,7 +903,13 @@ (define_insn "vec_interleave_lowv4hi" (const_int 1) (const_int 5)])))] "" - "unpack2.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack2.l %0 = %r1, %r2"; + else + return "%,unpack2.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "vec_interleave_highv4hi" @@ -899,7 +923,13 @@ (define_insn "vec_interleave_highv4hi" (const_int 3) (const_int 7)])))] "" - "unpack2.h %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack2.h %0 = %r1, %r2"; + else + return "%,unpack2.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "mix2_r" @@ -958,13 +988,13 @@ (define_expand "vec_extract_evenodd_help (const_int 2) (const_int 1) (const_int 3)])))] - "") + "!TARGET_BIG_ENDIAN") (define_expand "vec_extract_evenv4hi" [(match_operand:V4HI 0 "gr_register_operand") (match_operand:V4HI 1 "gr_reg_or_0_operand") (match_operand:V4HI 2 "gr_reg_or_0_operand")] - "" + "!TARGET_BIG_ENDIAN" { rtx temp = gen_reg_rtx (V4HImode); emit_insn (gen_mix2_r (temp, operands[1], operands[2])); @@ -976,7 +1006,7 @@ (define_expand "vec_extract_oddv4hi" [(match_operand:V4HI 0 "gr_register_operand") (match_operand:V4HI 1 "gr_reg_or_0_operand") (match_operand:V4HI 2 "gr_reg_or_0_operand")] - "" + "!TARGET_BIG_ENDIAN" { rtx temp = gen_reg_rtx (V4HImode); emit_insn (gen_mix2_l (temp, operands[1], operands[2])); @@ -1002,7 +1032,13 @@ (define_insn "vec_interleave_lowv2si" (parallel [(const_int 0) (const_int 2)])))] "" - "unpack4.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.h %0 = %r1, %r2"; + else + return "%,unpack4.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) ;; Note that mix4.l performs the exact same operation. @@ -1015,14 +1051,20 @@ (define_insn "vec_interleave_highv2si" (parallel [(const_int 1) (const_int 3)])))] "" - "unpack4.h %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.l %0 = %r1, %r2"; + else + return "%,unpack4.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_expand "vec_extract_evenv2si" [(match_operand:V2SI 0 "gr_register_operand" "") (match_operand:V2SI 1 "gr_register_operand" "") (match_operand:V2SI 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], operands[2])); @@ -1033,7 +1075,7 @@ (define_expand "vec_extract_oddv2si" [(match_operand:V2SI 0 "gr_register_operand" "") (match_operand:V2SI 1 "gr_register_operand" "") (match_operand:V2SI 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], operands[2])); @@ -1397,7 +1439,7 @@ (define_expand "vec_extract_evenv2sf" [(match_operand:V2SF 0 "gr_register_operand" "") (match_operand:V2SF 1 "gr_register_operand" "") (match_operand:V2SF 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1], operands[2])); @@ -1408,7 +1450,7 @@ (define_expand "vec_extract_oddv2sf" [(match_operand:V2SF 0 "gr_register_operand" "") (match_operand:V2SF 1 "gr_register_operand" "") (match_operand:V2SF 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1], operands[2])); @@ -1540,7 +1582,7 @@ (define_expand "vec_pack_trunc_v4hi" [(match_operand:V8QI 0 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V4HI 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { rtx op1 = gen_lowpart(V8QImode, operands[1]); rtx op2 = gen_lowpart(V8QImode, operands[2]); @@ -1552,7 +1594,7 @@ (define_expand "vec_pack_trunc_v2si" [(match_operand:V4HI 0 "gr_register_operand" "") (match_operand:V2SI 1 "gr_register_operand" "") (match_operand:V2SI 2 "gr_register_operand" "")] - "" + "!TARGET_BIG_ENDIAN" { rtx op1 = gen_lowpart(V4HImode, operands[1]); rtx op2 = gen_lowpart(V4HImode, operands[2]);

[ia64,rfa] vector pattern improvements

Commit Message

Comments

Patch