From patchwork Wed Dec 21 17:00:18 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 132694 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id EAE27B70C8 for ; Thu, 22 Dec 2011 04:02:49 +1100 (EST) Received: (qmail 30175 invoked by alias); 21 Dec 2011 17:01:33 -0000 Received: (qmail 29515 invoked by uid 22791); 21 Dec 2011 17:01:14 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW X-Spam-Check-By: sourceware.org Received: from mail-qy0-f175.google.com (HELO mail-qy0-f175.google.com) (209.85.216.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 21 Dec 2011 17:00:44 +0000 Received: by qcqw6 with SMTP id w6so4507919qcq.20 for ; Wed, 21 Dec 2011 09:00:44 -0800 (PST) Received: by 10.229.78.66 with SMTP id j2mr2854168qck.130.1324486843462; Wed, 21 Dec 2011 09:00:43 -0800 (PST) Received: from anchor.twiddle.home.com ([173.160.232.49]) by mx.google.com with ESMTPS id q14sm11657094qap.4.2011.12.21.09.00.42 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 21 Dec 2011 09:00:43 -0800 (PST) From: Richard Henderson To: rdsandiford@googlemail.com Cc: mingjie.xing@gmail.com, gcc-patches@gcc.gnu.org Subject: [PATCH 06/10] mips: Improve support for vec_init. Date: Wed, 21 Dec 2011 09:00:18 -0800 Message-Id: <1324486822-18225-7-git-send-email-rth@redhat.com> In-Reply-To: <1324486822-18225-1-git-send-email-rth@redhat.com> References: <1324486822-18225-1-git-send-email-rth@redhat.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org --- gcc/config/mips/loongson.md | 26 +++++ gcc/config/mips/mips-ps-3d.md | 14 +-- gcc/config/mips/mips.c | 226 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 233 insertions(+), 33 deletions(-) diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index 8404bf0..c80a45a 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -25,6 +25,7 @@ UNSPEC_LOONGSON_PCMPGT UNSPEC_LOONGSON_PEXTR UNSPEC_LOONGSON_PINSRH + UNSPEC_LOONGSON_VINIT UNSPEC_LOONGSON_PMADD UNSPEC_LOONGSON_PMOVMSK UNSPEC_LOONGSON_PMULHU @@ -83,6 +84,9 @@ ;; but with twice as many elements. (define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")]) +;; Given a vector type T, the inner mode. +(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) + ;; The Loongson instruction suffixes corresponding to the conversions ;; specified by V_half_width. (define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")]) @@ -119,6 +123,28 @@ DONE; }) +;; Helper for vec_init. Initialize element 0 of the output from the input. +;; All other elements are undefined. +(define_insn "loongson_vec_init1_" + [(set (match_operand:VHB 0 "register_operand" "=f") + (unspec:VHB [(truncate: + (match_operand:DI 1 "reg_or_0_operand" "Jd"))] + UNSPEC_LOONGSON_VINIT))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dmtc1\t%z1,%0" + [(set_attr "move_type" "mtc") + (set_attr "mode" "DI")]) + +;; Helper for vec_initv2si. +(define_insn "*vec_concatv2si" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_concat:V2SI + (match_operand:SI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + ;; Instruction patterns for SIMD instructions. ;; Pack with signed saturation. diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index fbbb7b0..7c3fe85 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -259,13 +259,11 @@ (match_operand:V2SF 1 "")] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { - rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0)); - rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1)); - emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1)); + mips_expand_vector_init (operands[0], operands[1]); DONE; }) -(define_insn "vec_initv2sf_internal" +(define_insn "vec_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=f") (vec_concat:V2SF (match_operand:SF 1 "register_operand" "f") @@ -315,7 +313,7 @@ /* We don't have an insert instruction, so we duplicate the float, and then use a PUL instruction. */ rtx temp = gen_reg_rtx (V2SFmode); - emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1])); + emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1])); operands[1] = temp; operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2); }) @@ -328,11 +326,9 @@ "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { if (BYTES_BIG_ENDIAN) - emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1], - operands[2])); + emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2])); else - emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2], - operands[1])); + emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1])); DONE; }) diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index b3a3ad0..45b8454 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -15932,30 +15932,6 @@ mips_conditional_register_usage (void) } } -/* Initialize vector TARGET to VALS. */ - -void -mips_expand_vector_init (rtx target, rtx vals) -{ - enum machine_mode mode; - enum machine_mode inner; - unsigned int i, n_elts; - rtx mem; - - mode = GET_MODE (target); - inner = GET_MODE_INNER (mode); - n_elts = GET_MODE_NUNITS (mode); - - gcc_assert (VECTOR_MODE_P (mode)); - - mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); - for (i = 0; i < n_elts; i++) - emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), - XVECEXP (vals, 0, i)); - - emit_move_insn (target, mem); -} - /* When generating MIPS16 code, we want to allocate $24 (T_REG) before other registers for instructions for which it is possible. This encourages the compiler to use CMP in cases where an XOR would @@ -16475,6 +16451,48 @@ mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d) return true; } +/* Recognize broadcast patterns for the Loongson. */ + +static bool +mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d) +{ + unsigned i, elt; + rtx t0, t1; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */ + if (d->vmode != V8QImode) + return false; + if (!d->one_vector_p) + return false; + + elt = d->perm[0]; + for (i = 1; i < 8; ++i) + if (d->perm[i] != elt) + return false; + + if (d->testing_p) + return true; + + /* With one interleave we put two of the desired element adjacent. */ + t0 = gen_reg_rtx (V8QImode); + if (elt < 4) + emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0)); + else + emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0)); + + /* Shuffle that one HImode element into all locations. */ + elt &= 3; + elt *= 0x55; + t1 = gen_reg_rtx (V4HImode); + emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0), + force_reg (SImode, GEN_INT (elt)))); + + emit_move_insn (d->target, gen_lowpart (V8QImode, t1)); + return true; +} + static bool mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) { @@ -16506,6 +16524,8 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (mips_expand_vpc_loongson_pshufh (d)) return true; + if (mips_expand_vpc_loongson_bcast (d)) + return true; return false; } @@ -16656,6 +16676,164 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest)); } + +/* A subroutine of mips_expand_vec_init, match constant vector elements. */ + +static inline bool +mips_constant_elt_p (rtx x) +{ + return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE; +} + +/* A subroutine of mips_expand_vec_init, expand via broadcast. */ + +static void +mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt) +{ + struct expand_vec_perm_d d; + rtx t1; + bool ok; + + if (elt != const0_rtx) + elt = force_reg (GET_MODE_INNER (vmode), elt); + if (REG_P (elt)) + elt = gen_lowpart (DImode, elt); + + t1 = gen_reg_rtx (vmode); + switch (vmode) + { + case V8QImode: + emit_insn (gen_loongson_vec_init1_v8qi (t1, elt)); + break; + case V4HImode: + emit_insn (gen_loongson_vec_init1_v4hi (t1, elt)); + break; + default: + gcc_unreachable (); + } + + memset (&d, 0, sizeof(d)); + d.target = target; + d.op0 = t1; + d.op1 = t1; + d.vmode = vmode; + d.nelt = GET_MODE_NUNITS (vmode); + d.one_vector_p = true; + + ok = mips_expand_vec_perm_const_1 (&d); + gcc_assert (ok); +} + +/* A subroutine of mips_expand_vec_init, replacing all of the non-constant + elements of VALS with zeros, copy the constant vector to TARGET. */ + +static void +mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt, + rtx target, rtx vals) +{ + rtvec vec = shallow_copy_rtvec (XVEC (vals, 0)); + unsigned i; + + for (i = 0; i < nelt; ++i) + { + if (!mips_constant_elt_p (RTVEC_ELT (vec, i))) + RTVEC_ELT (vec, i) = const0_rtx; + } + + emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); +} + + +/* A subroutine of mips_expand_vec_init, expand via pinsrh. */ + +static void +mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var) +{ + mips_expand_vi_constant (V4HImode, 4, target, vals); + + emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var), + GEN_INT (one_var))); +} + +/* A subroutine of mips_expand_vec_init, expand anything via memory. */ + +static void +mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode, + unsigned nelt, unsigned nvar, rtx target, rtx vals) +{ + rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0); + unsigned int i, isize = GET_MODE_SIZE (imode); + + if (nvar < nelt) + mips_expand_vi_constant (vmode, nelt, mem, vals); + + for (i = 0; i < nelt; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (!mips_constant_elt_p (x)) + emit_move_insn (adjust_address (mem, imode, i * isize), x); + } + + emit_move_insn (target, mem); +} + +/* Expand a vector initialization. */ + +void +mips_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode vmode = GET_MODE (target); + enum machine_mode imode = GET_MODE_INNER (vmode); + unsigned i, nelt = GET_MODE_NUNITS (vmode); + unsigned nvar = 0, one_var = -1u; + bool all_same = true; + rtx x; + + for (i = 0; i < nelt; ++i) + { + x = XVECEXP (vals, 0, i); + if (!mips_constant_elt_p (x)) + nvar++, one_var = i; + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + /* Load constants from the pool, or whatever's handy. */ + if (nvar == 0) + { + emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0))); + return; + } + + /* For two-part initialization, always use CONCAT. */ + if (nelt == 2) + { + rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0)); + rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1)); + x = gen_rtx_VEC_CONCAT (vmode, op0, op1); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return; + } + + /* Loongson is the only cpu with vectors with more elements. */ + gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS); + + /* If all values are identical, broadcast the value. */ + if (all_same) + { + mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0)); + return; + } + + /* If we've only got one non-variable V4HImode, use PINSRH. */ + if (nvar == 1 && vmode == V4HImode) + { + mips_expand_vi_loongson_one_pinsrh (target, vals, one_var); + return; + } + + mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals); +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP