From patchwork Wed Dec 21 17:00:18 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@redhat.com>
X-Patchwork-Id: 132694
Return-Path: 
 <gcc-patches-return-310372-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	by ozlabs.org (Postfix) with SMTP id EAE27B70C8
	for <incoming@patchwork.ozlabs.org>;
	Thu, 22 Dec 2011 04:02:49 +1100 (EST)
Received: (qmail 30175 invoked by alias); 21 Dec 2011 17:01:33 -0000
Received: (qmail 29515 invoked by uid 22791); 21 Dec 2011 17:01:14 -0000
X-SWARE-Spam-Status: No, hits=-2.2 required=5.0	tests=AWL, BAYES_00,
	DKIM_SIGNED, DKIM_VALID, FREEMAIL_ENVFROM_END_DIGIT,
	FREEMAIL_FROM, RCVD_IN_DNSWL_LOW
X-Spam-Check-By: sourceware.org
Received: from mail-qy0-f175.google.com (HELO mail-qy0-f175.google.com)
	(209.85.216.175) by sourceware.org (qpsmtpd/0.43rc1) with
	ESMTP; Wed, 21 Dec 2011 17:00:44 +0000
Received: by qcqw6 with SMTP id w6so4507919qcq.20 for
	<gcc-patches@gcc.gnu.org>; Wed, 21 Dec 2011 09:00:44 -0800 (PST)
Received: by 10.229.78.66 with SMTP id j2mr2854168qck.130.1324486843462;
	Wed, 21 Dec 2011 09:00:43 -0800 (PST)
Received: from anchor.twiddle.home.com ([173.160.232.49]) by mx.google.com
	with ESMTPS id q14sm11657094qap.4.2011.12.21.09.00.42
	(version=TLSv1/SSLv3 cipher=OTHER);
	Wed, 21 Dec 2011 09:00:43 -0800 (PST)
From: Richard Henderson <rth@redhat.com>
To: rdsandiford@googlemail.com
Cc: mingjie.xing@gmail.com,	gcc-patches@gcc.gnu.org
Subject: [PATCH 06/10] mips: Improve support for vec_init.
Date: Wed, 21 Dec 2011 09:00:18 -0800
Message-Id: <1324486822-18225-7-git-send-email-rth@redhat.com>
In-Reply-To: <1324486822-18225-1-git-send-email-rth@redhat.com>
References: <1324486822-18225-1-git-send-email-rth@redhat.com>
X-IsSubscribed: yes
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org

---
 gcc/config/mips/loongson.md   |   26 +++++
 gcc/config/mips/mips-ps-3d.md |   14 +--
 gcc/config/mips/mips.c        |  226 ++++++++++++++++++++++++++++++++++++-----
 3 files changed, 233 insertions(+), 33 deletions(-)
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md
index 8404bf0..c80a45a 100644
--- a/gcc/config/mips/loongson.md
+++ b/gcc/config/mips/loongson.md
@@ -25,6 +25,7 @@
   UNSPEC_LOONGSON_PCMPGT
   UNSPEC_LOONGSON_PEXTR
   UNSPEC_LOONGSON_PINSRH
+  UNSPEC_LOONGSON_VINIT
   UNSPEC_LOONGSON_PMADD
   UNSPEC_LOONGSON_PMOVMSK
   UNSPEC_LOONGSON_PMULHU
@@ -83,6 +84,9 @@
 ;; but with twice as many elements.
 (define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
 
+;; Given a vector type T, the inner mode.
+(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+
 ;; The Loongson instruction suffixes corresponding to the conversions
 ;; specified by V_half_width.
 (define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
@@ -119,6 +123,28 @@
   DONE;
 })
 
+;; Helper for vec_init.  Initialize element 0 of the output from the input.
+;; All other elements are undefined.
+(define_insn "loongson_vec_init1_<mode>"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+	(unspec:VHB [(truncate:<V_inner>
+		       (match_operand:DI 1 "reg_or_0_operand" "Jd"))]
+		    UNSPEC_LOONGSON_VINIT))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "dmtc1\t%z1,%0"
+  [(set_attr "move_type" "mtc")
+   (set_attr "mode" "DI")])
+
+;; Helper for vec_initv2si.
+(define_insn "*vec_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "register_operand" "f")
+	  (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
 ;; Instruction patterns for SIMD instructions.
 
 ;; Pack with signed saturation.
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index fbbb7b0..7c3fe85 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -259,13 +259,11 @@
    (match_operand:V2SF 1 "")]
   "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
 {
-  rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0));
-  rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1));
-  emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1));
+  mips_expand_vector_init (operands[0], operands[1]);
   DONE;
 })
 
-(define_insn "vec_initv2sf_internal"
+(define_insn "vec_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=f")
 	(vec_concat:V2SF
 	 (match_operand:SF 1 "register_operand" "f")
@@ -315,7 +313,7 @@
   /* We don't have an insert instruction, so we duplicate the float, and
      then use a PUL instruction.  */
   rtx temp = gen_reg_rtx (V2SFmode);
-  emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
+  emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1]));
   operands[1] = temp;
   operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2);
 })
@@ -328,11 +326,9 @@
   "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
 {
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1],
-	       operands[2]));
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2]));
   else
-    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2],
-	       operands[1]));
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1]));
   DONE;
 })
 
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index b3a3ad0..45b8454 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -15932,30 +15932,6 @@ mips_conditional_register_usage (void)
     }
 }
 
-/* Initialize vector TARGET to VALS.  */
-
-void
-mips_expand_vector_init (rtx target, rtx vals)
-{
-  enum machine_mode mode;
-  enum machine_mode inner;
-  unsigned int i, n_elts;
-  rtx mem;
-
-  mode = GET_MODE (target);
-  inner = GET_MODE_INNER (mode);
-  n_elts = GET_MODE_NUNITS (mode);
-
-  gcc_assert (VECTOR_MODE_P (mode));
-
-  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
-  for (i = 0; i < n_elts; i++)
-    emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
-                    XVECEXP (vals, 0, i));
-
-  emit_move_insn (target, mem);
-}
-
 /* When generating MIPS16 code, we want to allocate $24 (T_REG) before
    other registers for instructions for which it is possible.  This
    encourages the compiler to use CMP in cases where an XOR would
@@ -16475,6 +16451,48 @@ mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize broadcast patterns for the Loongson.  */
+
+static bool
+mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt;
+  rtx t0, t1;
+
+  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+    return false;
+  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
+  if (d->vmode != V8QImode)
+    return false;
+  if (!d->one_vector_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < 8; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* With one interleave we put two of the desired element adjacent.  */
+  t0 = gen_reg_rtx (V8QImode);
+  if (elt < 4)
+    emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
+  else
+    emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
+
+  /* Shuffle that one HImode element into all locations.  */
+  elt &= 3;
+  elt *= 0x55;
+  t1 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
+				  force_reg (SImode, GEN_INT (elt))));
+
+  emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
+  return true;
+}
+
 static bool
 mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -16506,6 +16524,8 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
     return true;
   if (mips_expand_vpc_loongson_pshufh (d))
     return true;
+  if (mips_expand_vpc_loongson_bcast (d))
+    return true;
   return false;
 }
 
@@ -16656,6 +16676,164 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 
   emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
 }
+
+/* A subroutine of mips_expand_vec_init, match constant vector elements.  */
+
+static inline bool
+mips_constant_elt_p (rtx x)
+{
+  return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
+}
+
+/* A subroutine of mips_expand_vec_init, expand via broadcast.  */
+
+static void
+mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
+{
+  struct expand_vec_perm_d d;
+  rtx t1;
+  bool ok;
+
+  if (elt != const0_rtx)
+    elt = force_reg (GET_MODE_INNER (vmode), elt);
+  if (REG_P (elt))
+    elt = gen_lowpart (DImode, elt);
+
+  t1 = gen_reg_rtx (vmode);
+  switch (vmode)
+    {
+    case V8QImode:
+      emit_insn (gen_loongson_vec_init1_v8qi (t1, elt));
+      break;
+    case V4HImode:
+      emit_insn (gen_loongson_vec_init1_v4hi (t1, elt));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  memset (&d, 0, sizeof(d));
+  d.target = target;
+  d.op0 = t1;
+  d.op1 = t1;
+  d.vmode = vmode;
+  d.nelt = GET_MODE_NUNITS (vmode);
+  d.one_vector_p = true;
+
+  ok = mips_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+/* A subroutine of mips_expand_vec_init, replacing all of the non-constant
+   elements of VALS with zeros, copy the constant vector to TARGET.  */
+
+static void
+mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt,
+			 rtx target, rtx vals)
+{
+  rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
+	RTVEC_ELT (vec, i) = const0_rtx;
+    }
+
+  emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+}
+
+
+/* A subroutine of mips_expand_vec_init, expand via pinsrh.  */
+
+static void
+mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var)
+{
+  mips_expand_vi_constant (V4HImode, 4, target, vals);
+
+  emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var),
+			      GEN_INT (one_var)));
+}
+
+/* A subroutine of mips_expand_vec_init, expand anything via memory.  */
+
+static void
+mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode,
+			unsigned nelt, unsigned nvar, rtx target, rtx vals)
+{
+  rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0);
+  unsigned int i, isize = GET_MODE_SIZE (imode);
+
+  if (nvar < nelt)
+    mips_expand_vi_constant (vmode, nelt, mem, vals);
+
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	emit_move_insn (adjust_address (mem, imode, i * isize), x);
+    }
+
+  emit_move_insn (target, mem);
+}
+
+/* Expand a vector initialization.  */
+
+void
+mips_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  enum machine_mode imode = GET_MODE_INNER (vmode);
+  unsigned i, nelt = GET_MODE_NUNITS (vmode);
+  unsigned nvar = 0, one_var = -1u;
+  bool all_same = true;
+  rtx x;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	nvar++, one_var = i;
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  /* Load constants from the pool, or whatever's handy.  */
+  if (nvar == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
+      return;
+    }
+
+  /* For two-part initialization, always use CONCAT.  */
+  if (nelt == 2)
+    {
+      rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
+      rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
+      x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      return;
+    }
+
+  /* Loongson is the only cpu with vectors with more elements.  */
+  gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS);
+
+  /* If all values are identical, broadcast the value.  */
+  if (all_same)
+    {
+      mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0));
+      return;
+    }
+
+  /* If we've only got one non-variable V4HImode, use PINSRH.  */
+  if (nvar == 1 && vmode == V4HImode)
+    {
+      mips_expand_vi_loongson_one_pinsrh (target, vals, one_var);
+      return;
+    }
+
+  mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP