From patchwork Sat Dec 19 21:07:24 2009
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@twiddle.net>
X-Patchwork-Id: 41476
Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from lists.gnu.org (lists.gnu.org [199.232.76.165])
	(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
	(Client did not present a certificate)
	by ozlabs.org (Postfix) with ESMTPS id 7E41DB6F0E
	for <incoming@patchwork.ozlabs.org>;
	Sun, 20 Dec 2009 08:26:19 +1100 (EST)
Received: from localhost ([127.0.0.1]:54530 helo=lists.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.43) id 1NM6oZ-0008Sv-PM
	for incoming@patchwork.ozlabs.org; Sat, 19 Dec 2009 16:26:15 -0500
Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43)
	id 1NM6iM-0006fF-OJ
	for qemu-devel@nongnu.org; Sat, 19 Dec 2009 16:19:50 -0500
Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43)
	id 1NM6iK-0006dt-Qk
	for qemu-devel@nongnu.org; Sat, 19 Dec 2009 16:19:49 -0500
Received: from [199.232.76.173] (port=49349 helo=monty-python.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.43) id 1NM6iK-0006dn-4g
	for qemu-devel@nongnu.org; Sat, 19 Dec 2009 16:19:48 -0500
Received: from are.twiddle.net ([75.149.56.221]:45162)
	by monty-python.gnu.org with esmtp (Exim 4.60)
	(envelope-from <rth@twiddle.net>) id 1NM6iJ-0000gT-JB
	for qemu-devel@nongnu.org; Sat, 19 Dec 2009 16:19:48 -0500
Received: by are.twiddle.net (Postfix, from userid 5000)
	id 3B259DAF; Sat, 19 Dec 2009 13:19:45 -0800 (PST)
Message-Id: 
 <89475f246e119011401b13909dc3433d3f592933.1261256927.git.rth@twiddle.net>
In-Reply-To: <cover.1261256927.git.rth@twiddle.net>
References: <cover.1261256927.git.rth@twiddle.net>
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 19 Dec 2009 13:07:24 -0800
To: qemu-devel@nongnu.org
X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer,
	2)
Cc: blauwirbel@gmail.com
Subject: [Qemu-devel] [PATCH 6/6] tcg-sparc: Implement setcond, movcond,
	setcond2.
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: qemu-devel.nongnu.org
List-Unsubscribe: <http://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.gnu.org/pipermail/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <http://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org
Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

For the time being, the movcond opcodes are protected by
MOVCOND_PATCH_APPLIED.  But since setcond largely depends
on movcond, it didn't seem worthwhile to spend too much
effort splitting this into two patches.  Hopefully we can
simply resolve the issue of what to do with movcond before
having to implement setcond without it.
---
 tcg/sparc/tcg-target.c |  253 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 253 insertions(+), 0 deletions(-)

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 8f094e5..6686ce2 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -191,6 +191,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define INSN_RS2(x) (x)
 #define INSN_ASI(x) ((x) << 5)
 
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
+#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
 #define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
@@ -214,6 +216,17 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define COND_VC    0xf
 #define BA         (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
 
+#define MOVCC_ICC  (1 << 18)
+#define MOVCC_XCC  (1 << 18 | 1 << 12)
+
+#define MRCOND_E   0x1
+#define MRCOND_LE  0x2
+#define MRCOND_L   0x3
+#define MRCOND_NE  0x5
+#define MRCOND_G   0x6
+#define MRCOND_GE  0x7
+#define INSN_MRCOND(c) ((c) << 10)
+
 #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
 #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
 #define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
@@ -228,7 +241,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
+#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
 
 #define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
 #define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
@@ -554,6 +569,205 @@ static void tcg_out_brcond2_i32(TCGContext *s, int cond,
 }
 #endif
 
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+static const uint8_t tcg_cond_to_mrcond[10] = {
+    [TCG_COND_EQ] = MRCOND_E,
+    [TCG_COND_NE] = MRCOND_NE,
+    [TCG_COND_LT] = MRCOND_L,
+    [TCG_COND_GE] = MRCOND_GE,
+    [TCG_COND_LE] = MRCOND_LE,
+    [TCG_COND_GT] = MRCOND_G
+};
+#endif
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg d,
+                            TCGArg c1, TCGArg c2, int c2const,
+                            TCGArg vt, int vtconst, TCGArg vf, int vfconst,
+                            int i64 __attribute__((unused)))
+{
+    TCGArg t;
+
+    if (vtconst == vfconst && vt == vf) {
+        if (vtconst) {
+            tcg_out_movi_imm13(s, d, vt);
+        } else if (d != vt) {
+            tcg_out_mov(s, d, vt);
+        }
+        return;
+    }
+
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+    /* Use MOVR when possible.  We have constrained the constants to IMM11
+       for the sake of MOVCC, but MOVR only takes IMM10.  Also, we cannot
+       overwrite the condition with our initial store to D.  Any attempt to
+       work around these extra conditions is just as much work as falling
+       back to MOVCC.  */
+    if (c2 == 0 && tcg_cond_to_mrcond[cond] && d != c1) {
+        int use_movr = 1, do_swap = 0;
+
+        if (vtconst) {
+            if (!check_fit_tl(vt, 10)) {
+                do_swap = 1;
+                if (vfconst) {
+                    use_movr = check_fit_tl(vt, 10);
+                } else {
+                    use_movr = (d != vf);
+                }
+            }
+        } else if (d == vt) {
+            do_swap = 1;
+            if (vfconst) {
+                use_movr = check_fit_tl(vt, 10);
+            }
+        }
+        if (use_movr) {
+            if (do_swap) {
+                cond = tcg_invert_cond(cond);
+                t = vt, vt = vf, vf = t;
+                t = vtconst, vtconst = vfconst, vfconst = t;
+            }
+            if (vfconst) {
+                tcg_out_movi_imm13(s, d, vf);
+            } else if (d != vf) {
+                tcg_out_mov(s, d, vf);
+            }
+            tcg_out32(s, ARITH_MOVR | INSN_RD(d) | INSN_RS1(c1)
+                      | INSN_MRCOND(tcg_cond_to_mrcond[cond])
+                      | (vtconst ? INSN_IMM10(vt) : INSN_RS2(vt)));
+            return;
+        }
+    }
+
+    tcg_out_cmp(s, c1, c2, c2const);
+
+    if (!vtconst && vt == d) {
+        cond = tcg_invert_cond(cond);
+        vt = vf, vf = d;
+        vtconst = vfconst, vfconst = 0;
+    }
+    if (vfconst) {
+        tcg_out_movi_imm13(s, d, vf);
+    } else if (d != vf) {
+        tcg_out_mov(s, d, vf);
+    }
+    tcg_out32(s, ARITH_MOVCC | INSN_RD(d)
+              | INSN_RS1(tcg_cond_to_bcond[cond])
+              | (i64 ? MOVCC_XCC : MOVCC_ICC)
+              | (vtconst ? INSN_IMM11(vt) : INSN_RS2(vt)));
+#else
+    t = gen_new_label ();
+
+    tcg_out_cmp(s, c1, c2, c2const);
+
+    /* Note we set the annul bit do avoid clobbering D on the false path.  */
+    tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
+
+    if (vtconst) {
+        tcg_out_movi_imm13(s, d, vt);
+    } else if (d != vt) {
+        tcg_out_mov(s, d, vt);
+    } else {
+        tcg_out_nop(s);
+    }
+
+    if (vfconst) {
+        tcg_out_movi_imm13(s, d, vf);
+    } else if (d != vf) {
+        tcg_out_mov(s, d, vf);
+    }
+
+    tcg_out_label(s, t, (tcg_target_long)s->code_ptr);
+#endif
+}
+
+static void tcg_out_setcond_i32(TCGContext *s, int cond, TCGArg d,
+                                TCGArg c1, TCGArg c2, int c2const)
+{
+    TCGArg t;
+
+    /* For 32-bit comparisons, we can play games with ADDX/SUBX in order
+       to get the correct value into the register.  Don't go beyond this
+       because the movcond fallback is only 4 insns.  */
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        if (c2 != 0) {
+            if (c2const) {
+                tcg_out_arithi(s, d, c1, c2, ARITH_XOR);
+            } else {
+                tcg_out_arith(s, d, c1, c2, ARITH_XOR);
+            }
+        }
+        c1 = TCG_REG_G0, c2 = d, c2const = 0;
+        cond = (cond == TCG_COND_EQ ? TCG_COND_LEU : TCG_COND_LTU);
+        break;
+
+    case TCG_COND_GTU:
+    case TCG_COND_GEU:
+        if (c2const && c2 != 0) {
+            tcg_out_movi_imm13(s, TCG_REG_I5, c2);
+            c2 = TCG_REG_I5;
+        }
+        t = c1, c1 = c2, c2 = t, c2const = 0;
+        cond = (cond == TCG_COND_GTU ? TCG_COND_LTU : TCG_COND_LEU);
+        break;
+
+    case TCG_COND_LTU:
+    case TCG_COND_LEU:
+        break;
+
+    default:
+        tcg_out_movcond(s, cond, d, c1, c2, c2const, 1, 1, 0, 1, 0);
+        return;
+    }
+
+    tcg_out_arith(s, TCG_REG_G0, c1, c2, ARITH_SUBCC);
+    if (cond == TCG_COND_LTU) {
+        tcg_out_arithi(s, d, TCG_REG_G0, 0, ARITH_ADDX);
+    } else {
+        tcg_out_arithi(s, d, TCG_REG_G0, -1, ARITH_SUBX);
+    }
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static void tcg_out_setcond2_i32(TCGContext *s, int cond, TCGArg d,
+                                 TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+                                 TCGArg bh, int bhconst)
+{
+    TCGArg scratch = TCG_REG_I5;
+    int label;
+
+    if (d != al && d != ah && (blconst || d != bl) && (bhconst || d != bh)) {
+        scratch = d;
+    }
+
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_out_setcond_i32(s, TCG_COND_EQ, scratch, al, bl, blconst);
+        tcg_out_movcond(s, TCG_COND_NE, scratch, ah, bh, bhconst,
+                        0, 1, scratch, 0, 0);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond_i32(s, TCG_COND_NE, scratch, al, bl, blconst);
+        tcg_out_movcond(s, TCG_COND_NE, scratch, ah, bh, bhconst,
+                        1, 1, scratch, 0, 0);
+        break;
+
+    default:
+        label = gen_new_label();
+        tcg_out_movi_imm13(s, scratch, 1);
+        tcg_out_brcond2_i32(s, cond, al, ah, bl, blconst, bh, bhconst, label);
+        tcg_out_movi_imm13(s, scratch, 0);
+        tcg_out_label(s, label, (tcg_target_long)s->code_ptr);
+        break;
+    }
+
+    if (d != scratch) {
+        tcg_out_mov(s, d, scratch);
+    }
+}
+#endif
+
 /* Generate global QEMU prologue and epilogue code */
 void tcg_target_qemu_prologue(TCGContext *s)
 {
@@ -1122,12 +1336,27 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_brcond_i32(s, args[2], args[0], args[1], const_args[1],
                            args[3]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond_i32(s, args[3], args[0], args[1],
+                            args[2], const_args[2]);
+        break;
+#ifdef MOVCOND_PATCH_APPLIED
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], const_args[3],
+                        args[4], const_args[4], 0);
+        break;
+#endif
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_brcond2_i32:
         tcg_out_brcond2_i32(s, args[4], args[0], args[1],
                             args[2], const_args[2],
                             args[3], const_args[3], args[5]);
         break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2],
+                             args[3], const_args[3], args[4], const_args[4]);
+        break;
 #endif
 
     case INDEX_op_qemu_ld8u:
@@ -1194,6 +1423,17 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_brcond_i64(s, args[2], args[0], args[1], const_args[1],
                            args[3]);
         break;
+    case INDEX_op_setcond_i64:
+        tcg_out_movcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], 1, 1, 0, 1, 1);
+        break;
+#ifdef MOVCOND_PATCH_APPLIED
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], const_args[3],
+                        args[4], const_args[4], 1);
+        break;
+#endif
     case INDEX_op_qemu_ld64:
         tcg_out_qemu_ld(s, args, 3);
         break;
@@ -1248,8 +1488,17 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_sar_i32, { "r", "r", "rJ" } },
 
     { INDEX_op_brcond_i32, { "r", "rJ" } },
+    { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+#ifdef MOVCOND_PATCH_APPLIED
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+    { INDEX_op_movcond_i32, { "r", "r", "rJ", "rI", "rI" } },
+#else
+    { INDEX_op_movcond_i32, { "r", "r", "rJ", "rJ", "rJ" } },
+#endif
+#endif
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
 #endif
 
     { INDEX_op_qemu_ld8u, { "r", "L" } },
@@ -1294,6 +1543,10 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_sar_i64, { "r", "r", "rJ" } },
 
     { INDEX_op_brcond_i64, { "r", "rJ" } },
+    { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+#ifdef MOVCOND_PATCH_APPLIED
+    { INDEX_op_movcond_i64, { "r", "r", "rJ", "rI", "rI" } },
+#endif
 #endif
     { -1 },
 };