diff mbox

tcg conditional set/move, round 3

Message ID 4B2BF650.80902@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Dec. 18, 2009, 9:38 p.m. UTC
On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>>   tcg: Generic support for conditional set and conditional move.
>
> Needs cosmetics changes.

Fixed, attachment 1.

>>   tcg-x86_64: Implement setcond and movcond.
>
> Some cosmetics and comments, but overall good.

Fixed, attachment 2.

>>   tcg-i386: Implement small forward branches.
>
> I think this contains a bug.

Fixed, attachment 3.  I've added an abort to patch_reloc to verify that 
the relocation is in range.  I've propagated the "small" flag to all of 
the branch functions so that...

>>   tcg-i386: Simplify brcond2.
>
> I don't like the rewrite of brcond2.

... this patch is dropped.

>>   tcg-i386: Implement setcond, movcond, setcond2.
>
> Not yet reviewed.

Fixed, attachment 4.  Similar changes to the amd64 patch.


r~
commit b89e4fc848abd936554ce48b7f9cf23d27ed9eb5
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Dec 18 11:02:06 2009 -0800

    tcg: Generic support for conditional set and conditional move.
    
    Defines setcond and movcond for implementing conditional moves at
    the tcg opcode level.  64-bit-on-32-bit is expanded via a setcond2
    primitive plus other operations.
    
    Signed-off-by: Richard Henderson <rth@twiddle.net>
commit eb326c39503ff265dc83e8792e87e6308d9dfd71
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Dec 18 11:51:35 2009 -0800

    tcg-x86_64: Implement setcond and movcond.
    
    Implement conditional moves in the x86_64 backend.
    
    Signed-off-by: Richard Henderson <rth@twiddle.net>

diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..1f86946 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int rexw)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+                         TCGArg arg2, int const_arg2, int rexw)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -508,9 +507,51 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
     }
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int rexw)
+{
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
+                            TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+    int use_xor = (dest != arg1 && (const_arg2 || dest != arg2));
+
+    if (use_xor)
+        tcg_out_movi(s, TCG_TYPE_I32, dest, 0);
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+    /* setcc */
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, dest);
+    if (!use_xor)
+        tgen_arithi32(s, ARITH_AND, dest, 0xff);
+}
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
+                            TCGArg cmp1, TCGArg cmp2, int const_cmp2,
+                            TCGArg vtrue, TCGArg vfalse, int rexw)
+{
+    if (vtrue == vfalse) {
+        tcg_out_mov(s, dest, vtrue);
+        return;
+    }
+    if (dest == vtrue) {
+        cond = tcg_invert_cond(cond);
+        vtrue = vfalse;
+        vfalse = dest;
+    }
+
+    tcg_out_cond(s, cond, cmp1, cmp2, const_cmp2, rexw);
+    if (dest != vfalse)
+        tcg_out_mov(s, dest, vfalse);
+    /* cmovcc */
+    tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, dest, vtrue);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1197,6 +1238,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_modrm(s, 0x8b, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], 0);
+        break;
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], P_REXW);
+        break;
+
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], args[4], 0);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], args[4], P_REXW);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1376,6 +1435,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "r"} },
     { INDEX_op_ext32u_i64, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+    { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
+    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
+
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
commit 798a4a5bfc491fef81cf968cd0039ad60f34366a
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Dec 18 13:01:30 2009 -0800

    tcg-i386: Implement small forward branches.
    
    There are places, like brcond2, where we know that the destination
    of a forward branch will be within 127 bytes.
    
    Add the R_386_PC8 relocation type to support this.  Add a flag to
    tcg_out_jxx and tcg_out_brcond* to enable it.  Set the flag in the
    brcond2 label_next branches; pass along the input flag otherwise.
    
    Signed-off-by: Richard Henderson <rth@twiddle.net>

diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
 #define R_386_GOTOFF	9
 #define R_386_GOTPC	10
 #define R_386_NUM	11
+/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
+#define R_386_PC8	23
 
 #define R_MIPS_NONE		0
 #define R_MIPS_16		1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..48c9bc8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     case R_386_PC32:
         *(uint32_t *)code_ptr = value - (long)code_ptr;
         break;
+    case R_386_PC8:
+        value -= (long)code_ptr;
+        if (value != (int8_t)value)
+            tcg_abort();
+        *(uint8_t *)code_ptr = value;
+        break;
     default:
         tcg_abort();
     }
@@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
         tgen_arithi(s, ARITH_ADD, reg, val, 0);
 }
 
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
@@ -320,6 +327,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out8(s, 0x70 + opc);
             tcg_out8(s, val1);
         } else {
+            if (small)
+                tcg_abort();
             if (opc == -1) {
                 tcg_out8(s, 0xe9);
                 tcg_out32(s, val - 5);
@@ -329,6 +338,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out32(s, val - 6);
             }
         }
+    } else if (small) {
+        if (opc == -1) {
+            tcg_out8(s, 0xeb);
+        } else {
+            tcg_out8(s, 0x70 + opc);
+        }
+        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+        s->code_ptr += 1;
     } else {
         if (opc == -1) {
             tcg_out8(s, 0xe9);
@@ -343,7 +360,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
 
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index)
+                           int label_index, int small)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -355,64 +372,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
-                            const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+                            const int *const_args, int small)
 {
     int label_next;
     label_next = gen_new_label();
     switch(args[4]) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
-        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                       label_next, small);
+        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+                       args[5], small);
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
-        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                       args[5], small);
+        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
+                       args[5], small);
         break;
     case TCG_COND_LT:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LE:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GT:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GE:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LTU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LEU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GTU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GEU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     default:
         tcg_abort();
@@ -913,7 +950,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         }
         break;
     case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
+        tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1081,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+                       args[3], 0);
         break;
     case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args);
+        tcg_out_brcond2(s, args, const_args, 0);
         break;
 
     case INDEX_op_bswap16_i32:
commit b2c6cc4c5efa898b53653a35dd957dc653513a29
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Dec 18 13:29:53 2009 -0800

    tcg-i386: Implement setcond, movcond, setcond2.
    
    Implement conditional moves in the i386 backend.
    
    Signed-off-by: Richard Henderson <rth@twiddle.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 48c9bc8..cec0663 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -358,9 +358,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int small)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+			 TCGArg arg2, int const_arg2)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -372,6 +371,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond, 
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int small)
+{
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
@@ -457,6 +463,168 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+    if (use_xor)
+        tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+    /* setcc */
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+    if (!use_xor)
+        tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+                             const int *const_args)
+{
+    TCGArg new_args[6];
+    int label_true, label_over;
+
+    memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+    if (args[0] == args[1] || args[0] == args[2]
+        || (!const_args[3] && args[0] == args[3])
+        || (!const_args[4] && args[0] == args[4])) {
+        /* When the destination overlaps with one of the argument
+           registers, don't do anything tricky.  */
+        label_true = gen_new_label();
+        label_over = gen_new_label();
+
+        new_args[5] = label_true;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+        tcg_out_jxx(s, JCC_JMP, label_over, 1);
+        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    } else {
+        /* When the destination does not overlap one of the arguments,
+           clear the destination first, jump if cond false, and emit an
+           increment in the true case.  This results in smaller code.  */
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+        label_over = gen_new_label();
+        new_args[4] = tcg_invert_cond(new_args[4]);
+        new_args[5] = label_over;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+    /* Compiler options say that cmov is available.  */
+    return 1;
+#else
+    /* ??? Use cpuid or something and figure out what's running.  */
+    return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+    TCGArg d, vt, vf;
+
+    d = args[0];
+    vt = args[3];
+    vf = args[4];
+    vtc = const_args[3];
+    vfc = const_args[4];
+
+    /* ??? The jcc code path below assumes that one mov insn must be skipped.
+       Rather than complicate the code below, make sure to simplify the
+       conditional move here.  */
+    if (vtc == vfc && vt == vf) {
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+        return;
+    }
+
+    cond = args[5];
+
+    /* If both arguments are constants, we *could* do all the funny bits that
+       gcc does with sbc, masks, etc.  There's likely no point.  Just use the
+       jcc version in this case.  We also have to be careful about clobbering
+       inputs when trying to move constants into position.  */
+
+    if (have_cmov()) {
+        use_cmov = 1;
+        if (vtc) {
+            if (vfc || d == vf)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        } else if (d == vt) {
+            if (vfc)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        }
+    }
+
+    if (!use_cmov) {
+        /* We're going to follow the lead of cmov and set D=VF first,
+           which means inverting the condition upon which we jump.  */
+        cond = tcg_invert_cond(cond);
+
+        /* Don't allow the move we jump over to be a nop.  */
+        do_swap = (!vtc && d == vt);
+    }
+
+    if (do_swap) {
+        TCGArg t;
+        cond = tcg_invert_cond(cond);
+        t = vf, vf = vt, vt = t;
+        t = vfc, vfc = vtc, vtc = t;
+    }
+
+    /* If possible, set D=0 before the compare, so that we can use XOR.  */
+    if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+        tcg_out_movi(s, TCG_TYPE_I32, d, 0);
+        vf = d, vfc = 0;
+    }
+
+    tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+
+    if (vfc) {
+        /* Force the use of "mov $0, d" to avoid clobbering flags.  */
+        tcg_out8(s, 0xb8 + d);
+        tcg_out32(s, vf);
+    } else {
+        tcg_out_mov(s, d, vf);
+    }
+
+    if (use_cmov) {
+        if (vtc)
+            tcg_abort();
+        /* cmovcc */
+        tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+    } else {
+        int label_next = gen_new_label();
+
+        tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+
+        tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1118,6 +1286,16 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args, const_args);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1206,6 +1384,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext8u_i32, { "r", "q"} },
     { INDEX_op_ext16u_i32, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },

Comments

Laurent Desnogues Dec. 19, 2009, 11:40 a.m. UTC | #1
On Fri, Dec 18, 2009 at 10:38 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>>>
>>>  tcg: Generic support for conditional set and conditional move.
>>
>> Needs cosmetics changes.
>
> Fixed, attachment 1.
>
>>>  tcg-x86_64: Implement setcond and movcond.
>>
>> Some cosmetics and comments, but overall good.
>
> Fixed, attachment 2.
>
>>>  tcg-i386: Implement small forward branches.
>>
>> I think this contains a bug.
>
> Fixed, attachment 3.  I've added an abort to patch_reloc to verify that the
> relocation is in range.  I've propagated the "small" flag to all of the
> branch functions so that...
>
>>>  tcg-i386: Simplify brcond2.
>>
>> I don't like the rewrite of brcond2.
>
> ... this patch is dropped.
>
>>>  tcg-i386: Implement setcond, movcond, setcond2.
>>
>> Not yet reviewed.
>
> Fixed, attachment 4.  Similar changes to the amd64 patch.

Everything looks good to me, though for i386 I'd not bet
my life it's 100% correct.  OTOH I think this is good enough
to go into mainline, so that people can start adding support
in the front-ends.

BTW for compiling to 32-bit on a 64-bit x86, the configure
script is broken as it checks gcc before having set -m32,
so passing -march=i686 will make it fail (in the end it means
I could not convince QEMU to use cmov :-).

Thanks,

Laurent

Acked-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Andreas Färber Dec. 19, 2009, 12:09 p.m. UTC | #2
Hello,

Am 18.12.2009 um 22:38 schrieb Richard Henderson:

> <commit-cmov-1.txt><commit-cmov-amd64.txt><commit-cmov-i386- 
> jmps.txt><commit-cmov-i386.txt>

Please send patches inline (and one patch per mail, like your original  
series), then a) you get more review comments and b) we can apply the  
patches with git-am (including your description) for testing.
You can use git-send-email's --in-reply-to= to make them appear as  
reply in this thread.

Thanks,

Andreas
Aurelien Jarno Dec. 19, 2009, 1:03 p.m. UTC | #3
On Fri, Dec 18, 2009 at 01:38:24PM -0800, Richard Henderson wrote:
> On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>>>   tcg: Generic support for conditional set and conditional move.
>>
>> Needs cosmetics changes.
>
> Fixed, attachment 1.
>
>>>   tcg-x86_64: Implement setcond and movcond.
>>
>> Some cosmetics and comments, but overall good.
>
> Fixed, attachment 2.
>
>>>   tcg-i386: Implement small forward branches.
>>
>> I think this contains a bug.
>
> Fixed, attachment 3.  I've added an abort to patch_reloc to verify that  
> the relocation is in range.  I've propagated the "small" flag to all of  
> the branch functions so that...
>
>>>   tcg-i386: Simplify brcond2.
>>
>> I don't like the rewrite of brcond2.
>
> ... this patch is dropped.
>
>>>   tcg-i386: Implement setcond, movcond, setcond2.
>>
>> Not yet reviewed.
>
> Fixed, attachment 4.  Similar changes to the amd64 patch.
>


Could you please send the patches inline instead. It makes them easier
to comment. 

Please find my comments here:
- I am fine with the setcond instruction
- For the movcond instruction, is there a real use for vtrue and vfalse
  values? Most CPU actually implement a version with one value.
  Implementing it with two values moves complexity within the arch
  specific tcg code.
- Do we really want to make movcond mandatory? It can be easily
  implemented using setcond, and, or instructions.
- The cmov instruction is not supported on all i386 CPU. While it is
  unlikely that someone runs qemu-system on such a CPU, it is not
  improbable that someone runs qemu-user on such a CPU. We should
  probably provide an alternative code and a check in configure (e.g.
  trying to compile asm inline code containing a cmov instruction).
- I am not sure using xor and followed by setcc *conditionally* instead
  of a movzb after setcc is a good idea. The two instructions are 
  supposed to be executed at the same speed, and time spent in code
  generation is not negligeable.
- Pay attention to the coding style, there are a few cases of if 
  without {}.

A final comment, would it be possible to split setcond and movcond in
different patches? setcond looks ready to go, there are probably some
more concerns/comments about movcond.
Aurelien Jarno Dec. 19, 2009, 1:32 p.m. UTC | #4
On Sat, Dec 19, 2009 at 02:03:46PM +0100, Aurelien Jarno wrote:
> On Fri, Dec 18, 2009 at 01:38:24PM -0800, Richard Henderson wrote:
> > On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
> >>>   tcg: Generic support for conditional set and conditional move.
> >>
> >> Needs cosmetics changes.
> >
> > Fixed, attachment 1.
> >
> >>>   tcg-x86_64: Implement setcond and movcond.
> >>
> >> Some cosmetics and comments, but overall good.
> >
> > Fixed, attachment 2.
> >
> >>>   tcg-i386: Implement small forward branches.
> >>
> >> I think this contains a bug.
> >
> > Fixed, attachment 3.  I've added an abort to patch_reloc to verify that  
> > the relocation is in range.  I've propagated the "small" flag to all of  
> > the branch functions so that...
> >
> >>>   tcg-i386: Simplify brcond2.
> >>
> >> I don't like the rewrite of brcond2.
> >
> > ... this patch is dropped.
> >
> >>>   tcg-i386: Implement setcond, movcond, setcond2.
> >>
> >> Not yet reviewed.
> >
> > Fixed, attachment 4.  Similar changes to the amd64 patch.
> >
> 
> 
> Could you please send the patches inline instead. It makes them easier
> to comment. 
> 
> Please find my comments here:
> - I am fine with the setcond instruction
> - For the movcond instruction, is there a real use for vtrue and vfalse
>   values? Most CPU actually implement a version with one value.
>   Implementing it with two values moves complexity within the arch
>   specific tcg code.
> - Do we really want to make movcond mandatory? It can be easily
>   implemented using setcond, and, or instructions.
> - The cmov instruction is not supported on all i386 CPU. While it is
>   unlikely that someone runs qemu-system on such a CPU, it is not
>   improbable that someone runs qemu-user on such a CPU. We should
>   probably provide an alternative code and a check in configure (e.g.
>   trying to compile asm inline code containing a cmov instruction).
 
Forget about that, I read the i386 patch to quickly.
Richard Henderson Dec. 19, 2009, 4:09 p.m. UTC | #5
On 12/19/2009 03:40 AM, Laurent Desnogues wrote:
> BTW for compiling to 32-bit on a 64-bit x86, the configure
> script is broken as it checks gcc before having set -m32,
> so passing -march=i686 will make it fail (in the end it means
> I could not convince QEMU to use cmov :-).

One of my servers has a 32-bit install.  I got tired of remembering the 
moderately long command-line required to configure GCC for a 32-bit 
build on a 64-bit host.  :-)


r~
Richard Henderson Dec. 19, 2009, 4:19 p.m. UTC | #6
On 12/19/2009 05:03 AM, Aurelien Jarno wrote:
> - For the movcond instruction, is there a real use for vtrue and vfalse
>    values? Most CPU actually implement a version with one value.
>    Implementing it with two values moves complexity within the arch
>    specific tcg code.

The reason I added both is that rather than force TCG to insert extra 
moves in order to always match VFALSE with DEST, I could have the 
backend invert the condition if VTRUE happened to match DEST already.

I suppose it would be possible to tweek the TCG register allocator to 
understand such commutative operations.  That seemed harder, but perhaps 
it really isn't considering that inversion code has to be replicated 
across all the targets.  It's certainly something to think about.

> - Do we really want to make movcond mandatory? It can be easily
>    implemented using setcond, and, or instructions.

I think so.  In that every host does have something useful to emit 
that's better than the generic fallback, and we'll quickly have 
implementations for each.

> - I am not sure using xor and followed by setcc *conditionally* instead
>    of a movzb after setcc is a good idea. The two instructions are
>    supposed to be executed at the same speed, and time spent in code
>    generation is not negligeable.

I can certainly remove that bit if you insist.

> - Pay attention to the coding style, there are a few cases of if
>    without {}.

Ok.  I try to remember, but it's at odds with my usual style, so please 
understand momentary lapses.

> A final comment, would it be possible to split setcond and movcond in
> different patches? setcond looks ready to go, there are probably some
> more concerns/comments about movcond.

Ok.


r~
Aurelien Jarno Dec. 19, 2009, 11:02 p.m. UTC | #7
On Sat, Dec 19, 2009 at 08:19:32AM -0800, Richard Henderson wrote:
> On 12/19/2009 05:03 AM, Aurelien Jarno wrote:
> >- For the movcond instruction, is there a real use for vtrue and vfalse
> >   values? Most CPU actually implement a version with one value.
> >   Implementing it with two values moves complexity within the arch
> >   specific tcg code.
> 
> The reason I added both is that rather than force TCG to insert
> extra moves in order to always match VFALSE with DEST, I could have
> the backend invert the condition if VTRUE happened to match DEST
> already.
> 
> I suppose it would be possible to tweek the TCG register allocator
> to understand such commutative operations.  That seemed harder, but
> perhaps it really isn't considering that inversion code has to be
> replicated across all the targets.  It's certainly something to
> think about.
> 

My main concerns here is that we are introducing a complex code here
that has to be multiplied by the number of TCG targets we have. On the
other hand I am not sure there are a lot of use cases for the different
architectures we support.

In addition I am reserved to make such bug changes given I am not sure 
it will result in a speed gain. All the previous setcond implementations
have been stopped because there was no measurable gain.
diff mbox

Patch

diff --git a/tcg/README b/tcg/README
index e672258..6163abe 100644
--- a/tcg/README
+++ b/tcg/README
@@ -152,6 +152,11 @@  Conditional jump if t0 cond t1 is true. cond can be:
     TCG_COND_LEU /* unsigned */
     TCG_COND_GTU /* unsigned */
 
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
 ********* Arithmetic
 
 * add_i32/i64 t0, t1, t2
@@ -282,6 +287,25 @@  order bytes must be set to zero.
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
+
+dest = (c1 cond c2 ? vtrue : vfalse)
+
+Set DEST to VTRUE if (C1 cond C2) is true, otherwise set to VFALSE.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments.  The result is a 32-bit value.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -375,7 +399,7 @@  The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
 
 On a 32 bit target, all 64 bit operations are converted to 32 bits. A
 few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
 
 Floating point operations are not supported in this version. A
 previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..f43ed16 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@  static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
     *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
 }
 
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+                                    TCGv_i32 arg3, TCGv_i32 arg4,
+                                    TCGv_i32 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+                                    TCGv_i64 arg3, TCGv_i64 arg4,
+                                    TCGv_i64 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
 static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                      TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
                                      TCGArg arg6)
@@ -1795,6 +1821,67 @@  static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+                                       TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+                                       TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
+                                       TCGv_i32 cmp1, TCGv_i32 cmp2,
+                                       TCGv_i32 op_t, TCGv_i32 op_f)
+{
+    if (TCGV_EQUAL_I32(op_t, op_f)) {
+        tcg_gen_mov_i32(ret, op_t);
+        return;
+    }
+    tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
+}
+
+static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
+                                       TCGv_i64 cmp1, TCGv_i64 cmp2,
+                                       TCGv_i64 op_t, TCGv_i64 op_f)
+{
+    if (TCGV_EQUAL_I64(op_t, op_f)) {
+        tcg_gen_mov_i64(ret, op_t);
+        return;
+    }
+#if TCG_TARGET_REG_BITS == 64
+    tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
+#else
+    {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 zero = tcg_const_i32(0);
+
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+                         TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
+                         TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
+
+        /* ??? We could perhaps conditionally define a movcond2_i32.  */
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+                            TCGV_LOW(op_t), TCGV_LOW(op_f));
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+                            TCGV_HIGH(op_t), TCGV_HIGH(op_f));
+
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(zero);
+    }
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2067,6 +2154,8 @@  static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
 #define tcg_gen_muli_tl tcg_gen_muli_i64
 #define tcg_gen_div_tl tcg_gen_div_i64
@@ -2137,6 +2226,8 @@  static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
 #define tcg_gen_muli_tl tcg_gen_muli_i32
 #define tcg_gen_div_tl tcg_gen_div_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..086968c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@  DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
 DEF2(mov_i32, 1, 1, 0, 0)
 DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
+DEF2(movcond_i32, 1, 4, 1, 0)
 /* load/store */
 DEF2(ld8u_i32, 1, 1, 1, 0)
 DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +84,7 @@  DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8s_i32
 DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +114,8 @@  DEF2(neg_i32, 1, 1, 0, 0)
 #if TCG_TARGET_REG_BITS == 64
 DEF2(mov_i64, 1, 1, 0, 0)
 DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
+DEF2(movcond_i64, 1, 4, 1, 0)
 /* load/store */
 DEF2(ld8u_i64, 1, 1, 1, 0)
 DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..f7ea727 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@  void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
 }
 #endif
 
+
 static void tcg_reg_alloc_start(TCGContext *s)
 {
     int i;
@@ -888,21 +889,31 @@  void tcg_dump_ops(TCGContext *s, FILE *outfile)
                 fprintf(outfile, "%s",
                         tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
             }
-            if (c == INDEX_op_brcond_i32
+            switch (c) {
+            case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+            case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+            case INDEX_op_brcond_i64:
+#endif
+            case INDEX_op_setcond_i32:
+            case INDEX_op_movcond_i32:
 #if TCG_TARGET_REG_BITS == 32
-                || c == INDEX_op_brcond2_i32
+            case INDEX_op_setcond2_i32:
 #elif TCG_TARGET_REG_BITS == 64
-                || c == INDEX_op_brcond_i64
+            case INDEX_op_setcond_i64:
+            case INDEX_op_movcond_i64:
 #endif
-                ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
                 else
                     fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
                 i = 1;
-            }
-            else
+                break;
+            default:
                 i = 0;
+                break;
+            }
             for(; i < nb_cargs; i++) {
                 if (k != 0)
                     fprintf(outfile, ",");