Message ID | 8f9e39de79e59163fb9a31b9e8433adf5c97d6ea.1261260692.git.rth@twiddle.net |
---|---|
State | New |
Headers | show |
On Sat, Dec 19, 2009 at 10:38:01PM +0000, Richard Henderson wrote: > --- > tcg/mips/tcg-target.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 177 insertions(+), 0 deletions(-) While this code is surely highly optimized, it is not easily readable. I think dropping support for constant argument as it is currently done in brcond/brcond2 would help to make it more readable. I'll work on that in the next days. > diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c > index 8fcb5c9..2a2913d 100644 > --- a/tcg/mips/tcg-target.c > +++ b/tcg/mips/tcg-target.c > @@ -274,6 +274,8 @@ enum { > OPC_BEQ = 0x04 << 26, > OPC_BNE = 0x05 << 26, > OPC_ADDIU = 0x09 << 26, > + OPC_SLTI = 0x0A << 26, > + OPC_SLTIU = 0x0B << 26, > OPC_ANDI = 0x0C << 26, > OPC_ORI = 0x0D << 26, > OPC_XORI = 0x0E << 26, > @@ -583,6 +585,170 @@ static void tcg_out_brcond2(TCGContext *s, int cond, int arg1, > reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr); > } > > +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0, > + TCGArg arg1, TCGArg arg2, int const_arg2) > +{ > + int do_swap = 0, do_inv = 0; > + > + switch (cond) { > + case TCG_COND_EQ: > + case TCG_COND_NE: > + /* Both of these forms require comparisons against zero. */ > + if (arg2 != 0) { > + if (const_arg2) > + tcg_out_opc_imm(s, OPC_XORI, arg0, arg1, arg2); > + else > + tcg_out_opc_reg(s, OPC_XOR, arg0, arg1, arg2); > + arg1 = arg0; > + } > + break; > + > + case TCG_COND_GT: > + case TCG_COND_GTU: > + /* A > B --> B < A */ > + do_swap = 1; > + break; > + > + case TCG_COND_GE: > + case TCG_COND_GEU: > + /* A >= B --> !(A < B) */ > + cond = tcg_invert_cond(cond); > + do_inv = 1; > + break; > + > + case TCG_COND_LE: > + case TCG_COND_LEU: > + if (const_arg2 && arg2 < 32767) { > + /* A <= B --> A < B+1, given that B+1 doesn't overflow. */ > + arg2++; > + cond = (cond == TCG_COND_LE ? TCG_COND_LT : TCG_COND_LTU); > + } else { > + /* A <= B --> B >= A --> !(B < A) */ > + do_swap = do_inv = 1; > + } > + break; > + } > + > + if (do_swap) { > + TCGArg t; > + > + /* Since we allow constants in arg2, we must load (non-zero) > + constants into AT. */ > + if (const_arg2 && arg2 != 0) { > + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, arg2); > + arg2 = TCG_REG_AT; > + } > + t = arg1, arg1 = arg2, arg2 = arg1; > + const_arg2 = 0; > + cond = tcg_swap_cond(cond); > + } > + > + switch (cond) { > + case TCG_COND_EQ: > + /* X == 0 --> (unsigned)X < 1. */ > + tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, 1); > + break; > + > + case TCG_COND_NE: > + /* X != 0 --> 0 < (unsigned)X. */ > + tcg_out_opc_reg(s, OPC_SLTU, arg0, TCG_REG_ZERO, arg1); > + break; > + > + case TCG_COND_LT: > + if (const_arg2) > + tcg_out_opc_imm(s, OPC_SLTI, arg0, arg1, arg2); > + else > + tcg_out_opc_imm(s, OPC_SLT, arg0, arg1, arg2); > + break; > + > + case TCG_COND_LTU: > + if (const_arg2) > + tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, arg2); > + else > + tcg_out_opc_imm(s, OPC_SLTU, arg0, arg1, arg2); > + break; > + > + default: > + tcg_abort (); > + } > + > + if (do_inv) { > + tcg_out_opc_imm(s, OPC_XORI, arg0, arg0, 1); > + } > +} > + > +static void tcg_out_setcond2(TCGContext *s, int cond, int dest, > + int al, int ah, int bl, int bh, > + int blconst, int bhconst) > +{ > + int cl, ch; > + > + /* If we were to implement this function with brcond2 and two sets, > + we'd use 6-8 insns, including nops in the delay slots. Here we > + generate (excluding possible constant loads, which brcond does > + not support) a minimum of 3 insns and a maximum of 5 insns. */ > + > + switch (cond) { > + case TCG_COND_NE: > + /* (ah != bh || al != bl) */ > + tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, al, bl, blconst); > + tcg_out_setcond(s, TCG_COND_NE, dest, ah, bh, bhconst); > + tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0); > + return; > + > + case TCG_COND_EQ: > + /* (ah == bh && al == bl) */ > + ch = -1; > + cl = TCG_COND_EQ; > + break; > + > + case TCG_COND_LT: > + /* (ah < bh || (ah == bh && al < bl)) */ > + ch = TCG_COND_LT; > + cl = TCG_COND_LTU; > + break; > + case TCG_COND_LTU: > + ch = (bh == 0 ? -1 : TCG_COND_LTU); > + cl = TCG_COND_LTU; > + break; > + > + case TCG_COND_LE: > + /* (ah < bh || (ah == bh && al <= bl) */ > + ch = TCG_COND_LT, cl = TCG_COND_LEU; > + break; > + case TCG_COND_LEU: > + ch = (bh == 0 ? -1 : TCG_COND_LTU); > + cl = TCG_COND_LEU; > + break; > + > + case TCG_COND_GT: > + case TCG_COND_GTU: > + /* (ah > bh || (ah == bh && al > bl)) */ > + ch = cond, cl = TCG_COND_GTU; > + break; > + > + case TCG_COND_GE: > + /* (ah > bh || (ah == bh && al >= bl)) */ > + ch = TCG_COND_GT, cl = TCG_COND_GEU; > + break; > + case TCG_COND_GEU: > + ch = TCG_COND_GTU, cl = TCG_COND_GEU; > + break; > + > + default: > + tcg_abort (); > + } > + > + tcg_out_setcond(s, cl, TCG_REG_AT, al, bl, blconst); > + tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, ah, bh, bhconst); > + tcg_out_opc_reg(s, OPC_AND, (ch == -1 ? dest : TCG_REG_T0), > + TCG_REG_T0, TCG_REG_AT); > + if (ch != -1) { > + tcg_out_setcond(s, ch, dest, ah, bh, bhconst); > + tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0); > + } > +} > + > #if defined(CONFIG_SOFTMMU) > > #include "../../softmmu_defs.h" > @@ -1155,6 +1321,14 @@ static inline void tcg_out_op(TCGContext *s, int opc, > tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); > break; > > + case INDEX_op_setcond_i32: > + tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]); > + break; > + case INDEX_op_setcond2_i32: > + tcg_out_setcond2(s, args[5], args[0], args[1], args[2], > + args[3], args[4], const_args[3], const_args[4]); > + break; > + > case INDEX_op_qemu_ld8u: > tcg_out_qemu_ld(s, args, 0); > break; > @@ -1233,6 +1407,9 @@ static const TCGTargetOpDef mips_op_defs[] = { > { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, > { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, > > + { INDEX_op_setcond_i32, { "r", "r", "rJ" } }, > + { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } }, > + > #if TARGET_LONG_BITS == 32 > { INDEX_op_qemu_ld8u, { "L", "lZ" } }, > { INDEX_op_qemu_ld8s, { "L", "lZ" } }, > -- > 1.6.5.2 > > > >
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 8fcb5c9..2a2913d 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -274,6 +274,8 @@ enum { OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, OPC_ADDIU = 0x09 << 26, + OPC_SLTI = 0x0A << 26, + OPC_SLTIU = 0x0B << 26, OPC_ANDI = 0x0C << 26, OPC_ORI = 0x0D << 26, OPC_XORI = 0x0E << 26, @@ -583,6 +585,170 @@ static void tcg_out_brcond2(TCGContext *s, int cond, int arg1, reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr); } +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + int do_swap = 0, do_inv = 0; + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + /* Both of these forms require comparisons against zero. */ + if (arg2 != 0) { + if (const_arg2) + tcg_out_opc_imm(s, OPC_XORI, arg0, arg1, arg2); + else + tcg_out_opc_reg(s, OPC_XOR, arg0, arg1, arg2); + arg1 = arg0; + } + break; + + case TCG_COND_GT: + case TCG_COND_GTU: + /* A > B --> B < A */ + do_swap = 1; + break; + + case TCG_COND_GE: + case TCG_COND_GEU: + /* A >= B --> !(A < B) */ + cond = tcg_invert_cond(cond); + do_inv = 1; + break; + + case TCG_COND_LE: + case TCG_COND_LEU: + if (const_arg2 && arg2 < 32767) { + /* A <= B --> A < B+1, given that B+1 doesn't overflow. */ + arg2++; + cond = (cond == TCG_COND_LE ? TCG_COND_LT : TCG_COND_LTU); + } else { + /* A <= B --> B >= A --> !(B < A) */ + do_swap = do_inv = 1; + } + break; + } + + if (do_swap) { + TCGArg t; + + /* Since we allow constants in arg2, we must load (non-zero) + constants into AT. */ + if (const_arg2 && arg2 != 0) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, arg2); + arg2 = TCG_REG_AT; + } + t = arg1, arg1 = arg2, arg2 = arg1; + const_arg2 = 0; + cond = tcg_swap_cond(cond); + } + + switch (cond) { + case TCG_COND_EQ: + /* X == 0 --> (unsigned)X < 1. */ + tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, 1); + break; + + case TCG_COND_NE: + /* X != 0 --> 0 < (unsigned)X. */ + tcg_out_opc_reg(s, OPC_SLTU, arg0, TCG_REG_ZERO, arg1); + break; + + case TCG_COND_LT: + if (const_arg2) + tcg_out_opc_imm(s, OPC_SLTI, arg0, arg1, arg2); + else + tcg_out_opc_imm(s, OPC_SLT, arg0, arg1, arg2); + break; + + case TCG_COND_LTU: + if (const_arg2) + tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, arg2); + else + tcg_out_opc_imm(s, OPC_SLTU, arg0, arg1, arg2); + break; + + default: + tcg_abort (); + } + + if (do_inv) { + tcg_out_opc_imm(s, OPC_XORI, arg0, arg0, 1); + } +} + +static void tcg_out_setcond2(TCGContext *s, int cond, int dest, + int al, int ah, int bl, int bh, + int blconst, int bhconst) +{ + int cl, ch; + + /* If we were to implement this function with brcond2 and two sets, + we'd use 6-8 insns, including nops in the delay slots. Here we + generate (excluding possible constant loads, which brcond does + not support) a minimum of 3 insns and a maximum of 5 insns. */ + + switch (cond) { + case TCG_COND_NE: + /* (ah != bh || al != bl) */ + tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, al, bl, blconst); + tcg_out_setcond(s, TCG_COND_NE, dest, ah, bh, bhconst); + tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0); + return; + + case TCG_COND_EQ: + /* (ah == bh && al == bl) */ + ch = -1; + cl = TCG_COND_EQ; + break; + + case TCG_COND_LT: + /* (ah < bh || (ah == bh && al < bl)) */ + ch = TCG_COND_LT; + cl = TCG_COND_LTU; + break; + case TCG_COND_LTU: + ch = (bh == 0 ? -1 : TCG_COND_LTU); + cl = TCG_COND_LTU; + break; + + case TCG_COND_LE: + /* (ah < bh || (ah == bh && al <= bl) */ + ch = TCG_COND_LT, cl = TCG_COND_LEU; + break; + case TCG_COND_LEU: + ch = (bh == 0 ? -1 : TCG_COND_LTU); + cl = TCG_COND_LEU; + break; + + case TCG_COND_GT: + case TCG_COND_GTU: + /* (ah > bh || (ah == bh && al > bl)) */ + ch = cond, cl = TCG_COND_GTU; + break; + + case TCG_COND_GE: + /* (ah > bh || (ah == bh && al >= bl)) */ + ch = TCG_COND_GT, cl = TCG_COND_GEU; + break; + case TCG_COND_GEU: + ch = TCG_COND_GTU, cl = TCG_COND_GEU; + break; + + default: + tcg_abort (); + } + + tcg_out_setcond(s, cl, TCG_REG_AT, al, bl, blconst); + tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, ah, bh, bhconst); + tcg_out_opc_reg(s, OPC_AND, (ch == -1 ? dest : TCG_REG_T0), + TCG_REG_T0, TCG_REG_AT); + if (ch != -1) { + tcg_out_setcond(s, ch, dest, ah, bh, bhconst); + tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0); + } +} + #if defined(CONFIG_SOFTMMU) #include "../../softmmu_defs.h" @@ -1155,6 +1321,14 @@ static inline void tcg_out_op(TCGContext *s, int opc, tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); break; + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[3], const_args[4]); + break; + case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); break; @@ -1233,6 +1407,9 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, + { INDEX_op_setcond_i32, { "r", "r", "rJ" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } }, + #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "L", "lZ" } }, { INDEX_op_qemu_ld8s, { "L", "lZ" } },