@@ -114,6 +114,8 @@ typedef enum X86CPUIDFeature {
X86_FEAT_CLWB,
X86_FEAT_CMOV,
X86_FEAT_CMPCCXADD,
+ X86_FEAT_CX8,
+ X86_FEAT_CX16,
X86_FEAT_F16C,
X86_FEAT_FMA,
X86_FEAT_FSGSBASE,
@@ -2298,104 +2298,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
-static void gen_cmpxchg8b(DisasContext *s, X86DecodedInsn *decode)
-{
- TCGv_i64 cmp, val, old;
- TCGv Z;
-
- gen_lea_modrm(s, decode);
-
- cmp = tcg_temp_new_i64();
- val = tcg_temp_new_i64();
- old = tcg_temp_new_i64();
-
- /* Construct the comparison values from the register pair. */
- tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
- tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
-
- /* Only require atomic with LOCK; non-parallel handled in generator. */
- if (s->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
- } else {
- tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
- s->mem_index, MO_TEUQ);
- }
-
- /* Set tmp0 to match the required value of Z. */
- tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
- Z = tcg_temp_new();
- tcg_gen_trunc_i64_tl(Z, cmp);
-
- /*
- * Extract the result values for the register pair.
- * For 32-bit, we may do this unconditionally, because on success (Z=1),
- * the old value matches the previous value in EDX:EAX. For x86_64,
- * the store must be conditional, because we must leave the source
- * registers unchanged on success, and zero-extend the writeback
- * on failure (Z=0).
- */
- if (TARGET_LONG_BITS == 32) {
- tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
- } else {
- TCGv zero = tcg_constant_tl(0);
-
- tcg_gen_extr_i64_tl(s->T0, s->T1, old);
- tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
- s->T0, cpu_regs[R_EAX]);
- tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
- s->T1, cpu_regs[R_EDX]);
- }
-
- /* Update Z. */
- gen_compute_eflags(s);
- tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
-}
-
-#ifdef TARGET_X86_64
-static void gen_cmpxchg16b(DisasContext *s, X86DecodedInsn *decode)
-{
- MemOp mop = MO_TE | MO_128 | MO_ALIGN;
- TCGv_i64 t0, t1;
- TCGv_i128 cmp, val;
-
- gen_lea_modrm(s, decode);
-
- cmp = tcg_temp_new_i128();
- val = tcg_temp_new_i128();
- tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
- tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
-
- /* Only require atomic with LOCK; non-parallel handled in generator. */
- if (s->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
- } else {
- tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
- }
-
- tcg_gen_extr_i128_i64(s->T0, s->T1, val);
-
- /* Determine success after the fact. */
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
- tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
- tcg_gen_or_i64(t0, t0, t1);
-
- /* Update Z. */
- gen_compute_eflags(s);
- tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
- tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
-
- /*
- * Extract the result values for the register pair. We may do this
- * unconditionally, because on success (Z=1), the old value matches
- * the previous value in RDX:RAX.
- */
- tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
- tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
-}
-#endif
-
#include "emit.c.inc"
static void gen_x87(DisasContext *s, X86DecodedInsn *decode)
@@ -2971,29 +2873,10 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
/* now check op code */
switch (b) {
- case 0x1c7: /* cmpxchg8b */
+ case 0x1c7: /* RDSEED, RDPID with f3 prefix */
mod = (modrm >> 6) & 3;
switch ((modrm >> 3) & 7) {
- case 1: /* CMPXCHG8, CMPXCHG16 */
- if (mod == 3) {
- goto illegal_op;
- }
-#ifdef TARGET_X86_64
- if (dflag == MO_64) {
- if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
- goto illegal_op;
- }
- gen_cmpxchg16b(s, decode);
- break;
- }
-#endif
- if (!(s->cpuid_features & CPUID_CX8)) {
- goto illegal_op;
- }
- gen_cmpxchg8b(s, decode);
- break;
-
- case 7: /* RDSEED, RDPID with f3 prefix */
+ case 7:
if (mod != 3 ||
(s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) {
goto illegal_op;
@@ -288,6 +288,25 @@ static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
}
}
+static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry group9_reg =
+ X86_OP_ENTRY0(multi0F); /* unconverted */
+ static const X86OpEntry cmpxchg8b =
+ X86_OP_ENTRY1(CMPXCHG8B, M,q, lock p_00 cpuid(CX8));
+ static const X86OpEntry cmpxchg16b =
+ X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16));
+
+ int modrm = get_modrm(s, env);
+ int op = (modrm >> 3) & 7;
+
+ if ((modrm >> 6) == 3) {
+ *entry = group9_reg;
+ } else if (op == 1) {
+ *entry = REX_W(s) ? cmpxchg16b : cmpxchg8b;
+ }
+}
+
static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry group15_reg[8] = {
@@ -1203,7 +1222,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
- [0xc7] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
+ [0xc7] = X86_OP_GROUP0(group9),
[0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
[0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
@@ -2241,8 +2260,12 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_features & CPUID_CMOV);
case X86_FEAT_CLFLUSH:
return (s->cpuid_features & CPUID_CLFLUSH);
+ case X86_FEAT_CX8:
+ return (s->cpuid_features & CPUID_CX8);
case X86_FEAT_FXSR:
return (s->cpuid_features & CPUID_FXSR);
+ case X86_FEAT_CX16:
+ return (s->cpuid_ext_features & CPUID_EXT_CX16);
case X86_FEAT_F16C:
return (s->cpuid_ext_features & CPUID_EXT_F16C);
case X86_FEAT_FMA:
@@ -2722,15 +2745,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
break;
}
- /*
- * hack for old decoder: 0F C7 has both instructions that accept LOCK
- * and instructions that don't, but also needs X86_SPECIAL_NoLoadEA.
- * Keep this here until CMPXCHG8B/CMPXCHG16B is separated from the
- * other unconverted opcodes.
- */
- if (decode.e.gen == gen_multi0F) {
- accept_lock = true;
- }
if ((s->prefix & PREFIX_LOCK) && !accept_lock) {
goto illegal_op;
}
@@ -1782,6 +1782,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
decode->cc_op = CC_OP_SUBB + ot;
}
+static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode)
+{
+#ifdef TARGET_X86_64
+ MemOp mop = MO_TE | MO_128 | MO_ALIGN;
+ TCGv_i64 t0, t1;
+ TCGv_i128 cmp, val;
+
+ cmp = tcg_temp_new_i128();
+ val = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
+ } else {
+ tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
+ }
+
+ tcg_gen_extr_i128_i64(s->T0, s->T1, val);
+
+ /* Determine success after the fact. */
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
+ tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ /* Update Z. */
+ gen_compute_eflags(s);
+ tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
+
+ /*
+ * Extract the result values for the register pair. We may do this
+ * unconditionally, because on success (Z=1), the old value matches
+ * the previous value in RDX:RAX.
+ */
+ tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
+ tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
+#else
+ abort();
+#endif
+}
+
+static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv_i64 cmp, val, old;
+ TCGv Z;
+
+ cmp = tcg_temp_new_i64();
+ val = tcg_temp_new_i64();
+ old = tcg_temp_new_i64();
+
+ /* Construct the comparison values from the register pair. */
+ tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
+ } else {
+ tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
+ s->mem_index, MO_TEUQ);
+ }
+
+ /* Set tmp0 to match the required value of Z. */
+ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
+ Z = tcg_temp_new();
+ tcg_gen_trunc_i64_tl(Z, cmp);
+
+ /*
+ * Extract the result values for the register pair.
+ * For 32-bit, we may do this unconditionally, because on success (Z=1),
+ * the old value matches the previous value in EDX:EAX. For x86_64,
+ * the store must be conditional, because we must leave the source
+ * registers unchanged on success, and zero-extend the writeback
+ * on failure (Z=0).
+ */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
+ } else {
+ TCGv zero = tcg_constant_tl(0);
+
+ tcg_gen_extr_i64_tl(s->T0, s->T1, old);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
+ s->T0, cpu_regs[R_EAX]);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
+ s->T1, cpu_regs[R_EDX]);
+ }
+
+ /* Update Z. */
+ gen_compute_eflags(s);
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
+}
+
static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
This moves the last LOCK-enabled instructions to the new decoder. It is now possible to assume that PREFIX_LOCK gen_multi0F is called only after checking that LOCK was not specified. The gen_cmpxchg8b and gen_cmpxchg16b functions even have the correct prototype already; the only thing that needs to be done is removing the gen_lea_modrm() call. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/tcg/decode-new.h | 2 + target/i386/tcg/translate.c | 121 +------------------------------ target/i386/tcg/decode-new.c.inc | 34 ++++++--- target/i386/tcg/emit.c.inc | 96 ++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 129 deletions(-)