@@ -197,6 +197,41 @@ static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
return false;
}
+static void emulate_vsyscall_sysexit(CPUX86State *env)
+{
+ /*
+ * Emulate the pop and ret instructions after the sysenter instruction
+ * in the vsyscall page. Any sysenter returns there, because sysenter
+ * does not save the old EIP!
+ */
+ abi_ulong word;
+ if (get_user_ual(word, env->regs[R_ESP])) {
+ goto segv;
+ }
+ env->regs[R_EBP] = word;
+ env->regs[R_ESP] += sizeof(target_ulong);
+ if (get_user_ual(word, env->regs[R_ESP])) {
+ goto segv;
+ }
+ env->regs[R_EDX] = word;
+ env->regs[R_ESP] += sizeof(target_ulong);
+ if (get_user_ual(word, env->regs[R_ESP])) {
+ goto segv;
+ }
+ env->regs[R_ECX] = word;
+ env->regs[R_ESP] += sizeof(target_ulong);
+ if (get_user_ual(word, env->regs[R_ESP])) {
+ goto segv;
+ }
+ env->eip = word;
+ env->regs[R_ESP] += sizeof(target_ulong);
+ return;
+
+segv:
+ env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
+ force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, env->regs[R_ESP]);
+}
+
void cpu_loop(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
@@ -213,6 +248,7 @@ void cpu_loop(CPUX86State *env)
case 0x80:
#ifdef TARGET_ABI32
case EXCP_SYSCALL:
+ case EXCP_SYSENTER:
#endif
/* linux syscall from int $0x80 */
ret = do_syscall(env,
@@ -226,12 +262,18 @@ void cpu_loop(CPUX86State *env)
0, 0);
if (ret == -QEMU_ERESTARTSYS) {
env->eip -= 2;
- } else if (ret != -QEMU_ESIGRETURN) {
+ break;
+ }
+ if (ret != -QEMU_ESIGRETURN) {
env->regs[R_EAX] = ret;
}
+ if (trapnr == EXCP_SYSENTER) {
+ emulate_vsyscall_sysexit(env);
+ }
break;
#ifndef TARGET_ABI32
case EXCP_SYSCALL:
+ case EXCP_SYSENTER:
/* linux syscall from syscall instruction */
ret = do_syscall(env,
env->regs[R_EAX],
@@ -244,9 +286,14 @@ void cpu_loop(CPUX86State *env)
0, 0);
if (ret == -QEMU_ERESTARTSYS) {
env->eip -= 2;
- } else if (ret != -QEMU_ESIGRETURN) {
+ break;
+ }
+ if (ret != -QEMU_ESIGRETURN) {
env->regs[R_EAX] = ret;
}
+ if (trapnr == EXCP_SYSENTER) {
+ emulate_vsyscall_sysexit(env);
+ }
break;
#endif
#ifdef TARGET_X86_64
@@ -614,11 +614,18 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
CPUID_PAE | CPUID_SEP | CPUID_APIC)
+#if defined CONFIG_SOFTMMU || defined CONFIG_LINUX_USER
+#define TCG_NOBSD_FEATURES CPUID_SEP
+#else
+#define TCG_NOBSD_FEATURES 0
+#endif
+
#define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
- CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
+ CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \
+ TCG_NOBSD_FEATURES)
/* partly implemented:
CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
/* missing:
@@ -1185,6 +1185,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define EXCP_VMEXIT 0x100 /* only for system emulation */
#define EXCP_SYSCALL 0x101 /* only for user emulation */
#define EXCP_VSYSCALL 0x102 /* only for user emulation */
+#define EXCP_SYSENTER 0x103 /* only for user emulation */
/* i386-specific interrupt pending bits. */
#define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1
@@ -49,7 +49,7 @@ DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
#endif /* !CONFIG_USER_ONLY */
-DEF_HELPER_1(sysenter, void, env)
+DEF_HELPER_2(sysenter, void, env, int)
DEF_HELPER_2(sysexit, void, env, int)
DEF_HELPER_2(syscall, void, env, int)
#ifdef TARGET_X86_64
@@ -2147,39 +2147,6 @@ void helper_lret_protected(CPUX86State *env, int shift, int addend)
helper_ret_protected(env, shift, 0, addend, GETPC());
}
-void helper_sysenter(CPUX86State *env)
-{
- if (env->sysenter_cs == 0) {
- raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
- }
- env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-
-#ifdef TARGET_X86_64
- if (env->hflags & HF_LMA_MASK) {
- cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
- 0, 0xffffffff,
- DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
- DESC_S_MASK |
- DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
- DESC_L_MASK);
- } else
-#endif
- {
- cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
- 0, 0xffffffff,
- DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
- DESC_S_MASK |
- DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
- }
- cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
- 0, 0xffffffff,
- DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
- DESC_S_MASK |
- DESC_W_MASK | DESC_A_MASK);
- env->regs[R_ESP] = env->sysenter_esp;
- env->eip = env->sysenter_eip;
-}
-
void helper_sysexit(CPUX86State *env, int dflag)
{
int cpl;
@@ -215,3 +215,36 @@ void helper_check_io(CPUX86State *env, uint32_t addr, uint32_t size)
raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr);
}
}
+
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+ if (env->sysenter_cs == 0) {
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+ env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+ DESC_L_MASK);
+ } else
+#endif
+ {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ }
+ cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ env->regs[R_ESP] = env->sysenter_esp;
+ env->eip = env->sysenter_eip;
+}
@@ -5667,7 +5667,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
if (!PE(s)) {
gen_exception_gpf(s);
} else {
- gen_helper_sysenter(cpu_env);
+ gen_helper_sysenter(cpu_env, cur_insn_len_i32(s));
s->base.is_jmp = DISAS_EOB_ONLY;
}
break;
@@ -36,6 +36,22 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
cpu_loop_exit(cs);
}
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+ CPUState *cs = env_cpu(env);
+
+ /*
+ * sysenter returns to the landing pad of the vDSO, which pops
+ * ebp/edx/ecx before executing a "ret".
+ */
+ cs->exception_index = EXCP_SYSENTER;
+ env->exception_is_int = 0;
+
+ /* Used for ERESTARTSYS. */
+ env->exception_next_eip = env->eip + next_eip_addend;
+ cpu_loop_exit(cs);
+}
+
/*
* fake user mode interrupt. is_int is TRUE if coming from the int
* instruction. next_eip is the env->eip value AFTER the interrupt
TCG reports the SEP feature (SYSENTER/SYSEXIT) in user mode emulation, but does not plumb it into the linux-user run loop. Split the helper into system emulation and user-mode emulation cases and implement the latter. SYSENTER does not have the best design for a kernel-mode entry instruction, and therefore Linux always makes it return to the vsyscall page. Because QEMU does not provide the _contents_ of the vsyscall page, the instructions executed after SYSEXIT have to be emulated by hand until the first RET. Some corner cases, such as restarting the system call after the system call has rewritten the SYSENTER instruction, are not emulated correctly. On Linux, the system call restart uses the SYSENTER call in the vsyscall page, while on QEMU it uses the emulated program's instruction. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- linux-user/i386/cpu_loop.c | 51 +++++++++++++++++++++++++++-- target/i386/cpu.c | 9 ++++- target/i386/cpu.h | 1 + target/i386/helper.h | 2 +- target/i386/tcg/seg_helper.c | 33 ------------------- target/i386/tcg/sysemu/seg_helper.c | 33 +++++++++++++++++++ target/i386/tcg/translate.c | 2 +- target/i386/tcg/user/seg_helper.c | 16 +++++++++ 8 files changed, 109 insertions(+), 38 deletions(-)