Message ID | 52B0A069.8060807@siemens.com |
---|---|
State | New |
Headers | show |
Il 17/12/2013 20:05, Jan Kiszka ha scritto: > If the guest is running in nested mode on system reset, clearing the > feature MSR signals the kernel to leave this mode. Recent kernels > processes this properly, but leave the VCPU state undefined behind. It > is the job of userspace to bring it to a proper shape. Therefore, write > this specific MSR first so that no state transfer gets lost. > > This allows to cleanly reset a guest with VMX in use. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > target-i386/kvm.c | 32 ++++++++++++++++++++++++++++---- > 1 file changed, 28 insertions(+), 4 deletions(-) > > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index 1188482..ec51447 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -1104,6 +1104,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) > return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); > } > > +/* > + * Provide a separate write service for the feature control MSR in order to > + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done > + * before writing any other state because forcibly leaving nested mode > + * invalidates the VCPU state. > + */ > +static int kvm_put_msr_feature_control(X86CPU *cpu) > +{ > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entry; > + } msr_data; > + > + kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, > + cpu->env.msr_ia32_feature_control); > + msr_data.info.nmsrs = 1; > + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); > +} > + > static int kvm_put_msrs(X86CPU *cpu, int level) > { > CPUX86State *env = &cpu->env; > @@ -1204,10 +1223,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) > if (cpu->hyperv_vapic) { > kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); > } > - if (has_msr_feature_control) { > - kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, > - env->msr_ia32_feature_control); > - } > + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see > + * kvm_put_msr_feature_control. */ > } > if (env->mcg_cap) { > int i; > @@ -1801,6 +1818,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); > > + if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { > + ret = kvm_put_msr_feature_control(x86_cpu); > + if (ret < 0) { > + return ret; > + } > + } > + > ret = kvm_getput_regs(x86_cpu, 1); > if (ret < 0) { > return ret; > Applied, thanks!
diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1188482..ec51447 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1104,6 +1104,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } +/* + * Provide a separate write service for the feature control MSR in order to + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done + * before writing any other state because forcibly leaving nested mode + * invalidates the VCPU state. + */ +static int kvm_put_msr_feature_control(X86CPU *cpu) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entry; + } msr_data; + + kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, + cpu->env.msr_ia32_feature_control); + msr_data.info.nmsrs = 1; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -1204,10 +1223,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_feature_control) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, - env->msr_ia32_feature_control); - } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ } if (env->mcg_cap) { int i; @@ -1801,6 +1818,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { + ret = kvm_put_msr_feature_control(x86_cpu); + if (ret < 0) { + return ret; + } + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret;
If the guest is running in nested mode on system reset, clearing the feature MSR signals the kernel to leave this mode. Recent kernels processes this properly, but leave the VCPU state undefined behind. It is the job of userspace to bring it to a proper shape. Therefore, write this specific MSR first so that no state transfer gets lost. This allows to cleanly reset a guest with VMX in use. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- target-i386/kvm.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-)