Message ID | 4F4AF1FB.6000903@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
On 2012-02-27 04:01, Wen Congyang wrote: > We can know the guest is paniced when the guest runs on xen. > But we do not have such feature on kvm. This patch implemnts > this feature, and the implementation is the same as xen: > register panic notifier, and call hypercall when the guest > is paniced. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > --- > arch/x86/kernel/kvm.c | 12 ++++++++++++ > arch/x86/kvm/svm.c | 8 ++++++-- > arch/x86/kvm/vmx.c | 8 ++++++-- > arch/x86/kvm/x86.c | 13 +++++++++++-- > include/linux/kvm.h | 1 + > include/linux/kvm_para.h | 1 + > 6 files changed, 37 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index f0c6fd6..b928d1d 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { > .notifier_call = kvm_pv_reboot_notify, > }; > > +static int > +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) > +{ > + kvm_hypercall0(KVM_HC_GUEST_PANIC); > + return NOTIFY_DONE; > +} > + > +static struct notifier_block kvm_pv_panic_nb = { > + .notifier_call = kvm_pv_panic_notify, > +}; > + You should split up host and guest-side changes. > static u64 kvm_steal_clock(int cpu) > { > u64 steal; > @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) > > paravirt_ops_setup(); > register_reboot_notifier(&kvm_pv_reboot_nb); > + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); > for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) > spin_lock_init(&async_pf_sleepers[i].lock); > if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 0b7690e..38b4705 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) > > static int vmmcall_interception(struct vcpu_svm *svm) > { > + int ret; > + > svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; > skip_emulated_instruction(&svm->vcpu); > - kvm_emulate_hypercall(&svm->vcpu); > - return 1; > + ret = kvm_emulate_hypercall(&svm->vcpu); > + > + /* Ignore the error? */ > + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? > } > > static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 66147ca..1b57ebb 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) > > static int handle_vmcall(struct kvm_vcpu *vcpu) > { > + int ret; > + > skip_emulated_instruction(vcpu); > - kvm_emulate_hypercall(vcpu); > - return 1; > + ret = kvm_emulate_hypercall(vcpu); > + > + /* Ignore the error? */ > + return ret == 0 ? 0 : 1; > } > > static int handle_invd(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c9d99e5..3fc2853 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > u64 param, ingpa, outgpa, ret; > uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; > bool fast, longmode; > - int cs_db, cs_l; > + int cs_db, cs_l, r = 1; > > /* > * hypercall generates UD from non zero cpl and real mode > @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: > kvm_vcpu_on_spin(vcpu); > break; > + case KVM_HC_GUEST_PANIC: > + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; > + r = 0; > + break; That's the wrong place. This is a KVM hypercall, not a HyperV one. > default: > res = HV_STATUS_INVALID_HYPERCALL_CODE; > break; > @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); > } > > - return 1; > + return r; > } > > int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > case KVM_HC_VAPIC_POLL_IRQ: > ret = 0; > break; > + case KVM_HC_GUEST_PANIC: > + ret = 0; > + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; > + r = 0; > + break; > default: > ret = -KVM_ENOSYS; > break; > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index acbe429..8f0e31b 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -163,6 +163,7 @@ struct kvm_pit_config { > #define KVM_EXIT_OSI 18 > #define KVM_EXIT_PAPR_HCALL 19 > #define KVM_EXIT_S390_UCONTROL 20 > +#define KVM_EXIT_GUEST_PANIC 21 > > /* For KVM_EXIT_INTERNAL_ERROR */ > #define KVM_INTERNAL_ERROR_EMULATION 1 > diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h > index ff476dd..cf94023 100644 > --- a/include/linux/kvm_para.h > +++ b/include/linux/kvm_para.h > @@ -19,6 +19,7 @@ > #define KVM_HC_MMU_OP 2 > #define KVM_HC_FEATURES 3 > #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 > +#define KVM_HC_GUEST_PANIC 5 > > /* > * hypercalls use architecture specific Jan
At 02/27/2012 11:08 PM, Jan Kiszka Wrote: > On 2012-02-27 04:01, Wen Congyang wrote: >> We can know the guest is paniced when the guest runs on xen. >> But we do not have such feature on kvm. This patch implemnts >> this feature, and the implementation is the same as xen: >> register panic notifier, and call hypercall when the guest >> is paniced. >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> --- >> arch/x86/kernel/kvm.c | 12 ++++++++++++ >> arch/x86/kvm/svm.c | 8 ++++++-- >> arch/x86/kvm/vmx.c | 8 ++++++-- >> arch/x86/kvm/x86.c | 13 +++++++++++-- >> include/linux/kvm.h | 1 + >> include/linux/kvm_para.h | 1 + >> 6 files changed, 37 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >> index f0c6fd6..b928d1d 100644 >> --- a/arch/x86/kernel/kvm.c >> +++ b/arch/x86/kernel/kvm.c >> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >> .notifier_call = kvm_pv_reboot_notify, >> }; >> >> +static int >> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >> +{ >> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >> + return NOTIFY_DONE; >> +} >> + >> +static struct notifier_block kvm_pv_panic_nb = { >> + .notifier_call = kvm_pv_panic_notify, >> +}; >> + > > You should split up host and guest-side changes. OK > >> static u64 kvm_steal_clock(int cpu) >> { >> u64 steal; >> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >> >> paravirt_ops_setup(); >> register_reboot_notifier(&kvm_pv_reboot_nb); >> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >> spin_lock_init(&async_pf_sleepers[i].lock); >> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index 0b7690e..38b4705 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >> >> static int vmmcall_interception(struct vcpu_svm *svm) >> { >> + int ret; >> + >> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >> skip_emulated_instruction(&svm->vcpu); >> - kvm_emulate_hypercall(&svm->vcpu); >> - return 1; >> + ret = kvm_emulate_hypercall(&svm->vcpu); >> + >> + /* Ignore the error? */ >> + return ret == 0 ? 0 : 1; > > Why can't kvm_emulate_hypercall return the right value? Because before this patch, kvm always ignores the error. After rereading the code, kvm_emulate_hypercall() will return -KVM_EPERM when vcpu's CPL is not 0. I think we should deal with this exception in kvm_emulate_hypercall(), and return 1. But I donot know how to do it. kvm_queue_exception(vcpu, UD_VECTOR)? > >> } >> >> static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 66147ca..1b57ebb 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) >> >> static int handle_vmcall(struct kvm_vcpu *vcpu) >> { >> + int ret; >> + >> skip_emulated_instruction(vcpu); >> - kvm_emulate_hypercall(vcpu); >> - return 1; >> + ret = kvm_emulate_hypercall(vcpu); >> + >> + /* Ignore the error? */ >> + return ret == 0 ? 0 : 1; >> } >> >> static int handle_invd(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index c9d99e5..3fc2853 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> u64 param, ingpa, outgpa, ret; >> uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; >> bool fast, longmode; >> - int cs_db, cs_l; >> + int cs_db, cs_l, r = 1; >> >> /* >> * hypercall generates UD from non zero cpl and real mode >> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: >> kvm_vcpu_on_spin(vcpu); >> break; >> + case KVM_HC_GUEST_PANIC: >> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >> + r = 0; >> + break; > > That's the wrong place. This is a KVM hypercall, not a HyperV one. OK, I will remove it. Thanks Wen Congyang > >> default: >> res = HV_STATUS_INVALID_HYPERCALL_CODE; >> break; >> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); >> } >> >> - return 1; >> + return r; >> } >> >> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >> case KVM_HC_VAPIC_POLL_IRQ: >> ret = 0; >> break; >> + case KVM_HC_GUEST_PANIC: >> + ret = 0; >> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >> + r = 0; >> + break; >> default: >> ret = -KVM_ENOSYS; >> break; >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >> index acbe429..8f0e31b 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -163,6 +163,7 @@ struct kvm_pit_config { >> #define KVM_EXIT_OSI 18 >> #define KVM_EXIT_PAPR_HCALL 19 >> #define KVM_EXIT_S390_UCONTROL 20 >> +#define KVM_EXIT_GUEST_PANIC 21 >> >> /* For KVM_EXIT_INTERNAL_ERROR */ >> #define KVM_INTERNAL_ERROR_EMULATION 1 >> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h >> index ff476dd..cf94023 100644 >> --- a/include/linux/kvm_para.h >> +++ b/include/linux/kvm_para.h >> @@ -19,6 +19,7 @@ >> #define KVM_HC_MMU_OP 2 >> #define KVM_HC_FEATURES 3 >> #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 >> +#define KVM_HC_GUEST_PANIC 5 >> >> /* >> * hypercalls use architecture specific > > Jan >
At 02/28/2012 01:26 PM, Wen Congyang Wrote: > At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >> On 2012-02-27 04:01, Wen Congyang wrote: >>> We can know the guest is paniced when the guest runs on xen. >>> But we do not have such feature on kvm. This patch implemnts >>> this feature, and the implementation is the same as xen: >>> register panic notifier, and call hypercall when the guest >>> is paniced. >>> >>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>> --- >>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>> arch/x86/kvm/svm.c | 8 ++++++-- >>> arch/x86/kvm/vmx.c | 8 ++++++-- >>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>> include/linux/kvm.h | 1 + >>> include/linux/kvm_para.h | 1 + >>> 6 files changed, 37 insertions(+), 6 deletions(-) >>> >>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>> index f0c6fd6..b928d1d 100644 >>> --- a/arch/x86/kernel/kvm.c >>> +++ b/arch/x86/kernel/kvm.c >>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>> .notifier_call = kvm_pv_reboot_notify, >>> }; >>> >>> +static int >>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>> +{ >>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>> + return NOTIFY_DONE; >>> +} >>> + >>> +static struct notifier_block kvm_pv_panic_nb = { >>> + .notifier_call = kvm_pv_panic_notify, >>> +}; >>> + >> >> You should split up host and guest-side changes. > > OK > >> >>> static u64 kvm_steal_clock(int cpu) >>> { >>> u64 steal; >>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>> >>> paravirt_ops_setup(); >>> register_reboot_notifier(&kvm_pv_reboot_nb); >>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>> spin_lock_init(&async_pf_sleepers[i].lock); >>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>> index 0b7690e..38b4705 100644 >>> --- a/arch/x86/kvm/svm.c >>> +++ b/arch/x86/kvm/svm.c >>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>> >>> static int vmmcall_interception(struct vcpu_svm *svm) >>> { >>> + int ret; >>> + >>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>> skip_emulated_instruction(&svm->vcpu); >>> - kvm_emulate_hypercall(&svm->vcpu); >>> - return 1; >>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>> + >>> + /* Ignore the error? */ >>> + return ret == 0 ? 0 : 1; >> >> Why can't kvm_emulate_hypercall return the right value? > > Because before this patch, kvm always ignores the error. > > After rereading the code, kvm_emulate_hypercall() will return -KVM_EPERM Sorry for my miss. kvm_emulate_hypercall() does not return -KVM_EPERM. > when vcpu's CPL is not 0. I think we should deal with this exception > in kvm_emulate_hypercall(), and return 1. But I donot know > how to do it. kvm_queue_exception(vcpu, UD_VECTOR)? > >> >>> } >>> >>> static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>> index 66147ca..1b57ebb 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) >>> >>> static int handle_vmcall(struct kvm_vcpu *vcpu) >>> { >>> + int ret; >>> + >>> skip_emulated_instruction(vcpu); >>> - kvm_emulate_hypercall(vcpu); >>> - return 1; >>> + ret = kvm_emulate_hypercall(vcpu); >>> + >>> + /* Ignore the error? */ >>> + return ret == 0 ? 0 : 1; >>> } >>> >>> static int handle_invd(struct kvm_vcpu *vcpu) >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>> index c9d99e5..3fc2853 100644 >>> --- a/arch/x86/kvm/x86.c >>> +++ b/arch/x86/kvm/x86.c >>> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >>> u64 param, ingpa, outgpa, ret; >>> uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; >>> bool fast, longmode; >>> - int cs_db, cs_l; >>> + int cs_db, cs_l, r = 1; >>> >>> /* >>> * hypercall generates UD from non zero cpl and real mode >>> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >>> case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: >>> kvm_vcpu_on_spin(vcpu); >>> break; >>> + case KVM_HC_GUEST_PANIC: >>> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >>> + r = 0; >>> + break; >> >> That's the wrong place. This is a KVM hypercall, not a HyperV one. > > OK, I will remove it. > > Thanks > Wen Congyang > >> >>> default: >>> res = HV_STATUS_INVALID_HYPERCALL_CODE; >>> break; >>> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >>> kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); >>> } >>> >>> - return 1; >>> + return r; >>> } >>> >>> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >>> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >>> case KVM_HC_VAPIC_POLL_IRQ: >>> ret = 0; >>> break; >>> + case KVM_HC_GUEST_PANIC: >>> + ret = 0; >>> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >>> + r = 0; >>> + break; >>> default: >>> ret = -KVM_ENOSYS; >>> break; >>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >>> index acbe429..8f0e31b 100644 >>> --- a/include/linux/kvm.h >>> +++ b/include/linux/kvm.h >>> @@ -163,6 +163,7 @@ struct kvm_pit_config { >>> #define KVM_EXIT_OSI 18 >>> #define KVM_EXIT_PAPR_HCALL 19 >>> #define KVM_EXIT_S390_UCONTROL 20 >>> +#define KVM_EXIT_GUEST_PANIC 21 >>> >>> /* For KVM_EXIT_INTERNAL_ERROR */ >>> #define KVM_INTERNAL_ERROR_EMULATION 1 >>> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h >>> index ff476dd..cf94023 100644 >>> --- a/include/linux/kvm_para.h >>> +++ b/include/linux/kvm_para.h >>> @@ -19,6 +19,7 @@ >>> #define KVM_HC_MMU_OP 2 >>> #define KVM_HC_FEATURES 3 >>> #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 >>> +#define KVM_HC_GUEST_PANIC 5 >>> >>> /* >>> * hypercalls use architecture specific >> >> Jan >> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ >
At 02/27/2012 11:08 PM, Jan Kiszka Wrote: > On 2012-02-27 04:01, Wen Congyang wrote: >> We can know the guest is paniced when the guest runs on xen. >> But we do not have such feature on kvm. This patch implemnts >> this feature, and the implementation is the same as xen: >> register panic notifier, and call hypercall when the guest >> is paniced. >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> --- >> arch/x86/kernel/kvm.c | 12 ++++++++++++ >> arch/x86/kvm/svm.c | 8 ++++++-- >> arch/x86/kvm/vmx.c | 8 ++++++-- >> arch/x86/kvm/x86.c | 13 +++++++++++-- >> include/linux/kvm.h | 1 + >> include/linux/kvm_para.h | 1 + >> 6 files changed, 37 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >> index f0c6fd6..b928d1d 100644 >> --- a/arch/x86/kernel/kvm.c >> +++ b/arch/x86/kernel/kvm.c >> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >> .notifier_call = kvm_pv_reboot_notify, >> }; >> >> +static int >> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >> +{ >> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >> + return NOTIFY_DONE; >> +} >> + >> +static struct notifier_block kvm_pv_panic_nb = { >> + .notifier_call = kvm_pv_panic_notify, >> +}; >> + > > You should split up host and guest-side changes. > >> static u64 kvm_steal_clock(int cpu) >> { >> u64 steal; >> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >> >> paravirt_ops_setup(); >> register_reboot_notifier(&kvm_pv_reboot_nb); >> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >> spin_lock_init(&async_pf_sleepers[i].lock); >> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index 0b7690e..38b4705 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >> >> static int vmmcall_interception(struct vcpu_svm *svm) >> { >> + int ret; >> + >> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >> skip_emulated_instruction(&svm->vcpu); >> - kvm_emulate_hypercall(&svm->vcpu); >> - return 1; >> + ret = kvm_emulate_hypercall(&svm->vcpu); >> + >> + /* Ignore the error? */ >> + return ret == 0 ? 0 : 1; > > Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. If vcpu's CPL > 0, does kvm need to exit and tell it to qemu? Thanks Wen Congyang > >> } >> >> static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 66147ca..1b57ebb 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) >> >> static int handle_vmcall(struct kvm_vcpu *vcpu) >> { >> + int ret; >> + >> skip_emulated_instruction(vcpu); >> - kvm_emulate_hypercall(vcpu); >> - return 1; >> + ret = kvm_emulate_hypercall(vcpu); >> + >> + /* Ignore the error? */ >> + return ret == 0 ? 0 : 1; >> } >> >> static int handle_invd(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index c9d99e5..3fc2853 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> u64 param, ingpa, outgpa, ret; >> uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; >> bool fast, longmode; >> - int cs_db, cs_l; >> + int cs_db, cs_l, r = 1; >> >> /* >> * hypercall generates UD from non zero cpl and real mode >> @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: >> kvm_vcpu_on_spin(vcpu); >> break; >> + case KVM_HC_GUEST_PANIC: >> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >> + r = 0; >> + break; > > That's the wrong place. This is a KVM hypercall, not a HyperV one. > >> default: >> res = HV_STATUS_INVALID_HYPERCALL_CODE; >> break; >> @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); >> } >> >> - return 1; >> + return r; >> } >> >> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >> @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >> case KVM_HC_VAPIC_POLL_IRQ: >> ret = 0; >> break; >> + case KVM_HC_GUEST_PANIC: >> + ret = 0; >> + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; >> + r = 0; >> + break; >> default: >> ret = -KVM_ENOSYS; >> break; >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >> index acbe429..8f0e31b 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -163,6 +163,7 @@ struct kvm_pit_config { >> #define KVM_EXIT_OSI 18 >> #define KVM_EXIT_PAPR_HCALL 19 >> #define KVM_EXIT_S390_UCONTROL 20 >> +#define KVM_EXIT_GUEST_PANIC 21 >> >> /* For KVM_EXIT_INTERNAL_ERROR */ >> #define KVM_INTERNAL_ERROR_EMULATION 1 >> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h >> index ff476dd..cf94023 100644 >> --- a/include/linux/kvm_para.h >> +++ b/include/linux/kvm_para.h >> @@ -19,6 +19,7 @@ >> #define KVM_HC_MMU_OP 2 >> #define KVM_HC_FEATURES 3 >> #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 >> +#define KVM_HC_GUEST_PANIC 5 >> >> /* >> * hypercalls use architecture specific > > Jan >
On 2012-02-28 09:23, Wen Congyang wrote: > At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >> On 2012-02-27 04:01, Wen Congyang wrote: >>> We can know the guest is paniced when the guest runs on xen. >>> But we do not have such feature on kvm. This patch implemnts >>> this feature, and the implementation is the same as xen: >>> register panic notifier, and call hypercall when the guest >>> is paniced. >>> >>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>> --- >>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>> arch/x86/kvm/svm.c | 8 ++++++-- >>> arch/x86/kvm/vmx.c | 8 ++++++-- >>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>> include/linux/kvm.h | 1 + >>> include/linux/kvm_para.h | 1 + >>> 6 files changed, 37 insertions(+), 6 deletions(-) >>> >>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>> index f0c6fd6..b928d1d 100644 >>> --- a/arch/x86/kernel/kvm.c >>> +++ b/arch/x86/kernel/kvm.c >>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>> .notifier_call = kvm_pv_reboot_notify, >>> }; >>> >>> +static int >>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>> +{ >>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>> + return NOTIFY_DONE; >>> +} >>> + >>> +static struct notifier_block kvm_pv_panic_nb = { >>> + .notifier_call = kvm_pv_panic_notify, >>> +}; >>> + >> >> You should split up host and guest-side changes. >> >>> static u64 kvm_steal_clock(int cpu) >>> { >>> u64 steal; >>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>> >>> paravirt_ops_setup(); >>> register_reboot_notifier(&kvm_pv_reboot_nb); >>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>> spin_lock_init(&async_pf_sleepers[i].lock); >>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>> index 0b7690e..38b4705 100644 >>> --- a/arch/x86/kvm/svm.c >>> +++ b/arch/x86/kvm/svm.c >>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>> >>> static int vmmcall_interception(struct vcpu_svm *svm) >>> { >>> + int ret; >>> + >>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>> skip_emulated_instruction(&svm->vcpu); >>> - kvm_emulate_hypercall(&svm->vcpu); >>> - return 1; >>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>> + >>> + /* Ignore the error? */ >>> + return ret == 0 ? 0 : 1; >> >> Why can't kvm_emulate_hypercall return the right value? > > kvm_emulate_hypercall() will call kvm_hv_hypercall(), and > kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. > If vcpu's CPL > 0, does kvm need to exit and tell it to > qemu? No, there is currently no exit to userspace due to hypercalls, neither of HV nor KVM kind. The point is that the return code of kvm_emulate_hypercall is unused so far, so you can easily redefine it to encode continue vs. exit to userspace. Once someone has different needs, this could still be refactored again. Jan
At 02/28/2012 05:34 PM, Jan Kiszka Wrote: > On 2012-02-28 09:23, Wen Congyang wrote: >> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >>> On 2012-02-27 04:01, Wen Congyang wrote: >>>> We can know the guest is paniced when the guest runs on xen. >>>> But we do not have such feature on kvm. This patch implemnts >>>> this feature, and the implementation is the same as xen: >>>> register panic notifier, and call hypercall when the guest >>>> is paniced. >>>> >>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>>> --- >>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>>> arch/x86/kvm/svm.c | 8 ++++++-- >>>> arch/x86/kvm/vmx.c | 8 ++++++-- >>>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>>> include/linux/kvm.h | 1 + >>>> include/linux/kvm_para.h | 1 + >>>> 6 files changed, 37 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>>> index f0c6fd6..b928d1d 100644 >>>> --- a/arch/x86/kernel/kvm.c >>>> +++ b/arch/x86/kernel/kvm.c >>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>>> .notifier_call = kvm_pv_reboot_notify, >>>> }; >>>> >>>> +static int >>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>>> +{ >>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>>> + return NOTIFY_DONE; >>>> +} >>>> + >>>> +static struct notifier_block kvm_pv_panic_nb = { >>>> + .notifier_call = kvm_pv_panic_notify, >>>> +}; >>>> + >>> >>> You should split up host and guest-side changes. >>> >>>> static u64 kvm_steal_clock(int cpu) >>>> { >>>> u64 steal; >>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>>> >>>> paravirt_ops_setup(); >>>> register_reboot_notifier(&kvm_pv_reboot_nb); >>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>>> spin_lock_init(&async_pf_sleepers[i].lock); >>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>>> index 0b7690e..38b4705 100644 >>>> --- a/arch/x86/kvm/svm.c >>>> +++ b/arch/x86/kvm/svm.c >>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>>> >>>> static int vmmcall_interception(struct vcpu_svm *svm) >>>> { >>>> + int ret; >>>> + >>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>>> skip_emulated_instruction(&svm->vcpu); >>>> - kvm_emulate_hypercall(&svm->vcpu); >>>> - return 1; >>>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>>> + >>>> + /* Ignore the error? */ >>>> + return ret == 0 ? 0 : 1; >>> >>> Why can't kvm_emulate_hypercall return the right value? >> >> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and >> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. >> If vcpu's CPL > 0, does kvm need to exit and tell it to >> qemu? > > No, there is currently no exit to userspace due to hypercalls, neither > of HV nor KVM kind. > > The point is that the return code of kvm_emulate_hypercall is unused so > far, so you can easily redefine it to encode continue vs. exit to > userspace. Once someone has different needs, this could still be > refactored again. So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's CPL > 0? Thanks Wen Congyang > > Jan >
On 2012-02-28 10:42, Wen Congyang wrote: > At 02/28/2012 05:34 PM, Jan Kiszka Wrote: >> On 2012-02-28 09:23, Wen Congyang wrote: >>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >>>> On 2012-02-27 04:01, Wen Congyang wrote: >>>>> We can know the guest is paniced when the guest runs on xen. >>>>> But we do not have such feature on kvm. This patch implemnts >>>>> this feature, and the implementation is the same as xen: >>>>> register panic notifier, and call hypercall when the guest >>>>> is paniced. >>>>> >>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>>>> --- >>>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>>>> arch/x86/kvm/svm.c | 8 ++++++-- >>>>> arch/x86/kvm/vmx.c | 8 ++++++-- >>>>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>>>> include/linux/kvm.h | 1 + >>>>> include/linux/kvm_para.h | 1 + >>>>> 6 files changed, 37 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>>>> index f0c6fd6..b928d1d 100644 >>>>> --- a/arch/x86/kernel/kvm.c >>>>> +++ b/arch/x86/kernel/kvm.c >>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>>>> .notifier_call = kvm_pv_reboot_notify, >>>>> }; >>>>> >>>>> +static int >>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>>>> +{ >>>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>>>> + return NOTIFY_DONE; >>>>> +} >>>>> + >>>>> +static struct notifier_block kvm_pv_panic_nb = { >>>>> + .notifier_call = kvm_pv_panic_notify, >>>>> +}; >>>>> + >>>> >>>> You should split up host and guest-side changes. >>>> >>>>> static u64 kvm_steal_clock(int cpu) >>>>> { >>>>> u64 steal; >>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>>>> >>>>> paravirt_ops_setup(); >>>>> register_reboot_notifier(&kvm_pv_reboot_nb); >>>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>>>> spin_lock_init(&async_pf_sleepers[i].lock); >>>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>>>> index 0b7690e..38b4705 100644 >>>>> --- a/arch/x86/kvm/svm.c >>>>> +++ b/arch/x86/kvm/svm.c >>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>>>> >>>>> static int vmmcall_interception(struct vcpu_svm *svm) >>>>> { >>>>> + int ret; >>>>> + >>>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>>>> skip_emulated_instruction(&svm->vcpu); >>>>> - kvm_emulate_hypercall(&svm->vcpu); >>>>> - return 1; >>>>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>>>> + >>>>> + /* Ignore the error? */ >>>>> + return ret == 0 ? 0 : 1; >>>> >>>> Why can't kvm_emulate_hypercall return the right value? >>> >>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and >>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. >>> If vcpu's CPL > 0, does kvm need to exit and tell it to >>> qemu? >> >> No, there is currently no exit to userspace due to hypercalls, neither >> of HV nor KVM kind. >> >> The point is that the return code of kvm_emulate_hypercall is unused so >> far, so you can easily redefine it to encode continue vs. exit to >> userspace. Once someone has different needs, this could still be >> refactored again. > > So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's > CPL > 0? Yes, change it to encode what vendor modules need to return to their callers. Jan
On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote: > On 2012-02-28 10:42, Wen Congyang wrote: > > At 02/28/2012 05:34 PM, Jan Kiszka Wrote: > >> On 2012-02-28 09:23, Wen Congyang wrote: > >>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: > >>>> On 2012-02-27 04:01, Wen Congyang wrote: > >>>>> We can know the guest is paniced when the guest runs on xen. > >>>>> But we do not have such feature on kvm. This patch implemnts > >>>>> this feature, and the implementation is the same as xen: > >>>>> register panic notifier, and call hypercall when the guest > >>>>> is paniced. > >>>>> > >>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > >>>>> --- > >>>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ > >>>>> arch/x86/kvm/svm.c | 8 ++++++-- > >>>>> arch/x86/kvm/vmx.c | 8 ++++++-- > >>>>> arch/x86/kvm/x86.c | 13 +++++++++++-- > >>>>> include/linux/kvm.h | 1 + > >>>>> include/linux/kvm_para.h | 1 + > >>>>> 6 files changed, 37 insertions(+), 6 deletions(-) > >>>>> > >>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > >>>>> index f0c6fd6..b928d1d 100644 > >>>>> --- a/arch/x86/kernel/kvm.c > >>>>> +++ b/arch/x86/kernel/kvm.c > >>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { > >>>>> .notifier_call = kvm_pv_reboot_notify, > >>>>> }; > >>>>> > >>>>> +static int > >>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) > >>>>> +{ > >>>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); > >>>>> + return NOTIFY_DONE; > >>>>> +} > >>>>> + > >>>>> +static struct notifier_block kvm_pv_panic_nb = { > >>>>> + .notifier_call = kvm_pv_panic_notify, > >>>>> +}; > >>>>> + > >>>> > >>>> You should split up host and guest-side changes. > >>>> > >>>>> static u64 kvm_steal_clock(int cpu) > >>>>> { > >>>>> u64 steal; > >>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) > >>>>> > >>>>> paravirt_ops_setup(); > >>>>> register_reboot_notifier(&kvm_pv_reboot_nb); > >>>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); > >>>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) > >>>>> spin_lock_init(&async_pf_sleepers[i].lock); > >>>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) > >>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > >>>>> index 0b7690e..38b4705 100644 > >>>>> --- a/arch/x86/kvm/svm.c > >>>>> +++ b/arch/x86/kvm/svm.c > >>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) > >>>>> > >>>>> static int vmmcall_interception(struct vcpu_svm *svm) > >>>>> { > >>>>> + int ret; > >>>>> + > >>>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; > >>>>> skip_emulated_instruction(&svm->vcpu); > >>>>> - kvm_emulate_hypercall(&svm->vcpu); > >>>>> - return 1; > >>>>> + ret = kvm_emulate_hypercall(&svm->vcpu); > >>>>> + > >>>>> + /* Ignore the error? */ > >>>>> + return ret == 0 ? 0 : 1; > >>>> > >>>> Why can't kvm_emulate_hypercall return the right value? > >>> > >>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and > >>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. > >>> If vcpu's CPL > 0, does kvm need to exit and tell it to > >>> qemu? > >> > >> No, there is currently no exit to userspace due to hypercalls, neither > >> of HV nor KVM kind. > >> > >> The point is that the return code of kvm_emulate_hypercall is unused so > >> far, so you can easily redefine it to encode continue vs. exit to > >> userspace. Once someone has different needs, this could still be > >> refactored again. > > > > So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's > > CPL > 0? > > Yes, change it to encode what vendor modules need to return to their > callers. > Better introduce new request flag and set it in your hypercall emulation. See how triple fault is handled. -- Gleb.
On 02/27/2012 05:01 AM, Wen Congyang wrote: > We can know the guest is paniced when the guest runs on xen. > But we do not have such feature on kvm. This patch implemnts > this feature, and the implementation is the same as xen: > register panic notifier, and call hypercall when the guest > is paniced. What's the motivation for this? "Xen does this" is insufficient.
At 02/28/2012 06:45 PM, Gleb Natapov Wrote: > On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote: >> On 2012-02-28 10:42, Wen Congyang wrote: >>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote: >>>> On 2012-02-28 09:23, Wen Congyang wrote: >>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >>>>>> On 2012-02-27 04:01, Wen Congyang wrote: >>>>>>> We can know the guest is paniced when the guest runs on xen. >>>>>>> But we do not have such feature on kvm. This patch implemnts >>>>>>> this feature, and the implementation is the same as xen: >>>>>>> register panic notifier, and call hypercall when the guest >>>>>>> is paniced. >>>>>>> >>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>>>>>> --- >>>>>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>>>>>> arch/x86/kvm/svm.c | 8 ++++++-- >>>>>>> arch/x86/kvm/vmx.c | 8 ++++++-- >>>>>>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>>>>>> include/linux/kvm.h | 1 + >>>>>>> include/linux/kvm_para.h | 1 + >>>>>>> 6 files changed, 37 insertions(+), 6 deletions(-) >>>>>>> >>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>>>>>> index f0c6fd6..b928d1d 100644 >>>>>>> --- a/arch/x86/kernel/kvm.c >>>>>>> +++ b/arch/x86/kernel/kvm.c >>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>>>>>> .notifier_call = kvm_pv_reboot_notify, >>>>>>> }; >>>>>>> >>>>>>> +static int >>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>>>>>> +{ >>>>>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>>>>>> + return NOTIFY_DONE; >>>>>>> +} >>>>>>> + >>>>>>> +static struct notifier_block kvm_pv_panic_nb = { >>>>>>> + .notifier_call = kvm_pv_panic_notify, >>>>>>> +}; >>>>>>> + >>>>>> >>>>>> You should split up host and guest-side changes. >>>>>> >>>>>>> static u64 kvm_steal_clock(int cpu) >>>>>>> { >>>>>>> u64 steal; >>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>>>>>> >>>>>>> paravirt_ops_setup(); >>>>>>> register_reboot_notifier(&kvm_pv_reboot_nb); >>>>>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>>>>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>>>>>> spin_lock_init(&async_pf_sleepers[i].lock); >>>>>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>>>>>> index 0b7690e..38b4705 100644 >>>>>>> --- a/arch/x86/kvm/svm.c >>>>>>> +++ b/arch/x86/kvm/svm.c >>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>>>>>> >>>>>>> static int vmmcall_interception(struct vcpu_svm *svm) >>>>>>> { >>>>>>> + int ret; >>>>>>> + >>>>>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>>>>>> skip_emulated_instruction(&svm->vcpu); >>>>>>> - kvm_emulate_hypercall(&svm->vcpu); >>>>>>> - return 1; >>>>>>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>>>>>> + >>>>>>> + /* Ignore the error? */ >>>>>>> + return ret == 0 ? 0 : 1; >>>>>> >>>>>> Why can't kvm_emulate_hypercall return the right value? >>>>> >>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and >>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. >>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to >>>>> qemu? >>>> >>>> No, there is currently no exit to userspace due to hypercalls, neither >>>> of HV nor KVM kind. >>>> >>>> The point is that the return code of kvm_emulate_hypercall is unused so >>>> far, so you can easily redefine it to encode continue vs. exit to >>>> userspace. Once someone has different needs, this could still be >>>> refactored again. >>> >>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's >>> CPL > 0? >> >> Yes, change it to encode what vendor modules need to return to their >> callers. >> > Better introduce new request flag and set it in your hypercall emulation. See > how triple fault is handled. triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce a new value(like KVM_EXIT_SHUTDOWN)? Thanks Wen Congyang > > -- > Gleb. > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ >
At 02/28/2012 07:23 PM, Avi Kivity Wrote: > On 02/27/2012 05:01 AM, Wen Congyang wrote: >> We can know the guest is paniced when the guest runs on xen. >> But we do not have such feature on kvm. This patch implemnts >> this feature, and the implementation is the same as xen: >> register panic notifier, and call hypercall when the guest >> is paniced. > > What's the motivation for this? "Xen does this" is insufficient. Another purpose is: management app(for example: libvirt) can do auto dump when the guest is crashed. If management app does not do auto dump, the guest's user can do dump by hand if he sees the guest is paniced. I am thinking about another status: dumping. This status tells the guest's user that the guest is paniced, and the OS's dump function is working. These two status can tell the guest's user whether the guest is pancied, and what should he do if the guest is paniced. Thanks Wen Congyang
On Wed, Feb 29, 2012 at 09:08:52AM +0800, Wen Congyang wrote: > At 02/28/2012 06:45 PM, Gleb Natapov Wrote: > > On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote: > >> On 2012-02-28 10:42, Wen Congyang wrote: > >>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote: > >>>> On 2012-02-28 09:23, Wen Congyang wrote: > >>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: > >>>>>> On 2012-02-27 04:01, Wen Congyang wrote: > >>>>>>> We can know the guest is paniced when the guest runs on xen. > >>>>>>> But we do not have such feature on kvm. This patch implemnts > >>>>>>> this feature, and the implementation is the same as xen: > >>>>>>> register panic notifier, and call hypercall when the guest > >>>>>>> is paniced. > >>>>>>> > >>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > >>>>>>> --- > >>>>>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ > >>>>>>> arch/x86/kvm/svm.c | 8 ++++++-- > >>>>>>> arch/x86/kvm/vmx.c | 8 ++++++-- > >>>>>>> arch/x86/kvm/x86.c | 13 +++++++++++-- > >>>>>>> include/linux/kvm.h | 1 + > >>>>>>> include/linux/kvm_para.h | 1 + > >>>>>>> 6 files changed, 37 insertions(+), 6 deletions(-) > >>>>>>> > >>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > >>>>>>> index f0c6fd6..b928d1d 100644 > >>>>>>> --- a/arch/x86/kernel/kvm.c > >>>>>>> +++ b/arch/x86/kernel/kvm.c > >>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { > >>>>>>> .notifier_call = kvm_pv_reboot_notify, > >>>>>>> }; > >>>>>>> > >>>>>>> +static int > >>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) > >>>>>>> +{ > >>>>>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); > >>>>>>> + return NOTIFY_DONE; > >>>>>>> +} > >>>>>>> + > >>>>>>> +static struct notifier_block kvm_pv_panic_nb = { > >>>>>>> + .notifier_call = kvm_pv_panic_notify, > >>>>>>> +}; > >>>>>>> + > >>>>>> > >>>>>> You should split up host and guest-side changes. > >>>>>> > >>>>>>> static u64 kvm_steal_clock(int cpu) > >>>>>>> { > >>>>>>> u64 steal; > >>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) > >>>>>>> > >>>>>>> paravirt_ops_setup(); > >>>>>>> register_reboot_notifier(&kvm_pv_reboot_nb); > >>>>>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); > >>>>>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) > >>>>>>> spin_lock_init(&async_pf_sleepers[i].lock); > >>>>>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) > >>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > >>>>>>> index 0b7690e..38b4705 100644 > >>>>>>> --- a/arch/x86/kvm/svm.c > >>>>>>> +++ b/arch/x86/kvm/svm.c > >>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) > >>>>>>> > >>>>>>> static int vmmcall_interception(struct vcpu_svm *svm) > >>>>>>> { > >>>>>>> + int ret; > >>>>>>> + > >>>>>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; > >>>>>>> skip_emulated_instruction(&svm->vcpu); > >>>>>>> - kvm_emulate_hypercall(&svm->vcpu); > >>>>>>> - return 1; > >>>>>>> + ret = kvm_emulate_hypercall(&svm->vcpu); > >>>>>>> + > >>>>>>> + /* Ignore the error? */ > >>>>>>> + return ret == 0 ? 0 : 1; > >>>>>> > >>>>>> Why can't kvm_emulate_hypercall return the right value? > >>>>> > >>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and > >>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. > >>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to > >>>>> qemu? > >>>> > >>>> No, there is currently no exit to userspace due to hypercalls, neither > >>>> of HV nor KVM kind. > >>>> > >>>> The point is that the return code of kvm_emulate_hypercall is unused so > >>>> far, so you can easily redefine it to encode continue vs. exit to > >>>> userspace. Once someone has different needs, this could still be > >>>> refactored again. > >>> > >>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's > >>> CPL > 0? > >> > >> Yes, change it to encode what vendor modules need to return to their > >> callers. > >> > > Better introduce new request flag and set it in your hypercall emulation. See > > how triple fault is handled. > > triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce > a new value(like KVM_EXIT_SHUTDOWN)? > I mean introduce new request bit (like KVM_REQ_TRIPLE_FAULT) and set it in your hypercall if exit to userspace is needed instead of changing return values. -- Gleb.
On 02/29/2012 03:29 AM, Wen Congyang wrote: > At 02/28/2012 07:23 PM, Avi Kivity Wrote: > > On 02/27/2012 05:01 AM, Wen Congyang wrote: > >> We can know the guest is paniced when the guest runs on xen. > >> But we do not have such feature on kvm. This patch implemnts > >> this feature, and the implementation is the same as xen: > >> register panic notifier, and call hypercall when the guest > >> is paniced. > > > > What's the motivation for this? "Xen does this" is insufficient. > > Another purpose is: management app(for example: libvirt) can do auto > dump when the guest is crashed. If management app does not do auto > dump, the guest's user can do dump by hand if he sees the guest is > paniced. > > I am thinking about another status: dumping. This status tells > the guest's user that the guest is paniced, and the OS's dump function > is working. > > These two status can tell the guest's user whether the guest is pancied, > and what should he do if the guest is paniced. > How about using a virtio-serial channel for this? You can transfer any amount of information (including the dump itself).
On Wed, Feb 29, 2012 at 11:49:58AM +0200, Avi Kivity wrote: > On 02/29/2012 03:29 AM, Wen Congyang wrote: > > At 02/28/2012 07:23 PM, Avi Kivity Wrote: > > > On 02/27/2012 05:01 AM, Wen Congyang wrote: > > >> We can know the guest is paniced when the guest runs on xen. > > >> But we do not have such feature on kvm. This patch implemnts > > >> this feature, and the implementation is the same as xen: > > >> register panic notifier, and call hypercall when the guest > > >> is paniced. > > > > > > What's the motivation for this? "Xen does this" is insufficient. > > > > Another purpose is: management app(for example: libvirt) can do auto > > dump when the guest is crashed. If management app does not do auto > > dump, the guest's user can do dump by hand if he sees the guest is > > paniced. > > > > I am thinking about another status: dumping. This status tells > > the guest's user that the guest is paniced, and the OS's dump function > > is working. > > > > These two status can tell the guest's user whether the guest is pancied, > > and what should he do if the guest is paniced. > > > > How about using a virtio-serial channel for this? You can transfer any > amount of information (including the dump itself). > Isn't it unreliable after the guest panicked? Having special kdump kernel that transfers dump to a host via virtio-serial channel though sounds interesting. May be that's what you mean. -- Gleb.
On Wed, Feb 29, 2012 at 11:49:58AM +0200, Avi Kivity wrote: > On 02/29/2012 03:29 AM, Wen Congyang wrote: > > At 02/28/2012 07:23 PM, Avi Kivity Wrote: > > > On 02/27/2012 05:01 AM, Wen Congyang wrote: > > >> We can know the guest is paniced when the guest runs on xen. > > >> But we do not have such feature on kvm. This patch implemnts > > >> this feature, and the implementation is the same as xen: > > >> register panic notifier, and call hypercall when the guest > > >> is paniced. > > > > > > What's the motivation for this? "Xen does this" is insufficient. > > > > Another purpose is: management app(for example: libvirt) can do auto > > dump when the guest is crashed. If management app does not do auto > > dump, the guest's user can do dump by hand if he sees the guest is > > paniced. > > > > I am thinking about another status: dumping. This status tells > > the guest's user that the guest is paniced, and the OS's dump function > > is working. > > > > These two status can tell the guest's user whether the guest is pancied, > > and what should he do if the guest is paniced. > > > > How about using a virtio-serial channel for this? You can transfer any > amount of information (including the dump itself). When the guest OS has crashed, any dumps will be done from the host OS using libvirt's core dump mechanism. The guest OS isn't involved and is likely too dead to be of any use anyway. Likewise it is quite probably too dead to work a virtio-serial channel or any similarly complex device. We're really just after the simplest possible notification that the guest kernel has paniced. Regards, Daniel
On 02/29/2012 11:55 AM, Gleb Natapov wrote: > > > > > > > How about using a virtio-serial channel for this? You can transfer any > > amount of information (including the dump itself). > > > Isn't it unreliable after the guest panicked? So is calling hypercalls, or dumping, or writing to the screen. Of course calling a hypercall is simpler and so is more reliable. > Having special kdump > kernel that transfers dump to a host via virtio-serial channel though > sounds interesting. May be that's what you mean. Yes. The "panic, starting dump" signal should be initiated by the panicking kernel though, in case the dump fails.
On 02/29/2012 11:58 AM, Daniel P. Berrange wrote: > > > > How about using a virtio-serial channel for this? You can transfer any > > amount of information (including the dump itself). > > When the guest OS has crashed, any dumps will be done from the host > OS using libvirt's core dump mechanism. The guest OS isn't involved > and is likely too dead to be of any use anyway. Likewise it is > quite probably too dead to work a virtio-serial channel or any > similarly complex device. We're really just after the simplest > possible notification that the guest kernel has paniced. If it's alive enough to panic, it's alive enough to kexec its kdump kernel. After that it can do anything. Guest-internal dumps are more useful IMO that host-initiated dumps. In a cloud, the host-initiated dump is left on the host, outside the reach of the guest admin, outside the guest image where all the symbols are, and sometimes not even on the same host if a live migration occurred. It's more useful in small setups, or if the problem is in the hypervisor, not the guest.
On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote: > On 02/29/2012 11:55 AM, Gleb Natapov wrote: > > > > > > > > > > How about using a virtio-serial channel for this? You can transfer any > > > amount of information (including the dump itself). > > > > > Isn't it unreliable after the guest panicked? > > So is calling hypercalls, or dumping, or writing to the screen. Of > course calling a hypercall is simpler and so is more reliable. > Yes, crash can be so severe that it is not even detected by a kernel itself, so not OOPS message even printed. But in most cases if kernel is functional enough to print OOPS it is functional enough to call single hypercall instruction. > > Having special kdump > > kernel that transfers dump to a host via virtio-serial channel though > > sounds interesting. May be that's what you mean. > > Yes. The "panic, starting dump" signal should be initiated by the > panicking kernel though, in case the dump fails. > Then panic hypercall sounds like a reasonable solution. -- Gleb.
At 02/29/2012 05:36 PM, Gleb Natapov Wrote: > On Wed, Feb 29, 2012 at 09:08:52AM +0800, Wen Congyang wrote: >> At 02/28/2012 06:45 PM, Gleb Natapov Wrote: >>> On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote: >>>> On 2012-02-28 10:42, Wen Congyang wrote: >>>>> At 02/28/2012 05:34 PM, Jan Kiszka Wrote: >>>>>> On 2012-02-28 09:23, Wen Congyang wrote: >>>>>>> At 02/27/2012 11:08 PM, Jan Kiszka Wrote: >>>>>>>> On 2012-02-27 04:01, Wen Congyang wrote: >>>>>>>>> We can know the guest is paniced when the guest runs on xen. >>>>>>>>> But we do not have such feature on kvm. This patch implemnts >>>>>>>>> this feature, and the implementation is the same as xen: >>>>>>>>> register panic notifier, and call hypercall when the guest >>>>>>>>> is paniced. >>>>>>>>> >>>>>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>>>>>>>> --- >>>>>>>>> arch/x86/kernel/kvm.c | 12 ++++++++++++ >>>>>>>>> arch/x86/kvm/svm.c | 8 ++++++-- >>>>>>>>> arch/x86/kvm/vmx.c | 8 ++++++-- >>>>>>>>> arch/x86/kvm/x86.c | 13 +++++++++++-- >>>>>>>>> include/linux/kvm.h | 1 + >>>>>>>>> include/linux/kvm_para.h | 1 + >>>>>>>>> 6 files changed, 37 insertions(+), 6 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>>>>>>>> index f0c6fd6..b928d1d 100644 >>>>>>>>> --- a/arch/x86/kernel/kvm.c >>>>>>>>> +++ b/arch/x86/kernel/kvm.c >>>>>>>>> @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { >>>>>>>>> .notifier_call = kvm_pv_reboot_notify, >>>>>>>>> }; >>>>>>>>> >>>>>>>>> +static int >>>>>>>>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) >>>>>>>>> +{ >>>>>>>>> + kvm_hypercall0(KVM_HC_GUEST_PANIC); >>>>>>>>> + return NOTIFY_DONE; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static struct notifier_block kvm_pv_panic_nb = { >>>>>>>>> + .notifier_call = kvm_pv_panic_notify, >>>>>>>>> +}; >>>>>>>>> + >>>>>>>> >>>>>>>> You should split up host and guest-side changes. >>>>>>>> >>>>>>>>> static u64 kvm_steal_clock(int cpu) >>>>>>>>> { >>>>>>>>> u64 steal; >>>>>>>>> @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) >>>>>>>>> >>>>>>>>> paravirt_ops_setup(); >>>>>>>>> register_reboot_notifier(&kvm_pv_reboot_nb); >>>>>>>>> + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); >>>>>>>>> for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) >>>>>>>>> spin_lock_init(&async_pf_sleepers[i].lock); >>>>>>>>> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) >>>>>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>>>>>>>> index 0b7690e..38b4705 100644 >>>>>>>>> --- a/arch/x86/kvm/svm.c >>>>>>>>> +++ b/arch/x86/kvm/svm.c >>>>>>>>> @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) >>>>>>>>> >>>>>>>>> static int vmmcall_interception(struct vcpu_svm *svm) >>>>>>>>> { >>>>>>>>> + int ret; >>>>>>>>> + >>>>>>>>> svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; >>>>>>>>> skip_emulated_instruction(&svm->vcpu); >>>>>>>>> - kvm_emulate_hypercall(&svm->vcpu); >>>>>>>>> - return 1; >>>>>>>>> + ret = kvm_emulate_hypercall(&svm->vcpu); >>>>>>>>> + >>>>>>>>> + /* Ignore the error? */ >>>>>>>>> + return ret == 0 ? 0 : 1; >>>>>>>> >>>>>>>> Why can't kvm_emulate_hypercall return the right value? >>>>>>> >>>>>>> kvm_emulate_hypercall() will call kvm_hv_hypercall(), and >>>>>>> kvm_hv_hypercall() will return 0 when vcpu's CPL > 0. >>>>>>> If vcpu's CPL > 0, does kvm need to exit and tell it to >>>>>>> qemu? >>>>>> >>>>>> No, there is currently no exit to userspace due to hypercalls, neither >>>>>> of HV nor KVM kind. >>>>>> >>>>>> The point is that the return code of kvm_emulate_hypercall is unused so >>>>>> far, so you can easily redefine it to encode continue vs. exit to >>>>>> userspace. Once someone has different needs, this could still be >>>>>> refactored again. >>>>> >>>>> So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's >>>>> CPL > 0? >>>> >>>> Yes, change it to encode what vendor modules need to return to their >>>> callers. >>>> >>> Better introduce new request flag and set it in your hypercall emulation. See >>> how triple fault is handled. >> >> triple fault sets KVM_EXIT_SHUTDOWN and exits to userspace. Do you mean introduce >> a new value(like KVM_EXIT_SHUTDOWN)? >> > I mean introduce new request bit (like KVM_REQ_TRIPLE_FAULT) and set it > in your hypercall if exit to userspace is needed instead of changing > return values. I donot notice it. Thanks for you suggestion. I will modify my patch. Thanks Wen Congyang > > -- > Gleb. >
On 02/29/2012 12:05 PM, Gleb Natapov wrote: > On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote: > > On 02/29/2012 11:55 AM, Gleb Natapov wrote: > > > > > > > > > > > > > How about using a virtio-serial channel for this? You can transfer any > > > > amount of information (including the dump itself). > > > > > > > Isn't it unreliable after the guest panicked? > > > > So is calling hypercalls, or dumping, or writing to the screen. Of > > course calling a hypercall is simpler and so is more reliable. > > > Yes, crash can be so severe that it is not even detected by a kernel > itself, so not OOPS message even printed. But in most cases if kernel is > functional enough to print OOPS it is functional enough to call single > hypercall instruction. Why not print the oops to virtio-serial? Or even just a regular serial port? That's what bare metal does. > > > Having special kdump > > > kernel that transfers dump to a host via virtio-serial channel though > > > sounds interesting. May be that's what you mean. > > > > Yes. The "panic, starting dump" signal should be initiated by the > > panicking kernel though, in case the dump fails. > > > Then panic hypercall sounds like a reasonable solution. It is, but I'm trying to see if we can get away with doing nothing.
At 02/29/2012 06:08 PM, Avi Kivity Wrote: > On 02/29/2012 12:05 PM, Gleb Natapov wrote: >> On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote: >>> On 02/29/2012 11:55 AM, Gleb Natapov wrote: >>>>>> >>>>> >>>>> How about using a virtio-serial channel for this? You can transfer any >>>>> amount of information (including the dump itself). >>>>> >>>> Isn't it unreliable after the guest panicked? >>> >>> So is calling hypercalls, or dumping, or writing to the screen. Of >>> course calling a hypercall is simpler and so is more reliable. >>> >> Yes, crash can be so severe that it is not even detected by a kernel >> itself, so not OOPS message even printed. But in most cases if kernel is >> functional enough to print OOPS it is functional enough to call single >> hypercall instruction. > > Why not print the oops to virtio-serial? Or even just a regular serial > port? That's what bare metal does. If virtio-serial's driver has bug or the guest doesn't have such device... > >>>> Having special kdump >>>> kernel that transfers dump to a host via virtio-serial channel though >>>> sounds interesting. May be that's what you mean. >>> >>> Yes. The "panic, starting dump" signal should be initiated by the >>> panicking kernel though, in case the dump fails. >>> >> Then panic hypercall sounds like a reasonable solution. > > It is, but I'm trying to see if we can get away with doing nothing. > If we have a reliable way with doing nothing, it is better. But I donot find such way. Thanks Wen Congyang
On Wed, Feb 29, 2012 at 12:05:32PM +0200, Avi Kivity wrote: > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote: > > > > > > How about using a virtio-serial channel for this? You can transfer any > > > amount of information (including the dump itself). > > > > When the guest OS has crashed, any dumps will be done from the host > > OS using libvirt's core dump mechanism. The guest OS isn't involved > > and is likely too dead to be of any use anyway. Likewise it is > > quite probably too dead to work a virtio-serial channel or any > > similarly complex device. We're really just after the simplest > > possible notification that the guest kernel has paniced. > > If it's alive enough to panic, it's alive enough to kexec its kdump > kernel. After that it can do anything. > > Guest-internal dumps are more useful IMO that host-initiated dumps. In > a cloud, the host-initiated dump is left on the host, outside the reach > of the guest admin, outside the guest image where all the symbols are, > and sometimes not even on the same host if a live migration occurred. > It's more useful in small setups, or if the problem is in the > hypervisor, not the guest. I don't think guest vs host dumps should be considered mutually exclusive, they both have pluses+minuses. Configuring kexec+kdump requires non-negligable guest admin configuration work before it's usable, and this work is guest OS specific, if it is possible at all. A permanent panic notifier that's built in the kernel by default requires zero guest admin config, and can allow host admin to automate collection of dumps across all their hosts/guests. The KVM hypercall notification is fairly trivially ported to any OS kernel, by comparison with a full virtio + virtio-serial impl. Regards, Daniel
At 02/29/2012 06:05 PM, Avi Kivity Wrote: > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote: >>> >>> How about using a virtio-serial channel for this? You can transfer any >>> amount of information (including the dump itself). >> >> When the guest OS has crashed, any dumps will be done from the host >> OS using libvirt's core dump mechanism. The guest OS isn't involved >> and is likely too dead to be of any use anyway. Likewise it is >> quite probably too dead to work a virtio-serial channel or any >> similarly complex device. We're really just after the simplest >> possible notification that the guest kernel has paniced. > > If it's alive enough to panic, it's alive enough to kexec its kdump > kernel. After that it can do anything. > > Guest-internal dumps are more useful IMO that host-initiated dumps. In Yes, guest-internal dump is better than host dump. But the user may not start guest-internal dump or guest-internal dump failed. So we need the following feature: 1. If the guest-internal dump does not work, the guest's status is 'crashed'. And then the user does the host dump. 2. If the guest-internal dump is working, the guest's status should be 'dumping'. The user see this status and know the guest has paniced, and the guest-internal dump is working. Thanks Wen Congyang > a cloud, the host-initiated dump is left on the host, outside the reach > of the guest admin, outside the guest image where all the symbols are, > and sometimes not even on the same host if a live migration occurred. > It's more useful in small setups, or if the problem is in the > hypervisor, not the guest. >
On 02/29/2012 12:17 PM, Wen Congyang wrote: > >>> > >> Yes, crash can be so severe that it is not even detected by a kernel > >> itself, so not OOPS message even printed. But in most cases if kernel is > >> functional enough to print OOPS it is functional enough to call single > >> hypercall instruction. > > > > Why not print the oops to virtio-serial? Or even just a regular serial > > port? That's what bare metal does. > > If virtio-serial's driver has bug or the guest doesn't have such device... We have the same issue with the hypercall; and virtio-serial is available on many deployed versions. > > > >>>> Having special kdump > >>>> kernel that transfers dump to a host via virtio-serial channel though > >>>> sounds interesting. May be that's what you mean. > >>> > >>> Yes. The "panic, starting dump" signal should be initiated by the > >>> panicking kernel though, in case the dump fails. > >>> > >> Then panic hypercall sounds like a reasonable solution. > > > > It is, but I'm trying to see if we can get away with doing nothing. > > > > If we have a reliable way with doing nothing, it is better. But I donot > find such way. We won't have a 100% reliable way. But I think a variant of the driver that doesn't use interrupts, or just using the ordinary serial driver, should be reliable enough.
On 02/29/2012 12:19 PM, Daniel P. Berrange wrote: > On Wed, Feb 29, 2012 at 12:05:32PM +0200, Avi Kivity wrote: > > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote: > > > > > > > > How about using a virtio-serial channel for this? You can transfer any > > > > amount of information (including the dump itself). > > > > > > When the guest OS has crashed, any dumps will be done from the host > > > OS using libvirt's core dump mechanism. The guest OS isn't involved > > > and is likely too dead to be of any use anyway. Likewise it is > > > quite probably too dead to work a virtio-serial channel or any > > > similarly complex device. We're really just after the simplest > > > possible notification that the guest kernel has paniced. > > > > If it's alive enough to panic, it's alive enough to kexec its kdump > > kernel. After that it can do anything. > > > > Guest-internal dumps are more useful IMO that host-initiated dumps. In > > a cloud, the host-initiated dump is left on the host, outside the reach > > of the guest admin, outside the guest image where all the symbols are, > > and sometimes not even on the same host if a live migration occurred. > > It's more useful in small setups, or if the problem is in the > > hypervisor, not the guest. > > I don't think guest vs host dumps should be considered mutually exclusive, > they both have pluses+minuses. True. > Configuring kexec+kdump requires non-negligable guest admin configuration > work before it's usable, and this work is guest OS specific, if it is possible > at all. I think it's on by default on Windows and requires installing a package on Linux, which may be part of the default configuration on many distros. > A permanent panic notifier that's built in the kernel by default > requires zero guest admin config, and can allow host admin to automate > collection of dumps across all their hosts/guests. The KVM hypercall > notification is fairly trivially ported to any OS kernel, by comparison > with a full virtio + virtio-serial impl. That's the path of least resistance. But it's not necessarily the best path. We end up with a wide set of disconnected ABIs instead of a narrow set that is more flexible.
On Wed, Feb 29, 2012 at 12:08:19PM +0200, Avi Kivity wrote: > On 02/29/2012 12:05 PM, Gleb Natapov wrote: > > On Wed, Feb 29, 2012 at 12:00:41PM +0200, Avi Kivity wrote: > > > On 02/29/2012 11:55 AM, Gleb Natapov wrote: > > > > > > > > > > > > > > > > How about using a virtio-serial channel for this? You can transfer any > > > > > amount of information (including the dump itself). > > > > > > > > > Isn't it unreliable after the guest panicked? > > > > > > So is calling hypercalls, or dumping, or writing to the screen. Of > > > course calling a hypercall is simpler and so is more reliable. > > > > > Yes, crash can be so severe that it is not even detected by a kernel > > itself, so not OOPS message even printed. But in most cases if kernel is > > functional enough to print OOPS it is functional enough to call single > > hypercall instruction. > > Why not print the oops to virtio-serial? Or even just a regular serial > port? That's what bare metal does. > The more interface is complex the more chances it will fail during panic. Regular serial is likely more reliable than virtio-serial. Hypercall is likely more reliable than uart. On serial user can fake panic notification. Can this be problematic? > > > > Having special kdump > > > > kernel that transfers dump to a host via virtio-serial channel though > > > > sounds interesting. May be that's what you mean. > > > > > > Yes. The "panic, starting dump" signal should be initiated by the > > > panicking kernel though, in case the dump fails. > > > > > Then panic hypercall sounds like a reasonable solution. > > It is, but I'm trying to see if we can get away with doing nothing. > Fair enough. -- Gleb.
On 02/29/2012 12:31 PM, Wen Congyang wrote: > At 02/29/2012 06:05 PM, Avi Kivity Wrote: > > On 02/29/2012 11:58 AM, Daniel P. Berrange wrote: > >>> > >>> How about using a virtio-serial channel for this? You can transfer any > >>> amount of information (including the dump itself). > >> > >> When the guest OS has crashed, any dumps will be done from the host > >> OS using libvirt's core dump mechanism. The guest OS isn't involved > >> and is likely too dead to be of any use anyway. Likewise it is > >> quite probably too dead to work a virtio-serial channel or any > >> similarly complex device. We're really just after the simplest > >> possible notification that the guest kernel has paniced. > > > > If it's alive enough to panic, it's alive enough to kexec its kdump > > kernel. After that it can do anything. > > > > Guest-internal dumps are more useful IMO that host-initiated dumps. In > > Yes, guest-internal dump is better than host dump. But the user may not > start guest-internal dump or guest-internal dump failed. So we need the > following feature: > 1. If the guest-internal dump does not work, the guest's status is 'crashed'. > And then the user does the host dump. > 2. If the guest-internal dump is working, the guest's status should be > 'dumping'. The user see this status and know the guest has paniced, and > the guest-internal dump is working. I agree. There is room for host dump, and we do want notification about what the guest is doing. The question is whether we should reuse virtio-serial for guest-host communication in this case. It's more complicated, but allows us to avoid touching the hypervisor.
On 02/29/2012 12:44 PM, Gleb Natapov wrote: > > > > > > > Yes, crash can be so severe that it is not even detected by a kernel > > > itself, so not OOPS message even printed. But in most cases if kernel is > > > functional enough to print OOPS it is functional enough to call single > > > hypercall instruction. > > > > Why not print the oops to virtio-serial? Or even just a regular serial > > port? That's what bare metal does. > > > The more interface is complex the more chances it will fail during > panic. Regular serial is likely more reliable than virtio-serial. > Hypercall is likely more reliable than uart. On serial user can > fake panic notification. The serial device is under control of the kernel, so the user can only access it if the kernel so allows. > Can this be problematic? From the point of view of the guest, yes, it can generate support calls in fill up the admin's dump quota. From the point of view of the host, there's no difference between the guest's admin and a random user.
On Wed, Feb 29, 2012 at 12:48:57PM +0200, Avi Kivity wrote: > On 02/29/2012 12:44 PM, Gleb Natapov wrote: > > > > > > > > > Yes, crash can be so severe that it is not even detected by a kernel > > > > itself, so not OOPS message even printed. But in most cases if kernel is > > > > functional enough to print OOPS it is functional enough to call single > > > > hypercall instruction. > > > > > > Why not print the oops to virtio-serial? Or even just a regular serial > > > port? That's what bare metal does. > > > > > The more interface is complex the more chances it will fail during > > panic. Regular serial is likely more reliable than virtio-serial. > > Hypercall is likely more reliable than uart. On serial user can > > fake panic notification. > > The serial device is under control of the kernel, so the user can only > access it if the kernel so allows. > Yes, but if we will hijack UART for panic notification VM user will not be able to use it for anything else. virtio-serial have many channels, but AFAIK it does not work at early stages of boot process. -- Gleb.
At 02/29/2012 06:39 PM, Avi Kivity Wrote: > On 02/29/2012 12:17 PM, Wen Congyang wrote: >>>>> >>>> Yes, crash can be so severe that it is not even detected by a kernel >>>> itself, so not OOPS message even printed. But in most cases if kernel is >>>> functional enough to print OOPS it is functional enough to call single >>>> hypercall instruction. >>> >>> Why not print the oops to virtio-serial? Or even just a regular serial >>> port? That's what bare metal does. >> >> If virtio-serial's driver has bug or the guest doesn't have such device... > > We have the same issue with the hypercall; and virtio-serial is > available on many deployed versions. How to know whether a guest has virtio-serial? Thanks Wen Congyang > >>> >>>>>> Having special kdump >>>>>> kernel that transfers dump to a host via virtio-serial channel though >>>>>> sounds interesting. May be that's what you mean. >>>>> >>>>> Yes. The "panic, starting dump" signal should be initiated by the >>>>> panicking kernel though, in case the dump fails. >>>>> >>>> Then panic hypercall sounds like a reasonable solution. >>> >>> It is, but I'm trying to see if we can get away with doing nothing. >>> >> >> If we have a reliable way with doing nothing, it is better. But I donot >> find such way. > > We won't have a 100% reliable way. But I think a variant of the driver > that doesn't use interrupts, or just using the ordinary serial driver, > should be reliable enough. >
At 02/29/2012 06:39 PM, Avi Kivity Wrote: > On 02/29/2012 12:17 PM, Wen Congyang wrote: >>>>> >>>> Yes, crash can be so severe that it is not even detected by a kernel >>>> itself, so not OOPS message even printed. But in most cases if kernel is >>>> functional enough to print OOPS it is functional enough to call single >>>> hypercall instruction. >>> >>> Why not print the oops to virtio-serial? Or even just a regular serial >>> port? That's what bare metal does. >> >> If virtio-serial's driver has bug or the guest doesn't have such device... > > We have the same issue with the hypercall; and virtio-serial is > available on many deployed versions. virtio-serial is available, but it is an optional device. If the guest does not have this device, the guest cannot tell the host that is is paniced. So I still prefer to touch the hypervisor. Thanks Wen Congyang > >>> >>>>>> Having special kdump >>>>>> kernel that transfers dump to a host via virtio-serial channel though >>>>>> sounds interesting. May be that's what you mean. >>>>> >>>>> Yes. The "panic, starting dump" signal should be initiated by the >>>>> panicking kernel though, in case the dump fails. >>>>> >>>> Then panic hypercall sounds like a reasonable solution. >>> >>> It is, but I'm trying to see if we can get away with doing nothing. >>> >> >> If we have a reliable way with doing nothing, it is better. But I donot >> find such way. > > We won't have a 100% reliable way. But I think a variant of the driver > that doesn't use interrupts, or just using the ordinary serial driver, > should be reliable enough. >
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); + atomic_notifier_chain_register(&panic_notifier_list, &kvm_pv_panic_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(&async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); - kvm_emulate_hypercall(&svm->vcpu); - return 1; + ret = kvm_emulate_hypercall(&svm->vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; } static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66147ca..1b57ebb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) static int handle_vmcall(struct kvm_vcpu *vcpu) { + int ret; + skip_emulated_instruction(vcpu); - kvm_emulate_hypercall(vcpu); - return 1; + ret = kvm_emulate_hypercall(vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; } static int handle_invd(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9d99e5..3fc2853 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; - int cs_db, cs_l; + int cs_db, cs_l, r = 1; /* * hypercall generates UD from non zero cpl and real mode @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; + case KVM_HC_GUEST_PANIC: + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; + r = 0; + break; default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); } - return 1; + return r; } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HC_GUEST_PANIC: + ret = 0; + vcpu->run->exit_reason = KVM_EXIT_GUEST_PANIC; + r = 0; + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index acbe429..8f0e31b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -163,6 +163,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_GUEST_PANIC 21 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index ff476dd..cf94023 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -19,6 +19,7 @@ #define KVM_HC_MMU_OP 2 #define KVM_HC_FEATURES 3 #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_GUEST_PANIC 5 /* * hypercalls use architecture specific
We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> --- arch/x86/kernel/kvm.c | 12 ++++++++++++ arch/x86/kvm/svm.c | 8 ++++++-- arch/x86/kvm/vmx.c | 8 ++++++-- arch/x86/kvm/x86.c | 13 +++++++++++-- include/linux/kvm.h | 1 + include/linux/kvm_para.h | 1 + 6 files changed, 37 insertions(+), 6 deletions(-)