Message ID | 20221102231911.3107438-9-seanjc@google.com |
---|---|
State | Accepted |
Headers | show |
Series | KVM: Rework kvm_init() and hardware enabling | expand |
On Wed, Nov 02, 2022 at 11:18:35PM +0000, Sean Christopherson wrote: > Now that kvm_arch_hardware_setup() is called immediately after > kvm_arch_init(), fold the guts of kvm_arch_hardware_(un)setup() into > kvm_arch_{init,exit}() as a step towards dropping one of the hooks. > > To avoid having to unwind various setup, e.g registration of several > notifiers, slot in the vendor hardware setup before the registration of > said notifiers and callbacks. Introducing a functional change while > moving code is less than ideal, but the alternative is adding a pile of > unwinding code, which is much more error prone, e.g. several attempts to > move the setup code verbatim all introduced bugs. > > Add a comment to document that kvm_ops_update() is effectively the point > of no return, e.g. it sets the kvm_x86_ops.hardware_enable canary and so > needs to be unwound. > > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/x86.c | 121 +++++++++++++++++++++++---------------------- > 1 file changed, 63 insertions(+), 58 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 9a7702b1c563..80ee580a9cd4 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -9252,6 +9252,24 @@ static struct notifier_block pvclock_gtod_notifier = { > }; > #endif > > +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) > +{ > + memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); > + > +#define __KVM_X86_OP(func) \ > + static_call_update(kvm_x86_##func, kvm_x86_ops.func); > +#define KVM_X86_OP(func) \ > + WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) > +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP > +#define KVM_X86_OP_OPTIONAL_RET0(func) \ > + static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ > + (void *)__static_call_return0); > +#include <asm/kvm-x86-ops.h> > +#undef __KVM_X86_OP > + > + kvm_pmu_ops_update(ops->pmu_ops); > +} > + > int kvm_arch_init(void *opaque) > { > struct kvm_x86_init_ops *ops = opaque; > @@ -9325,6 +9343,24 @@ int kvm_arch_init(void *opaque) > kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; > } > > + rdmsrl_safe(MSR_EFER, &host_efer); > + > + if (boot_cpu_has(X86_FEATURE_XSAVES)) > + rdmsrl(MSR_IA32_XSS, host_xss); > + > + kvm_init_pmu_capability(); > + > + r = ops->hardware_setup(); > + if (r != 0) > + goto out_mmu_exit; The failure case of ops->hardware_setup() is unwound by kvm_arch_exit() before this patch, do we need to keep that old behavior ? > + > + /* > + * Point of no return! DO NOT add error paths below this point unless > + * absolutely necessary, as most operations from this point forward > + * require unwinding. > + */ > + kvm_ops_update(ops); > + > kvm_timer_init(); > > if (pi_inject_timer == -1) > @@ -9336,8 +9372,32 @@ int kvm_arch_init(void *opaque) > set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); > #endif > > + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); > + > + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) > + kvm_caps.supported_xss = 0; > + > +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) > + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); > +#undef __kvm_cpu_cap_has > + > + if (kvm_caps.has_tsc_control) { > + /* > + * Make sure the user can only configure tsc_khz values that > + * fit into a signed integer. > + * A min value is not calculated because it will always > + * be 1 on all machines. > + */ > + u64 max = min(0x7fffffffULL, > + __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); > + kvm_caps.max_guest_tsc_khz = max; > + } > + kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; > + kvm_init_msr_list(); > return 0; > > +out_mmu_exit: > + kvm_mmu_vendor_module_exit(); > out_free_percpu: > free_percpu(user_return_msrs); > out_free_x86_emulator_cache: > @@ -9347,6 +9407,8 @@ int kvm_arch_init(void *opaque) > > void kvm_arch_exit(void) > { > + kvm_unregister_perf_callbacks(); > + > #ifdef CONFIG_X86_64 > if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) > clear_hv_tscchange_cb(); > @@ -9362,6 +9424,7 @@ void kvm_arch_exit(void) > irq_work_sync(&pvclock_irq_work); > cancel_work_sync(&pvclock_gtod_work); > #endif > + static_call(kvm_x86_hardware_unsetup)(); > kvm_x86_ops.hardware_enable = NULL; > kvm_mmu_vendor_module_exit(); > free_percpu(user_return_msrs); > @@ -11922,72 +11985,14 @@ void kvm_arch_hardware_disable(void) > drop_user_return_notifiers(); > } > > -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) > -{ > - memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); > - > -#define __KVM_X86_OP(func) \ > - static_call_update(kvm_x86_##func, kvm_x86_ops.func); > -#define KVM_X86_OP(func) \ > - WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) > -#define KVM_X86_OP_OPTIONAL __KVM_X86_OP > -#define KVM_X86_OP_OPTIONAL_RET0(func) \ > - static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ > - (void *)__static_call_return0); > -#include <asm/kvm-x86-ops.h> > -#undef __KVM_X86_OP > - > - kvm_pmu_ops_update(ops->pmu_ops); > -} > - > int kvm_arch_hardware_setup(void *opaque) > { > - struct kvm_x86_init_ops *ops = opaque; > - int r; > - > - rdmsrl_safe(MSR_EFER, &host_efer); > - > - if (boot_cpu_has(X86_FEATURE_XSAVES)) > - rdmsrl(MSR_IA32_XSS, host_xss); > - > - kvm_init_pmu_capability(); > - > - r = ops->hardware_setup(); > - if (r != 0) > - return r; > - > - kvm_ops_update(ops); > - > - kvm_register_perf_callbacks(ops->handle_intel_pt_intr); > - > - if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) > - kvm_caps.supported_xss = 0; > - > -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) > - cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); > -#undef __kvm_cpu_cap_has > - > - if (kvm_caps.has_tsc_control) { > - /* > - * Make sure the user can only configure tsc_khz values that > - * fit into a signed integer. > - * A min value is not calculated because it will always > - * be 1 on all machines. > - */ > - u64 max = min(0x7fffffffULL, > - __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); > - kvm_caps.max_guest_tsc_khz = max; > - } > - kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; > - kvm_init_msr_list(); > return 0; > } > > void kvm_arch_hardware_unsetup(void) > { > - kvm_unregister_perf_callbacks(); > > - static_call(kvm_x86_hardware_unsetup)(); > } > > int kvm_arch_check_processor_compat(void *opaque) > -- > 2.38.1.431.g37b22c650d-goog >
On Fri, Nov 04, 2022, Yuan Yao wrote: > On Wed, Nov 02, 2022 at 11:18:35PM +0000, Sean Christopherson wrote: > > To avoid having to unwind various setup, e.g registration of several > > notifiers, slot in the vendor hardware setup before the registration of > > said notifiers and callbacks. Introducing a functional change while > > moving code is less than ideal, but the alternative is adding a pile of > > unwinding code, which is much more error prone, e.g. several attempts to > > move the setup code verbatim all introduced bugs. ... > > @@ -9325,6 +9343,24 @@ int kvm_arch_init(void *opaque) > > kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; > > } > > > > + rdmsrl_safe(MSR_EFER, &host_efer); > > + > > + if (boot_cpu_has(X86_FEATURE_XSAVES)) > > + rdmsrl(MSR_IA32_XSS, host_xss); > > + > > + kvm_init_pmu_capability(); > > + > > + r = ops->hardware_setup(); > > + if (r != 0) > > + goto out_mmu_exit; > > The failure case of ops->hardware_setup() is unwound > by kvm_arch_exit() before this patch, do we need to > keep that old behavior ? As called out in the changelog, the call to ops->hardware_setup() was deliberately slotted in before the call to kvm_timer_init() so that kvm_arch_init() wouldn't need to unwind more stuff if harware_setup() fails. > > + /* > > + * Point of no return! DO NOT add error paths below this point unless > > + * absolutely necessary, as most operations from this point forward > > + * require unwinding. > > + */ > > + kvm_ops_update(ops); > > + > > kvm_timer_init(); > > > > if (pi_inject_timer == -1) > > @@ -9336,8 +9372,32 @@ int kvm_arch_init(void *opaque) > > set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); > > #endif > > > > + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); > > + > > + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) > > + kvm_caps.supported_xss = 0; > > + > > +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) > > + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); > > +#undef __kvm_cpu_cap_has > > + > > + if (kvm_caps.has_tsc_control) { > > + /* > > + * Make sure the user can only configure tsc_khz values that > > + * fit into a signed integer. > > + * A min value is not calculated because it will always > > + * be 1 on all machines. > > + */ > > + u64 max = min(0x7fffffffULL, > > + __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); > > + kvm_caps.max_guest_tsc_khz = max; > > + } > > + kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; > > + kvm_init_msr_list(); > > return 0; > > > > +out_mmu_exit: > > + kvm_mmu_vendor_module_exit(); > > out_free_percpu: > > free_percpu(user_return_msrs); > > out_free_x86_emulator_cache:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a7702b1c563..80ee580a9cd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9252,6 +9252,24 @@ static struct notifier_block pvclock_gtod_notifier = { }; #endif +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) +{ + memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); + +#define __KVM_X86_OP(func) \ + static_call_update(kvm_x86_##func, kvm_x86_ops.func); +#define KVM_X86_OP(func) \ + WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP +#define KVM_X86_OP_OPTIONAL_RET0(func) \ + static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ + (void *)__static_call_return0); +#include <asm/kvm-x86-ops.h> +#undef __KVM_X86_OP + + kvm_pmu_ops_update(ops->pmu_ops); +} + int kvm_arch_init(void *opaque) { struct kvm_x86_init_ops *ops = opaque; @@ -9325,6 +9343,24 @@ int kvm_arch_init(void *opaque) kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; } + rdmsrl_safe(MSR_EFER, &host_efer); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + rdmsrl(MSR_IA32_XSS, host_xss); + + kvm_init_pmu_capability(); + + r = ops->hardware_setup(); + if (r != 0) + goto out_mmu_exit; + + /* + * Point of no return! DO NOT add error paths below this point unless + * absolutely necessary, as most operations from this point forward + * require unwinding. + */ + kvm_ops_update(ops); + kvm_timer_init(); if (pi_inject_timer == -1) @@ -9336,8 +9372,32 @@ int kvm_arch_init(void *opaque) set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); #endif + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); + + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) + kvm_caps.supported_xss = 0; + +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); +#undef __kvm_cpu_cap_has + + if (kvm_caps.has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); + kvm_caps.max_guest_tsc_khz = max; + } + kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; + kvm_init_msr_list(); return 0; +out_mmu_exit: + kvm_mmu_vendor_module_exit(); out_free_percpu: free_percpu(user_return_msrs); out_free_x86_emulator_cache: @@ -9347,6 +9407,8 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { + kvm_unregister_perf_callbacks(); + #ifdef CONFIG_X86_64 if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) clear_hv_tscchange_cb(); @@ -9362,6 +9424,7 @@ void kvm_arch_exit(void) irq_work_sync(&pvclock_irq_work); cancel_work_sync(&pvclock_gtod_work); #endif + static_call(kvm_x86_hardware_unsetup)(); kvm_x86_ops.hardware_enable = NULL; kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); @@ -11922,72 +11985,14 @@ void kvm_arch_hardware_disable(void) drop_user_return_notifiers(); } -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) -{ - memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); - -#define __KVM_X86_OP(func) \ - static_call_update(kvm_x86_##func, kvm_x86_ops.func); -#define KVM_X86_OP(func) \ - WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) -#define KVM_X86_OP_OPTIONAL __KVM_X86_OP -#define KVM_X86_OP_OPTIONAL_RET0(func) \ - static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ - (void *)__static_call_return0); -#include <asm/kvm-x86-ops.h> -#undef __KVM_X86_OP - - kvm_pmu_ops_update(ops->pmu_ops); -} - int kvm_arch_hardware_setup(void *opaque) { - struct kvm_x86_init_ops *ops = opaque; - int r; - - rdmsrl_safe(MSR_EFER, &host_efer); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrl(MSR_IA32_XSS, host_xss); - - kvm_init_pmu_capability(); - - r = ops->hardware_setup(); - if (r != 0) - return r; - - kvm_ops_update(ops); - - kvm_register_perf_callbacks(ops->handle_intel_pt_intr); - - if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) - kvm_caps.supported_xss = 0; - -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) - cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); -#undef __kvm_cpu_cap_has - - if (kvm_caps.has_tsc_control) { - /* - * Make sure the user can only configure tsc_khz values that - * fit into a signed integer. - * A min value is not calculated because it will always - * be 1 on all machines. - */ - u64 max = min(0x7fffffffULL, - __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); - kvm_caps.max_guest_tsc_khz = max; - } - kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); return 0; } void kvm_arch_hardware_unsetup(void) { - kvm_unregister_perf_callbacks(); - static_call(kvm_x86_hardware_unsetup)(); } int kvm_arch_check_processor_compat(void *opaque)
Now that kvm_arch_hardware_setup() is called immediately after kvm_arch_init(), fold the guts of kvm_arch_hardware_(un)setup() into kvm_arch_{init,exit}() as a step towards dropping one of the hooks. To avoid having to unwind various setup, e.g registration of several notifiers, slot in the vendor hardware setup before the registration of said notifiers and callbacks. Introducing a functional change while moving code is less than ideal, but the alternative is adding a pile of unwinding code, which is much more error prone, e.g. several attempts to move the setup code verbatim all introduced bugs. Add a comment to document that kvm_ops_update() is effectively the point of no return, e.g. it sets the kvm_x86_ops.hardware_enable canary and so needs to be unwound. Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/x86.c | 121 +++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 58 deletions(-)