Message ID | 20221102231911.3107438-38-seanjc@google.com |
---|---|
State | Accepted |
Headers | show |
Series | KVM: Rework kvm_init() and hardware enabling | expand |
On Wed, 2022-11-02 at 23:19 +0000, Sean Christopherson wrote: > From: Chao Gao <chao.gao@intel.com> > > The CPU STARTING section doesn't allow callbacks to fail. Move KVM's > hotplug callback to ONLINE section so that it can abort onlining a > CPU in > certain cases to avoid potentially breaking VMs running on existing > CPUs. > For example, when KVM fails to enable hardware virtualization on the > hotplugged CPU. > > Place KVM's hotplug state before CPUHP_AP_SCHED_WAIT_EMPTY as it > ensures > when offlining a CPU, all user tasks and non-pinned kernel tasks have > left > the CPU, i.e. there cannot be a vCPU task around. So, it is safe for > KVM's > CPU offline callback to disable hardware virtualization at that > point. > Likewise, KVM's online callback can enable hardware virtualization > before > any vCPU task gets a chance to run on hotplugged CPUs. > > Rename KVM's CPU hotplug callbacks accordingly. > > Suggested-by: Thomas Gleixner <tglx@linutronix.de> > Signed-off-by: Chao Gao <chao.gao@intel.com> > Reviewed-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Reviewed-by: Yuan Yao <yuan.yao@intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > include/linux/cpuhotplug.h | 2 +- > virt/kvm/kvm_main.c | 30 ++++++++++++++++++++++-------- > 2 files changed, 23 insertions(+), 9 deletions(-) > > diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h > index 7337414e4947..de45be38dd27 100644 > --- a/include/linux/cpuhotplug.h > +++ b/include/linux/cpuhotplug.h > @@ -185,7 +185,6 @@ enum cpuhp_state { > CPUHP_AP_CSKY_TIMER_STARTING, > CPUHP_AP_TI_GP_TIMER_STARTING, > CPUHP_AP_HYPERV_TIMER_STARTING, > - CPUHP_AP_KVM_STARTING, > /* Must be the last timer callback */ > CPUHP_AP_DUMMY_TIMER_STARTING, > CPUHP_AP_ARM_XEN_STARTING, > @@ -200,6 +199,7 @@ enum cpuhp_state { > > /* Online section invoked on the hotplugged CPU from the > hotplug thread */ > CPUHP_AP_ONLINE_IDLE, > + CPUHP_AP_KVM_ONLINE, > CPUHP_AP_SCHED_WAIT_EMPTY, > CPUHP_AP_SMPBOOT_THREADS, > CPUHP_AP_X86_VDSO_VMA_ONLINE, > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index dd13af9f06d5..fd9e39c85549 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -5026,13 +5026,27 @@ static void hardware_enable_nolock(void > *junk) > } > } > > -static int kvm_starting_cpu(unsigned int cpu) > +static int kvm_online_cpu(unsigned int cpu) > { > + int ret = 0; > + > raw_spin_lock(&kvm_count_lock); > - if (kvm_usage_count) > + /* > + * Abort the CPU online process if hardware virtualization > cannot > + * be enabled. Otherwise running VMs would encounter > unrecoverable > + * errors when scheduled to this CPU. > + */ > + if (kvm_usage_count) { > + WARN_ON_ONCE(atomic_read(&hardware_enable_failed)); > + > hardware_enable_nolock(NULL); > + if (atomic_read(&hardware_enable_failed)) { > + atomic_set(&hardware_enable_failed, 0); I see other places using this hardware_enable_failed with atomic_inc(), should here use atomic_dec() instead of straightly set to 0? Though here is embraced by spin_lock, hardware_enable_nolock() can be invoked in other places in parallel? Fortunately in the end of this patch set, global hardware_enable_failed is get rid of. > + ret = -EIO; > + } > + } > raw_spin_unlock(&kvm_count_lock); > - return 0; > + return ret; > } > > static void hardware_disable_nolock(void *junk) > @@ -5045,7 +5059,7 @@ static void hardware_disable_nolock(void *junk) > kvm_arch_hardware_disable(); > } > > -static int kvm_dying_cpu(unsigned int cpu) > +static int kvm_offline_cpu(unsigned int cpu) > { > raw_spin_lock(&kvm_count_lock); > if (kvm_usage_count) > @@ -5822,8 +5836,8 @@ int kvm_init(unsigned vcpu_size, unsigned > vcpu_align, struct module *module) > if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) > return -ENOMEM; > > - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, > "kvm/cpu:starting", > - kvm_starting_cpu, kvm_dying_cpu); > + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, > "kvm/cpu:online", > + kvm_online_cpu, kvm_offline_cpu); > if (r) > goto out_free_2; > register_reboot_notifier(&kvm_reboot_notifier); > @@ -5897,7 +5911,7 @@ int kvm_init(unsigned vcpu_size, unsigned > vcpu_align, struct module *module) > kmem_cache_destroy(kvm_vcpu_cache); > out_free_3: > unregister_reboot_notifier(&kvm_reboot_notifier); > - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); > + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); > out_free_2: > free_cpumask_var(cpus_hardware_enabled); > return r; > @@ -5923,7 +5937,7 @@ void kvm_exit(void) > kvm_async_pf_deinit(); > unregister_syscore_ops(&kvm_syscore_ops); > unregister_reboot_notifier(&kvm_reboot_notifier); > - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); > + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); > on_each_cpu(hardware_disable_nolock, NULL, 1); > kvm_irqfd_exit(); > free_cpumask_var(cpus_hardware_enabled);
On Thu, Nov 10, 2022, Robert Hoo wrote: > > -static int kvm_starting_cpu(unsigned int cpu) > > +static int kvm_online_cpu(unsigned int cpu) > > { > > + int ret = 0; > > + > > raw_spin_lock(&kvm_count_lock); > > - if (kvm_usage_count) > > + /* > > + * Abort the CPU online process if hardware virtualization > > cannot > > + * be enabled. Otherwise running VMs would encounter > > unrecoverable > > + * errors when scheduled to this CPU. > > + */ > > + if (kvm_usage_count) { > > + WARN_ON_ONCE(atomic_read(&hardware_enable_failed)); > > + > > hardware_enable_nolock(NULL); > > + if (atomic_read(&hardware_enable_failed)) { > > + atomic_set(&hardware_enable_failed, 0); > > I see other places using this hardware_enable_failed with atomic_inc(), > should here use atomic_dec() instead of straightly set to 0? Meh, both options are flawed. E.g. if hardware_enable_failed was left dangling (the WARN above), then atomic_dec() won't remedy the problem and KVM will reject onlining CPUs indefinitely. Forcing the atomic back to '0' will remedy that particular issue, but could lead to problems if there are other bugs. > Though here is embraced by spin_lock, hardware_enable_nolock() can be > invoked in other places in parallel? Only because of a KVM bug, which gets fixed in the next patch: KVM: Disable CPU hotplug during hardware enabling
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7337414e4947..de45be38dd27 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -185,7 +185,6 @@ enum cpuhp_state { CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, - CPUHP_AP_KVM_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, @@ -200,6 +199,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dd13af9f06d5..fd9e39c85549 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5026,13 +5026,27 @@ static void hardware_enable_nolock(void *junk) } } -static int kvm_starting_cpu(unsigned int cpu) +static int kvm_online_cpu(unsigned int cpu) { + int ret = 0; + raw_spin_lock(&kvm_count_lock); - if (kvm_usage_count) + /* + * Abort the CPU online process if hardware virtualization cannot + * be enabled. Otherwise running VMs would encounter unrecoverable + * errors when scheduled to this CPU. + */ + if (kvm_usage_count) { + WARN_ON_ONCE(atomic_read(&hardware_enable_failed)); + hardware_enable_nolock(NULL); + if (atomic_read(&hardware_enable_failed)) { + atomic_set(&hardware_enable_failed, 0); + ret = -EIO; + } + } raw_spin_unlock(&kvm_count_lock); - return 0; + return ret; } static void hardware_disable_nolock(void *junk) @@ -5045,7 +5059,7 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(); } -static int kvm_dying_cpu(unsigned int cpu) +static int kvm_offline_cpu(unsigned int cpu) { raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) @@ -5822,8 +5836,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) return -ENOMEM; - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", - kvm_starting_cpu, kvm_dying_cpu); + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", + kvm_online_cpu, kvm_offline_cpu); if (r) goto out_free_2; register_reboot_notifier(&kvm_reboot_notifier); @@ -5897,7 +5911,7 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) kmem_cache_destroy(kvm_vcpu_cache); out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); out_free_2: free_cpumask_var(cpus_hardware_enabled); return r; @@ -5923,7 +5937,7 @@ void kvm_exit(void) kvm_async_pf_deinit(); unregister_syscore_ops(&kvm_syscore_ops); unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled);