[3/4] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path

Message ID	20210513122207.1897664-4-npiggin@gmail.com
State	New
Headers	show Return-Path: <kvm-ppc-owner@vger.kernel.org> From: Nicholas Piggin <npiggin@gmail.com> To: kvm-ppc@vger.kernel.org Cc: Nicholas Piggin <npiggin@gmail.com>, linuxppc-dev@lists.ozlabs.org, Alexey Kardashevskiy <aik@ozlabs.ru>, =?utf-8?q?C=C3=A9dric_Le_Goater?= <clg@kaod.org> Subject: [PATCH 3/4] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path Date: Thu, 13 May 2021 22:22:06 +1000 Message-Id: <20210513122207.1897664-4-npiggin@gmail.com> In-Reply-To: <20210513122207.1897664-1-npiggin@gmail.com> References: <20210513122207.1897664-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	KVM: PPC: Convert P9 HV path to C \| expand [0/4] KVM: PPC: Convert P9 HV path to C [1/4] KVM: PPC: Book3S HV P9: Move xive vcpu context management into kvmhv_p9_guest_entry [2/4] KVM: PPC: Book3S HV P9: Move radix MMU switching instructions together [3/4] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path [4/4] KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8c10c3427166..cb9e3c85c605 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags); +extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); @@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm); extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req); extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, @@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } +static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) + { return 0; } #endif #ifdef CONFIG_KVM_XIVE @@ -672,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu); +extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu); static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { @@ -713,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir int level, bool line_status) { return -ENODEV; } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { } static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { return 0; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 2b691f4d1f26..d69560d5bf16 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags) } EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check); +void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu) +{ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0); +} +EXPORT_SYMBOL(kvmppc_core_queue_syscall); + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 287cd3e1b918..5815c04a48a3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -899,6 +899,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) * H_SUCCESS if the source vcore wasn't idle (e.g. if it may * have useful work to do and should not confer) so we don't * recheck that here. + * + * In the case of the P9 single vcpu per vcore case, the real + * mode handler is not called but no other threads are in the + * source vcore. */ spin_lock(&vcore->lock); @@ -1142,12 +1146,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) } /* - * Handle H_CEDE in the nested virtualization case where we haven't - * called the real-mode hcall handlers in book3s_hv_rmhandlers.S. + * Handle H_CEDE in the P9 path where we don't call the real-mode hcall + * handlers in book3s_hv_rmhandlers.S. + * * This has to be done early, not in kvmppc_pseries_do_hcall(), so * that the cede logic in kvmppc_run_single_vcpu() works properly. */ -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu) +static void kvmppc_cede(struct kvm_vcpu *vcpu) { vcpu->arch.shregs.msr |= MSR_EE; vcpu->arch.ceded = 1; @@ -1400,13 +1405,29 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, } case BOOK3S_INTERRUPT_SYSCALL: { - /* hcall - punt to userspace */ int i; - /* hypercall with MSR_PR has already been handled in rmode, - * and never reaches here. - */ + if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) { + /* + * Guest userspace executed sc 1. This can only be + * reached by the P9 path because the old path + * handles this case in realmode hcall handlers. + * + * Radix guests can not run PR KVM or nested HV hash + * guests which might run PR KVM, so this is always + * a privilege fault. Send a program check to guest + * kernel. + */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + r = RESUME_GUEST; + break; + } + /* + * hcall - gather args and set exit_reason. This will next be + * handled by kvmppc_pseries_do_hcall which may be able to deal + * with it and resume guest, or may punt to userspace. + */ run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); for (i = 0; i < 9; ++i) run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); @@ -3664,6 +3685,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, return trap; } +static inline bool hcall_is_xics(unsigned long req) +{ + return req == H_EOI || req == H_CPPR || req == H_IPI || + req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; +} + /* * Virtual-mode guest entry for POWER9 and later when the host and * guest are both using the radix MMU. The LPIDR has already been set. @@ -3787,15 +3814,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { - kvmppc_nested_cede(vcpu); + kvmppc_cede(vcpu); kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } } else { kvmppc_xive_push_vcpu(vcpu); trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && + !(vcpu->arch.shregs.msr & MSR_PR)) { + unsigned long req = kvmppc_get_gpr(vcpu, 3); + + /* H_CEDE has to be handled now, not later */ + if (req == H_CEDE) { + kvmppc_cede(vcpu); + kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */ + kvmppc_set_gpr(vcpu, 3, 0); + trap = 0; + + /* XICS hcalls must be handled before xive is pulled */ + } else if (hcall_is_xics(req)) { + int ret; + + ret = kvmppc_xive_xics_hcall(vcpu, req); + if (ret != H_TOO_HARD) { + kvmppc_set_gpr(vcpu, 3, ret); + trap = 0; + } + } + } kvmppc_xive_pull_vcpu(vcpu); - } vcpu->arch.slb_max = 0; @@ -4461,8 +4509,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) else r = kvmppc_run_vcpu(vcpu); - if (run->exit_reason == KVM_EXIT_PAPR_HCALL && - !(vcpu->arch.shregs.msr & MSR_PR)) { + if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { + /* + * These should have been caught reflected + * into the guest by now. Final sanity check: + * don't allow userspace to execute hcalls in + * the hypervisor. + */ + r = RESUME_GUEST; + continue; + } trace_kvm_hcall_enter(vcpu); r = kvmppc_pseries_do_hcall(vcpu); trace_kvm_hcall_exit(vcpu, r); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c11597f815e4..2d0d14ed1d92 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mr r4,r9 bge fast_guest_return 2: + /* If we came in through the P9 short path, no real mode hcalls */ + lwz r0, STACK_SLOT_SHORT_PATH(r1) + cmpwi r0, 0 + bne no_try_real /* See if this is an hcall we can handle in real mode */ cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode +no_try_real: /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 741bf1f4387a..24c07094651a 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -158,6 +158,40 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu); +void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) +{ + void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr; + + if (!esc_vaddr) + return; + + /* we are using XIVE with single escalation */ + + if (vcpu->arch.xive_esc_on) { + /* + * If we still have a pending escalation, abort the cede, + * and we must set PQ to 10 rather than 00 so that we don't + * potentially end up with two entries for the escalation + * interrupt in the XIVE interrupt queue. In that case + * we also don't want to set xive_esc_on to 1 here in + * case we race with xive_esc_irq(). + */ + vcpu->arch.ceded = 0; + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ + __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10); + } else { + vcpu->arch.xive_esc_on = true; + mb(); + __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00); + } + mb(); +} +EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation); + /* * This is a simple trigger for a generic XIVE IRQ. This must * only be called for interrupts that support a trigger page @@ -2106,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) return 0; } +int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + /* The VM should have configured XICS mode before doing XICS hcalls. */ + if (!kvmppc_xics_enabled(vcpu)) + return H_TOO_HARD; + + switch (req) { + case H_XIRR: + return xive_vm_h_xirr(vcpu); + case H_CPPR: + return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_EOI: + return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_IPI: + return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + case H_IPOLL: + return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); + case H_XIRR_X: + xive_vm_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset); + return H_SUCCESS; + } + + return H_UNSUPPORTED; +} +EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall); + int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;

[3/4] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path

Commit Message

Patch