Message ID | 20210726035036.739609-18-npiggin@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,01/55] KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path | expand |
> On 26-Jul-2021, at 9:19 AM, Nicholas Piggin <npiggin@gmail.com> wrote: > > Implement the P9 path PMU save/restore code in C, and remove the > POWER9/10 code from the P7/8 path assembly. > > -449 cycles (8533) POWER9 virt-mode NULL hcall > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > arch/powerpc/include/asm/asm-prototypes.h | 5 - > arch/powerpc/kvm/book3s_hv.c | 205 ++++++++++++++++++++-- > arch/powerpc/kvm/book3s_hv_interrupts.S | 13 +- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +---- > 4 files changed, 200 insertions(+), 66 deletions(-) > > diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h > index 222823861a67..41b8a1e1144a 100644 > --- a/arch/powerpc/include/asm/asm-prototypes.h > +++ b/arch/powerpc/include/asm/asm-prototypes.h > @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, > bool preserve_nv) { } > #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ > > -void kvmhv_save_host_pmu(void); > -void kvmhv_load_host_pmu(void); > -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); > -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); > - > void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); > > long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 2eef708c4354..d20b579ddcdf 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -3735,6 +3735,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) > trace_kvmppc_run_core(vc, 1); > } > > +/* > + * Privileged (non-hypervisor) host registers to save. > + */ > +struct p9_host_os_sprs { > + unsigned long dscr; > + unsigned long tidr; > + unsigned long iamr; > + unsigned long amr; > + unsigned long fscr; > + > + unsigned int pmc1; > + unsigned int pmc2; > + unsigned int pmc3; > + unsigned int pmc4; > + unsigned int pmc5; > + unsigned int pmc6; > + unsigned long mmcr0; > + unsigned long mmcr1; > + unsigned long mmcr2; > + unsigned long mmcr3; > + unsigned long mmcra; > + unsigned long siar; > + unsigned long sier1; > + unsigned long sier2; > + unsigned long sier3; > + unsigned long sdar; > +}; > + > +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) > +{ > + if (!(mmcr0 & MMCR0_FC)) > + goto do_freeze; > + if (mmcra & MMCRA_SAMPLE_ENABLE) > + goto do_freeze; > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + if (!(mmcr0 & MMCR0_PMCCEXT)) > + goto do_freeze; > + if (!(mmcra & MMCRA_BHRB_DISABLE)) > + goto do_freeze; > + } > + return; > + > +do_freeze: > + mmcr0 = MMCR0_FC; > + mmcra = 0; > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mmcr0 |= MMCR0_PMCCEXT; > + mmcra = MMCRA_BHRB_DISABLE; > + } > + > + mtspr(SPRN_MMCR0, mmcr0); > + mtspr(SPRN_MMCRA, mmcra); > + isync(); > +} > + Hi Nick, After feezing pmu, do we need to clear “pmcregs_in_use” as well? Also can’t we unconditionally do the MMCR0/MMCRA/ freeze settings in here ? do we need the if conditions for FC/PMCCEXT/BHRB ? Thanks Athira > +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) > +{ > + if (ppc_get_pmu_inuse()) { > + /* > + * It might be better to put PMU handling (at least for the > + * host) in the perf subsystem because it knows more about what > + * is being used. > + */ > + > + /* POWER9, POWER10 do not implement HPMC or SPMC */ > + > + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); > + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); > + > + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); > + > + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); > + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); > + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); > + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); > + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); > + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); > + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); > + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); > + host_os_sprs->sdar = mfspr(SPRN_SDAR); > + host_os_sprs->siar = mfspr(SPRN_SIAR); > + host_os_sprs->sier1 = mfspr(SPRN_SIER); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); > + host_os_sprs->sier2 = mfspr(SPRN_SIER2); > + host_os_sprs->sier3 = mfspr(SPRN_SIER3); > + } > + } > +} > + > +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu) > +{ > + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); > + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); > + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); > + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); > + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); > + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); > + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); > + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); > + mtspr(SPRN_SDAR, vcpu->arch.sdar); > + mtspr(SPRN_SIAR, vcpu->arch.siar); > + mtspr(SPRN_SIER, vcpu->arch.sier[0]); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); > + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); > + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); > + } > + > + /* Set MMCRA then MMCR0 last */ > + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); > + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); > + /* No isync necessary because we're starting counters */ > +} > + > +static void save_p9_guest_pmu(struct kvm_vcpu *vcpu) > +{ > + struct lppaca *lp; > + int save_pmu = 1; > + > + lp = vcpu->arch.vpa.pinned_addr; > + if (lp) > + save_pmu = lp->pmcregs_in_use; > + > + if (save_pmu) { > + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); > + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); > + > + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); > + > + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); > + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); > + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); > + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); > + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); > + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); > + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); > + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); > + vcpu->arch.sdar = mfspr(SPRN_SDAR); > + vcpu->arch.siar = mfspr(SPRN_SIAR); > + vcpu->arch.sier[0] = mfspr(SPRN_SIER); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); > + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); > + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); > + } > + } else { > + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); > + } > +} > + > +static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) > +{ > + if (ppc_get_pmu_inuse()) { > + mtspr(SPRN_PMC1, host_os_sprs->pmc1); > + mtspr(SPRN_PMC2, host_os_sprs->pmc2); > + mtspr(SPRN_PMC3, host_os_sprs->pmc3); > + mtspr(SPRN_PMC4, host_os_sprs->pmc4); > + mtspr(SPRN_PMC5, host_os_sprs->pmc5); > + mtspr(SPRN_PMC6, host_os_sprs->pmc6); > + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); > + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); > + mtspr(SPRN_SDAR, host_os_sprs->sdar); > + mtspr(SPRN_SIAR, host_os_sprs->siar); > + mtspr(SPRN_SIER, host_os_sprs->sier1); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); > + mtspr(SPRN_SIER2, host_os_sprs->sier2); > + mtspr(SPRN_SIER3, host_os_sprs->sier3); > + } > + > + /* Set MMCRA then MMCR0 last */ > + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); > + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); > + isync(); > + } > +} > + > static void load_spr_state(struct kvm_vcpu *vcpu) > { > mtspr(SPRN_DSCR, vcpu->arch.dscr); > @@ -3777,17 +3959,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu) > vcpu->arch.dscr = mfspr(SPRN_DSCR); > } > > -/* > - * Privileged (non-hypervisor) host registers to save. > - */ > -struct p9_host_os_sprs { > - unsigned long dscr; > - unsigned long tidr; > - unsigned long iamr; > - unsigned long amr; > - unsigned long fscr; > -}; > - > static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) > { > host_os_sprs->dscr = mfspr(SPRN_DSCR); > @@ -3835,7 +4006,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > struct p9_host_os_sprs host_os_sprs; > s64 dec; > u64 tb, next_timer; > - int trap, save_pmu; > + int trap; > > WARN_ON_ONCE(vcpu->arch.ceded); > > @@ -3848,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > > save_p9_host_os_sprs(&host_os_sprs); > > - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ > + save_p9_host_pmu(&host_os_sprs); > > kvmppc_subcore_enter_guest(); > > @@ -3878,7 +4049,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > barrier(); > } > #endif > - kvmhv_load_guest_pmu(vcpu); > + load_p9_guest_pmu(vcpu); > > msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); > load_fp_state(&vcpu->arch.fp); > @@ -4000,16 +4171,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) > kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); > > - save_pmu = 1; > if (vcpu->arch.vpa.pinned_addr) { > struct lppaca *lp = vcpu->arch.vpa.pinned_addr; > u32 yield_count = be32_to_cpu(lp->yield_count) + 1; > lp->yield_count = cpu_to_be32(yield_count); > vcpu->arch.vpa.dirty = 1; > - save_pmu = lp->pmcregs_in_use; > } > > - kvmhv_save_guest_pmu(vcpu, save_pmu); > + save_p9_guest_pmu(vcpu); > #ifdef CONFIG_PPC_PSERIES > if (kvmhv_on_pseries()) { > barrier(); > @@ -4025,7 +4194,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > > mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); > > - kvmhv_load_host_pmu(); > + load_p9_host_pmu(&host_os_sprs); > > kvmppc_subcore_exit_guest(); > > diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S > index 4444f83cb133..59d89e4b154a 100644 > --- a/arch/powerpc/kvm/book3s_hv_interrupts.S > +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S > @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) > mtlr r0 > blr > > -_GLOBAL(kvmhv_save_host_pmu) > +/* > + * void kvmhv_save_host_pmu(void) > + */ > +kvmhv_save_host_pmu: > BEGIN_FTR_SECTION > /* Work around P8 PMAE bug */ > li r3, -1 > @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION > std r8, HSTATE_MMCR2(r13) > std r9, HSTATE_SIER(r13) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - mfspr r5, SPRN_MMCR3 > - mfspr r6, SPRN_SIER2 > - mfspr r7, SPRN_SIER3 > - std r5, HSTATE_MMCR3(r13) > - std r6, HSTATE_SIER2(r13) > - std r7, HSTATE_SIER3(r13) > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > mfspr r3, SPRN_PMC1 > mfspr r5, SPRN_PMC2 > mfspr r6, SPRN_PMC3 > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 9021052f1579..551ce223b40c 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -2738,10 +2738,11 @@ kvmppc_msr_interrupt: > blr > > /* > + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) > + * > * Load up guest PMU state. R3 points to the vcpu struct. > */ > -_GLOBAL(kvmhv_load_guest_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) > +kvmhv_load_guest_pmu: > mr r4, r3 > mflr r0 > li r3, 1 > @@ -2775,27 +2776,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) > mtspr SPRN_MMCRA, r6 > mtspr SPRN_SIAR, r7 > mtspr SPRN_SDAR, r8 > -BEGIN_FTR_SECTION > - ld r5, VCPU_MMCR + 24(r4) > - ld r6, VCPU_SIER + 8(r4) > - ld r7, VCPU_SIER + 16(r4) > - mtspr SPRN_MMCR3, r5 > - mtspr SPRN_SIER2, r6 > - mtspr SPRN_SIER3, r7 > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > BEGIN_FTR_SECTION > ld r5, VCPU_MMCR + 16(r4) > ld r6, VCPU_SIER(r4) > mtspr SPRN_MMCR2, r5 > mtspr SPRN_SIER, r6 > -BEGIN_FTR_SECTION_NESTED(96) > lwz r7, VCPU_PMC + 24(r4) > lwz r8, VCPU_PMC + 28(r4) > ld r9, VCPU_MMCRS(r4) > mtspr SPRN_SPMC1, r7 > mtspr SPRN_SPMC2, r8 > mtspr SPRN_MMCRS, r9 > -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > mtspr SPRN_MMCR0, r3 > isync > @@ -2803,10 +2794,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > blr > > /* > + * void kvmhv_load_host_pmu(void) > + * > * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. > */ > -_GLOBAL(kvmhv_load_host_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) > +kvmhv_load_host_pmu: > mflr r0 > lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ > cmpwi r4, 0 > @@ -2844,25 +2836,18 @@ BEGIN_FTR_SECTION > mtspr SPRN_MMCR2, r8 > mtspr SPRN_SIER, r9 > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - ld r5, HSTATE_MMCR3(r13) > - ld r6, HSTATE_SIER2(r13) > - ld r7, HSTATE_SIER3(r13) > - mtspr SPRN_MMCR3, r5 > - mtspr SPRN_SIER2, r6 > - mtspr SPRN_SIER3, r7 > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > mtspr SPRN_MMCR0, r3 > isync > mtlr r0 > 23: blr > > /* > + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) > + * > * Save guest PMU state into the vcpu struct. > * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) > */ > -_GLOBAL(kvmhv_save_guest_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) > +kvmhv_save_guest_pmu: > mr r9, r3 > mr r8, r4 > BEGIN_FTR_SECTION > @@ -2911,14 +2896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > BEGIN_FTR_SECTION > std r10, VCPU_MMCR + 16(r9) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - mfspr r5, SPRN_MMCR3 > - mfspr r6, SPRN_SIER2 > - mfspr r7, SPRN_SIER3 > - std r5, VCPU_MMCR + 24(r9) > - std r6, VCPU_SIER + 8(r9) > - std r7, VCPU_SIER + 16(r9) > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > std r7, VCPU_SIAR(r9) > std r8, VCPU_SDAR(r9) > mfspr r3, SPRN_PMC1 > @@ -2936,7 +2913,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > BEGIN_FTR_SECTION > mfspr r5, SPRN_SIER > std r5, VCPU_SIER(r9) > -BEGIN_FTR_SECTION_NESTED(96) > mfspr r6, SPRN_SPMC1 > mfspr r7, SPRN_SPMC2 > mfspr r8, SPRN_MMCRS > @@ -2945,7 +2921,6 @@ BEGIN_FTR_SECTION_NESTED(96) > std r8, VCPU_MMCRS(r9) > lis r4, 0x8000 > mtspr SPRN_MMCRS, r4 > -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > 22: blr > > -- > 2.23.0 >
Excerpts from Athira Rajeev's message of August 9, 2021 1:03 pm: > > >> On 26-Jul-2021, at 9:19 AM, Nicholas Piggin <npiggin@gmail.com> wrote: >> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) >> +{ >> + if (!(mmcr0 & MMCR0_FC)) >> + goto do_freeze; >> + if (mmcra & MMCRA_SAMPLE_ENABLE) >> + goto do_freeze; >> + if (cpu_has_feature(CPU_FTR_ARCH_31)) { >> + if (!(mmcr0 & MMCR0_PMCCEXT)) >> + goto do_freeze; >> + if (!(mmcra & MMCRA_BHRB_DISABLE)) >> + goto do_freeze; >> + } >> + return; >> + >> +do_freeze: >> + mmcr0 = MMCR0_FC; >> + mmcra = 0; >> + if (cpu_has_feature(CPU_FTR_ARCH_31)) { >> + mmcr0 |= MMCR0_PMCCEXT; >> + mmcra = MMCRA_BHRB_DISABLE; >> + } >> + >> + mtspr(SPRN_MMCR0, mmcr0); >> + mtspr(SPRN_MMCRA, mmcra); >> + isync(); >> +} >> + > Hi Nick, > > After feezing pmu, do we need to clear “pmcregs_in_use” as well? Not until we save the values out of the registers. pmcregs_in_use = 0 means our hypervisor is free to clear our PMU register contents. > Also can’t we unconditionally do the MMCR0/MMCRA/ freeze settings in here ? do we need the if conditions for FC/PMCCEXT/BHRB ? I think it's possible, but pretty minimal advantage. I would prefer to set them the way perf does for now. If we can move this code into perf/ it should become easier for you to tweak things. Thanks, Nick
> On 13-Aug-2021, at 9:54 AM, Nicholas Piggin <npiggin@gmail.com> wrote: > > Excerpts from Athira Rajeev's message of August 9, 2021 1:03 pm: >> >> >>> On 26-Jul-2021, at 9:19 AM, Nicholas Piggin <npiggin@gmail.com> wrote: > > >>> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) >>> +{ >>> + if (!(mmcr0 & MMCR0_FC)) >>> + goto do_freeze; >>> + if (mmcra & MMCRA_SAMPLE_ENABLE) >>> + goto do_freeze; >>> + if (cpu_has_feature(CPU_FTR_ARCH_31)) { >>> + if (!(mmcr0 & MMCR0_PMCCEXT)) >>> + goto do_freeze; >>> + if (!(mmcra & MMCRA_BHRB_DISABLE)) >>> + goto do_freeze; >>> + } >>> + return; >>> + >>> +do_freeze: >>> + mmcr0 = MMCR0_FC; >>> + mmcra = 0; >>> + if (cpu_has_feature(CPU_FTR_ARCH_31)) { >>> + mmcr0 |= MMCR0_PMCCEXT; >>> + mmcra = MMCRA_BHRB_DISABLE; >>> + } >>> + >>> + mtspr(SPRN_MMCR0, mmcr0); >>> + mtspr(SPRN_MMCRA, mmcra); >>> + isync(); >>> +} >>> + >> Hi Nick, >> >> After feezing pmu, do we need to clear “pmcregs_in_use” as well? > > Not until we save the values out of the registers. pmcregs_in_use = 0 > means our hypervisor is free to clear our PMU register contents. > >> Also can’t we unconditionally do the MMCR0/MMCRA/ freeze settings in here ? do we need the if conditions for FC/PMCCEXT/BHRB ? > > I think it's possible, but pretty minimal advantage. I would prefer to > set them the way perf does for now. Sure Nick, Other changes looks good to me. Reviewed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Thanks Athira > If we can move this code into perf/ > it should become easier for you to tweak things. > > Thanks, > Nick
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 222823861a67..41b8a1e1144a 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void kvmhv_save_host_pmu(void); -void kvmhv_load_host_pmu(void); -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); - void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2eef708c4354..d20b579ddcdf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3735,6 +3735,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 1); } +/* + * Privileged (non-hypervisor) host registers to save. + */ +struct p9_host_os_sprs { + unsigned long dscr; + unsigned long tidr; + unsigned long iamr; + unsigned long amr; + unsigned long fscr; + + unsigned int pmc1; + unsigned int pmc2; + unsigned int pmc3; + unsigned int pmc4; + unsigned int pmc5; + unsigned int pmc6; + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcr3; + unsigned long mmcra; + unsigned long siar; + unsigned long sier1; + unsigned long sier2; + unsigned long sier3; + unsigned long sdar; +}; + +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) +{ + if (!(mmcr0 & MMCR0_FC)) + goto do_freeze; + if (mmcra & MMCRA_SAMPLE_ENABLE) + goto do_freeze; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (!(mmcr0 & MMCR0_PMCCEXT)) + goto do_freeze; + if (!(mmcra & MMCRA_BHRB_DISABLE)) + goto do_freeze; + } + return; + +do_freeze: + mmcr0 = MMCR0_FC; + mmcra = 0; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mmcr0 |= MMCR0_PMCCEXT; + mmcra = MMCRA_BHRB_DISABLE; + } + + mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCRA, mmcra); + isync(); +} + +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) +{ + if (ppc_get_pmu_inuse()) { + /* + * It might be better to put PMU handling (at least for the + * host) in the perf subsystem because it knows more about what + * is being used. + */ + + /* POWER9, POWER10 do not implement HPMC or SPMC */ + + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); + + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); + host_os_sprs->sdar = mfspr(SPRN_SDAR); + host_os_sprs->siar = mfspr(SPRN_SIAR); + host_os_sprs->sier1 = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); + host_os_sprs->sier2 = mfspr(SPRN_SIER2); + host_os_sprs->sier3 = mfspr(SPRN_SIER3); + } + } +} + +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu) +{ + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); + mtspr(SPRN_SDAR, vcpu->arch.sdar); + mtspr(SPRN_SIAR, vcpu->arch.siar); + mtspr(SPRN_SIER, vcpu->arch.sier[0]); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); + /* No isync necessary because we're starting counters */ +} + +static void save_p9_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct lppaca *lp; + int save_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + save_pmu = lp->pmcregs_in_use; + + if (save_pmu) { + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); + + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); + vcpu->arch.sdar = mfspr(SPRN_SDAR); + vcpu->arch.siar = mfspr(SPRN_SIAR); + vcpu->arch.sier[0] = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); + } + } else { + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); + } +} + +static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) +{ + if (ppc_get_pmu_inuse()) { + mtspr(SPRN_PMC1, host_os_sprs->pmc1); + mtspr(SPRN_PMC2, host_os_sprs->pmc2); + mtspr(SPRN_PMC3, host_os_sprs->pmc3); + mtspr(SPRN_PMC4, host_os_sprs->pmc4); + mtspr(SPRN_PMC5, host_os_sprs->pmc5); + mtspr(SPRN_PMC6, host_os_sprs->pmc6); + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); + mtspr(SPRN_SDAR, host_os_sprs->sdar); + mtspr(SPRN_SIAR, host_os_sprs->siar); + mtspr(SPRN_SIER, host_os_sprs->sier1); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); + mtspr(SPRN_SIER2, host_os_sprs->sier2); + mtspr(SPRN_SIER3, host_os_sprs->sier3); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); + isync(); + } +} + static void load_spr_state(struct kvm_vcpu *vcpu) { mtspr(SPRN_DSCR, vcpu->arch.dscr); @@ -3777,17 +3959,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu) vcpu->arch.dscr = mfspr(SPRN_DSCR); } -/* - * Privileged (non-hypervisor) host registers to save. - */ -struct p9_host_os_sprs { - unsigned long dscr; - unsigned long tidr; - unsigned long iamr; - unsigned long amr; - unsigned long fscr; -}; - static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) { host_os_sprs->dscr = mfspr(SPRN_DSCR); @@ -3835,7 +4006,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, struct p9_host_os_sprs host_os_sprs; s64 dec; u64 tb, next_timer; - int trap, save_pmu; + int trap; WARN_ON_ONCE(vcpu->arch.ceded); @@ -3848,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_p9_host_os_sprs(&host_os_sprs); - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ + save_p9_host_pmu(&host_os_sprs); kvmppc_subcore_enter_guest(); @@ -3878,7 +4049,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, barrier(); } #endif - kvmhv_load_guest_pmu(vcpu); + load_p9_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); load_fp_state(&vcpu->arch.fp); @@ -4000,16 +4171,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); - save_pmu = 1; if (vcpu->arch.vpa.pinned_addr) { struct lppaca *lp = vcpu->arch.vpa.pinned_addr; u32 yield_count = be32_to_cpu(lp->yield_count) + 1; lp->yield_count = cpu_to_be32(yield_count); vcpu->arch.vpa.dirty = 1; - save_pmu = lp->pmcregs_in_use; } - kvmhv_save_guest_pmu(vcpu, save_pmu); + save_p9_guest_pmu(vcpu); #ifdef CONFIG_PPC_PSERIES if (kvmhv_on_pseries()) { barrier(); @@ -4025,7 +4194,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); - kvmhv_load_host_pmu(); + load_p9_host_pmu(&host_os_sprs); kvmppc_subcore_exit_guest(); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 4444f83cb133..59d89e4b154a 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtlr r0 blr -_GLOBAL(kvmhv_save_host_pmu) +/* + * void kvmhv_save_host_pmu(void) + */ +kvmhv_save_host_pmu: BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, HSTATE_MMCR3(r13) - std r6, HSTATE_SIER2(r13) - std r7, HSTATE_SIER3(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9021052f1579..551ce223b40c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2738,10 +2738,11 @@ kvmppc_msr_interrupt: blr /* + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) + * * Load up guest PMU state. R3 points to the vcpu struct. */ -_GLOBAL(kvmhv_load_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) +kvmhv_load_guest_pmu: mr r4, r3 mflr r0 li r3, 1 @@ -2775,27 +2776,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_MMCRA, r6 mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 -BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) - ld r6, VCPU_SIER + 8(r4) - ld r7, VCPU_SIER + 16(r4) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 -BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -2803,10 +2794,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) blr /* + * void kvmhv_load_host_pmu(void) + * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -_GLOBAL(kvmhv_load_host_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) +kvmhv_load_host_pmu: mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2844,25 +2836,18 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - ld r5, HSTATE_MMCR3(r13) - ld r6, HSTATE_SIER2(r13) - ld r7, HSTATE_SIER3(r13) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 23: blr /* + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) + * * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -_GLOBAL(kvmhv_save_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) +kvmhv_save_guest_pmu: mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2911,14 +2896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, VCPU_MMCR + 24(r9) - std r6, VCPU_SIER + 8(r9) - std r7, VCPU_SIER + 16(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -2936,7 +2913,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER std r5, VCPU_SIER(r9) -BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS @@ -2945,7 +2921,6 @@ BEGIN_FTR_SECTION_NESTED(96) std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr
Implement the P9 path PMU save/restore code in C, and remove the POWER9/10 code from the P7/8 path assembly. -449 cycles (8533) POWER9 virt-mode NULL hcall Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/include/asm/asm-prototypes.h | 5 - arch/powerpc/kvm/book3s_hv.c | 205 ++++++++++++++++++++-- arch/powerpc/kvm/book3s_hv_interrupts.S | 13 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +---- 4 files changed, 200 insertions(+), 66 deletions(-)