diff mbox series

[RFC,11/43] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C

Message ID 20210622105736.633352-12-npiggin@gmail.com
State New
Headers show
Series KVM: PPC: Book3S HV P9: entry/exit optimisations round 1 | expand

Commit Message

Nicholas Piggin June 22, 2021, 10:57 a.m. UTC
Implement the P9 path PMU save/restore code in C, and remove the
POWER9/10 code from the P7/8 path assembly.

-449 cycles (8533) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/asm-prototypes.h |   5 -
 arch/powerpc/kvm/book3s_hv.c              | 205 ++++++++++++++++++++--
 arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +----
 4 files changed, 200 insertions(+), 66 deletions(-)

Comments

Athira Rajeev July 10, 2021, 2:47 a.m. UTC | #1
> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Implement the P9 path PMU save/restore code in C, and remove the
> POWER9/10 code from the P7/8 path assembly.
> 
> -449 cycles (8533) POWER9 virt-mode NULL hcall
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/include/asm/asm-prototypes.h |   5 -
> arch/powerpc/kvm/book3s_hv.c              | 205 ++++++++++++++++++++--
> arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
> arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +----
> 4 files changed, 200 insertions(+), 66 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
> index 02ee6f5ac9fe..928db8ef9a5a 100644
> --- a/arch/powerpc/include/asm/asm-prototypes.h
> +++ b/arch/powerpc/include/asm/asm-prototypes.h
> @@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
> 					bool preserve_nv) { }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void kvmhv_save_host_pmu(void);
> -void kvmhv_load_host_pmu(void);
> -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
> -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
> -
> void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
> 
> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index f7349d150828..b1b94b3563b7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
> 	trace_kvmppc_run_core(vc, 1);
> }
> 
> +/*
> + * Privileged (non-hypervisor) host registers to save.
> + */
> +struct p9_host_os_sprs {
> +	unsigned long dscr;
> +	unsigned long tidr;
> +	unsigned long iamr;
> +	unsigned long amr;
> +	unsigned long fscr;
> +
> +	unsigned int pmc1;
> +	unsigned int pmc2;
> +	unsigned int pmc3;
> +	unsigned int pmc4;
> +	unsigned int pmc5;
> +	unsigned int pmc6;
> +	unsigned long mmcr0;
> +	unsigned long mmcr1;
> +	unsigned long mmcr2;
> +	unsigned long mmcr3;
> +	unsigned long mmcra;
> +	unsigned long siar;
> +	unsigned long sier1;
> +	unsigned long sier2;
> +	unsigned long sier3;
> +	unsigned long sdar;
> +};
> +
> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
> +{
> +	if (!(mmcr0 & MMCR0_FC))
> +		goto do_freeze;
> +	if (mmcra & MMCRA_SAMPLE_ENABLE)
> +		goto do_freeze;
> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +		if (!(mmcr0 & MMCR0_PMCCEXT))
> +			goto do_freeze;
> +		if (!(mmcra & MMCRA_BHRB_DISABLE))
> +			goto do_freeze;
> +	}
> +	return;


Hi Nick

When freezing the PMU, do we need to also set pmcregs_in_use to zero ?

Also, why we need these above conditions like MMCRA_SAMPLE_ENABLE,  MMCR0_PMCCEXT checks also before freezing ?

> +
> +do_freeze:
> +	mmcr0 = MMCR0_FC;
> +	mmcra = 0;
> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +		mmcr0 |= MMCR0_PMCCEXT;
> +		mmcra = MMCRA_BHRB_DISABLE;
> +	}
> +
> +	mtspr(SPRN_MMCR0, mmcr0);
> +	mtspr(SPRN_MMCRA, mmcra);
> +	isync();
> +}
> +
> +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
> +{
> +	if (ppc_get_pmu_inuse()) {
> +		/*
> +		 * It might be better to put PMU handling (at least for the
> +		 * host) in the perf subsystem because it knows more about what
> +		 * is being used.
> +		 */
> +
> +		/* POWER9, POWER10 do not implement HPMC or SPMC */
> +
> +		host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
> +		host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
> +
> +		freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
> +
> +		host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
> +		host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
> +		host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
> +		host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
> +		host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
> +		host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
> +		host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
> +		host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
> +		host_os_sprs->sdar = mfspr(SPRN_SDAR);
> +		host_os_sprs->siar = mfspr(SPRN_SIAR);
> +		host_os_sprs->sier1 = mfspr(SPRN_SIER);
> +
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
> +			host_os_sprs->sier2 = mfspr(SPRN_SIER2);
> +			host_os_sprs->sier3 = mfspr(SPRN_SIER3);
> +		}
> +	}
> +}
> +
> +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
> +{
> +	mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
> +	mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
> +	mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
> +	mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
> +	mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
> +	mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
> +	mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
> +	mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
> +	mtspr(SPRN_SDAR, vcpu->arch.sdar);
> +	mtspr(SPRN_SIAR, vcpu->arch.siar);
> +	mtspr(SPRN_SIER, vcpu->arch.sier[0]);
> +
> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +		mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
> +		mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
> +		mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
> +	}
> +
> +	/* Set MMCRA then MMCR0 last */
> +	mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
> +	mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
> +	/* No isync necessary because we're starting counters */
> +}
> +
> +static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
> +{
> +	struct lppaca *lp;
> +	int save_pmu = 1;
> +
> +	lp = vcpu->arch.vpa.pinned_addr;
> +	if (lp)
> +		save_pmu = lp->pmcregs_in_use;
> +
> +	if (save_pmu) {
> +		vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
> +		vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
> +
> +		freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
> +
> +		vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
> +		vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
> +		vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
> +		vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
> +		vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
> +		vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
> +		vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
> +		vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
> +		vcpu->arch.sdar = mfspr(SPRN_SDAR);
> +		vcpu->arch.siar = mfspr(SPRN_SIAR);
> +		vcpu->arch.sier[0] = mfspr(SPRN_SIER);
> +
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
> +			vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
> +			vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
> +		}
> +	} else {
> +		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
> +	}
> +}
> +
> +static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
> +{
> +	if (ppc_get_pmu_inuse()) {
> +		mtspr(SPRN_PMC1, host_os_sprs->pmc1);
> +		mtspr(SPRN_PMC2, host_os_sprs->pmc2);
> +		mtspr(SPRN_PMC3, host_os_sprs->pmc3);
> +		mtspr(SPRN_PMC4, host_os_sprs->pmc4);
> +		mtspr(SPRN_PMC5, host_os_sprs->pmc5);
> +		mtspr(SPRN_PMC6, host_os_sprs->pmc6);
> +		mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
> +		mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
> +		mtspr(SPRN_SDAR, host_os_sprs->sdar);
> +		mtspr(SPRN_SIAR, host_os_sprs->siar);
> +		mtspr(SPRN_SIER, host_os_sprs->sier1);
> +
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
> +			mtspr(SPRN_SIER2, host_os_sprs->sier2);
> +			mtspr(SPRN_SIER3, host_os_sprs->sier3);
> +		}
> +
> +		/* Set MMCRA then MMCR0 last */
> +		mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
> +		mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
> +		isync();
> +	}
> +}
> +
> static void load_spr_state(struct kvm_vcpu *vcpu)
> {
> 	mtspr(SPRN_DSCR, vcpu->arch.dscr);
> @@ -3677,17 +3859,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
> 	vcpu->arch.dscr = mfspr(SPRN_DSCR);
> }
> 
> -/*
> - * Privileged (non-hypervisor) host registers to save.
> - */
> -struct p9_host_os_sprs {
> -	unsigned long dscr;
> -	unsigned long tidr;
> -	unsigned long iamr;
> -	unsigned long amr;
> -	unsigned long fscr;
> -};
> -
> static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
> {
> 	host_os_sprs->dscr = mfspr(SPRN_DSCR);
> @@ -3735,7 +3906,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> 	struct p9_host_os_sprs host_os_sprs;
> 	s64 dec;
> 	u64 tb, next_timer;
> -	int trap, save_pmu;
> +	int trap;
> 
> 	WARN_ON_ONCE(vcpu->arch.ceded);
> 
> @@ -3748,7 +3919,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> 
> 	save_p9_host_os_sprs(&host_os_sprs);
> 
> -	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
> +	save_p9_host_pmu(&host_os_sprs);
> 
> 	kvmppc_subcore_enter_guest();
> 
> @@ -3776,7 +3947,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> 		}
> 	}
> #endif
> -	kvmhv_load_guest_pmu(vcpu);
> +	load_p9_guest_pmu(vcpu);
> 
> 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
> 	load_fp_state(&vcpu->arch.fp);
> @@ -3898,16 +4069,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> 		kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
> 
> -	save_pmu = 1;
> 	if (vcpu->arch.vpa.pinned_addr) {
> 		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
> 		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
> 		lp->yield_count = cpu_to_be32(yield_count);
> 		vcpu->arch.vpa.dirty = 1;
> -		save_pmu = lp->pmcregs_in_use;
> 	}
> 
> -	kvmhv_save_guest_pmu(vcpu, save_pmu);
> +	save_p9_guest_pmu(vcpu);
> #ifdef CONFIG_PPC_PSERIES
> 	if (kvmhv_on_pseries())
> 		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
> @@ -3920,7 +4089,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> 
> 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
> 
> -	kvmhv_load_host_pmu();
> +	load_p9_host_pmu(&host_os_sprs);
> 
> 	kvmppc_subcore_exit_guest();
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
> index 4444f83cb133..59d89e4b154a 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> 	mtlr	r0
> 	blr
> 
> -_GLOBAL(kvmhv_save_host_pmu)
> +/*
> + * void kvmhv_save_host_pmu(void)
> + */
> +kvmhv_save_host_pmu:
> BEGIN_FTR_SECTION
> 	/* Work around P8 PMAE bug */
> 	li	r3, -1
> @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
> 	std	r8, HSTATE_MMCR2(r13)
> 	std	r9, HSTATE_SIER(r13)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> -	mfspr	r5, SPRN_MMCR3
> -	mfspr	r6, SPRN_SIER2
> -	mfspr	r7, SPRN_SIER3
> -	std	r5, HSTATE_MMCR3(r13)
> -	std	r6, HSTATE_SIER2(r13)
> -	std	r7, HSTATE_SIER3(r13)
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> 	mfspr	r3, SPRN_PMC1
> 	mfspr	r5, SPRN_PMC2
> 	mfspr	r6, SPRN_PMC3
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 007f87b97184..0eb06734bc26 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2780,10 +2780,11 @@ kvmppc_msr_interrupt:
> 	blr
> 
> /*
> + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
> + *
>  * Load up guest PMU state.  R3 points to the vcpu struct.
>  */
> -_GLOBAL(kvmhv_load_guest_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
> +kvmhv_load_guest_pmu:
> 	mr	r4, r3
> 	mflr	r0
> 	li	r3, 1
> @@ -2817,27 +2818,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
> 	mtspr	SPRN_MMCRA, r6
> 	mtspr	SPRN_SIAR, r7
> 	mtspr	SPRN_SDAR, r8
> -BEGIN_FTR_SECTION
> -	ld      r5, VCPU_MMCR + 24(r4)
> -	ld      r6, VCPU_SIER + 8(r4)
> -	ld      r7, VCPU_SIER + 16(r4)
> -	mtspr   SPRN_MMCR3, r5
> -	mtspr   SPRN_SIER2, r6
> -	mtspr   SPRN_SIER3, r7
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> BEGIN_FTR_SECTION
> 	ld	r5, VCPU_MMCR + 16(r4)
> 	ld	r6, VCPU_SIER(r4)
> 	mtspr	SPRN_MMCR2, r5
> 	mtspr	SPRN_SIER, r6
> -BEGIN_FTR_SECTION_NESTED(96)
> 	lwz	r7, VCPU_PMC + 24(r4)
> 	lwz	r8, VCPU_PMC + 28(r4)
> 	ld	r9, VCPU_MMCRS(r4)
> 	mtspr	SPRN_SPMC1, r7
> 	mtspr	SPRN_SPMC2, r8
> 	mtspr	SPRN_MMCRS, r9
> -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 	mtspr	SPRN_MMCR0, r3
> 	isync
> @@ -2845,10 +2836,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 	blr
> 
> /*
> + * void kvmhv_load_host_pmu(void)
> + *
>  * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
>  */
> -_GLOBAL(kvmhv_load_host_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
> +kvmhv_load_host_pmu:
> 	mflr	r0
> 	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
> 	cmpwi	r4, 0
> @@ -2886,25 +2878,18 @@ BEGIN_FTR_SECTION
> 	mtspr	SPRN_MMCR2, r8
> 	mtspr	SPRN_SIER, r9
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> -	ld      r5, HSTATE_MMCR3(r13)
> -	ld      r6, HSTATE_SIER2(r13)
> -	ld      r7, HSTATE_SIER3(r13)
> -	mtspr   SPRN_MMCR3, r5
> -	mtspr   SPRN_SIER2, r6
> -	mtspr   SPRN_SIER3, r7
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> 	mtspr	SPRN_MMCR0, r3
> 	isync
> 	mtlr	r0
> 23:	blr
> 
> /*
> + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
> + *
>  * Save guest PMU state into the vcpu struct.
>  * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
>  */
> -_GLOBAL(kvmhv_save_guest_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
> +kvmhv_save_guest_pmu:
> 	mr	r9, r3
> 	mr	r8, r4
> BEGIN_FTR_SECTION
> @@ -2953,14 +2938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> BEGIN_FTR_SECTION
> 	std	r10, VCPU_MMCR + 16(r9)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> -	mfspr   r5, SPRN_MMCR3
> -	mfspr   r6, SPRN_SIER2
> -	mfspr   r7, SPRN_SIER3
> -	std     r5, VCPU_MMCR + 24(r9)
> -	std     r6, VCPU_SIER + 8(r9)
> -	std     r7, VCPU_SIER + 16(r9)
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> 	std	r7, VCPU_SIAR(r9)
> 	std	r8, VCPU_SDAR(r9)
> 	mfspr	r3, SPRN_PMC1
> @@ -2978,7 +2955,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> BEGIN_FTR_SECTION
> 	mfspr	r5, SPRN_SIER
> 	std	r5, VCPU_SIER(r9)
> -BEGIN_FTR_SECTION_NESTED(96)
> 	mfspr	r6, SPRN_SPMC1
> 	mfspr	r7, SPRN_SPMC2
> 	mfspr	r8, SPRN_MMCRS
> @@ -2987,7 +2963,6 @@ BEGIN_FTR_SECTION_NESTED(96)
> 	std	r8, VCPU_MMCRS(r9)
> 	lis	r4, 0x8000
> 	mtspr	SPRN_MMCRS, r4
> -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 22:	blr
> 
> -- 
> 2.23.0
>
Nicholas Piggin July 12, 2021, 2:49 a.m. UTC | #2
Excerpts from Athira Rajeev's message of July 10, 2021 12:47 pm:
> 
> 
>> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>> 
>> Implement the P9 path PMU save/restore code in C, and remove the
>> POWER9/10 code from the P7/8 path assembly.
>> 
>> -449 cycles (8533) POWER9 virt-mode NULL hcall
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> arch/powerpc/include/asm/asm-prototypes.h |   5 -
>> arch/powerpc/kvm/book3s_hv.c              | 205 ++++++++++++++++++++--
>> arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
>> arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +----
>> 4 files changed, 200 insertions(+), 66 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
>> index 02ee6f5ac9fe..928db8ef9a5a 100644
>> --- a/arch/powerpc/include/asm/asm-prototypes.h
>> +++ b/arch/powerpc/include/asm/asm-prototypes.h
>> @@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
>> 					bool preserve_nv) { }
>> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
>> 
>> -void kvmhv_save_host_pmu(void);
>> -void kvmhv_load_host_pmu(void);
>> -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
>> -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
>> -
>> void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
>> 
>> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index f7349d150828..b1b94b3563b7 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>> 	trace_kvmppc_run_core(vc, 1);
>> }
>> 
>> +/*
>> + * Privileged (non-hypervisor) host registers to save.
>> + */
>> +struct p9_host_os_sprs {
>> +	unsigned long dscr;
>> +	unsigned long tidr;
>> +	unsigned long iamr;
>> +	unsigned long amr;
>> +	unsigned long fscr;
>> +
>> +	unsigned int pmc1;
>> +	unsigned int pmc2;
>> +	unsigned int pmc3;
>> +	unsigned int pmc4;
>> +	unsigned int pmc5;
>> +	unsigned int pmc6;
>> +	unsigned long mmcr0;
>> +	unsigned long mmcr1;
>> +	unsigned long mmcr2;
>> +	unsigned long mmcr3;
>> +	unsigned long mmcra;
>> +	unsigned long siar;
>> +	unsigned long sier1;
>> +	unsigned long sier2;
>> +	unsigned long sier3;
>> +	unsigned long sdar;
>> +};
>> +
>> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
>> +{
>> +	if (!(mmcr0 & MMCR0_FC))
>> +		goto do_freeze;
>> +	if (mmcra & MMCRA_SAMPLE_ENABLE)
>> +		goto do_freeze;
>> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>> +		if (!(mmcr0 & MMCR0_PMCCEXT))
>> +			goto do_freeze;
>> +		if (!(mmcra & MMCRA_BHRB_DISABLE))
>> +			goto do_freeze;
>> +	}
>> +	return;
> 
> 
> Hi Nick
> 
> When freezing the PMU, do we need to also set pmcregs_in_use to zero ?

Not immediately, we still need to save out the values of the PMU 
registers. If we clear pmcregs_in_use, then our hypervisor can discard 
the contents of those registers at any time.

> Also, why we need these above conditions like MMCRA_SAMPLE_ENABLE,  MMCR0_PMCCEXT checks also before freezing ?

Basically just because that's the condition we wnat to set the PMU to 
before entering the guest if the guest does not have its own PMU 
registers in use.

I'm not entirely sure this is correct / optimal for perf though, so we
should double check that. I think some of this stuff should be wrapped 
up and put into perf/ subdirectory as I've said before. KVM shouldn't 
need to know about exactly how PMU is to be set up and managed by
perf, we should just be able to ask perf to save/restore/switch state.

Thanks,
Nick
Athira Rajeev July 12, 2021, 2:07 p.m. UTC | #3
> On 12-Jul-2021, at 8:19 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Excerpts from Athira Rajeev's message of July 10, 2021 12:47 pm:
>> 
>> 
>>> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>>> 
>>> Implement the P9 path PMU save/restore code in C, and remove the
>>> POWER9/10 code from the P7/8 path assembly.
>>> 
>>> -449 cycles (8533) POWER9 virt-mode NULL hcall
>>> 
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> arch/powerpc/include/asm/asm-prototypes.h |   5 -
>>> arch/powerpc/kvm/book3s_hv.c              | 205 ++++++++++++++++++++--
>>> arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
>>> arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +----
>>> 4 files changed, 200 insertions(+), 66 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
>>> index 02ee6f5ac9fe..928db8ef9a5a 100644
>>> --- a/arch/powerpc/include/asm/asm-prototypes.h
>>> +++ b/arch/powerpc/include/asm/asm-prototypes.h
>>> @@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
>>> 					bool preserve_nv) { }
>>> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
>>> 
>>> -void kvmhv_save_host_pmu(void);
>>> -void kvmhv_load_host_pmu(void);
>>> -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
>>> -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
>>> -
>>> void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
>>> 
>>> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index f7349d150828..b1b94b3563b7 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>>> 	trace_kvmppc_run_core(vc, 1);
>>> }
>>> 
>>> +/*
>>> + * Privileged (non-hypervisor) host registers to save.
>>> + */
>>> +struct p9_host_os_sprs {
>>> +	unsigned long dscr;
>>> +	unsigned long tidr;
>>> +	unsigned long iamr;
>>> +	unsigned long amr;
>>> +	unsigned long fscr;
>>> +
>>> +	unsigned int pmc1;
>>> +	unsigned int pmc2;
>>> +	unsigned int pmc3;
>>> +	unsigned int pmc4;
>>> +	unsigned int pmc5;
>>> +	unsigned int pmc6;
>>> +	unsigned long mmcr0;
>>> +	unsigned long mmcr1;
>>> +	unsigned long mmcr2;
>>> +	unsigned long mmcr3;
>>> +	unsigned long mmcra;
>>> +	unsigned long siar;
>>> +	unsigned long sier1;
>>> +	unsigned long sier2;
>>> +	unsigned long sier3;
>>> +	unsigned long sdar;
>>> +};
>>> +
>>> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
>>> +{
>>> +	if (!(mmcr0 & MMCR0_FC))
>>> +		goto do_freeze;
>>> +	if (mmcra & MMCRA_SAMPLE_ENABLE)
>>> +		goto do_freeze;
>>> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>>> +		if (!(mmcr0 & MMCR0_PMCCEXT))
>>> +			goto do_freeze;
>>> +		if (!(mmcra & MMCRA_BHRB_DISABLE))
>>> +			goto do_freeze;
>>> +	}
>>> +	return;
>> 
>> 
>> Hi Nick
>> 
>> When freezing the PMU, do we need to also set pmcregs_in_use to zero ?
> 
> Not immediately, we still need to save out the values of the PMU 
> registers. If we clear pmcregs_in_use, then our hypervisor can discard 
> the contents of those registers at any time.
> 
>> Also, why we need these above conditions like MMCRA_SAMPLE_ENABLE,  MMCR0_PMCCEXT checks also before freezing ?
> 
> Basically just because that's the condition we wnat to set the PMU to 
> before entering the guest if the guest does not have its own PMU 
> registers in use.
> 
> I'm not entirely sure this is correct / optimal for perf though, so we
> should double check that. I think some of this stuff should be wrapped 
> up and put into perf/ subdirectory as I've said before. KVM shouldn't 
> need to know about exactly how PMU is to be set up and managed by
> perf, we should just be able to ask perf to save/restore/switch state.

Hi Nick,

Agree to your point. It makes sense that some of these perf code should be moved under perf/ directory.

Athira
> 
> Thanks,
> Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 02ee6f5ac9fe..928db8ef9a5a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -136,11 +136,6 @@  static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
 					bool preserve_nv) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
 void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f7349d150828..b1b94b3563b7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3635,6 +3635,188 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 1);
 }
 
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+	unsigned long dscr;
+	unsigned long tidr;
+	unsigned long iamr;
+	unsigned long amr;
+	unsigned long fscr;
+
+	unsigned int pmc1;
+	unsigned int pmc2;
+	unsigned int pmc3;
+	unsigned int pmc4;
+	unsigned int pmc5;
+	unsigned int pmc6;
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcr2;
+	unsigned long mmcr3;
+	unsigned long mmcra;
+	unsigned long siar;
+	unsigned long sier1;
+	unsigned long sier2;
+	unsigned long sier3;
+	unsigned long sdar;
+};
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+	if (!(mmcr0 & MMCR0_FC))
+		goto do_freeze;
+	if (mmcra & MMCRA_SAMPLE_ENABLE)
+		goto do_freeze;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (!(mmcr0 & MMCR0_PMCCEXT))
+			goto do_freeze;
+		if (!(mmcra & MMCRA_BHRB_DISABLE))
+			goto do_freeze;
+	}
+	return;
+
+do_freeze:
+	mmcr0 = MMCR0_FC;
+	mmcra = 0;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mmcr0 |= MMCR0_PMCCEXT;
+		mmcra = MMCRA_BHRB_DISABLE;
+	}
+
+	mtspr(SPRN_MMCR0, mmcr0);
+	mtspr(SPRN_MMCRA, mmcra);
+	isync();
+}
+
+static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+	if (ppc_get_pmu_inuse()) {
+		/*
+		 * It might be better to put PMU handling (at least for the
+		 * host) in the perf subsystem because it knows more about what
+		 * is being used.
+		 */
+
+		/* POWER9, POWER10 do not implement HPMC or SPMC */
+
+		host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+		host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+		host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+		host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+		host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+		host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+		host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+		host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+		host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+		host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+		host_os_sprs->sdar = mfspr(SPRN_SDAR);
+		host_os_sprs->siar = mfspr(SPRN_SIAR);
+		host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+			host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+			host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+		}
+	}
+}
+
+static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+	mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+	mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+	mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+	mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+	mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+	mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+	mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+	mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+	mtspr(SPRN_SDAR, vcpu->arch.sdar);
+	mtspr(SPRN_SIAR, vcpu->arch.siar);
+	mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
+		mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+		mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+	}
+
+	/* Set MMCRA then MMCR0 last */
+	mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+	mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+	/* No isync necessary because we're starting counters */
+}
+
+static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+	struct lppaca *lp;
+	int save_pmu = 1;
+
+	lp = vcpu->arch.vpa.pinned_addr;
+	if (lp)
+		save_pmu = lp->pmcregs_in_use;
+
+	if (save_pmu) {
+		vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+		vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+		vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+		vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+		vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+		vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+		vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+		vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+		vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+		vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+		vcpu->arch.sdar = mfspr(SPRN_SDAR);
+		vcpu->arch.siar = mfspr(SPRN_SIAR);
+		vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+			vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+			vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+		}
+	} else {
+		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+	}
+}
+
+static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+	if (ppc_get_pmu_inuse()) {
+		mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+		mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+		mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+		mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+		mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+		mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+		mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+		mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+		mtspr(SPRN_SDAR, host_os_sprs->sdar);
+		mtspr(SPRN_SIAR, host_os_sprs->siar);
+		mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+			mtspr(SPRN_SIER2, host_os_sprs->sier2);
+			mtspr(SPRN_SIER3, host_os_sprs->sier3);
+		}
+
+		/* Set MMCRA then MMCR0 last */
+		mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+		mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+		isync();
+	}
+}
+
 static void load_spr_state(struct kvm_vcpu *vcpu)
 {
 	mtspr(SPRN_DSCR, vcpu->arch.dscr);
@@ -3677,17 +3859,6 @@  static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.dscr = mfspr(SPRN_DSCR);
 }
 
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
-	unsigned long dscr;
-	unsigned long tidr;
-	unsigned long iamr;
-	unsigned long amr;
-	unsigned long fscr;
-};
-
 static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 {
 	host_os_sprs->dscr = mfspr(SPRN_DSCR);
@@ -3735,7 +3906,7 @@  static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	struct p9_host_os_sprs host_os_sprs;
 	s64 dec;
 	u64 tb, next_timer;
-	int trap, save_pmu;
+	int trap;
 
 	WARN_ON_ONCE(vcpu->arch.ceded);
 
@@ -3748,7 +3919,7 @@  static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	save_p9_host_os_sprs(&host_os_sprs);
 
-	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
+	save_p9_host_pmu(&host_os_sprs);
 
 	kvmppc_subcore_enter_guest();
 
@@ -3776,7 +3947,7 @@  static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		}
 	}
 #endif
-	kvmhv_load_guest_pmu(vcpu);
+	load_p9_guest_pmu(vcpu);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	load_fp_state(&vcpu->arch.fp);
@@ -3898,16 +4069,14 @@  static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-	save_pmu = 1;
 	if (vcpu->arch.vpa.pinned_addr) {
 		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
 		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
 		lp->yield_count = cpu_to_be32(yield_count);
 		vcpu->arch.vpa.dirty = 1;
-		save_pmu = lp->pmcregs_in_use;
 	}
 
-	kvmhv_save_guest_pmu(vcpu, save_pmu);
+	save_p9_guest_pmu(vcpu);
 #ifdef CONFIG_PPC_PSERIES
 	if (kvmhv_on_pseries())
 		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
@@ -3920,7 +4089,7 @@  static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
-	kvmhv_load_host_pmu();
+	load_p9_host_pmu(&host_os_sprs);
 
 	kvmppc_subcore_exit_guest();
 
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 4444f83cb133..59d89e4b154a 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -104,7 +104,10 @@  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtlr	r0
 	blr
 
-_GLOBAL(kvmhv_save_host_pmu)
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+kvmhv_save_host_pmu:
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
 	li	r3, -1
@@ -138,14 +141,6 @@  BEGIN_FTR_SECTION
 	std	r8, HSTATE_MMCR2(r13)
 	std	r9, HSTATE_SIER(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_MMCR3
-	mfspr	r6, SPRN_SIER2
-	mfspr	r7, SPRN_SIER3
-	std	r5, HSTATE_MMCR3(r13)
-	std	r6, HSTATE_SIER2(r13)
-	std	r7, HSTATE_SIER3(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mfspr	r3, SPRN_PMC1
 	mfspr	r5, SPRN_PMC2
 	mfspr	r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 007f87b97184..0eb06734bc26 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2780,10 +2780,11 @@  kvmppc_msr_interrupt:
 	blr
 
 /*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
  * Load up guest PMU state.  R3 points to the vcpu struct.
  */
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+kvmhv_load_guest_pmu:
 	mr	r4, r3
 	mflr	r0
 	li	r3, 1
@@ -2817,27 +2818,17 @@  END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	mtspr	SPRN_MMCRA, r6
 	mtspr	SPRN_SIAR, r7
 	mtspr	SPRN_SDAR, r8
-BEGIN_FTR_SECTION
-	ld      r5, VCPU_MMCR + 24(r4)
-	ld      r6, VCPU_SIER + 8(r4)
-	ld      r7, VCPU_SIER + 16(r4)
-	mtspr   SPRN_MMCR3, r5
-	mtspr   SPRN_SIER2, r6
-	mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
 	ld	r5, VCPU_MMCR + 16(r4)
 	ld	r6, VCPU_SIER(r4)
 	mtspr	SPRN_MMCR2, r5
 	mtspr	SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
 	lwz	r7, VCPU_PMC + 24(r4)
 	lwz	r8, VCPU_PMC + 28(r4)
 	ld	r9, VCPU_MMCRS(r4)
 	mtspr	SPRN_SPMC1, r7
 	mtspr	SPRN_SPMC2, r8
 	mtspr	SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_MMCR0, r3
 	isync
@@ -2845,10 +2836,11 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	blr
 
 /*
+ * void kvmhv_load_host_pmu(void)
+ *
  * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
  */
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+kvmhv_load_host_pmu:
 	mflr	r0
 	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
 	cmpwi	r4, 0
@@ -2886,25 +2878,18 @@  BEGIN_FTR_SECTION
 	mtspr	SPRN_MMCR2, r8
 	mtspr	SPRN_SIER, r9
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	ld      r5, HSTATE_MMCR3(r13)
-	ld      r6, HSTATE_SIER2(r13)
-	ld      r7, HSTATE_SIER3(r13)
-	mtspr   SPRN_MMCR3, r5
-	mtspr   SPRN_SIER2, r6
-	mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mtspr	SPRN_MMCR0, r3
 	isync
 	mtlr	r0
 23:	blr
 
 /*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
  * Save guest PMU state into the vcpu struct.
  * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
  */
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+kvmhv_save_guest_pmu:
 	mr	r9, r3
 	mr	r8, r4
 BEGIN_FTR_SECTION
@@ -2953,14 +2938,6 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	std	r10, VCPU_MMCR + 16(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	mfspr   r5, SPRN_MMCR3
-	mfspr   r6, SPRN_SIER2
-	mfspr   r7, SPRN_SIER3
-	std     r5, VCPU_MMCR + 24(r9)
-	std     r6, VCPU_SIER + 8(r9)
-	std     r7, VCPU_SIER + 16(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	std	r7, VCPU_SIAR(r9)
 	std	r8, VCPU_SDAR(r9)
 	mfspr	r3, SPRN_PMC1
@@ -2978,7 +2955,6 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	std	r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
 	mfspr	r8, SPRN_MMCRS
@@ -2987,7 +2963,6 @@  BEGIN_FTR_SECTION_NESTED(96)
 	std	r8, VCPU_MMCRS(r9)
 	lis	r4, 0x8000
 	mtspr	SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 22:	blr