Message ID | 20240224073359.1025835-1-npiggin@gmail.com |
---|---|
State | New |
Headers | show |
Series | spapr: avoid overhead of finding vhyp class in critical operations | expand |
On Sat, Feb 24, 2024 at 05:33:59PM +1000, Nicholas Piggin wrote: > PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like > interrupts and TLB misses and is quite costly. Running the > kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG > TLB and spends a lot of time in TLB and page table walking code. The > test takes 67 seconds to complete with a lot of time being spent in > code related to finding the vhyp class: > > 12.01% [.] g_str_hash > 8.94% [.] g_hash_table_lookup > 8.06% [.] object_class_dynamic_cast > 6.21% [.] address_space_ldq > 4.94% [.] __strcmp_avx2 > 4.28% [.] tlb_set_page_full > 4.08% [.] address_space_translate_internal > 3.17% [.] object_class_dynamic_cast_assert > 2.84% [.] ppc_radix64_xlate > > Keep a pointer to the class and avoid this lookup. This reduces the > execution time to 40 seconds. > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > This feels a bit ugly, but the performance problem of looking up the > class in fast paths can't be ignored. Is there a "nicer" way to get the > same result? Not one I'm aware of, unfortunately. > > Thanks, > Nick > > target/ppc/cpu.h | 3 ++- > target/ppc/mmu-book3s-v3.h | 4 +--- > hw/ppc/pegasos2.c | 1 + > target/ppc/cpu_init.c | 9 +++------ > target/ppc/excp_helper.c | 16 ++++------------ > target/ppc/kvm.c | 4 +--- > target/ppc/mmu-hash64.c | 16 ++++------------ > target/ppc/mmu-radix64.c | 4 +--- > 8 files changed, 17 insertions(+), 40 deletions(-) > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h > index ec14574d14..eb85d9aa71 100644 > --- a/target/ppc/cpu.h > +++ b/target/ppc/cpu.h > @@ -1437,6 +1437,7 @@ struct ArchCPU { > int vcpu_id; > uint32_t compat_pvr; > PPCVirtualHypervisor *vhyp; > + PPCVirtualHypervisorClass *vhyp_class; > void *machine_data; > int32_t node_id; /* NUMA node this CPU belongs to */ > PPCHash64Options *hash64_opts; > @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, > > static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) > { > - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); > + return cpu->vhyp_class->cpu_in_nested(cpu); > } > #endif /* CONFIG_USER_ONLY */ > > diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h > index 674377a19e..f3f7993958 100644 > --- a/target/ppc/mmu-book3s-v3.h > +++ b/target/ppc/mmu-book3s-v3.h > @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) > uint64_t base; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - return vhc->hpt_mask(cpu->vhyp); > + return cpu->vhyp_class->hpt_mask(cpu->vhyp); > } > if (cpu->env.mmu_model == POWERPC_MMU_3_00) { > ppc_v3_pate_t pate; > diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c > index 04d6decb2b..c22e8b336d 100644 > --- a/hw/ppc/pegasos2.c > +++ b/hw/ppc/pegasos2.c > @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) > machine->fdt = fdt; > > pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); > + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp); > } > > enum pegasos2_rtas_tokens { > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c > index 9bccddb350..63d0094024 100644 > --- a/target/ppc/cpu_init.c > +++ b/target/ppc/cpu_init.c > @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) > CPUPPCState *env = &cpu->env; > > cpu->vhyp = vhyp; > + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); > > /* > * With a virtual hypervisor mode we never allow the CPU to go > @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) > PowerPCCPU *cpu = POWERPC_CPU(cs); > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->cpu_exec_enter(cpu->vhyp, cpu); > + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); > } > } > > @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) > PowerPCCPU *cpu = POWERPC_CPU(cs); > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->cpu_exec_exit(cpu->vhyp, cpu); > + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); > } > } > #endif /* CONFIG_TCG */ > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c > index 98952de267..445350488c 100644 > --- a/target/ppc/excp_helper.c > +++ b/target/ppc/excp_helper.c > @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) > * HV mode, we need to keep hypercall support. > */ > if (lev == 1 && cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) > * HV mode, we need to keep hypercall support. > */ > if (lev == 1 && cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > > /* "PAPR mode" built-in hypercall emulation */ > if (lev == 1 && books_vhyp_handles_hcall(cpu)) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > } > > if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ > - vhc->deliver_hv_excp(cpu, excp); > + cpu->vhyp_class->deliver_hv_excp(cpu, excp); > > powerpc_reset_excp_state(cpu); > > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index 26fa9d0575..5b5b96ab6b 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) > sregs.pvr = env->spr[SPR_PVR]; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); > + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); > } else { > sregs.u.s.sdr1 = env->spr[SPR_SDR1]; > } > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c > index d645c0bb94..196b4b2a48 100644 > --- a/target/ppc/mmu-hash64.c > +++ b/target/ppc/mmu-hash64.c > @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, > const ppc_hash_pte64_t *hptes; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - return vhc->map_hptes(cpu->vhyp, ptex, n); > + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); > } > base = ppc_hash64_hpt_base(cpu); > > @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, > hwaddr ptex, int n) > { > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); > + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); > return; > } > > @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); > + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); > return; > } > base = ppc_hash64_hpt_base(cpu); > @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); > + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); > return; > } > base = ppc_hash64_hpt_base(cpu); > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index 5823e039e6..496ba87a95 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, > > /* Get Partition Table */ > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc; > - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > if (guest_visible) { > ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, > DSISR_R_BADCONFIG);
Hi Nick, One minor comment below: On 2/24/24 13:03, Nicholas Piggin wrote: > PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like > interrupts and TLB misses and is quite costly. Running the > kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG > TLB and spends a lot of time in TLB and page table walking code. The > test takes 67 seconds to complete with a lot of time being spent in > code related to finding the vhyp class: > > 12.01% [.] g_str_hash > 8.94% [.] g_hash_table_lookup > 8.06% [.] object_class_dynamic_cast > 6.21% [.] address_space_ldq > 4.94% [.] __strcmp_avx2 > 4.28% [.] tlb_set_page_full > 4.08% [.] address_space_translate_internal > 3.17% [.] object_class_dynamic_cast_assert > 2.84% [.] ppc_radix64_xlate > > Keep a pointer to the class and avoid this lookup. This reduces the > execution time to 40 seconds. > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > This feels a bit ugly, but the performance problem of looking up the > class in fast paths can't be ignored. Is there a "nicer" way to get the > same result? > > Thanks, > Nick > > target/ppc/cpu.h | 3 ++- > target/ppc/mmu-book3s-v3.h | 4 +--- > hw/ppc/pegasos2.c | 1 + > target/ppc/cpu_init.c | 9 +++------ > target/ppc/excp_helper.c | 16 ++++------------ > target/ppc/kvm.c | 4 +--- > target/ppc/mmu-hash64.c | 16 ++++------------ > target/ppc/mmu-radix64.c | 4 +--- > 8 files changed, 17 insertions(+), 40 deletions(-) > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h > index ec14574d14..eb85d9aa71 100644 > --- a/target/ppc/cpu.h > +++ b/target/ppc/cpu.h > @@ -1437,6 +1437,7 @@ struct ArchCPU { > int vcpu_id; > uint32_t compat_pvr; > PPCVirtualHypervisor *vhyp; > + PPCVirtualHypervisorClass *vhyp_class; > void *machine_data; > int32_t node_id; /* NUMA node this CPU belongs to */ > PPCHash64Options *hash64_opts; > @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, > > static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) > { > - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); > + return cpu->vhyp_class->cpu_in_nested(cpu); > } > #endif /* CONFIG_USER_ONLY */ > > diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h > index 674377a19e..f3f7993958 100644 > --- a/target/ppc/mmu-book3s-v3.h > +++ b/target/ppc/mmu-book3s-v3.h > @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) > uint64_t base; > > if (cpu->vhyp) { All the checks for cpu->vhyp needs to be changed to check for cpu->vhyp_class now, for all such instances. With that, Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - return vhc->hpt_mask(cpu->vhyp); > + return cpu->vhyp_class->hpt_mask(cpu->vhyp); > } > if (cpu->env.mmu_model == POWERPC_MMU_3_00) { > ppc_v3_pate_t pate; > diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c > index 04d6decb2b..c22e8b336d 100644 > --- a/hw/ppc/pegasos2.c > +++ b/hw/ppc/pegasos2.c > @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) > machine->fdt = fdt; > > pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); > + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp); > } > > enum pegasos2_rtas_tokens { > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c > index 9bccddb350..63d0094024 100644 > --- a/target/ppc/cpu_init.c > +++ b/target/ppc/cpu_init.c > @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) > CPUPPCState *env = &cpu->env; > > cpu->vhyp = vhyp; > + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); > > /* > * With a virtual hypervisor mode we never allow the CPU to go > @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) > PowerPCCPU *cpu = POWERPC_CPU(cs); > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->cpu_exec_enter(cpu->vhyp, cpu); > + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); > } > } > > @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) > PowerPCCPU *cpu = POWERPC_CPU(cs); > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->cpu_exec_exit(cpu->vhyp, cpu); > + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); > } > } > #endif /* CONFIG_TCG */ > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c > index 98952de267..445350488c 100644 > --- a/target/ppc/excp_helper.c > +++ b/target/ppc/excp_helper.c > @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) > * HV mode, we need to keep hypercall support. > */ > if (lev == 1 && cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) > * HV mode, we need to keep hypercall support. > */ > if (lev == 1 && cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > > /* "PAPR mode" built-in hypercall emulation */ > if (lev == 1 && books_vhyp_handles_hcall(cpu)) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hypercall(cpu->vhyp, cpu); > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > powerpc_reset_excp_state(cpu); > return; > } > @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > } > > if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ > - vhc->deliver_hv_excp(cpu, excp); > + cpu->vhyp_class->deliver_hv_excp(cpu, excp); > > powerpc_reset_excp_state(cpu); > > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index 26fa9d0575..5b5b96ab6b 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) > sregs.pvr = env->spr[SPR_PVR]; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); > + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); > } else { > sregs.u.s.sdr1 = env->spr[SPR_SDR1]; > } > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c > index d645c0bb94..196b4b2a48 100644 > --- a/target/ppc/mmu-hash64.c > +++ b/target/ppc/mmu-hash64.c > @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, > const ppc_hash_pte64_t *hptes; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - return vhc->map_hptes(cpu->vhyp, ptex, n); > + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); > } > base = ppc_hash64_hpt_base(cpu); > > @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, > hwaddr ptex, int n) > { > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); > + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); > return; > } > > @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); > + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); > return; > } > base = ppc_hash64_hpt_base(cpu); > @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; > > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc = > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); > + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); > return; > } > base = ppc_hash64_hpt_base(cpu); > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index 5823e039e6..496ba87a95 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, > > /* Get Partition Table */ > if (cpu->vhyp) { > - PPCVirtualHypervisorClass *vhc; > - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > if (guest_visible) { > ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, > DSISR_R_BADCONFIG);
On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote: > Hi Nick, > > One minor comment below: > > On 2/24/24 13:03, Nicholas Piggin wrote: > > PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like > > interrupts and TLB misses and is quite costly. Running the > > kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG > > TLB and spends a lot of time in TLB and page table walking code. The > > test takes 67 seconds to complete with a lot of time being spent in > > code related to finding the vhyp class: > > > > 12.01% [.] g_str_hash > > 8.94% [.] g_hash_table_lookup > > 8.06% [.] object_class_dynamic_cast > > 6.21% [.] address_space_ldq > > 4.94% [.] __strcmp_avx2 > > 4.28% [.] tlb_set_page_full > > 4.08% [.] address_space_translate_internal > > 3.17% [.] object_class_dynamic_cast_assert > > 2.84% [.] ppc_radix64_xlate > > > > Keep a pointer to the class and avoid this lookup. This reduces the > > execution time to 40 seconds. > > > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > > --- > > This feels a bit ugly, but the performance problem of looking up the > > class in fast paths can't be ignored. Is there a "nicer" way to get the > > same result? > > > > Thanks, > > Nick > > > > target/ppc/cpu.h | 3 ++- > > target/ppc/mmu-book3s-v3.h | 4 +--- > > hw/ppc/pegasos2.c | 1 + > > target/ppc/cpu_init.c | 9 +++------ > > target/ppc/excp_helper.c | 16 ++++------------ > > target/ppc/kvm.c | 4 +--- > > target/ppc/mmu-hash64.c | 16 ++++------------ > > target/ppc/mmu-radix64.c | 4 +--- > > 8 files changed, 17 insertions(+), 40 deletions(-) > > > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h > > index ec14574d14..eb85d9aa71 100644 > > --- a/target/ppc/cpu.h > > +++ b/target/ppc/cpu.h > > @@ -1437,6 +1437,7 @@ struct ArchCPU { > > int vcpu_id; > > uint32_t compat_pvr; > > PPCVirtualHypervisor *vhyp; > > + PPCVirtualHypervisorClass *vhyp_class; > > void *machine_data; > > int32_t node_id; /* NUMA node this CPU belongs to */ > > PPCHash64Options *hash64_opts; > > @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, > > > > static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) > > { > > - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); > > + return cpu->vhyp_class->cpu_in_nested(cpu); > > } > > #endif /* CONFIG_USER_ONLY */ > > > > diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h > > index 674377a19e..f3f7993958 100644 > > --- a/target/ppc/mmu-book3s-v3.h > > +++ b/target/ppc/mmu-book3s-v3.h > > @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) > > uint64_t base; > > > > if (cpu->vhyp) { > > All the checks for cpu->vhyp needs to be changed to check for > cpu->vhyp_class now, for all such instances. It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL. It's supposed to be an equivalent transformation just changing the lookup function. Okay to leave it as is? Thanks, Nick > > With that, > > Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> > > > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - return vhc->hpt_mask(cpu->vhyp); > > + return cpu->vhyp_class->hpt_mask(cpu->vhyp); > > } > > if (cpu->env.mmu_model == POWERPC_MMU_3_00) { > > ppc_v3_pate_t pate; > > diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c > > index 04d6decb2b..c22e8b336d 100644 > > --- a/hw/ppc/pegasos2.c > > +++ b/hw/ppc/pegasos2.c > > @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) > > machine->fdt = fdt; > > > > pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); > > + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp); > > } > > > > enum pegasos2_rtas_tokens { > > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c > > index 9bccddb350..63d0094024 100644 > > --- a/target/ppc/cpu_init.c > > +++ b/target/ppc/cpu_init.c > > @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) > > CPUPPCState *env = &cpu->env; > > > > cpu->vhyp = vhyp; > > + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); > > > > /* > > * With a virtual hypervisor mode we never allow the CPU to go > > @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) > > PowerPCCPU *cpu = POWERPC_CPU(cs); > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->cpu_exec_enter(cpu->vhyp, cpu); > > + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); > > } > > } > > > > @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) > > PowerPCCPU *cpu = POWERPC_CPU(cs); > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->cpu_exec_exit(cpu->vhyp, cpu); > > + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); > > } > > } > > #endif /* CONFIG_TCG */ > > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c > > index 98952de267..445350488c 100644 > > --- a/target/ppc/excp_helper.c > > +++ b/target/ppc/excp_helper.c > > @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) > > * HV mode, we need to keep hypercall support. > > */ > > if (lev == 1 && cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->hypercall(cpu->vhyp, cpu); > > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > > powerpc_reset_excp_state(cpu); > > return; > > } > > @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) > > * HV mode, we need to keep hypercall support. > > */ > > if (lev == 1 && cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->hypercall(cpu->vhyp, cpu); > > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > > powerpc_reset_excp_state(cpu); > > return; > > } > > @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > > > > /* "PAPR mode" built-in hypercall emulation */ > > if (lev == 1 && books_vhyp_handles_hcall(cpu)) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->hypercall(cpu->vhyp, cpu); > > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); > > powerpc_reset_excp_state(cpu); > > return; > > } > > @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > > } > > > > if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ > > - vhc->deliver_hv_excp(cpu, excp); > > + cpu->vhyp_class->deliver_hv_excp(cpu, excp); > > > > powerpc_reset_excp_state(cpu); > > > > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > > index 26fa9d0575..5b5b96ab6b 100644 > > --- a/target/ppc/kvm.c > > +++ b/target/ppc/kvm.c > > @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) > > sregs.pvr = env->spr[SPR_PVR]; > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); > > + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); > > } else { > > sregs.u.s.sdr1 = env->spr[SPR_SDR1]; > > } > > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c > > index d645c0bb94..196b4b2a48 100644 > > --- a/target/ppc/mmu-hash64.c > > +++ b/target/ppc/mmu-hash64.c > > @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, > > const ppc_hash_pte64_t *hptes; > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - return vhc->map_hptes(cpu->vhyp, ptex, n); > > + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); > > } > > base = ppc_hash64_hpt_base(cpu); > > > > @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, > > hwaddr ptex, int n) > > { > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); > > + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); > > return; > > } > > > > @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); > > + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); > > return; > > } > > base = ppc_hash64_hpt_base(cpu); > > @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) > > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; > > > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc = > > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); > > + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); > > return; > > } > > base = ppc_hash64_hpt_base(cpu); > > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > > index 5823e039e6..496ba87a95 100644 > > --- a/target/ppc/mmu-radix64.c > > +++ b/target/ppc/mmu-radix64.c > > @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, > > > > /* Get Partition Table */ > > if (cpu->vhyp) { > > - PPCVirtualHypervisorClass *vhc; > > - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > > - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > > + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { > > if (guest_visible) { > > ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, > > DSISR_R_BADCONFIG);
On 3/12/24 14:18, Nicholas Piggin wrote: > On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote: >> Hi Nick, >> >> One minor comment below: >> >> On 2/24/24 13:03, Nicholas Piggin wrote: >>> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like >>> interrupts and TLB misses and is quite costly. Running the >>> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG >>> TLB and spends a lot of time in TLB and page table walking code. The >>> test takes 67 seconds to complete with a lot of time being spent in >>> code related to finding the vhyp class: >>> >>> 12.01% [.] g_str_hash >>> 8.94% [.] g_hash_table_lookup >>> 8.06% [.] object_class_dynamic_cast >>> 6.21% [.] address_space_ldq >>> 4.94% [.] __strcmp_avx2 >>> 4.28% [.] tlb_set_page_full >>> 4.08% [.] address_space_translate_internal >>> 3.17% [.] object_class_dynamic_cast_assert >>> 2.84% [.] ppc_radix64_xlate >>> >>> Keep a pointer to the class and avoid this lookup. This reduces the >>> execution time to 40 seconds. >>> >>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> >>> --- >>> This feels a bit ugly, but the performance problem of looking up the >>> class in fast paths can't be ignored. Is there a "nicer" way to get the >>> same result? >>> >>> Thanks, >>> Nick >>> >>> target/ppc/cpu.h | 3 ++- >>> target/ppc/mmu-book3s-v3.h | 4 +--- >>> hw/ppc/pegasos2.c | 1 + >>> target/ppc/cpu_init.c | 9 +++------ >>> target/ppc/excp_helper.c | 16 ++++------------ >>> target/ppc/kvm.c | 4 +--- >>> target/ppc/mmu-hash64.c | 16 ++++------------ >>> target/ppc/mmu-radix64.c | 4 +--- >>> 8 files changed, 17 insertions(+), 40 deletions(-) >>> >>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h >>> index ec14574d14..eb85d9aa71 100644 >>> --- a/target/ppc/cpu.h >>> +++ b/target/ppc/cpu.h >>> @@ -1437,6 +1437,7 @@ struct ArchCPU { >>> int vcpu_id; >>> uint32_t compat_pvr; >>> PPCVirtualHypervisor *vhyp; >>> + PPCVirtualHypervisorClass *vhyp_class; >>> void *machine_data; >>> int32_t node_id; /* NUMA node this CPU belongs to */ >>> PPCHash64Options *hash64_opts; >>> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, >>> >>> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) >>> { >>> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); >>> + return cpu->vhyp_class->cpu_in_nested(cpu); >>> } >>> #endif /* CONFIG_USER_ONLY */ >>> >>> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h >>> index 674377a19e..f3f7993958 100644 >>> --- a/target/ppc/mmu-book3s-v3.h >>> +++ b/target/ppc/mmu-book3s-v3.h >>> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) >>> uint64_t base; >>> >>> if (cpu->vhyp) { >> >> All the checks for cpu->vhyp needs to be changed to check for >> cpu->vhyp_class now, for all such instances. > > It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL. > It's supposed to be an equivalent transformation just changing the > lookup function. I agree, but not just it appears a bit odd, my only worry is if a future change cause vhyp_class to be NULL before the control reaches here, this check wont really serve the purpose. Anyways, not a mandatory requirement for now, so I shall leave it to your choice. regards, Harsh > > Okay to leave it as is? > > Thanks, > Nick > >> >> With that, >> >> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> >> >> >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - return vhc->hpt_mask(cpu->vhyp); >>> + return cpu->vhyp_class->hpt_mask(cpu->vhyp); >>> } >>> if (cpu->env.mmu_model == POWERPC_MMU_3_00) { >>> ppc_v3_pate_t pate; >>> diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c >>> index 04d6decb2b..c22e8b336d 100644 >>> --- a/hw/ppc/pegasos2.c >>> +++ b/hw/ppc/pegasos2.c >>> @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) >>> machine->fdt = fdt; >>> >>> pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); >>> + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp); >>> } >>> >>> enum pegasos2_rtas_tokens { >>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c >>> index 9bccddb350..63d0094024 100644 >>> --- a/target/ppc/cpu_init.c >>> +++ b/target/ppc/cpu_init.c >>> @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) >>> CPUPPCState *env = &cpu->env; >>> >>> cpu->vhyp = vhyp; >>> + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); >>> >>> /* >>> * With a virtual hypervisor mode we never allow the CPU to go >>> @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) >>> PowerPCCPU *cpu = POWERPC_CPU(cs); >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->cpu_exec_enter(cpu->vhyp, cpu); >>> + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); >>> } >>> } >>> >>> @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) >>> PowerPCCPU *cpu = POWERPC_CPU(cs); >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->cpu_exec_exit(cpu->vhyp, cpu); >>> + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); >>> } >>> } >>> #endif /* CONFIG_TCG */ >>> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c >>> index 98952de267..445350488c 100644 >>> --- a/target/ppc/excp_helper.c >>> +++ b/target/ppc/excp_helper.c >>> @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) >>> * HV mode, we need to keep hypercall support. >>> */ >>> if (lev == 1 && cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->hypercall(cpu->vhyp, cpu); >>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); >>> powerpc_reset_excp_state(cpu); >>> return; >>> } >>> @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) >>> * HV mode, we need to keep hypercall support. >>> */ >>> if (lev == 1 && cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->hypercall(cpu->vhyp, cpu); >>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); >>> powerpc_reset_excp_state(cpu); >>> return; >>> } >>> @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) >>> >>> /* "PAPR mode" built-in hypercall emulation */ >>> if (lev == 1 && books_vhyp_handles_hcall(cpu)) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->hypercall(cpu->vhyp, cpu); >>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); >>> powerpc_reset_excp_state(cpu); >>> return; >>> } >>> @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) >>> } >>> >>> if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ >>> - vhc->deliver_hv_excp(cpu, excp); >>> + cpu->vhyp_class->deliver_hv_excp(cpu, excp); >>> >>> powerpc_reset_excp_state(cpu); >>> >>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c >>> index 26fa9d0575..5b5b96ab6b 100644 >>> --- a/target/ppc/kvm.c >>> +++ b/target/ppc/kvm.c >>> @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) >>> sregs.pvr = env->spr[SPR_PVR]; >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); >>> + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); >>> } else { >>> sregs.u.s.sdr1 = env->spr[SPR_SDR1]; >>> } >>> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c >>> index d645c0bb94..196b4b2a48 100644 >>> --- a/target/ppc/mmu-hash64.c >>> +++ b/target/ppc/mmu-hash64.c >>> @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, >>> const ppc_hash_pte64_t *hptes; >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - return vhc->map_hptes(cpu->vhyp, ptex, n); >>> + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); >>> } >>> base = ppc_hash64_hpt_base(cpu); >>> >>> @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, >>> hwaddr ptex, int n) >>> { >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); >>> + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); >>> return; >>> } >>> >>> @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) >>> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); >>> + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); >>> return; >>> } >>> base = ppc_hash64_hpt_base(cpu); >>> @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) >>> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; >>> >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc = >>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); >>> + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); >>> return; >>> } >>> base = ppc_hash64_hpt_base(cpu); >>> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c >>> index 5823e039e6..496ba87a95 100644 >>> --- a/target/ppc/mmu-radix64.c >>> +++ b/target/ppc/mmu-radix64.c >>> @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, >>> >>> /* Get Partition Table */ >>> if (cpu->vhyp) { >>> - PPCVirtualHypervisorClass *vhc; >>> - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); >>> - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { >>> + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { >>> if (guest_visible) { >>> ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, >>> DSISR_R_BADCONFIG); > >
On Tue Mar 12, 2024 at 6:56 PM AEST, Harsh Prateek Bora wrote: > > > On 3/12/24 14:18, Nicholas Piggin wrote: > > On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote: > >> Hi Nick, > >> > >> One minor comment below: > >> > >> On 2/24/24 13:03, Nicholas Piggin wrote: > >>> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like > >>> interrupts and TLB misses and is quite costly. Running the > >>> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG > >>> TLB and spends a lot of time in TLB and page table walking code. The > >>> test takes 67 seconds to complete with a lot of time being spent in > >>> code related to finding the vhyp class: > >>> > >>> 12.01% [.] g_str_hash > >>> 8.94% [.] g_hash_table_lookup > >>> 8.06% [.] object_class_dynamic_cast > >>> 6.21% [.] address_space_ldq > >>> 4.94% [.] __strcmp_avx2 > >>> 4.28% [.] tlb_set_page_full > >>> 4.08% [.] address_space_translate_internal > >>> 3.17% [.] object_class_dynamic_cast_assert > >>> 2.84% [.] ppc_radix64_xlate > >>> > >>> Keep a pointer to the class and avoid this lookup. This reduces the > >>> execution time to 40 seconds. > >>> > >>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > >>> --- > >>> This feels a bit ugly, but the performance problem of looking up the > >>> class in fast paths can't be ignored. Is there a "nicer" way to get the > >>> same result? > >>> > >>> Thanks, > >>> Nick > >>> > >>> target/ppc/cpu.h | 3 ++- > >>> target/ppc/mmu-book3s-v3.h | 4 +--- > >>> hw/ppc/pegasos2.c | 1 + > >>> target/ppc/cpu_init.c | 9 +++------ > >>> target/ppc/excp_helper.c | 16 ++++------------ > >>> target/ppc/kvm.c | 4 +--- > >>> target/ppc/mmu-hash64.c | 16 ++++------------ > >>> target/ppc/mmu-radix64.c | 4 +--- > >>> 8 files changed, 17 insertions(+), 40 deletions(-) > >>> > >>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h > >>> index ec14574d14..eb85d9aa71 100644 > >>> --- a/target/ppc/cpu.h > >>> +++ b/target/ppc/cpu.h > >>> @@ -1437,6 +1437,7 @@ struct ArchCPU { > >>> int vcpu_id; > >>> uint32_t compat_pvr; > >>> PPCVirtualHypervisor *vhyp; > >>> + PPCVirtualHypervisorClass *vhyp_class; > >>> void *machine_data; > >>> int32_t node_id; /* NUMA node this CPU belongs to */ > >>> PPCHash64Options *hash64_opts; > >>> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, > >>> > >>> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) > >>> { > >>> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); > >>> + return cpu->vhyp_class->cpu_in_nested(cpu); > >>> } > >>> #endif /* CONFIG_USER_ONLY */ > >>> > >>> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h > >>> index 674377a19e..f3f7993958 100644 > >>> --- a/target/ppc/mmu-book3s-v3.h > >>> +++ b/target/ppc/mmu-book3s-v3.h > >>> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) > >>> uint64_t base; > >>> > >>> if (cpu->vhyp) { > >> > >> All the checks for cpu->vhyp needs to be changed to check for > >> cpu->vhyp_class now, for all such instances. > > > > It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL. > > It's supposed to be an equivalent transformation just changing the > > lookup function. > > I agree, but not just it appears a bit odd, my only worry is if a future > change cause vhyp_class to be NULL before the control reaches here, this > check wont really serve the purpose. Anyways, not a mandatory > requirement for now, so I shall leave it to your choice. It does look like it should be the other way around without context of s/PPC_V_H_G_C(cpu->vhyp)/cpu->vhyp_class/ We'll never have a vhyp != NULL && vhyp_class == NULL though, so should be okay. Thanks, Nick
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index ec14574d14..eb85d9aa71 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1437,6 +1437,7 @@ struct ArchCPU { int vcpu_id; uint32_t compat_pvr; PPCVirtualHypervisor *vhyp; + PPCVirtualHypervisorClass *vhyp_class; void *machine_data; int32_t node_id; /* NUMA node this CPU belongs to */ PPCHash64Options *hash64_opts; @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) { - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); + return cpu->vhyp_class->cpu_in_nested(cpu); } #endif /* CONFIG_USER_ONLY */ diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 674377a19e..f3f7993958 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) uint64_t base; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - return vhc->hpt_mask(cpu->vhyp); + return cpu->vhyp_class->hpt_mask(cpu->vhyp); } if (cpu->env.mmu_model == POWERPC_MMU_3_00) { ppc_v3_pate_t pate; diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 04d6decb2b..c22e8b336d 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) machine->fdt = fdt; pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp); } enum pegasos2_rtas_tokens { diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 9bccddb350..63d0094024 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) CPUPPCState *env = &cpu->env; cpu->vhyp = vhyp; + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); /* * With a virtual hypervisor mode we never allow the CPU to go @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) PowerPCCPU *cpu = POWERPC_CPU(cs); if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->cpu_exec_enter(cpu->vhyp, cpu); + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); } } @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) PowerPCCPU *cpu = POWERPC_CPU(cs); if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->cpu_exec_exit(cpu->vhyp, cpu); + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); } } #endif /* CONFIG_TCG */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 98952de267..445350488c 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) * HV mode, we need to keep hypercall support. */ if (lev == 1 && cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) * HV mode, we need to keep hypercall support. */ if (lev == 1 && cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) /* "PAPR mode" built-in hypercall emulation */ if (lev == 1 && books_vhyp_handles_hcall(cpu)) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) } if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ - vhc->deliver_hv_excp(cpu, excp); + cpu->vhyp_class->deliver_hv_excp(cpu, excp); powerpc_reset_excp_state(cpu); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 26fa9d0575..5b5b96ab6b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) sregs.pvr = env->spr[SPR_PVR]; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); } else { sregs.u.s.sdr1 = env->spr[SPR_SDR1]; } diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index d645c0bb94..196b4b2a48 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - return vhc->map_hptes(cpu->vhyp, ptex, n); + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); } base = ppc_hash64_hpt_base(cpu); @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, hwaddr ptex, int n) { if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); return; } @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); return; } base = ppc_hash64_hpt_base(cpu); @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); return; } base = ppc_hash64_hpt_base(cpu); diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 5823e039e6..496ba87a95 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, /* Get Partition Table */ if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc; - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { if (guest_visible) { ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, DSISR_R_BADCONFIG);
PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like interrupts and TLB misses and is quite costly. Running the kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG TLB and spends a lot of time in TLB and page table walking code. The test takes 67 seconds to complete with a lot of time being spent in code related to finding the vhyp class: 12.01% [.] g_str_hash 8.94% [.] g_hash_table_lookup 8.06% [.] object_class_dynamic_cast 6.21% [.] address_space_ldq 4.94% [.] __strcmp_avx2 4.28% [.] tlb_set_page_full 4.08% [.] address_space_translate_internal 3.17% [.] object_class_dynamic_cast_assert 2.84% [.] ppc_radix64_xlate Keep a pointer to the class and avoid this lookup. This reduces the execution time to 40 seconds. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- This feels a bit ugly, but the performance problem of looking up the class in fast paths can't be ignored. Is there a "nicer" way to get the same result? Thanks, Nick target/ppc/cpu.h | 3 ++- target/ppc/mmu-book3s-v3.h | 4 +--- hw/ppc/pegasos2.c | 1 + target/ppc/cpu_init.c | 9 +++------ target/ppc/excp_helper.c | 16 ++++------------ target/ppc/kvm.c | 4 +--- target/ppc/mmu-hash64.c | 16 ++++------------ target/ppc/mmu-radix64.c | 4 +--- 8 files changed, 17 insertions(+), 40 deletions(-)