Message ID | 3b3cce4a39e0debbf6fa29d4a2ead3014898cb93.1716763435.git.balaton@eik.bme.hu |
---|---|
State | New |
Headers | show |
Series | Remaining MMU clean up patches | expand |
On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote: > Two of these are not used anywhere and the other two are used only > once and can be inlined and removed from the header. I'd prefer to put these in the .c file. Probably calculating the base once would generate marginally better code since it would not have to keep reloading it (since there is a barrier there it can't cache the value). Thanks, Nick > > Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> > --- > target/ppc/mmu-hash32.c | 5 +++-- > target/ppc/mmu-hash32.h | 32 -------------------------------- > 2 files changed, 3 insertions(+), 34 deletions(-) > > diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c > index a2c0ac05d2..7a6a674f8a 100644 > --- a/target/ppc/mmu-hash32.c > +++ b/target/ppc/mmu-hash32.c > @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, > { > hwaddr pte_offset = pteg_off; > target_ulong pte0, pte1; > + hwaddr base = ppc_hash32_hpt_base(cpu); > int i; > > for (i = 0; i < HPTES_PER_GROUP; i++) { > - pte0 = ppc_hash32_load_hpte0(cpu, pte_offset); > + pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset); > /* > * pte0 contains the valid bit and must be read before pte1, > * otherwise we might see an old pte1 with a new valid bit and > * thus an inconsistent hpte value > */ > smp_rmb(); > - pte1 = ppc_hash32_load_hpte1(cpu, pte_offset); > + pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); > > if ((pte0 & HPTE32_V_VALID) > && (secondary == !!(pte0 & HPTE32_V_SECONDARY)) > diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h > index 2838de031c..4db55fb0a0 100644 > --- a/target/ppc/mmu-hash32.h > +++ b/target/ppc/mmu-hash32.h > @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu) > return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF; > } > > -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu, > - hwaddr pte_offset) > -{ > - target_ulong base = ppc_hash32_hpt_base(cpu); > - > - return ldl_phys(CPU(cpu)->as, base + pte_offset); > -} > - > -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu, > - hwaddr pte_offset) > -{ > - target_ulong base = ppc_hash32_hpt_base(cpu); > - > - return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); > -} > - > -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu, > - hwaddr pte_offset, target_ulong pte0) > -{ > - target_ulong base = ppc_hash32_hpt_base(cpu); > - > - stl_phys(CPU(cpu)->as, base + pte_offset, pte0); > -} > - > -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, > - hwaddr pte_offset, target_ulong pte1) > -{ > - target_ulong base = ppc_hash32_hpt_base(cpu); > - > - stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); > -} > - > static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) > { > return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
On Thu, 4 Jul 2024, Nicholas Piggin wrote: > On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote: >> Two of these are not used anywhere and the other two are used only >> once and can be inlined and removed from the header. > > I'd prefer to put these in the .c file. Probably calculating the > base once would generate marginally better code since it would not > have to keep reloading it (since there is a barrier there it can't > cache the value). These aren't even used anywhere but one function and they are inefficient becuase they would call ppc_hash32_hpt_base() on each call. Next patch even removes base and calculates pte_addr once before the loop, then it's quite straight forward what these read from guest memory even without inline functions. I see no reason to keep these inline functions. > Thanks, > Nick > >> >> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> >> --- >> target/ppc/mmu-hash32.c | 5 +++-- >> target/ppc/mmu-hash32.h | 32 -------------------------------- >> 2 files changed, 3 insertions(+), 34 deletions(-) >> >> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c >> index a2c0ac05d2..7a6a674f8a 100644 >> --- a/target/ppc/mmu-hash32.c >> +++ b/target/ppc/mmu-hash32.c >> @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, >> { >> hwaddr pte_offset = pteg_off; >> target_ulong pte0, pte1; >> + hwaddr base = ppc_hash32_hpt_base(cpu); >> int i; >> >> for (i = 0; i < HPTES_PER_GROUP; i++) { >> - pte0 = ppc_hash32_load_hpte0(cpu, pte_offset); >> + pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset); >> /* >> * pte0 contains the valid bit and must be read before pte1, >> * otherwise we might see an old pte1 with a new valid bit and >> * thus an inconsistent hpte value >> */ >> smp_rmb(); >> - pte1 = ppc_hash32_load_hpte1(cpu, pte_offset); >> + pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); >> >> if ((pte0 & HPTE32_V_VALID) >> && (secondary == !!(pte0 & HPTE32_V_SECONDARY)) >> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h >> index 2838de031c..4db55fb0a0 100644 >> --- a/target/ppc/mmu-hash32.h >> +++ b/target/ppc/mmu-hash32.h >> @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu) >> return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF; >> } >> >> -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu, >> - hwaddr pte_offset) >> -{ >> - target_ulong base = ppc_hash32_hpt_base(cpu); >> - >> - return ldl_phys(CPU(cpu)->as, base + pte_offset); >> -} >> - >> -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu, >> - hwaddr pte_offset) >> -{ >> - target_ulong base = ppc_hash32_hpt_base(cpu); >> - >> - return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); >> -} >> - >> -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu, >> - hwaddr pte_offset, target_ulong pte0) >> -{ >> - target_ulong base = ppc_hash32_hpt_base(cpu); >> - >> - stl_phys(CPU(cpu)->as, base + pte_offset, pte0); >> -} >> - >> -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, >> - hwaddr pte_offset, target_ulong pte1) >> -{ >> - target_ulong base = ppc_hash32_hpt_base(cpu); >> - >> - stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); >> -} >> - >> static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) >> { >> return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu); > >
On Sun Jul 7, 2024 at 6:18 AM AEST, BALATON Zoltan wrote: > On Thu, 4 Jul 2024, Nicholas Piggin wrote: > > On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote: > >> Two of these are not used anywhere and the other two are used only > >> once and can be inlined and removed from the header. > > > > I'd prefer to put these in the .c file. Probably calculating the > > base once would generate marginally better code since it would not > > have to keep reloading it (since there is a barrier there it can't > > cache the value). > > These aren't even used anywhere but one function and they are inefficient > becuase they would call ppc_hash32_hpt_base() on each call. Next patch > even removes base and calculates pte_addr once before the loop, then it's > quite straight forward what these read from guest memory even without > inline functions. I see no reason to keep these inline functions. Make them take the hash base instead of cpu if that's the performance issue to solve. And open coded access can always be converted to use it. Thanks, Nick
On Mon, 8 Jul 2024, Nicholas Piggin wrote: > On Sun Jul 7, 2024 at 6:18 AM AEST, BALATON Zoltan wrote: >> On Thu, 4 Jul 2024, Nicholas Piggin wrote: >>> On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote: >>>> Two of these are not used anywhere and the other two are used only >>>> once and can be inlined and removed from the header. >>> >>> I'd prefer to put these in the .c file. Probably calculating the >>> base once would generate marginally better code since it would not >>> have to keep reloading it (since there is a barrier there it can't >>> cache the value). >> >> These aren't even used anywhere but one function and they are inefficient >> becuase they would call ppc_hash32_hpt_base() on each call. Next patch >> even removes base and calculates pte_addr once before the loop, then it's >> quite straight forward what these read from guest memory even without >> inline functions. I see no reason to keep these inline functions. > > Make them take the hash base instead of cpu if that's the performance > issue to solve. And open coded access can always be converted to use > it. If you look at the next patch you can see the base calculatoin is gone and it does pte_addr = ppc_hash32_hpt_base(cpu) + pteg_off once at the beginning before the loop, then the function you want to make is just: pte0 = ldl_phys(CPU(cpu)->as, pte_addr). I don't think it's worth making it a separate fucntion .The other one pte1 = ldl_phys(CPU(cpu)->as, pte_addr + HASH_PTE_SIZE_32 / 2) still has some calculation left but that's pretty straight forward. Maybe we could add a mcacro for HASH_PTE_SIZE_32 / 2 like HARH_PTE1_OFFS or something but I don't think that or having separate functions for this would add any more clarity just unnecessary complication. Yout one line helpers would only be used by ppc_hash32_pteg_search which is already a helper tp get pte values so open coding it within this function is OK in my opinion. There are no other places your helper functions would be needed. Regards, BALATON Zoltlan
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index a2c0ac05d2..7a6a674f8a 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, { hwaddr pte_offset = pteg_off; target_ulong pte0, pte1; + hwaddr base = ppc_hash32_hpt_base(cpu); int i; for (i = 0; i < HPTES_PER_GROUP; i++) { - pte0 = ppc_hash32_load_hpte0(cpu, pte_offset); + pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset); /* * pte0 contains the valid bit and must be read before pte1, * otherwise we might see an old pte1 with a new valid bit and * thus an inconsistent hpte value */ smp_rmb(); - pte1 = ppc_hash32_load_hpte1(cpu, pte_offset); + pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); if ((pte0 & HPTE32_V_VALID) && (secondary == !!(pte0 & HPTE32_V_SECONDARY)) diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index 2838de031c..4db55fb0a0 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu) return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF; } -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu, - hwaddr pte_offset) -{ - target_ulong base = ppc_hash32_hpt_base(cpu); - - return ldl_phys(CPU(cpu)->as, base + pte_offset); -} - -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu, - hwaddr pte_offset) -{ - target_ulong base = ppc_hash32_hpt_base(cpu); - - return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); -} - -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu, - hwaddr pte_offset, target_ulong pte0) -{ - target_ulong base = ppc_hash32_hpt_base(cpu); - - stl_phys(CPU(cpu)->as, base + pte_offset, pte0); -} - -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, - hwaddr pte_offset, target_ulong pte1) -{ - target_ulong base = ppc_hash32_hpt_base(cpu); - - stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); -} - static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) { return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
Two of these are not used anywhere and the other two are used only once and can be inlined and removed from the header. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> --- target/ppc/mmu-hash32.c | 5 +++-- target/ppc/mmu-hash32.h | 32 -------------------------------- 2 files changed, 3 insertions(+), 34 deletions(-)