Message ID | 1469571686-7284-26-git-send-email-benh@kernel.crashing.org |
---|---|
State | New |
Headers | show |
On Wed, Jul 27, 2016 at 08:21:20AM +1000, Benjamin Herrenschmidt wrote: > Use tlb_vaddr_to_host to do a fast path single translate for > the whole cache line. Also make the reservation check match > the entire range. > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> > --- > target-ppc/mem_helper.c | 46 +++++++++++++++++++++++++--------------------- > target-ppc/translate.c | 11 ++++------- > 2 files changed, 29 insertions(+), 28 deletions(-) > > diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c > index 92a594c..6548715 100644 > --- a/target-ppc/mem_helper.c > +++ b/target-ppc/mem_helper.c > @@ -141,35 +141,39 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, > } > } > > -static void do_dcbz(CPUPPCState *env, target_ulong addr, int dcache_line_size, > - uintptr_t raddr) > +void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode) > { > - int i; > - > - addr &= ~(dcache_line_size - 1); > - for (i = 0; i < dcache_line_size; i += 4) { > - cpu_stl_data_ra(env, addr + i, 0, raddr); > - } > - if (env->reserve_addr == addr) { > - env->reserve_addr = (target_ulong)-1ULL; > - } > -} > - > -void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl) > -{ > - int dcbz_size = env->dcache_line_size; > + target_ulong mask, dcbz_size = env->dcache_line_size; > + uint32_t i; > + void *haddr; > > #if defined(TARGET_PPC64) > - if (!is_dcbzl && > - (env->excp_model == POWERPC_EXCP_970) && > - ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { > + /* Check for dcbz vs dcbzl on 970 */ > + if (env->excp_model == POWERPC_EXCP_970 && > + !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { > dcbz_size = 32; > } > #endif > > - /* XXX add e500mc support */ > + /* Align address */ > + mask = ~(dcbz_size - 1); > + addr &= mask; > + > + /* Check reservation */ > + if ((env->reserve_addr & mask) == (addr & mask)) { > + env->reserve_addr = (target_ulong)-1ULL; > + } > > - do_dcbz(env, addr, dcbz_size, GETPC()); > + /* Try fast path translate */ > + haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx); It worries me slightly that this doesn't take any length to verify. I guess it's ok in practice, because memory blocks will always be at least cache line size aligned. > + if (haddr) { > + memset(haddr, 0, dcbz_size); > + } else { > + /* Slow path */ > + for (i = 0; i < dcbz_size; i += 8) { > + cpu_stq_data_ra(env, addr + i, 0, GETPC()); > + } > + } > } > > void helper_icbi(CPUPPCState *env, target_ulong addr) > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 57a891b..5288e02 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -3851,18 +3851,15 @@ static void gen_dcbtls(DisasContext *ctx) > static void gen_dcbz(DisasContext *ctx) > { > TCGv tcgv_addr; > - TCGv_i32 tcgv_is_dcbzl; > - int is_dcbzl = ctx->opcode & 0x00200000 ? 1 : 0; > + TCGv_i32 tcgv_op; > > gen_set_access_type(ctx, ACCESS_CACHE); > tcgv_addr = tcg_temp_new(); > - tcgv_is_dcbzl = tcg_const_i32(is_dcbzl); > - > + tcgv_op = tcg_const_i32(ctx->opcode & 0x03FF000); > gen_addr_reg_index(ctx, tcgv_addr); > - gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_is_dcbzl); > - > + gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_op); > tcg_temp_free(tcgv_addr); > - tcg_temp_free_i32(tcgv_is_dcbzl); > + tcg_temp_free_i32(tcgv_op); > } > > /* dst / dstt */
On Wed, 2016-07-27 at 12:36 +1000, David Gibson wrote: > > - do_dcbz(env, addr, dcbz_size, GETPC()); > > + /* Try fast path translate */ > > + haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx); > > It worries me slightly that this doesn't take any length to verify. I > guess it's ok in practice, because memory blocks will always be at > least cache line size aligned. It's safe ;-) The translate returns a qemu page size address which is always 4K. We don't need to verify because we just aligned the address to the cache block size which is always smaller than 4k. So we can't possibly be crossing a page boundary. (grep for tlb_vaddr_to_host in target-s390 for other examples of use of tlb_vaddr_to_host). Cheers, Ben.
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index 92a594c..6548715 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -141,35 +141,39 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, } } -static void do_dcbz(CPUPPCState *env, target_ulong addr, int dcache_line_size, - uintptr_t raddr) +void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode) { - int i; - - addr &= ~(dcache_line_size - 1); - for (i = 0; i < dcache_line_size; i += 4) { - cpu_stl_data_ra(env, addr + i, 0, raddr); - } - if (env->reserve_addr == addr) { - env->reserve_addr = (target_ulong)-1ULL; - } -} - -void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl) -{ - int dcbz_size = env->dcache_line_size; + target_ulong mask, dcbz_size = env->dcache_line_size; + uint32_t i; + void *haddr; #if defined(TARGET_PPC64) - if (!is_dcbzl && - (env->excp_model == POWERPC_EXCP_970) && - ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { + /* Check for dcbz vs dcbzl on 970 */ + if (env->excp_model == POWERPC_EXCP_970 && + !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { dcbz_size = 32; } #endif - /* XXX add e500mc support */ + /* Align address */ + mask = ~(dcbz_size - 1); + addr &= mask; + + /* Check reservation */ + if ((env->reserve_addr & mask) == (addr & mask)) { + env->reserve_addr = (target_ulong)-1ULL; + } - do_dcbz(env, addr, dcbz_size, GETPC()); + /* Try fast path translate */ + haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx); + if (haddr) { + memset(haddr, 0, dcbz_size); + } else { + /* Slow path */ + for (i = 0; i < dcbz_size; i += 8) { + cpu_stq_data_ra(env, addr + i, 0, GETPC()); + } + } } void helper_icbi(CPUPPCState *env, target_ulong addr) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 57a891b..5288e02 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -3851,18 +3851,15 @@ static void gen_dcbtls(DisasContext *ctx) static void gen_dcbz(DisasContext *ctx) { TCGv tcgv_addr; - TCGv_i32 tcgv_is_dcbzl; - int is_dcbzl = ctx->opcode & 0x00200000 ? 1 : 0; + TCGv_i32 tcgv_op; gen_set_access_type(ctx, ACCESS_CACHE); tcgv_addr = tcg_temp_new(); - tcgv_is_dcbzl = tcg_const_i32(is_dcbzl); - + tcgv_op = tcg_const_i32(ctx->opcode & 0x03FF000); gen_addr_reg_index(ctx, tcgv_addr); - gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_is_dcbzl); - + gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_op); tcg_temp_free(tcgv_addr); - tcg_temp_free_i32(tcgv_is_dcbzl); + tcg_temp_free_i32(tcgv_op); } /* dst / dstt */
Use tlb_vaddr_to_host to do a fast path single translate for the whole cache line. Also make the reservation check match the entire range. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- target-ppc/mem_helper.c | 46 +++++++++++++++++++++++++--------------------- target-ppc/translate.c | 11 ++++------- 2 files changed, 29 insertions(+), 28 deletions(-)