diff mbox

[26/32] ppc: Speed up dcbz

Message ID 1469571686-7284-26-git-send-email-benh@kernel.crashing.org
State New
Headers show

Commit Message

Benjamin Herrenschmidt July 26, 2016, 10:21 p.m. UTC
Use tlb_vaddr_to_host to do a fast path single translate for
the whole cache line. Also make the reservation check match
the entire range.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/mem_helper.c | 46 +++++++++++++++++++++++++---------------------
 target-ppc/translate.c  | 11 ++++-------
 2 files changed, 29 insertions(+), 28 deletions(-)

Comments

David Gibson July 27, 2016, 2:36 a.m. UTC | #1
On Wed, Jul 27, 2016 at 08:21:20AM +1000, Benjamin Herrenschmidt wrote:
> Use tlb_vaddr_to_host to do a fast path single translate for
> the whole cache line. Also make the reservation check match
> the entire range.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/mem_helper.c | 46 +++++++++++++++++++++++++---------------------
>  target-ppc/translate.c  | 11 ++++-------
>  2 files changed, 29 insertions(+), 28 deletions(-)
> 
> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
> index 92a594c..6548715 100644
> --- a/target-ppc/mem_helper.c
> +++ b/target-ppc/mem_helper.c
> @@ -141,35 +141,39 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
>      }
>  }
>  
> -static void do_dcbz(CPUPPCState *env, target_ulong addr, int dcache_line_size,
> -                    uintptr_t raddr)
> +void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
>  {
> -    int i;
> -
> -    addr &= ~(dcache_line_size - 1);
> -    for (i = 0; i < dcache_line_size; i += 4) {
> -        cpu_stl_data_ra(env, addr + i, 0, raddr);
> -    }
> -    if (env->reserve_addr == addr) {
> -        env->reserve_addr = (target_ulong)-1ULL;
> -    }
> -}
> -
> -void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl)
> -{
> -    int dcbz_size = env->dcache_line_size;
> +    target_ulong mask, dcbz_size = env->dcache_line_size;
> +    uint32_t i;
> +    void *haddr;
>  
>  #if defined(TARGET_PPC64)
> -    if (!is_dcbzl &&
> -        (env->excp_model == POWERPC_EXCP_970) &&
> -        ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
> +    /* Check for dcbz vs dcbzl on 970 */
> +    if (env->excp_model == POWERPC_EXCP_970 &&
> +        !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
>          dcbz_size = 32;
>      }
>  #endif
>  
> -    /* XXX add e500mc support */
> +    /* Align address */
> +    mask = ~(dcbz_size - 1);
> +    addr &= mask;
> +
> +    /* Check reservation */
> +    if ((env->reserve_addr & mask) == (addr & mask))  {
> +        env->reserve_addr = (target_ulong)-1ULL;
> +    }
>  
> -    do_dcbz(env, addr, dcbz_size, GETPC());
> +    /* Try fast path translate */
> +    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx);

It worries me slightly that this doesn't take any length to verify.  I
guess it's ok in practice, because memory blocks will always be at
least cache line size aligned.

> +    if (haddr) {
> +        memset(haddr, 0, dcbz_size);
> +    } else {
> +        /* Slow path */
> +        for (i = 0; i < dcbz_size; i += 8) {
> +            cpu_stq_data_ra(env, addr + i, 0, GETPC());
> +        }
> +    }
>  }
>  
>  void helper_icbi(CPUPPCState *env, target_ulong addr)
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 57a891b..5288e02 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3851,18 +3851,15 @@ static void gen_dcbtls(DisasContext *ctx)
>  static void gen_dcbz(DisasContext *ctx)
>  {
>      TCGv tcgv_addr;
> -    TCGv_i32 tcgv_is_dcbzl;
> -    int is_dcbzl = ctx->opcode & 0x00200000 ? 1 : 0;
> +    TCGv_i32 tcgv_op;
>  
>      gen_set_access_type(ctx, ACCESS_CACHE);
>      tcgv_addr = tcg_temp_new();
> -    tcgv_is_dcbzl = tcg_const_i32(is_dcbzl);
> -
> +    tcgv_op = tcg_const_i32(ctx->opcode & 0x03FF000);
>      gen_addr_reg_index(ctx, tcgv_addr);
> -    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_is_dcbzl);
> -
> +    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_op);
>      tcg_temp_free(tcgv_addr);
> -    tcg_temp_free_i32(tcgv_is_dcbzl);
> +    tcg_temp_free_i32(tcgv_op);
>  }
>  
>  /* dst / dstt */
Benjamin Herrenschmidt July 27, 2016, 4:02 a.m. UTC | #2
On Wed, 2016-07-27 at 12:36 +1000, David Gibson wrote:
> > -    do_dcbz(env, addr, dcbz_size, GETPC());
> > +    /* Try fast path translate */
> > +    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx);
> 
> It worries me slightly that this doesn't take any length to verify.  I
> guess it's ok in practice, because memory blocks will always be at
> least cache line size aligned.

It's safe ;-)

The translate returns a qemu page size address which is always 4K.

We don't need to verify  because we just aligned the address to the
cache block size which is always smaller than 4k. So we can't
possibly be crossing a page boundary.

(grep for tlb_vaddr_to_host in target-s390 for other examples of use
of tlb_vaddr_to_host).

Cheers,
Ben.
diff mbox

Patch

diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 92a594c..6548715 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -141,35 +141,39 @@  void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
     }
 }
 
-static void do_dcbz(CPUPPCState *env, target_ulong addr, int dcache_line_size,
-                    uintptr_t raddr)
+void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
 {
-    int i;
-
-    addr &= ~(dcache_line_size - 1);
-    for (i = 0; i < dcache_line_size; i += 4) {
-        cpu_stl_data_ra(env, addr + i, 0, raddr);
-    }
-    if (env->reserve_addr == addr) {
-        env->reserve_addr = (target_ulong)-1ULL;
-    }
-}
-
-void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl)
-{
-    int dcbz_size = env->dcache_line_size;
+    target_ulong mask, dcbz_size = env->dcache_line_size;
+    uint32_t i;
+    void *haddr;
 
 #if defined(TARGET_PPC64)
-    if (!is_dcbzl &&
-        (env->excp_model == POWERPC_EXCP_970) &&
-        ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+    /* Check for dcbz vs dcbzl on 970 */
+    if (env->excp_model == POWERPC_EXCP_970 &&
+        !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
         dcbz_size = 32;
     }
 #endif
 
-    /* XXX add e500mc support */
+    /* Align address */
+    mask = ~(dcbz_size - 1);
+    addr &= mask;
+
+    /* Check reservation */
+    if ((env->reserve_addr & mask) == (addr & mask))  {
+        env->reserve_addr = (target_ulong)-1ULL;
+    }
 
-    do_dcbz(env, addr, dcbz_size, GETPC());
+    /* Try fast path translate */
+    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx);
+    if (haddr) {
+        memset(haddr, 0, dcbz_size);
+    } else {
+        /* Slow path */
+        for (i = 0; i < dcbz_size; i += 8) {
+            cpu_stq_data_ra(env, addr + i, 0, GETPC());
+        }
+    }
 }
 
 void helper_icbi(CPUPPCState *env, target_ulong addr)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 57a891b..5288e02 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3851,18 +3851,15 @@  static void gen_dcbtls(DisasContext *ctx)
 static void gen_dcbz(DisasContext *ctx)
 {
     TCGv tcgv_addr;
-    TCGv_i32 tcgv_is_dcbzl;
-    int is_dcbzl = ctx->opcode & 0x00200000 ? 1 : 0;
+    TCGv_i32 tcgv_op;
 
     gen_set_access_type(ctx, ACCESS_CACHE);
     tcgv_addr = tcg_temp_new();
-    tcgv_is_dcbzl = tcg_const_i32(is_dcbzl);
-
+    tcgv_op = tcg_const_i32(ctx->opcode & 0x03FF000);
     gen_addr_reg_index(ctx, tcgv_addr);
-    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_is_dcbzl);
-
+    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_op);
     tcg_temp_free(tcgv_addr);
-    tcg_temp_free_i32(tcgv_is_dcbzl);
+    tcg_temp_free_i32(tcgv_op);
 }
 
 /* dst / dstt */