diff mbox series

[v9,06/10] target/ppc: enable PMU instruction count

Message ID 20211201151734.654994-7-danielhb413@gmail.com
State Accepted, archived
Headers show
Series PMU-EBB support for PPC64 TCG | expand

Commit Message

Daniel Henrique Barboza Dec. 1, 2021, 3:17 p.m. UTC
The PMU is already counting cycles by calculating time elapsed in
nanoseconds. Counting instructions is a different matter and requires
another approach.

This patch adds the capability of counting completed instructions (Perf
event PM_INST_CMPL) by counting the amount of instructions translated in
each translation block right before exiting it.

A new pmu_count_insns() helper in translation.c was added to do that.
After verifying that the PMU is counting instructions, call
helper_insns_inc(). This new helper from power8-pmu.c will add the
instructions to the relevant counters. It'll also be responsible for
triggering counter negative overflows as it is already being done with
cycles.

To verify whether the PMU is counting instructions or now, a new hflags
named 'HFLAGS_INSN_CNT' is introduced. This flag will match the internal
state of the PMU. We're be using this flag to avoid calling
helper_insn_inc() when we do not have a valid instruction event being
sampled.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/cpu.h                 |  1 +
 target/ppc/helper.h              |  1 +
 target/ppc/helper_regs.c         |  7 ++++
 target/ppc/power8-pmu-regs.c.inc |  6 +++
 target/ppc/power8-pmu.c          | 67 +++++++++++++++++++++++++++++++-
 target/ppc/power8-pmu.h          |  1 +
 target/ppc/translate.c           | 64 ++++++++++++++++++++++++++++++
 7 files changed, 146 insertions(+), 1 deletion(-)

Comments

David Gibson Dec. 2, 2021, 2:42 a.m. UTC | #1
On Wed, Dec 01, 2021 at 12:17:30PM -0300, Daniel Henrique Barboza wrote:
> The PMU is already counting cycles by calculating time elapsed in
> nanoseconds. Counting instructions is a different matter and requires
> another approach.
> 
> This patch adds the capability of counting completed instructions (Perf
> event PM_INST_CMPL) by counting the amount of instructions translated in
> each translation block right before exiting it.
> 
> A new pmu_count_insns() helper in translation.c was added to do that.
> After verifying that the PMU is counting instructions, call
> helper_insns_inc(). This new helper from power8-pmu.c will add the
> instructions to the relevant counters. It'll also be responsible for
> triggering counter negative overflows as it is already being done with
> cycles.
> 
> To verify whether the PMU is counting instructions or now, a new hflags
> named 'HFLAGS_INSN_CNT' is introduced. This flag will match the internal
> state of the PMU. We're be using this flag to avoid calling
> helper_insn_inc() when we do not have a valid instruction event being
> sampled.
> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target/ppc/cpu.h                 |  1 +
>  target/ppc/helper.h              |  1 +
>  target/ppc/helper_regs.c         |  7 ++++
>  target/ppc/power8-pmu-regs.c.inc |  6 +++
>  target/ppc/power8-pmu.c          | 67 +++++++++++++++++++++++++++++++-
>  target/ppc/power8-pmu.h          |  1 +
>  target/ppc/translate.c           | 64 ++++++++++++++++++++++++++++++
>  7 files changed, 146 insertions(+), 1 deletion(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index f562d5b933..28a185fb25 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -655,6 +655,7 @@ enum {
>      HFLAGS_PR = 14,  /* MSR_PR */
>      HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
>      HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
> +    HFLAGS_INSN_CNT = 17, /* PMU instruction count enabled */
>      HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>      HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
>  
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 94b4690375..d8a23e054a 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
>  DEF_HELPER_2(store_mmcr1, void, env, tl)
>  DEF_HELPER_3(store_pmc, void, env, i32, i64)
>  DEF_HELPER_2(read_pmc, tl, env, i32)
> +DEF_HELPER_2(insns_inc, void, env, i32)
>  #endif
>  DEF_HELPER_1(check_tlb_flush_local, void, env)
>  DEF_HELPER_1(check_tlb_flush_global, void, env)
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 99562edd57..b847928842 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -23,6 +23,7 @@
>  #include "exec/exec-all.h"
>  #include "sysemu/kvm.h"
>  #include "helper_regs.h"
> +#include "power8-pmu.h"
>  
>  /* Swap temporary saved registers with GPRs */
>  void hreg_swap_gpr_tgpr(CPUPPCState *env)
> @@ -121,6 +122,12 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
>          hflags |= 1 << HFLAGS_HV;
>      }
>  
> +#if defined(TARGET_PPC64)
> +    if (pmu_insn_cnt_enabled(env)) {
> +        hflags |= 1 << HFLAGS_INSN_CNT;
> +    }
> +#endif
> +
>      /*
>       * This is our encoding for server processors. The architecture
>       * specifies that there is no such thing as userspace with
> diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
> index 25b13ad564..2bab6cece7 100644
> --- a/target/ppc/power8-pmu-regs.c.inc
> +++ b/target/ppc/power8-pmu-regs.c.inc
> @@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
>       */
>      gen_icount_io_start(ctx);
>      gen_helper_store_mmcr0(cpu_env, val);
> +
> +    /*
> +     * End the translation block because MMCR0 writes can change
> +     * ctx->pmu_insn_cnt.
> +     */
> +    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
>  }
>  
>  void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
> index 399234a2fc..e163ba5640 100644
> --- a/target/ppc/power8-pmu.c
> +++ b/target/ppc/power8-pmu.c
> @@ -112,6 +112,54 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
>      return evt_type;
>  }
>  
> +bool pmu_insn_cnt_enabled(CPUPPCState *env)
> +{
> +    int sprn;
> +
> +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
> +        if (pmc_get_event(env, sprn) == PMU_EVENT_INSTRUCTIONS) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
> +{
> +    bool overflow_triggered = false;
> +    int sprn;
> +
> +    /* PMC6 never counts instructions */
> +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
> +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
> +            continue;
> +        }
> +
> +        env->spr[sprn] += num_insns;
> +
> +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
> +            pmc_has_overflow_enabled(env, sprn)) {
> +
> +            overflow_triggered = true;
> +
> +            /*
> +             * The real PMU will always trigger a counter overflow with
> +             * PMC_COUNTER_NEGATIVE_VAL. We don't have an easy way to
> +             * do that since we're counting block of instructions at
> +             * the end of each translation block, and we're probably
> +             * passing this value at this point.
> +             *
> +             * Let's write PMC_COUNTER_NEGATIVE_VAL to the overflowed
> +             * counter to simulate what the real hardware would do.
> +             */
> +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
> +        }
> +    }
> +
> +    return overflow_triggered;
> +}
> +
>  static void pmu_update_cycles(CPUPPCState *env)
>  {
>      uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> @@ -195,7 +243,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>  
>      env->spr[SPR_POWER_MMCR0] = value;
>  
> -    /* MMCR0 writes can change HFLAGS_PMCCCLEAR */
> +    /* MMCR0 writes can change HFLAGS_PMCCCLEAR and HFLAGS_INSN_CNT */
>      hreg_compute_hflags(env);
>  
>      /* Update cycle overflow timers with the current MMCR0 state */
> @@ -207,6 +255,9 @@ void helper_store_mmcr1(CPUPPCState *env, uint64_t value)
>      pmu_update_cycles(env);
>  
>      env->spr[SPR_POWER_MMCR1] = value;
> +
> +    /* MMCR1 writes can change HFLAGS_INSN_CNT */
> +    hreg_compute_hflags(env);
>  }
>  
>  target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
> @@ -237,6 +288,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
>      return;
>  }
>  
> +/* This helper assumes that the PMC is running. */
> +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
> +{
> +    bool overflow_triggered;
> +    PowerPCCPU *cpu;
> +
> +    overflow_triggered = pmu_increment_insns(env, num_insns);
> +
> +    if (overflow_triggered) {
> +        cpu = env_archcpu(env);
> +        fire_PMC_interrupt(cpu);
> +    }
> +}
> +
>  static void cpu_ppc_pmu_timer_cb(void *opaque)
>  {
>      PowerPCCPU *cpu = opaque;
> diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
> index 49a813a443..3ee4b4cda5 100644
> --- a/target/ppc/power8-pmu.h
> +++ b/target/ppc/power8-pmu.h
> @@ -21,5 +21,6 @@
>  #include "qemu/main-loop.h"
>  
>  void cpu_ppc_pmu_init(CPUPPCState *env);
> +bool pmu_insn_cnt_enabled(CPUPPCState *env);
>  
>  #endif
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 9960df6e18..896b916021 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -177,6 +177,7 @@ struct DisasContext {
>      bool hr;
>      bool mmcr0_pmcc0;
>      bool mmcr0_pmcc1;
> +    bool pmu_insn_cnt;
>      ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>      int singlestep_enabled;
>      uint32_t flags;
> @@ -4170,6 +4171,49 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
>  #endif
>  }
>  
> +#if defined(TARGET_PPC64)
> +static void pmu_count_insns(DisasContext *ctx)
> +{
> +    /*
> +     * Do not bother calling the helper if the PMU isn't counting
> +     * instructions.
> +     */
> +    if (!ctx->pmu_insn_cnt) {
> +        return;
> +    }
> +
> + #if !defined(CONFIG_USER_ONLY)
> +    /*
> +     * The PMU insns_inc() helper stops the internal PMU timer if a
> +     * counter overflows happens. In that case, if the guest is
> +     * running with icount and we do not handle it beforehand,
> +     * the helper can trigger a 'bad icount read'.
> +     */
> +    gen_icount_io_start(ctx);
> +
> +    gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns));
> +#else
> +    /*
> +     * User mode can read (but not write) PMC5 and start/stop
> +     * the PMU via MMCR0_FC. In this case just increment
> +     * PMC5 with base.num_insns.
> +     */
> +    TCGv t0 = tcg_temp_new();
> +
> +    gen_load_spr(t0, SPR_POWER_PMC5);
> +    tcg_gen_addi_tl(t0, t0, ctx->base.num_insns);
> +    gen_store_spr(SPR_POWER_PMC5, t0);
> +
> +    tcg_temp_free(t0);
> +#endif /* #if !defined(CONFIG_USER_ONLY) */
> +}
> +#else
> +static void pmu_count_insns(DisasContext *ctx)
> +{
> +    return;
> +}
> +#endif /* #if defined(TARGET_PPC64) */
> +
>  static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
>  {
>      return translator_use_goto_tb(&ctx->base, dest);
> @@ -4180,6 +4224,14 @@ static void gen_lookup_and_goto_ptr(DisasContext *ctx)
>      if (unlikely(ctx->singlestep_enabled)) {
>          gen_debug_exception(ctx);
>      } else {
> +        /*
> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
> +         * CF_NO_GOTO_PTR is set. Count insns now.
> +         */
> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
> +            pmu_count_insns(ctx);
> +        }
> +
>          tcg_gen_lookup_and_goto_ptr();
>      }
>  }
> @@ -4191,6 +4243,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>          dest = (uint32_t) dest;
>      }
>      if (use_goto_tb(ctx, dest)) {
> +        pmu_count_insns(ctx);
>          tcg_gen_goto_tb(n);
>          tcg_gen_movi_tl(cpu_nip, dest & ~3);
>          tcg_gen_exit_tb(ctx->base.tb, n);
> @@ -8458,6 +8511,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>      ctx->hr = (hflags >> HFLAGS_HR) & 1;
>      ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
>      ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
> +    ctx->pmu_insn_cnt = (hflags >> HFLAGS_INSN_CNT) & 1;
>  
>      ctx->singlestep_enabled = 0;
>      if ((hflags >> HFLAGS_SE) & 1) {
> @@ -8564,6 +8618,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>      switch (is_jmp) {
>      case DISAS_TOO_MANY:
>          if (use_goto_tb(ctx, nip)) {
> +            pmu_count_insns(ctx);
>              tcg_gen_goto_tb(0);
>              gen_update_nip(ctx, nip);
>              tcg_gen_exit_tb(ctx->base.tb, 0);
> @@ -8574,6 +8629,14 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>          gen_update_nip(ctx, nip);
>          /* fall through */
>      case DISAS_CHAIN:
> +        /*
> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
> +         * CF_NO_GOTO_PTR is set. Count insns now.
> +         */
> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
> +            pmu_count_insns(ctx);
> +        }
> +
>          tcg_gen_lookup_and_goto_ptr();
>          break;
>  
> @@ -8581,6 +8644,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>          gen_update_nip(ctx, nip);
>          /* fall through */
>      case DISAS_EXIT:
> +        pmu_count_insns(ctx);
>          tcg_gen_exit_tb(NULL, 0);
>          break;
>
diff mbox series

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f562d5b933..28a185fb25 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -655,6 +655,7 @@  enum {
     HFLAGS_PR = 14,  /* MSR_PR */
     HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
     HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
+    HFLAGS_INSN_CNT = 17, /* PMU instruction count enabled */
     HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
     HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 94b4690375..d8a23e054a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -24,6 +24,7 @@  DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
 DEF_HELPER_2(read_pmc, tl, env, i32)
+DEF_HELPER_2(insns_inc, void, env, i32)
 #endif
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 99562edd57..b847928842 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -23,6 +23,7 @@ 
 #include "exec/exec-all.h"
 #include "sysemu/kvm.h"
 #include "helper_regs.h"
+#include "power8-pmu.h"
 
 /* Swap temporary saved registers with GPRs */
 void hreg_swap_gpr_tgpr(CPUPPCState *env)
@@ -121,6 +122,12 @@  static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
         hflags |= 1 << HFLAGS_HV;
     }
 
+#if defined(TARGET_PPC64)
+    if (pmu_insn_cnt_enabled(env)) {
+        hflags |= 1 << HFLAGS_INSN_CNT;
+    }
+#endif
+
     /*
      * This is our encoding for server processors. The architecture
      * specifies that there is no such thing as userspace with
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index 25b13ad564..2bab6cece7 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -113,6 +113,12 @@  static void write_MMCR0_common(DisasContext *ctx, TCGv val)
      */
     gen_icount_io_start(ctx);
     gen_helper_store_mmcr0(cpu_env, val);
+
+    /*
+     * End the translation block because MMCR0 writes can change
+     * ctx->pmu_insn_cnt.
+     */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 }
 
 void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 399234a2fc..e163ba5640 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -112,6 +112,54 @@  static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
     return evt_type;
 }
 
+bool pmu_insn_cnt_enabled(CPUPPCState *env)
+{
+    int sprn;
+
+    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
+        if (pmc_get_event(env, sprn) == PMU_EVENT_INSTRUCTIONS) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
+{
+    bool overflow_triggered = false;
+    int sprn;
+
+    /* PMC6 never counts instructions */
+    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
+        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
+            continue;
+        }
+
+        env->spr[sprn] += num_insns;
+
+        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
+            pmc_has_overflow_enabled(env, sprn)) {
+
+            overflow_triggered = true;
+
+            /*
+             * The real PMU will always trigger a counter overflow with
+             * PMC_COUNTER_NEGATIVE_VAL. We don't have an easy way to
+             * do that since we're counting block of instructions at
+             * the end of each translation block, and we're probably
+             * passing this value at this point.
+             *
+             * Let's write PMC_COUNTER_NEGATIVE_VAL to the overflowed
+             * counter to simulate what the real hardware would do.
+             */
+            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
+        }
+    }
+
+    return overflow_triggered;
+}
+
 static void pmu_update_cycles(CPUPPCState *env)
 {
     uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -195,7 +243,7 @@  void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
 
     env->spr[SPR_POWER_MMCR0] = value;
 
-    /* MMCR0 writes can change HFLAGS_PMCCCLEAR */
+    /* MMCR0 writes can change HFLAGS_PMCCCLEAR and HFLAGS_INSN_CNT */
     hreg_compute_hflags(env);
 
     /* Update cycle overflow timers with the current MMCR0 state */
@@ -207,6 +255,9 @@  void helper_store_mmcr1(CPUPPCState *env, uint64_t value)
     pmu_update_cycles(env);
 
     env->spr[SPR_POWER_MMCR1] = value;
+
+    /* MMCR1 writes can change HFLAGS_INSN_CNT */
+    hreg_compute_hflags(env);
 }
 
 target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
@@ -237,6 +288,20 @@  static void fire_PMC_interrupt(PowerPCCPU *cpu)
     return;
 }
 
+/* This helper assumes that the PMC is running. */
+void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
+{
+    bool overflow_triggered;
+    PowerPCCPU *cpu;
+
+    overflow_triggered = pmu_increment_insns(env, num_insns);
+
+    if (overflow_triggered) {
+        cpu = env_archcpu(env);
+        fire_PMC_interrupt(cpu);
+    }
+}
+
 static void cpu_ppc_pmu_timer_cb(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
index 49a813a443..3ee4b4cda5 100644
--- a/target/ppc/power8-pmu.h
+++ b/target/ppc/power8-pmu.h
@@ -21,5 +21,6 @@ 
 #include "qemu/main-loop.h"
 
 void cpu_ppc_pmu_init(CPUPPCState *env);
+bool pmu_insn_cnt_enabled(CPUPPCState *env);
 
 #endif
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9960df6e18..896b916021 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -177,6 +177,7 @@  struct DisasContext {
     bool hr;
     bool mmcr0_pmcc0;
     bool mmcr0_pmcc1;
+    bool pmu_insn_cnt;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint32_t flags;
@@ -4170,6 +4171,49 @@  static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
 #endif
 }
 
+#if defined(TARGET_PPC64)
+static void pmu_count_insns(DisasContext *ctx)
+{
+    /*
+     * Do not bother calling the helper if the PMU isn't counting
+     * instructions.
+     */
+    if (!ctx->pmu_insn_cnt) {
+        return;
+    }
+
+ #if !defined(CONFIG_USER_ONLY)
+    /*
+     * The PMU insns_inc() helper stops the internal PMU timer if a
+     * counter overflows happens. In that case, if the guest is
+     * running with icount and we do not handle it beforehand,
+     * the helper can trigger a 'bad icount read'.
+     */
+    gen_icount_io_start(ctx);
+
+    gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns));
+#else
+    /*
+     * User mode can read (but not write) PMC5 and start/stop
+     * the PMU via MMCR0_FC. In this case just increment
+     * PMC5 with base.num_insns.
+     */
+    TCGv t0 = tcg_temp_new();
+
+    gen_load_spr(t0, SPR_POWER_PMC5);
+    tcg_gen_addi_tl(t0, t0, ctx->base.num_insns);
+    gen_store_spr(SPR_POWER_PMC5, t0);
+
+    tcg_temp_free(t0);
+#endif /* #if !defined(CONFIG_USER_ONLY) */
+}
+#else
+static void pmu_count_insns(DisasContext *ctx)
+{
+    return;
+}
+#endif /* #if defined(TARGET_PPC64) */
+
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
     return translator_use_goto_tb(&ctx->base, dest);
@@ -4180,6 +4224,14 @@  static void gen_lookup_and_goto_ptr(DisasContext *ctx)
     if (unlikely(ctx->singlestep_enabled)) {
         gen_debug_exception(ctx);
     } else {
+        /*
+         * tcg_gen_lookup_and_goto_ptr will exit the TB if
+         * CF_NO_GOTO_PTR is set. Count insns now.
+         */
+        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
+            pmu_count_insns(ctx);
+        }
+
         tcg_gen_lookup_and_goto_ptr();
     }
 }
@@ -4191,6 +4243,7 @@  static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
         dest = (uint32_t) dest;
     }
     if (use_goto_tb(ctx, dest)) {
+        pmu_count_insns(ctx);
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(cpu_nip, dest & ~3);
         tcg_gen_exit_tb(ctx->base.tb, n);
@@ -8458,6 +8511,7 @@  static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->hr = (hflags >> HFLAGS_HR) & 1;
     ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
     ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
+    ctx->pmu_insn_cnt = (hflags >> HFLAGS_INSN_CNT) & 1;
 
     ctx->singlestep_enabled = 0;
     if ((hflags >> HFLAGS_SE) & 1) {
@@ -8564,6 +8618,7 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     switch (is_jmp) {
     case DISAS_TOO_MANY:
         if (use_goto_tb(ctx, nip)) {
+            pmu_count_insns(ctx);
             tcg_gen_goto_tb(0);
             gen_update_nip(ctx, nip);
             tcg_gen_exit_tb(ctx->base.tb, 0);
@@ -8574,6 +8629,14 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         gen_update_nip(ctx, nip);
         /* fall through */
     case DISAS_CHAIN:
+        /*
+         * tcg_gen_lookup_and_goto_ptr will exit the TB if
+         * CF_NO_GOTO_PTR is set. Count insns now.
+         */
+        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
+            pmu_count_insns(ctx);
+        }
+
         tcg_gen_lookup_and_goto_ptr();
         break;
 
@@ -8581,6 +8644,7 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         gen_update_nip(ctx, nip);
         /* fall through */
     case DISAS_EXIT:
+        pmu_count_insns(ctx);
         tcg_gen_exit_tb(NULL, 0);
         break;