Message ID | 20240403080452.1007601-13-atishp@rivosinc.com |
---|---|
State | Changes Requested |
Headers | show |
Series | RISC-V SBI v2.0 PMU improvements and Perf sampling in KVM guest | expand |
On Wed, Apr 03, 2024 at 01:04:41AM -0700, Atish Patra wrote: > PMU Snapshot function allows to minimize the number of traps when the > guest access configures/access the hpmcounters. If the snapshot feature > is enabled, the hypervisor updates the shared memory with counter > data and state of overflown counters. The guest can just read the > shared memory instead of trap & emulate done by the hypervisor. > > This patch doesn't implement the counter overflow yet. > > Reviewed-by: Anup Patel <anup@brainfault.org> > Signed-off-by: Atish Patra <atishp@rivosinc.com> > --- > arch/riscv/include/asm/kvm_vcpu_pmu.h | 7 ++ > arch/riscv/kvm/vcpu_pmu.c | 121 +++++++++++++++++++++++++- > arch/riscv/kvm/vcpu_sbi_pmu.c | 3 + > 3 files changed, 130 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h > index 395518a1664e..77a1fc4d203d 100644 > --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h > +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h > @@ -50,6 +50,10 @@ struct kvm_pmu { > bool init_done; > /* Bit map of all the virtual counter used */ > DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); > + /* The address of the counter snapshot area (guest physical address) */ > + gpa_t snapshot_addr; > + /* The actual data of the snapshot */ > + struct riscv_pmu_snapshot_data *sdata; > }; > > #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) > @@ -85,6 +89,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, > struct kvm_vcpu_sbi_return *retdata); > void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, > + unsigned long saddr_high, unsigned long flags, > + struct kvm_vcpu_sbi_return *retdata); > void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); > void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); > > diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c > index 2d9929bbc2c8..f706c688b338 100644 > --- a/arch/riscv/kvm/vcpu_pmu.c > +++ b/arch/riscv/kvm/vcpu_pmu.c > @@ -14,6 +14,7 @@ > #include <asm/csr.h> > #include <asm/kvm_vcpu_sbi.h> > #include <asm/kvm_vcpu_pmu.h> > +#include <asm/sbi.h> > #include <linux/bitops.h> > > #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) > @@ -311,6 +312,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, > return ret; > } > > +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) > +{ > + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); > + > + if (kvpmu->sdata) { > + if (kvpmu->snapshot_addr != INVALID_GPA) { > + memset(kvpmu->sdata, 0, snapshot_area_size); > + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, > + kvpmu->sdata, snapshot_area_size); > + } else { > + pr_warn("snapshot address invalid\n"); > + } > + kfree(kvpmu->sdata); > + kvpmu->sdata = NULL; > + } > + kvpmu->snapshot_addr = INVALID_GPA; > +} > + > +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, > + unsigned long saddr_high, unsigned long flags, > + struct kvm_vcpu_sbi_return *retdata) > +{ > + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); > + int sbiret = 0; > + gpa_t saddr; > + unsigned long hva; > + bool writable; > + > + if (!kvpmu || flags) { > + sbiret = SBI_ERR_INVALID_PARAM; > + goto out; > + } > + > + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { > + kvm_pmu_clear_snapshot_area(vcpu); > + return 0; > + } > + > + saddr = saddr_low; > + > + if (saddr_high != 0) { > + if (IS_ENABLED(CONFIG_32BIT)) > + saddr |= ((gpa_t)saddr << 32); saddr |= ((gpa_t)saddr_high << 32) > + else > + sbiret = SBI_ERR_INVALID_ADDRESS; > + goto out; > + } > + Thanks, drew
On 4/5/24 04:23, Andrew Jones wrote: > On Wed, Apr 03, 2024 at 01:04:41AM -0700, Atish Patra wrote: >> PMU Snapshot function allows to minimize the number of traps when the >> guest access configures/access the hpmcounters. If the snapshot feature >> is enabled, the hypervisor updates the shared memory with counter >> data and state of overflown counters. The guest can just read the >> shared memory instead of trap & emulate done by the hypervisor. >> >> This patch doesn't implement the counter overflow yet. >> >> Reviewed-by: Anup Patel <anup@brainfault.org> >> Signed-off-by: Atish Patra <atishp@rivosinc.com> >> --- >> arch/riscv/include/asm/kvm_vcpu_pmu.h | 7 ++ >> arch/riscv/kvm/vcpu_pmu.c | 121 +++++++++++++++++++++++++- >> arch/riscv/kvm/vcpu_sbi_pmu.c | 3 + >> 3 files changed, 130 insertions(+), 1 deletion(-) >> >> diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h >> index 395518a1664e..77a1fc4d203d 100644 >> --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h >> +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h >> @@ -50,6 +50,10 @@ struct kvm_pmu { >> bool init_done; >> /* Bit map of all the virtual counter used */ >> DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); >> + /* The address of the counter snapshot area (guest physical address) */ >> + gpa_t snapshot_addr; >> + /* The actual data of the snapshot */ >> + struct riscv_pmu_snapshot_data *sdata; >> }; >> >> #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) >> @@ -85,6 +89,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba >> int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, >> struct kvm_vcpu_sbi_return *retdata); >> void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); >> +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, >> + unsigned long saddr_high, unsigned long flags, >> + struct kvm_vcpu_sbi_return *retdata); >> void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); >> void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); >> >> diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c >> index 2d9929bbc2c8..f706c688b338 100644 >> --- a/arch/riscv/kvm/vcpu_pmu.c >> +++ b/arch/riscv/kvm/vcpu_pmu.c >> @@ -14,6 +14,7 @@ >> #include <asm/csr.h> >> #include <asm/kvm_vcpu_sbi.h> >> #include <asm/kvm_vcpu_pmu.h> >> +#include <asm/sbi.h> >> #include <linux/bitops.h> >> >> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) >> @@ -311,6 +312,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, >> return ret; >> } >> >> +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) >> +{ >> + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); >> + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); >> + >> + if (kvpmu->sdata) { >> + if (kvpmu->snapshot_addr != INVALID_GPA) { >> + memset(kvpmu->sdata, 0, snapshot_area_size); >> + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, >> + kvpmu->sdata, snapshot_area_size); >> + } else { >> + pr_warn("snapshot address invalid\n"); >> + } >> + kfree(kvpmu->sdata); >> + kvpmu->sdata = NULL; >> + } >> + kvpmu->snapshot_addr = INVALID_GPA; >> +} >> + >> +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, >> + unsigned long saddr_high, unsigned long flags, >> + struct kvm_vcpu_sbi_return *retdata) >> +{ >> + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); >> + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); >> + int sbiret = 0; >> + gpa_t saddr; >> + unsigned long hva; >> + bool writable; >> + >> + if (!kvpmu || flags) { >> + sbiret = SBI_ERR_INVALID_PARAM; >> + goto out; >> + } >> + >> + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { >> + kvm_pmu_clear_snapshot_area(vcpu); >> + return 0; >> + } >> + >> + saddr = saddr_low; >> + >> + if (saddr_high != 0) { >> + if (IS_ENABLED(CONFIG_32BIT)) >> + saddr |= ((gpa_t)saddr << 32); > > saddr |= ((gpa_t)saddr_high << 32) > Oops. Thanks for catching it. Fixed. >> + else >> + sbiret = SBI_ERR_INVALID_ADDRESS; >> + goto out; >> + } >> + > > Thanks, > drew
diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 395518a1664e..77a1fc4d203d 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -50,6 +50,10 @@ struct kvm_pmu { bool init_done; /* Bit map of all the virtual counter used */ DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); + /* The address of the counter snapshot area (guest physical address) */ + gpa_t snapshot_addr; + /* The actual data of the snapshot */ + struct riscv_pmu_snapshot_data *sdata; }; #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) @@ -85,6 +89,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 2d9929bbc2c8..f706c688b338 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -311,6 +312,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + + if (kvpmu->sdata) { + if (kvpmu->snapshot_addr != INVALID_GPA) { + memset(kvpmu->sdata, 0, snapshot_area_size); + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, + kvpmu->sdata, snapshot_area_size); + } else { + pr_warn("snapshot address invalid\n"); + } + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + } + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -344,20 +419,38 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -398,14 +491,22 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, { struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); int i, pmc_index, sbiret = 0; + u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -438,12 +539,28 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* TODO: Add counter overflow support when sscofpmf support is added */ + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -566,6 +683,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -625,6 +743,7 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) } bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index e1633606c98b..d3e7625fb2d2 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -64,6 +64,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, case SBI_EXT_PMU_COUNTER_FW_READ: ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); + break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; }