Message ID | 1426844400-12017-14-git-send-email-paulus@samba.org |
---|---|
State | New, archived |
Headers | show |
On 20.03.15 10:39, Paul Mackerras wrote: > This reads the timebase at various points in the real-mode guest > entry/exit code and uses that to accumulate total, minimum and > maximum time spent in those parts of the code. Currently these > times are accumulated per vcpu in 5 parts of the code: > > * rm_entry - time taken from the start of kvmppc_hv_entry() until > just before entering the guest. > * rm_intr - time from when we take a hypervisor interrupt in the > guest until we either re-enter the guest or decide to exit to the > host. This includes time spent handling hcalls in real mode. > * rm_exit - time from when we decide to exit the guest until the > return from kvmppc_hv_entry(). > * guest - time spend in the guest > * cede - time spent napping in real mode due to an H_CEDE hcall > while other threads in the same vcore are active. > > These times are exposed in debugfs in a directory per vcpu that > contains a file called "timings". This file contains one line for > each of the 5 timings above, with the name followed by a colon and > 4 numbers, which are the count (number of times the code has been > executed), the total time, the minimum time, and the maximum time, > all in nanoseconds. > > Signed-off-by: Paul Mackerras <paulus@samba.org> Have you measure the additional overhead this brings? > --- > arch/powerpc/include/asm/kvm_host.h | 19 +++++ > arch/powerpc/include/asm/time.h | 3 + > arch/powerpc/kernel/asm-offsets.c | 11 +++ > arch/powerpc/kernel/time.c | 6 ++ > arch/powerpc/kvm/book3s_hv.c | 135 ++++++++++++++++++++++++++++++++ > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 105 ++++++++++++++++++++++++- > 6 files changed, 276 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index f1d0bbc..286c0ce 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -369,6 +369,14 @@ struct kvmppc_slb { > u8 base_page_size; /* MMU_PAGE_xxx */ > }; > > +/* Struct used to accumulate timing information in HV real mode code */ > +struct kvmhv_tb_accumulator { > + u64 seqcount; /* used to synchronize access, also count * 2 */ > + u64 tb_total; /* total time in timebase ticks */ > + u64 tb_min; /* min time */ > + u64 tb_max; /* max time */ > +}; > + > # ifdef CONFIG_PPC_FSL_BOOK3E > #define KVMPPC_BOOKE_IAC_NUM 2 > #define KVMPPC_BOOKE_DAC_NUM 2 > @@ -656,6 +664,17 @@ struct kvm_vcpu_arch { > u64 busy_preempt; > > u32 emul_inst; > + > + struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ > + u64 cur_tb_start; /* when it started */ > + struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ > + struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ > + struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ > + struct kvmhv_tb_accumulator guest_time; /* guest execution */ > + struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ > + > + struct dentry *debugfs_dir; > + struct dentry *debugfs_timings; > #endif > }; > > diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h > index 03cbada..10fc784 100644 > --- a/arch/powerpc/include/asm/time.h > +++ b/arch/powerpc/include/asm/time.h > @@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void); > > DECLARE_PER_CPU(u64, decrementers_next_tb); > > +/* Convert timebase ticks to nanoseconds */ > +unsigned long long tb_to_ns(unsigned long long tb_ticks); > + > #endif /* __KERNEL__ */ > #endif /* __POWERPC_TIME_H */ > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c > index 4717859..ec9f59c 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -458,6 +458,17 @@ int main(void) > DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); > DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); > DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); > + DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry)); > + DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr)); > + DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit)); > + DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time)); > + DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time)); > + DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity)); > + DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start)); > + DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount)); > + DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total)); > + DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min)); > + DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max)); > #endif > DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); > DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); > diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c > index 2d7b33f..56f4484 100644 > --- a/arch/powerpc/kernel/time.c > +++ b/arch/powerpc/kernel/time.c > @@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void) > } > #endif > > +unsigned long long tb_to_ns(unsigned long long ticks) > +{ > + return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift; > +} > +EXPORT_SYMBOL_GPL(tb_to_ns); > + > /* > * Scheduler clock - returns current time in nanosec units. > * > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 6631b53..8517c33 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1411,6 +1411,127 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) > return vcore; > } > > +static struct debugfs_timings_element { > + const char *name; > + size_t offset; > +} timings[] = { > + {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, > + {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, > + {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, > + {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, > + {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, > +}; > + > +#define N_TIMINGS (sizeof(timings) / sizeof(timings[0])) > + > +struct debugfs_timings_state { > + struct kvm_vcpu *vcpu; > + unsigned int buflen; > + char buf[N_TIMINGS * 100]; > +}; > + > +static int debugfs_timings_open(struct inode *inode, struct file *file) > +{ > + struct kvm_vcpu *vcpu = inode->i_private; > + struct debugfs_timings_state *p; > + > + p = kzalloc(sizeof(*p), GFP_KERNEL); > + if (!p) > + return -ENOMEM; > + > + kvm_get_kvm(vcpu->kvm); > + p->vcpu = vcpu; > + file->private_data = p; > + > + return nonseekable_open(inode, file); > +} > + > +static int debugfs_timings_release(struct inode *inode, struct file *file) > +{ > + struct debugfs_timings_state *p = file->private_data; > + > + kvm_put_kvm(p->vcpu->kvm); > + kfree(p); > + return 0; > +} > + > +static ssize_t debugfs_timings_read(struct file *file, char __user *buf, > + size_t len, loff_t *ppos) > +{ > + struct debugfs_timings_state *p = file->private_data; > + struct kvm_vcpu *vcpu = p->vcpu; > + char *s; > + struct kvmhv_tb_accumulator tb; > + u64 count; > + loff_t pos; > + ssize_t n; > + int i, loops; > + bool ok; > + > + if (!p->buflen) { > + s = p->buf; > + for (i = 0; i < N_TIMINGS; ++i) { > + struct kvmhv_tb_accumulator *acc; > + > + acc = (struct kvmhv_tb_accumulator *) > + ((unsigned long)vcpu + timings[i].offset); > + ok = false; > + for (loops = 0; loops < 1000; ++loops) { > + count = acc->seqcount; > + if (!(count & 1)) { > + smp_rmb(); > + tb = *acc; > + smp_rmb(); > + if (count == acc->seqcount) { > + ok = true; > + break; > + } > + } > + udelay(1); > + } > + if (!ok) > + sprintf(s, "%s: stuck\n", timings[i].name); > + else > + sprintf(s, "%s: %llu %llu %llu %llu\n", > + timings[i].name, count / 2, > + tb_to_ns(tb.tb_total), > + tb_to_ns(tb.tb_min), > + tb_to_ns(tb.tb_max)); > + s += strlen(s); > + } > + p->buflen = s - p->buf; > + } > + > + pos = *ppos; > + if (pos >= p->buflen) > + return 0; > + if (len > p->buflen - pos) > + len = p->buflen - pos; > + n = copy_to_user(buf, p->buf + pos, len); > + if (n) { > + if (n == len) > + return -EFAULT; > + len -= n; > + } > + *ppos = pos + len; > + return len; > +} > + > +static ssize_t debugfs_timings_write(struct file *file, const char __user *buf, > + size_t len, loff_t *ppos) > +{ > + return -EACCES; > +} > + > +static const struct file_operations debugfs_timings_ops = { > + .owner = THIS_MODULE, > + .open = debugfs_timings_open, > + .release = debugfs_timings_release, > + .read = debugfs_timings_read, > + .write = debugfs_timings_write, > + .llseek = generic_file_llseek, > +}; > + > static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, > unsigned int id) > { > @@ -1418,6 +1539,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, > int err = -EINVAL; > int core; > struct kvmppc_vcore *vcore; > + char buf[16]; > > core = id / threads_per_subcore; > if (core >= KVM_MAX_VCORES) > @@ -1480,6 +1602,19 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, > vcpu->arch.cpu_type = KVM_CPU_3S_64; > kvmppc_sanity_check(vcpu); > > + /* Create a debugfs directory for the vcpu */ > + sprintf(buf, "vcpu%u", id); Please make sure to use snprintf and friends rather than assume that you happened to choose the buffer correctly sized. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 20, 2015 at 12:15:15PM +0100, Alexander Graf wrote: > > > On 20.03.15 10:39, Paul Mackerras wrote: > > This reads the timebase at various points in the real-mode guest > > entry/exit code and uses that to accumulate total, minimum and > > maximum time spent in those parts of the code. Currently these > > times are accumulated per vcpu in 5 parts of the code: > > > > * rm_entry - time taken from the start of kvmppc_hv_entry() until > > just before entering the guest. > > * rm_intr - time from when we take a hypervisor interrupt in the > > guest until we either re-enter the guest or decide to exit to the > > host. This includes time spent handling hcalls in real mode. > > * rm_exit - time from when we decide to exit the guest until the > > return from kvmppc_hv_entry(). > > * guest - time spend in the guest > > * cede - time spent napping in real mode due to an H_CEDE hcall > > while other threads in the same vcore are active. > > > > These times are exposed in debugfs in a directory per vcpu that > > contains a file called "timings". This file contains one line for > > each of the 5 timings above, with the name followed by a colon and > > 4 numbers, which are the count (number of times the code has been > > executed), the total time, the minimum time, and the maximum time, > > all in nanoseconds. > > > > Signed-off-by: Paul Mackerras <paulus@samba.org> > > Have you measure the additional overhead this brings? I haven't - in fact I did this patch so I could measure the overhead or improvement from other changes I did, but it doesn't measure its own overhead, of course. I guess I need a workload that does a defined number of guest entries and exits and measure how fast it runs with and without the patch (maybe something like H_SET_MODE in a loop). I'll figure something out and post the results. Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 20.03.15 12:25, Paul Mackerras wrote: > On Fri, Mar 20, 2015 at 12:15:15PM +0100, Alexander Graf wrote: >> >> >> On 20.03.15 10:39, Paul Mackerras wrote: >>> This reads the timebase at various points in the real-mode guest >>> entry/exit code and uses that to accumulate total, minimum and >>> maximum time spent in those parts of the code. Currently these >>> times are accumulated per vcpu in 5 parts of the code: >>> >>> * rm_entry - time taken from the start of kvmppc_hv_entry() until >>> just before entering the guest. >>> * rm_intr - time from when we take a hypervisor interrupt in the >>> guest until we either re-enter the guest or decide to exit to the >>> host. This includes time spent handling hcalls in real mode. >>> * rm_exit - time from when we decide to exit the guest until the >>> return from kvmppc_hv_entry(). >>> * guest - time spend in the guest >>> * cede - time spent napping in real mode due to an H_CEDE hcall >>> while other threads in the same vcore are active. >>> >>> These times are exposed in debugfs in a directory per vcpu that >>> contains a file called "timings". This file contains one line for >>> each of the 5 timings above, with the name followed by a colon and >>> 4 numbers, which are the count (number of times the code has been >>> executed), the total time, the minimum time, and the maximum time, >>> all in nanoseconds. >>> >>> Signed-off-by: Paul Mackerras <paulus@samba.org> >> >> Have you measure the additional overhead this brings? > > I haven't - in fact I did this patch so I could measure the overhead > or improvement from other changes I did, but it doesn't measure its > own overhead, of course. I guess I need a workload that does a > defined number of guest entries and exits and measure how fast it runs > with and without the patch (maybe something like H_SET_MODE in a > loop). I'll figure something out and post the results. Yeah, just measure the number of exits you can handle for a simple hcall. If there is measurable overhead, it's probably a good idea to move the statistics gathering into #ifdef paths for DEBUGFS or maybe even a separate EXIT_TIMING config option as we have it for booke. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 20, 2015 at 12:35:43PM +0100, Alexander Graf wrote: > > > On 20.03.15 12:25, Paul Mackerras wrote: > > On Fri, Mar 20, 2015 at 12:15:15PM +0100, Alexander Graf wrote: > >> Have you measure the additional overhead this brings? > > > > I haven't - in fact I did this patch so I could measure the overhead > > or improvement from other changes I did, but it doesn't measure its > > own overhead, of course. I guess I need a workload that does a > > defined number of guest entries and exits and measure how fast it runs > > with and without the patch (maybe something like H_SET_MODE in a > > loop). I'll figure something out and post the results. > > Yeah, just measure the number of exits you can handle for a simple > hcall. If there is measurable overhead, it's probably a good idea to > move the statistics gathering into #ifdef paths for DEBUGFS or maybe > even a separate EXIT_TIMING config option as we have it for booke. For 1-vcpu guest on POWER8, it adds 29ns to the time for an hcall that is handled in real mode (H_SET_DABR), which is 25%. It adds 43ns to the time for an hcall that is handled in the host kernel in virtual mode (H_PROD), which is 1.2%. I'll add a config option. Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f1d0bbc..286c0ce 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -369,6 +369,14 @@ struct kvmppc_slb { u8 base_page_size; /* MMU_PAGE_xxx */ }; +/* Struct used to accumulate timing information in HV real mode code */ +struct kvmhv_tb_accumulator { + u64 seqcount; /* used to synchronize access, also count * 2 */ + u64 tb_total; /* total time in timebase ticks */ + u64 tb_min; /* min time */ + u64 tb_max; /* max time */ +}; + # ifdef CONFIG_PPC_FSL_BOOK3E #define KVMPPC_BOOKE_IAC_NUM 2 #define KVMPPC_BOOKE_DAC_NUM 2 @@ -656,6 +664,17 @@ struct kvm_vcpu_arch { u64 busy_preempt; u32 emul_inst; + + struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ + u64 cur_tb_start; /* when it started */ + struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ + struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ + struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ + struct kvmhv_tb_accumulator guest_time; /* guest execution */ + struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ + + struct dentry *debugfs_dir; + struct dentry *debugfs_timings; #endif }; diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 03cbada..10fc784 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); +/* Convert timebase ticks to nanoseconds */ +unsigned long long tb_to_ns(unsigned long long tb_ticks); + #endif /* __KERNEL__ */ #endif /* __POWERPC_TIME_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4717859..ec9f59c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -458,6 +458,17 @@ int main(void) DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); + DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry)); + DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr)); + DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit)); + DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time)); + DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time)); + DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity)); + DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start)); + DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount)); + DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total)); + DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min)); + DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max)); #endif DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2d7b33f..56f4484 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void) } #endif +unsigned long long tb_to_ns(unsigned long long ticks) +{ + return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift; +} +EXPORT_SYMBOL_GPL(tb_to_ns); + /* * Scheduler clock - returns current time in nanosec units. * diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6631b53..8517c33 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1411,6 +1411,127 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) return vcore; } +static struct debugfs_timings_element { + const char *name; + size_t offset; +} timings[] = { + {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, + {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, + {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, + {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, + {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +}; + +#define N_TIMINGS (sizeof(timings) / sizeof(timings[0])) + +struct debugfs_timings_state { + struct kvm_vcpu *vcpu; + unsigned int buflen; + char buf[N_TIMINGS * 100]; +}; + +static int debugfs_timings_open(struct inode *inode, struct file *file) +{ + struct kvm_vcpu *vcpu = inode->i_private; + struct debugfs_timings_state *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + kvm_get_kvm(vcpu->kvm); + p->vcpu = vcpu; + file->private_data = p; + + return nonseekable_open(inode, file); +} + +static int debugfs_timings_release(struct inode *inode, struct file *file) +{ + struct debugfs_timings_state *p = file->private_data; + + kvm_put_kvm(p->vcpu->kvm); + kfree(p); + return 0; +} + +static ssize_t debugfs_timings_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct debugfs_timings_state *p = file->private_data; + struct kvm_vcpu *vcpu = p->vcpu; + char *s; + struct kvmhv_tb_accumulator tb; + u64 count; + loff_t pos; + ssize_t n; + int i, loops; + bool ok; + + if (!p->buflen) { + s = p->buf; + for (i = 0; i < N_TIMINGS; ++i) { + struct kvmhv_tb_accumulator *acc; + + acc = (struct kvmhv_tb_accumulator *) + ((unsigned long)vcpu + timings[i].offset); + ok = false; + for (loops = 0; loops < 1000; ++loops) { + count = acc->seqcount; + if (!(count & 1)) { + smp_rmb(); + tb = *acc; + smp_rmb(); + if (count == acc->seqcount) { + ok = true; + break; + } + } + udelay(1); + } + if (!ok) + sprintf(s, "%s: stuck\n", timings[i].name); + else + sprintf(s, "%s: %llu %llu %llu %llu\n", + timings[i].name, count / 2, + tb_to_ns(tb.tb_total), + tb_to_ns(tb.tb_min), + tb_to_ns(tb.tb_max)); + s += strlen(s); + } + p->buflen = s - p->buf; + } + + pos = *ppos; + if (pos >= p->buflen) + return 0; + if (len > p->buflen - pos) + len = p->buflen - pos; + n = copy_to_user(buf, p->buf + pos, len); + if (n) { + if (n == len) + return -EFAULT; + len -= n; + } + *ppos = pos + len; + return len; +} + +static ssize_t debugfs_timings_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return -EACCES; +} + +static const struct file_operations debugfs_timings_ops = { + .owner = THIS_MODULE, + .open = debugfs_timings_open, + .release = debugfs_timings_release, + .read = debugfs_timings_read, + .write = debugfs_timings_write, + .llseek = generic_file_llseek, +}; + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { @@ -1418,6 +1539,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, int err = -EINVAL; int core; struct kvmppc_vcore *vcore; + char buf[16]; core = id / threads_per_subcore; if (core >= KVM_MAX_VCORES) @@ -1480,6 +1602,19 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); + /* Create a debugfs directory for the vcpu */ + sprintf(buf, "vcpu%u", id); + if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) { + vcpu->arch.debugfs_dir = debugfs_create_dir(buf, + kvm->arch.debugfs_dir); + if (!IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) { + vcpu->arch.debugfs_timings = + debugfs_create_file("timings", 0444, + vcpu->arch.debugfs_dir, + vcpu, &debugfs_timings_ops); + } + } + return vcpu; free_vcpu: diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0814ca1..d71ae2f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -225,9 +225,14 @@ kvm_novcpu_wakeup: /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 - bne kvmppc_got_guest + beq kvmppc_primary_no_guest + + addi r3, r4, VCPU_TB_RMENTRY + bl kvmhv_start_timing + b kvmppc_got_guest kvm_novcpu_exit: + ld r4, HSTATE_KVM_VCPU(r13) b hdec_soon /* @@ -376,6 +381,12 @@ kvmppc_hv_entry: li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) + /* Store initial timestamp */ + cmpdi r4, 0 + beq 1f + addi r3, r4, VCPU_TB_RMENTRY + bl kvmhv_start_timing +1: /* Clear out SLB */ li r6,0 slbmte r6,r6 @@ -880,6 +891,10 @@ fast_guest_return: li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) + /* Accumulate timing */ + addi r3, r4, VCPU_TB_GUEST + bl kvmhv_accumulate_time + /* Enter guest */ BEGIN_FTR_SECTION @@ -917,6 +932,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) hrfid b . +secondary_too_late: + cmpdi r4, 0 + beq 11f + addi r3, r4, VCPU_TB_RMEXIT + bl kvmhv_accumulate_time +11: b kvmhv_switch_to_host + +hdec_soon: + cmpdi r4, 0 + beq 12f + addi r3, r4, VCPU_TB_RMEXIT + bl kvmhv_accumulate_time +12: b kvmhv_do_exit + /****************************************************************************** * * * Exit code * @@ -1002,6 +1031,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) stw r12,VCPU_TRAP(r9) + addi r3, r9, VCPU_TB_RMINTR + mr r4, r9 + bl kvmhv_accumulate_time + ld r5, VCPU_GPR(R5)(r9) + ld r6, VCPU_GPR(R6)(r9) + ld r7, VCPU_GPR(R7)(r9) + ld r8, VCPU_GPR(R8)(r9) + /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED @@ -1073,6 +1110,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK beq machine_check_realmode mc_cont: + addi r3, r9, VCPU_TB_RMEXIT + mr r4, r9 + bl kvmhv_accumulate_time /* Save guest CTRL register, set runlatch to 1 */ 6: mfspr r6,SPRN_CTRLF @@ -1417,7 +1457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync -hdec_soon: /* r12 = trap, r13 = paca */ +kvmhv_do_exit: /* r12 = trap, r13 = paca */ /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1476,7 +1516,7 @@ hdec_soon: /* r12 = trap, r13 = paca */ addi r6,r6,PACA_SIZE bne 42b -secondary_too_late: +kvmhv_switch_to_host: /* Secondary threads wait for primary to do partition switch */ 43: ld r5,HSTATE_KVM_VCORE(r13) ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ @@ -1562,6 +1602,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1: addi r8,r8,16 .endr + /* Finish timing, if we have a vcpu */ + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + li r3, 0 + beq 2f + bl kvmhv_accumulate_time +2: /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) @@ -2069,6 +2116,10 @@ _GLOBAL(kvmppc_h_cede) /* save FP state */ bl kvmppc_save_fp + ld r4, HSTATE_KVM_VCPU(r13) + addi r3, r4, VCPU_TB_CEDE + bl kvmhv_accumulate_time + /* * Take a nap until a decrementer or external or doobell interrupt * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the @@ -2109,6 +2160,9 @@ kvm_end_cede: /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) + addi r3, r4, VCPU_TB_RMINTR + bl kvmhv_accumulate_time + /* load up FP state */ bl kvmppc_load_fp @@ -2429,3 +2483,48 @@ kvmppc_fix_pmao: mtspr SPRN_PMC6, r3 isync blr + +/* + * Start timing an activity + * r3 = pointer to time accumulation struct, r4 = vcpu + */ +kvmhv_start_timing: + mftb r5 + std r3, VCPU_CUR_ACTIVITY(r4) + std r5, VCPU_ACTIVITY_START(r4) + blr + +/* + * Accumulate time to one activity and start another. + * r3 = pointer to new time accumulation struct, r4 = vcpu + */ +kvmhv_accumulate_time: + ld r5, VCPU_CUR_ACTIVITY(r4) + ld r6, VCPU_ACTIVITY_START(r4) + std r3, VCPU_CUR_ACTIVITY(r4) + mftb r7 + std r7, VCPU_ACTIVITY_START(r4) + cmpdi r5, 0 + beqlr + subf r3, r6, r7 + ld r8, TAS_SEQCOUNT(r5) + cmpdi r8, 0 + addi r8, r8, 1 + std r8, TAS_SEQCOUNT(r5) + lwsync + ld r7, TAS_TOTAL(r5) + add r7, r7, r3 + std r7, TAS_TOTAL(r5) + ld r6, TAS_MIN(r5) + ld r7, TAS_MAX(r5) + beq 3f + cmpd r3, r6 + bge 1f +3: std r3, TAS_MIN(r5) +1: cmpd r3, r7 + ble 2f + std r3, TAS_MAX(r5) +2: lwsync + addi r8, r8, 1 + std r8, TAS_SEQCOUNT(r5) + blr
This reads the timebase at various points in the real-mode guest entry/exit code and uses that to accumulate total, minimum and maximum time spent in those parts of the code. Currently these times are accumulated per vcpu in 5 parts of the code: * rm_entry - time taken from the start of kvmppc_hv_entry() until just before entering the guest. * rm_intr - time from when we take a hypervisor interrupt in the guest until we either re-enter the guest or decide to exit to the host. This includes time spent handling hcalls in real mode. * rm_exit - time from when we decide to exit the guest until the return from kvmppc_hv_entry(). * guest - time spend in the guest * cede - time spent napping in real mode due to an H_CEDE hcall while other threads in the same vcore are active. These times are exposed in debugfs in a directory per vcpu that contains a file called "timings". This file contains one line for each of the 5 timings above, with the name followed by a colon and 4 numbers, which are the count (number of times the code has been executed), the total time, the minimum time, and the maximum time, all in nanoseconds. Signed-off-by: Paul Mackerras <paulus@samba.org> --- arch/powerpc/include/asm/kvm_host.h | 19 +++++ arch/powerpc/include/asm/time.h | 3 + arch/powerpc/kernel/asm-offsets.c | 11 +++ arch/powerpc/kernel/time.c | 6 ++ arch/powerpc/kvm/book3s_hv.c | 135 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 105 ++++++++++++++++++++++++- 6 files changed, 276 insertions(+), 3 deletions(-)