@@ -369,6 +369,14 @@ struct kvmppc_slb {
u8 base_page_size; /* MMU_PAGE_xxx */
};
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+ u64 seqcount; /* used to synchronize access, also count * 2 */
+ u64 tb_total; /* total time in timebase ticks */
+ u64 tb_min; /* min time */
+ u64 tb_max; /* max time */
+};
+
# ifdef CONFIG_PPC_FSL_BOOK3E
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
@@ -657,6 +665,19 @@ struct kvm_vcpu_arch {
u32 emul_inst;
#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */
+ u64 cur_tb_start; /* when it started */
+ struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */
+ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */
+ struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
+ struct kvmhv_tb_accumulator guest_time; /* guest execution */
+ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
+
+ struct dentry *debugfs_dir;
+ struct dentry *debugfs_timings;
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
};
#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */
@@ -459,6 +459,19 @@ int main(void)
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
+ DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
+ DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
+ DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
+ DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
+ DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
+ DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
+ DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
+ DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
+ DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
+ DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
+#endif
DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
}
#endif
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+ return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
/*
* Scheduler clock - returns current time in nanosec units.
*
@@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
processor, including emulating 32-bit processors on a 64-bit
host.
+config KVM_BOOK3S_HV_EXIT_TIMING
+ bool "Detailed timing for hypervisor real-mode code"
+ depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+ ---help---
+ Calculate time taken for each vcpu in the real-mode guest entry,
+ exit, and interrupt handling code, plus time spent in the guest
+ and in nap mode due to idle (cede) while other threads are still
+ in the guest. The total, minimum and maximum times in nanoseconds
+ together with the number of executions are reported in debugfs in
+ kvm/vm#/vcpu#/timings. The overhead is of the order of 30 - 40
+ ns per exit on POWER8.
+
+ If unsure, say N.
+
config KVM_BOOKE_HV
bool
@@ -1423,6 +1423,154 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
return vcore;
}
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+ const char *name;
+ size_t offset;
+} timings[] = {
+ {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
+ {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
+ {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
+ {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
+ {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
+};
+
+#define N_TIMINGS (sizeof(timings) / sizeof(timings[0]))
+
+struct debugfs_timings_state {
+ struct kvm_vcpu *vcpu;
+ unsigned int buflen;
+ char buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+ struct kvm_vcpu *vcpu = inode->i_private;
+ struct debugfs_timings_state *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ kvm_get_kvm(vcpu->kvm);
+ p->vcpu = vcpu;
+ file->private_data = p;
+
+ return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+ struct debugfs_timings_state *p = file->private_data;
+
+ kvm_put_kvm(p->vcpu->kvm);
+ kfree(p);
+ return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct debugfs_timings_state *p = file->private_data;
+ struct kvm_vcpu *vcpu = p->vcpu;
+ char *s, *buf_end;
+ struct kvmhv_tb_accumulator tb;
+ u64 count;
+ loff_t pos;
+ ssize_t n;
+ int i, loops;
+ bool ok;
+
+ if (!p->buflen) {
+ s = p->buf;
+ buf_end = s + sizeof(p->buf);
+ for (i = 0; i < N_TIMINGS; ++i) {
+ struct kvmhv_tb_accumulator *acc;
+
+ acc = (struct kvmhv_tb_accumulator *)
+ ((unsigned long)vcpu + timings[i].offset);
+ ok = false;
+ for (loops = 0; loops < 1000; ++loops) {
+ count = acc->seqcount;
+ if (!(count & 1)) {
+ smp_rmb();
+ tb = *acc;
+ smp_rmb();
+ if (count == acc->seqcount) {
+ ok = true;
+ break;
+ }
+ }
+ udelay(1);
+ }
+ if (!ok)
+ snprintf(s, buf_end - s, "%s: stuck\n",
+ timings[i].name);
+ else
+ snprintf(s, buf_end - s,
+ "%s: %llu %llu %llu %llu\n",
+ timings[i].name, count / 2,
+ tb_to_ns(tb.tb_total),
+ tb_to_ns(tb.tb_min),
+ tb_to_ns(tb.tb_max));
+ s += strlen(s);
+ }
+ p->buflen = s - p->buf;
+ }
+
+ pos = *ppos;
+ if (pos >= p->buflen)
+ return 0;
+ if (len > p->buflen - pos)
+ len = p->buflen - pos;
+ n = copy_to_user(buf, p->buf + pos, len);
+ if (n) {
+ if (n == len)
+ return -EFAULT;
+ len -= n;
+ }
+ *ppos = pos + len;
+ return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_timings_open,
+ .release = debugfs_timings_release,
+ .read = debugfs_timings_read,
+ .write = debugfs_timings_write,
+ .llseek = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+ char buf[16];
+ struct kvm *kvm = vcpu->kvm;
+
+ snprintf(buf, sizeof(buf), "vcpu%u", id);
+ if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+ return;
+ vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
+ if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
+ return;
+ vcpu->arch.debugfs_timings =
+ debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
+ vcpu, &debugfs_timings_ops);
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
@@ -1492,6 +1640,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
+ debugfs_vcpu_init(vcpu, id);
+
return vcpu;
free_vcpu:
@@ -225,7 +225,13 @@ kvm_novcpu_wakeup:
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
ld r4, HSTATE_KVM_VCPU(r13)
cmpdi r4, 0
- bne kvmppc_got_guest
+ beq kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r4, VCPU_TB_RMENTRY
+ bl kvmhv_start_timing
+#endif
+ b kvmppc_got_guest
kvm_novcpu_exit:
b hdec_soon
@@ -376,6 +382,14 @@ kvmppc_hv_entry:
li r6, KVM_GUEST_MODE_HOST_HV
stb r6, HSTATE_IN_GUEST(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ /* Store initial timestamp */
+ cmpdi r4, 0
+ beq 1f
+ addi r3, r4, VCPU_TB_RMENTRY
+ bl kvmhv_start_timing
+1:
+#endif
/* Clear out SLB */
li r6,0
slbmte r6,r6
@@ -880,6 +894,12 @@ fast_guest_return:
li r9, KVM_GUEST_MODE_GUEST_HV
stb r9, HSTATE_IN_GUEST(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ /* Accumulate timing */
+ addi r3, r4, VCPU_TB_GUEST
+ bl kvmhv_accumulate_time
+#endif
+
/* Enter guest */
BEGIN_FTR_SECTION
@@ -917,6 +937,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
hrfid
b .
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+secondary_too_late:
+ cmpdi r4, 0
+ beq 11f
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+11: b kvmhv_switch_to_host
+
+hdec_soon:
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ beq 12f
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+12: b kvmhv_do_exit
+#endif
+
/******************************************************************************
* *
* Exit code *
@@ -1002,6 +1039,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r9, VCPU_TB_RMINTR
+ mr r4, r9
+ bl kvmhv_accumulate_time
+ ld r5, VCPU_GPR(R5)(r9)
+ ld r6, VCPU_GPR(R6)(r9)
+ ld r7, VCPU_GPR(R7)(r9)
+ ld r8, VCPU_GPR(R8)(r9)
+#endif
+
/* Save HEIR (HV emulation assist reg) in emul_inst
if this is an HEI (HV emulation interrupt, e40) */
li r3,KVM_INST_FETCH_FAILED
@@ -1073,6 +1120,11 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
beq machine_check_realmode
mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r9, VCPU_TB_RMEXIT
+ mr r4, r9
+ bl kvmhv_accumulate_time
+#endif
/* Save guest CTRL register, set runlatch to 1 */
6: mfspr r6,SPRN_CTRLF
@@ -1417,7 +1469,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbia
ptesync
-hdec_soon: /* r12 = trap, r13 = paca */
+#ifndef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+hdec_soon:
+#endif
+kvmhv_do_exit: /* r12 = trap, r13 = paca */
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -1476,7 +1531,10 @@ hdec_soon: /* r12 = trap, r13 = paca */
addi r6,r6,PACA_SIZE
bne 42b
+#ifndef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
secondary_too_late:
+#endif
+kvmhv_switch_to_host:
/* Secondary threads wait for primary to do partition switch */
43: ld r5,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
@@ -1562,6 +1620,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1: addi r8,r8,16
.endr
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ /* Finish timing, if we have a vcpu */
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ li r3, 0
+ beq 2f
+ bl kvmhv_accumulate_time
+2:
+#endif
/* Unset guest mode */
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
@@ -2069,6 +2136,12 @@ _GLOBAL(kvmppc_h_cede)
/* save FP state */
bl kvmppc_save_fp
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ ld r4, HSTATE_KVM_VCPU(r13)
+ addi r3, r4, VCPU_TB_CEDE
+ bl kvmhv_accumulate_time
+#endif
+
/*
* Take a nap until a decrementer or external or doobell interrupt
* occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
@@ -2109,6 +2182,11 @@ kvm_end_cede:
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r4, VCPU_TB_RMINTR
+ bl kvmhv_accumulate_time
+#endif
+
/* load up FP state */
bl kvmppc_load_fp
@@ -2429,3 +2507,62 @@ kvmppc_fix_pmao:
mtspr SPRN_PMC6, r3
isync
blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lbz r6, VCORE_IN_GUEST(r5)
+ cmpwi r6, 0
+ beq 5f /* if in guest, need to */
+ ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
+5: mftb r5
+ subf r5, r6, r5
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ std r5, VCPU_ACTIVITY_START(r4)
+ blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lbz r8, VCORE_IN_GUEST(r5)
+ cmpwi r8, 0
+ beq 4f /* if in guest, need to */
+ ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
+4: ld r5, VCPU_CUR_ACTIVITY(r4)
+ ld r6, VCPU_ACTIVITY_START(r4)
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ mftb r7
+ subf r7, r8, r7
+ std r7, VCPU_ACTIVITY_START(r4)
+ cmpdi r5, 0
+ beqlr
+ subf r3, r6, r7
+ ld r8, TAS_SEQCOUNT(r5)
+ cmpdi r8, 0
+ addi r8, r8, 1
+ std r8, TAS_SEQCOUNT(r5)
+ lwsync
+ ld r7, TAS_TOTAL(r5)
+ add r7, r7, r3
+ std r7, TAS_TOTAL(r5)
+ ld r6, TAS_MIN(r5)
+ ld r7, TAS_MAX(r5)
+ beq 3f
+ cmpd r3, r6
+ bge 1f
+3: std r3, TAS_MIN(r5)
+1: cmpd r3, r7
+ ble 2f
+ std r3, TAS_MAX(r5)
+2: lwsync
+ addi r8, r8, 1
+ std r8, TAS_SEQCOUNT(r5)
+ blr
+#endif