- Patchwork

Message ID	1299854503-27699-1-git-send-email-stefan.bader@canonical.com
State	New
Headers	show Return-Path: <kernel-team-bounces@lists.ubuntu.com> From: Stefan Bader <stefan.bader@canonical.com> To: kernel-team@lists.ubuntu.com Subject: Date: Fri, 11 Mar 2011 15:41:43 +0100 Message-Id: <1299854503-27699-1-git-send-email-stefan.bader@canonical.com> In-Reply-To: <20110309133149.GA20735@peq.hallyn.com> References: <20110309133149.GA20735@peq.hallyn.com> Re: [lucid PATCH] fix smb kvm guests on AMD Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: kernel-team-bounces@lists.ubuntu.com Errors-To: kernel-team-bounces@lists.ubuntu.com

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index daaacab..982aa32 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -12,4 +12,42 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct timespec *ts); void pvclock_resume(void); +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif defined(__x86_64__) + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + #endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index b12fe8d..929047c 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -74,7 +74,8 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) { u64 delta = native_read_tsc() - shadow->tsc_timestamp; - return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); + return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul, + shadow->tsc_shift); } /* -- 1.7.0.4 From 06bd89325e6441c130c8dc71859867f6d68d617c Mon Sep 17 00:00:00 2001 From: Serge E. Hallyn <serge.hallyn@canonical.com> Date: Wed, 2 Mar 2011 20:05:34 +0000 Subject: [PATCH 2/3] KVM: x86: Fix a possible backwards warp of kvmclock Kernel time, which advances in discrete steps may progress much slower than TSC. As a result, when kvmclock is adjusted to a new base, the apparent time to the guest, which runs at a much higher, nsec scaled rate based on the current TSC, may have already been observed to have a larger value (kernel_ns + scaled tsc) than the value to which we are setting it (kernel_ns + 0). We must instead compute the clock as potentially observed by the guest for kernel_ns to make sure it does not go backwards. Signed-off-by: Zachary Amsden <zamsden@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> BugLink: http://bugs.launchpad.net/bugs/714335 (backported from commit 1d5f066e0b63271b67eac6d3752f8aa96adcbddb) Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com> Reviewed-by: Stefan Bader <stefan.bader@canonical.com> --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/x86.c | 47 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 600807b..08bc2ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -357,6 +357,9 @@ struct kvm_vcpu_arch { struct page *time_page; bool singlestep; /* guest is single stepped by KVM */ + u64 last_guest_tsc; + u64 last_kernel_ns; + bool nmi_pending; bool nmi_injected; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2c02a2..8a10f27 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -47,6 +47,7 @@ #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/mce.h> +#include <asm/pvclock.h> #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -633,6 +634,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) struct kvm_vcpu_arch *vcpu = &v->arch; void *shared_kaddr; unsigned long this_tsc_khz; + s64 kernel_ns, max_kernel_ns; + u64 tsc_timestamp; if ((!vcpu->time_page)) return; @@ -646,15 +649,51 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); + kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); ktime_get_ts(&ts); monotonic_to_bootbased(&ts); + kernel_ns = timespec_to_ns(&ts); local_irq_restore(flags); + /* + * Time as measured by the TSC may go backwards when resetting the base + * tsc_timestamp. The reason for this is that the TSC resolution is + * higher than the resolution of the other clock scales. Thus, many + * possible measurments of the TSC correspond to one measurement of any + * other clock, and so a spread of values is possible. This is not a + * problem for the computation of the nanosecond clock; with TSC rates + * around 1GHZ, there can only be a few cycles which correspond to one + * nanosecond value, and any path through this code will inevitably + * take longer than that. However, with the kernel_ns value itself, + * the precision may be much lower, down to HZ granularity. If the + * first sampling of TSC against kernel_ns ends in the low part of the + * range, and the second in the high end of the range, we can get: + * + * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new + * + * As the sampling errors potentially range in the thousands of cycles, + * it is possible such a time value has already been observed by the + * guest. To protect against this, we must compute the system time as + * observed by the guest and ensure the new system time is greater. + */ + max_kernel_ns = 0; + if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { + max_kernel_ns = vcpu->last_guest_tsc - + vcpu->hv_clock.tsc_timestamp; + max_kernel_ns = pvclock_scale_delta(max_kernel_ns, + vcpu->hv_clock.tsc_to_system_mul, + vcpu->hv_clock.tsc_shift); + max_kernel_ns += vcpu->last_kernel_ns; + } + + if (max_kernel_ns > kernel_ns) + kernel_ns = max_kernel_ns; + /* With all the info we got, fill in the values */ - vcpu->hv_clock.system_time = ts.tv_nsec + - (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; + vcpu->hv_clock.tsc_timestamp = tsc_timestamp; + vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; + vcpu->last_kernel_ns = kernel_ns; /* * The interface expects us to write an even number signaling that the @@ -3695,6 +3734,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_x86_ops->prepare_guest_switch(vcpu); kvm_load_guest_fpu(vcpu); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); + local_irq_disable(); clear_bit(KVM_REQ_KICK, &vcpu->requests); -- 1.7.0.4 From 36cf90d9adc1fcdd98eeb89b97948882fbd2d1db Mon Sep 17 00:00:00 2001 From: Serge E. Hallyn <serge.hallyn@canonical.com> Date: Wed, 2 Mar 2011 20:07:48 +0000 Subject: [PATCH 3/3] KVM: x86: Fix kvmclock bug If preempted after kvmclock values are updated, but before hardware virtualization is entered, the last tsc time as read by the guest is never set. It underflows the next time kvmclock is updated if there has not yet been a successful entry / exit into hardware virt. Fix this by simply setting last_tsc to the newly read tsc value so that any computed nsec advance of kvmclock is nulled. Signed-off-by: Zachary Amsden <zamsden@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> BugLink: http://bugs.launchpad.net/bugs/714335 (backported from commit 28e4639adf0c9f26f6bb56149b7ab547bf33bb95) Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com> Reviewed-by: Stefan Bader <stefan.bader@canonical.com> --- arch/x86/kvm/x86.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a10f27..df1cefb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -694,6 +694,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_kernel_ns = kernel_ns; + vcpu->last_guest_tsc = tsc_timestamp; /* * The interface expects us to write an even number signaling that the

Pull-request

Commit Message

Patch