diff mbox

[RESEND,1/2] kvmclock: use a light weight interface to update env->tsc.

Message ID 1446106927-15490-2-git-send-email-liang.z.li@intel.com
State New
Headers show

Commit Message

Li, Liang Z Oct. 29, 2015, 8:22 a.m. UTC
The commit 317b0a6d8 fixed an issue which caused by the outdated env->tsc
value, but the fix lead to cpu_synchronize_all_states called twice during
live migration. The cpu_synchronize_all_states takes about 130us for a VM
which has 4 vcpus, it's a bit expensive.

This patch use a light weight interface to update the env->tsc value, it
only takes about 20us to update the env->tsc.

Signed-off-by: Liang Li <liang.z.li@intel.com>
---
 hw/i386/kvm/clock.c    | 26 +++++++++-----------------
 target-i386/kvm.c      | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/kvm_i386.h |  1 +
 3 files changed, 56 insertions(+), 17 deletions(-)

Comments

Paolo Bonzini Nov. 2, 2015, 3:04 p.m. UTC | #1
On 29/10/2015 09:22, Liang Li wrote:
> +int kvm_get_tsc(CPUState *cs)
> +{
> +    X86CPU *cpu = X86_CPU(cs);
> +    CPUX86State *env = &cpu->env;
> +    struct {
> +        struct kvm_msrs info;
> +        struct kvm_msr_entry entries[1];
> +    } msr_data;
> +    struct kvm_msr_entry *msrs = msr_data.entries;
> +    int ret, i, n;
> +
> +    n = 0;
> +
> +    if (!env->tsc_valid) {
> +        msrs[n++].index = MSR_IA32_TSC;
> +        env->tsc_valid = !runstate_is_running();
> +    }
> +
> +    if (n == 0) {
> +        return 0;
> +    }
> +
> +    msr_data.info = (struct kvm_msrs) {
> +        .nmsrs = n,
> +    };
> +
> +    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < ret; i++) {
> +        uint32_t index = msrs[i].index;
> +        switch (index) {
> +        case MSR_IA32_TSC:
> +            env->tsc = msrs[i].data;
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +

This can be simplified a bit:

int kvm_get_tsc(CPUState *cs)
{
    X86CPU *cpu = X86_CPU(cs);
    CPUX86State *env = &cpu->env;
    struct {
        struct kvm_msrs info;
        struct kvm_msr_entry entries[1];
    } msr_data;
    int ret;

    if (env->tsc_valid) {
        return 0;
    }

    msr_data.info.nmsrs = 1;
    msr_data.entries[0].index = MSR_IA32_TSC;
    env->tsc_valid = !runstate_is_running();

    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
    if (ret < 0) {
        return ret;
    }

    env->tsc = msr_data.entries[0].data;
    return 0;
}

> 
> +        CPU_FOREACH(cpu) {
> +            ret = kvm_get_tsc(cpu);
> +            if (ret < 0) {
> +                fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret));
> +                abort();
> +                return;
> +            }
> +        }


This should be run in the appropriate thread using run_on_cpu.  VCPU
ioctls should only be invoked from the VCPU thread.  So you should
introduce a new function kvm_synchronize_all_tsc() or something like that.

Otherwise, the idea behind the patches is fine.  Thanks!

Paolo
Li, Liang Z Nov. 2, 2015, 3:11 p.m. UTC | #2
> This can be simplified a bit:
> 
> int kvm_get_tsc(CPUState *cs)
> {
>     X86CPU *cpu = X86_CPU(cs);
>     CPUX86State *env = &cpu->env;
>     struct {
>         struct kvm_msrs info;
>         struct kvm_msr_entry entries[1];
>     } msr_data;
>     int ret;
> 
>     if (env->tsc_valid) {
>         return 0;
>     }
> 
>     msr_data.info.nmsrs = 1;
>     msr_data.entries[0].index = MSR_IA32_TSC;
>     env->tsc_valid = !runstate_is_running();
> 
>     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
>     if (ret < 0) {
>         return ret;
>     }
> 
>     env->tsc = msr_data.entries[0].data;
>     return 0;
> }
> 
> >
> > +        CPU_FOREACH(cpu) {
> > +            ret = kvm_get_tsc(cpu);
> > +            if (ret < 0) {
> > +                fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret));
> > +                abort();
> > +                return;
> > +            }
> > +        }
> 
> 
> This should be run in the appropriate thread using run_on_cpu.  VCPU ioctls
> should only be invoked from the VCPU thread.  So you should introduce a new
> function kvm_synchronize_all_tsc() or something like that.
> 
> Otherwise, the idea behind the patches is fine.  Thanks!
> 
> Paolo

Thanks for your comments, I will send the v3.

Liang
diff mbox

Patch

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index efdf165..2e69cdb 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -17,7 +17,7 @@ 
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
-#include "sysemu/cpus.h"
+#include "kvm_i386.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -125,22 +125,14 @@  static void kvmclock_vm_state_change(void *opaque, int running,
             return;
         }
 
-        cpu_synchronize_all_states();
-        /* In theory, the cpu_synchronize_all_states() call above wouldn't
-         * affect the rest of the code, as the VCPU state inside CPUState
-         * is supposed to always match the VCPU state on the kernel side.
-         *
-         * In practice, calling cpu_synchronize_state() too soon will load the
-         * kernel-side APIC state into X86CPU.apic_state too early, APIC state
-         * won't be reloaded later because CPUState.vcpu_dirty==true, and
-         * outdated APIC state may be migrated to another host.
-         *
-         * The real fix would be to make sure outdated APIC state is read
-         * from the kernel again when necessary. While this is not fixed, we
-         * need the cpu_clean_all_dirty() call below.
-         */
-        cpu_clean_all_dirty();
-
+        CPU_FOREACH(cpu) {
+            ret = kvm_get_tsc(cpu);
+            if (ret < 0) {
+                fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret));
+                abort();
+                return;
+            }
+        }
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
         if (ret < 0) {
             fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 64046cb..eae90e1 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -111,6 +111,52 @@  bool kvm_allows_irq0_override(void)
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+int kvm_get_tsc(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entries[1];
+    } msr_data;
+    struct kvm_msr_entry *msrs = msr_data.entries;
+    int ret, i, n;
+
+    n = 0;
+
+    if (!env->tsc_valid) {
+        msrs[n++].index = MSR_IA32_TSC;
+        env->tsc_valid = !runstate_is_running();
+    }
+
+    if (n == 0) {
+        return 0;
+    }
+
+    msr_data.info = (struct kvm_msrs) {
+        .nmsrs = n,
+    };
+
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (i = 0; i < ret; i++) {
+        uint32_t index = msrs[i].index;
+        switch (index) {
+        case MSR_IA32_TSC:
+            env->tsc = msrs[i].data;
+            break;
+        default:
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
 {
     struct kvm_cpuid2 *cpuid;
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index e557e94..141fab5 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -17,6 +17,7 @@  bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);
+int  kvm_get_tsc(CPUState *cs);
 
 int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
                           uint32_t flags, uint32_t *dev_id);