@@ -139,7 +139,7 @@ typedef struct CPUWatchpoint {
struct qemu_work_item;
struct KVMCPUState {
- pthread_t thread;
+ pthread_t *thread;
int signalled;
struct qemu_work_item *queued_work_first, *queued_work_last;
int regs_modified;
@@ -781,7 +781,7 @@ void qemu_system_cpu_hot_add(int cpu, int state)
CPUState *env;
if (state && !qemu_get_cpu(cpu)) {
- env = pc_new_cpu(model);
+ pc_new_cpu(model, &env);
if (!env) {
fprintf(stderr, "cpu %d creation failed\n", cpu);
return;
@@ -1013,29 +1013,26 @@ int cpu_is_bsp(CPUState *env)
return env->cpuid_apic_id == 0;
}
-CPUState *pc_new_cpu(const char *cpu_model)
+void pc_new_cpu(const char *cpu_model, CPUState **env)
{
- CPUState *env;
-
- env = cpu_init(cpu_model);
- if (!env) {
+ *env = cpu_init(cpu_model);
+ if (!*env) {
fprintf(stderr, "Unable to find x86 CPU definition\n");
exit(1);
}
- env->kvm_cpu_state.regs_modified = 1;
- if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
- env->cpuid_apic_id = env->cpu_index;
+ (*env)->kvm_cpu_state.regs_modified = 1;
+ if (((*env)->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
+ (*env)->cpuid_apic_id = (*env)->cpu_index;
/* APIC reset callback resets cpu */
- apic_init(env);
+ apic_init(*env);
} else {
- qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+ qemu_register_reset((QEMUResetHandler*)cpu_reset, *env);
}
/* kvm needs this to run after the apic is initialized. Otherwise,
* it can access invalid state and crash.
*/
- qemu_init_vcpu(env);
- return env;
+ qemu_init_vcpu(*env);
}
/* PC hardware initialisation */
@@ -1055,7 +1052,6 @@ static void pc_init1(ram_addr_t ram_size,
PCIBus *pci_bus;
ISADevice *isa_dev;
int piix3_devfn = -1;
- CPUState *env;
qemu_irq *cpu_irq;
qemu_irq *isa_irq;
qemu_irq *i8259;
@@ -1086,8 +1082,10 @@ static void pc_init1(ram_addr_t ram_size,
if (kvm_enabled()) {
kvm_set_boot_cpu_id(0);
}
+
for (i = 0; i < smp_cpus; i++) {
- env = pc_new_cpu(cpu_model);
+// pc_new_cpu(cpu_model, &env);
+ kvm_init_vcpu(NULL);
}
vmport_init();
@@ -100,7 +100,7 @@ extern int fd_bootchk;
void ioport_set_a20(int enable);
int ioport_get_a20(void);
-CPUState *pc_new_cpu(const char *cpu_model);
+void pc_new_cpu(const char *cpu_model, CPUState **env);
/* acpi.c */
extern int acpi_enabled;
@@ -160,6 +160,11 @@ int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
return 0;
}
+void kvm_arch_create_vcpu(const char *model, CPUState **env)
+{
+ pc_new_cpu("qemu64", env);
+}
+
#ifdef KVM_EXIT_TPR_ACCESS
static int kvm_handle_tpr_access(CPUState *env)
@@ -436,10 +436,18 @@ void kvm_disable_pit_creation(kvm_context_t kvm)
kvm->no_pit_creation = 1;
}
-static void kvm_create_vcpu(CPUState *env, int id)
+
+static void kvm_create_vcpu(CPUState **_env)
{
long mmap_size;
int r;
+ int id;
+ CPUState *env;
+
+ kvm_arch_create_vcpu("qemu64", _env);
+ env = *(CPUState **)_env;
+
+ id = env->cpu_index;
r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
if (r < 0) {
@@ -1549,11 +1557,13 @@ static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
{
struct qemu_work_item wi;
+ static int remotes;
if (env == current_env) {
func(data);
return;
}
+ printf("remotes: %d\n", remotes++);
wi.func = func;
wi.data = data;
@@ -1565,7 +1575,7 @@ static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
wi.next = NULL;
wi.done = false;
- pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+ pthread_kill(*env->kvm_cpu_state.thread, SIG_IPI);
while (!wi.done)
qemu_cond_wait(&qemu_work_cond);
}
@@ -1616,8 +1626,8 @@ void kvm_update_interrupt_request(CPUState *env)
if (signal) {
env->kvm_cpu_state.signalled = 1;
- if (env->kvm_cpu_state.thread)
- pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+ if (*env->kvm_cpu_state.thread)
+ pthread_kill(*env->kvm_cpu_state.thread, SIG_IPI);
}
}
}
@@ -1840,7 +1850,7 @@ static void pause_all_threads(void)
while (penv) {
if (penv != cpu_single_env) {
penv->stop = 1;
- pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+ pthread_kill(*penv->kvm_cpu_state.thread, SIG_IPI);
} else {
penv->stop = 0;
penv->stopped = 1;
@@ -1862,7 +1872,7 @@ static void resume_all_threads(void)
while (penv) {
penv->stop = 0;
penv->stopped = 0;
- pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+ pthread_kill(*penv->kvm_cpu_state.thread, SIG_IPI);
penv = (CPUState *) penv->next_cpu;
}
}
@@ -1934,19 +1944,23 @@ static int kvm_main_loop_cpu(CPUState *env)
return 0;
}
+extern void pc_new_cpu(const char *c, CPUState **env);
+
static void *ap_main_loop(void *_env)
{
- CPUState *env = _env;
+ CPUState *env;
sigset_t signals;
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
struct ioperm_data *data = NULL;
#endif
- current_env = env;
- env->thread_id = kvm_get_thread_id();
sigfillset(&signals);
sigprocmask(SIG_BLOCK, &signals, NULL);
- kvm_create_vcpu(env, env->cpu_index);
+ kvm_create_vcpu(&env);
+
+ *(CPUState **)_env = env;
+ current_env = env;
+ env->thread_id = kvm_get_thread_id();
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
/* do ioperm for io ports of assigned devices */
@@ -1981,12 +1995,16 @@ static void *ap_main_loop(void *_env)
return NULL;
}
-void kvm_init_vcpu(CPUState *env)
+void kvm_init_vcpu(CPUState *_env)
{
- pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
+ CPUState *env = NULL;
+ pthread_t *thread = qemu_malloc(sizeof(*thread));
+
+ pthread_create(thread, NULL, ap_main_loop, &env);
- while (env->created == 0)
+ while (env == NULL)
qemu_cond_wait(&qemu_vcpu_cond);
+ env->kvm_cpu_state.thread = thread;
}
int kvm_vcpu_inited(CPUState *env)
@@ -87,6 +87,8 @@ int kvm_alloc_userspace_memory(kvm_context_t kvm, unsigned long memory,
int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
void **vm_mem);
+void kvm_arch_create_vcpu(const char *model, CPUState **env);
+
int kvm_arch_run(CPUState *env);
@@ -3570,8 +3570,6 @@ void qemu_init_vcpu(void *_env)
{
CPUState *env = _env;
- if (kvm_enabled())
- kvm_init_vcpu(env);
env->nr_cores = smp_cores;
env->nr_threads = smp_threads;
return;
Right now, we issue cpu creation from the i/o thread, and then shoot a thread from inside that code. Over the last months, a lot of subtle bugs were reported, usually arising from the very fragile order of that initialization. I propose we rethink that a little. This is a patch that received basic testing only, and I'd like to hear on the overall direction. The idea is to issue the new thread as early as possible. The first direct benefits I can identify are that we no longer have to rely at on_vcpu-like schemes for issuing vcpu ioctls, since we are already on the right thread. Apic creation has far less spots for race conditions as well. I am implementing this on qemu-kvm first, since we can show the benefits of it a bit better in there (since we already support smp) Let me know what you guys think Signed-off-by: Glauber Costa <glommer@redhat.com> CC: Marcelo Tosatti <mtosatti@redhat.com> CC: Avi Kivity <avi@redhat.com> CC: Jan Kiszka <jan.kiszka@siemens.com> CC: Anthony Liguori <aliguori@us.ibm.com> --- cpu-defs.h | 2 +- hw/acpi.c | 2 +- hw/pc.c | 26 ++++++++++++-------------- hw/pc.h | 2 +- qemu-kvm-x86.c | 5 +++++ qemu-kvm.c | 44 +++++++++++++++++++++++++++++++------------- qemu-kvm.h | 2 ++ vl.c | 2 -- 8 files changed, 53 insertions(+), 32 deletions(-)