Message ID | 1254953315-5761-5-git-send-email-glommer@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Glauber Costa wrote: > This patch provides kvm with an in-kernel apic. We are currently not enabling it. > The code is heavily based on what's in qemu-kvm.git. > > Signed-off-by: Glauber Costa <glommer@redhat.com> > --- > hw/apic.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > kvm.h | 3 + > target-i386/kvm.c | 18 +++++++ > 3 files changed, 152 insertions(+), 4 deletions(-) > > diff --git a/hw/apic.c b/hw/apic.c > index c89008e..5635607 100644 > --- a/hw/apic.c > +++ b/hw/apic.c > @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val) > #endif > if (!s) > return; > - s->apicbase = (val & 0xfffff000) | > + > + if (kvm_enabled() && kvm_irqchip_in_kernel()) > + s->apicbase = val; > + else > + s->apicbase = (val & 0xfffff000) | > (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); > /* if disabled, cannot be enabled again */ > if (!(val & MSR_IA32_APICBASE_ENABLE)) { > @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env) > s->wait_for_sipi = 1; > > env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP); > + > +#ifdef KVM_CAP_MP_STATE > + if (kvm_enabled() && kvm_irqchip_in_kernel()) > + env->mp_state > + = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE; > +#endif > I don't think CAP_MP_STATE should be treated as an optional feature. > +static int kvm_kernel_lapic_load_from_user(APICState *s) > +{ > + int r = 0; > +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) > + struct kvm_lapic_state apic; > + struct kvm_lapic_state *klapic = &apic; > + int i; > + > + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) > + return 0; > + > + memset(klapic, 0, sizeof apic); > + kapic_set_reg(klapic, 0x2, s->id << 24); > + kapic_set_reg(klapic, 0x8, s->tpr); > + kapic_set_reg(klapic, 0xd, s->log_dest << 24); > + kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff); > + kapic_set_reg(klapic, 0xf, s->spurious_vec); > + for (i = 0; i < 8; i++) { > + kapic_set_reg(klapic, 0x10 + i, s->isr[i]); > + kapic_set_reg(klapic, 0x18 + i, s->tmr[i]); > + kapic_set_reg(klapic, 0x20 + i, s->irr[i]); > + } > + kapic_set_reg(klapic, 0x28, s->esr); > + kapic_set_reg(klapic, 0x30, s->icr[0]); > + kapic_set_reg(klapic, 0x31, s->icr[1]); > + for (i = 0; i < APIC_LVT_NB; i++) > + kapic_set_reg(klapic, 0x32 + i, s->lvt[i]); > + kapic_set_reg(klapic, 0x38, s->initial_count); > + kapic_set_reg(klapic, 0x3e, s->divide_conf); > + > + r = kvm_set_lapic(s->cpu_env, klapic); > +#endif > + return r; > +} > You should probably just setup VMState such that it directly saves kvm_lapic_state and then have the pre/post functions call the kernel ioctls to sync it. There's not a whole lot of point switching the state between two different structures. > static const VMStateDescription vmstate_apic = { > .name = "apic", > .version_id = 3, > .minimum_version_id = 3, > .minimum_version_id_old = 1, > .load_state_old = apic_load_old, > + .pre_save = apic_pre_save, > + .post_load = apic_post_load, > .fields = (VMStateField []) { > VMSTATE_UINT32(apicbase, APICState), > VMSTATE_UINT8(id, APICState), > @@ -933,9 +1052,8 @@ static const VMStateDescription vmstate_apic = { > } > }; > Same applies here as ioapic. Should be a separate device.
On 10/08/2009 03:55 PM, Anthony Liguori wrote: > Glauber Costa wrote: >> This patch provides kvm with an in-kernel apic. We are currently not >> enabling it. >> The code is heavily based on what's in qemu-kvm.git. >> >> Signed-off-by: Glauber Costa <glommer@redhat.com> >> --- >> hw/apic.c | 135 >> +++++++++++++++++++++++++++++++++++++++++++++++++++-- >> kvm.h | 3 + >> target-i386/kvm.c | 18 +++++++ >> 3 files changed, 152 insertions(+), 4 deletions(-) >> >> diff --git a/hw/apic.c b/hw/apic.c >> index c89008e..5635607 100644 >> --- a/hw/apic.c >> +++ b/hw/apic.c >> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val) >> #endif >> if (!s) >> return; >> - s->apicbase = (val & 0xfffff000) | >> + >> + if (kvm_enabled() && kvm_irqchip_in_kernel()) >> + s->apicbase = val; >> + else >> + s->apicbase = (val & 0xfffff000) | >> (s->apicbase & (MSR_IA32_APICBASE_BSP | >> MSR_IA32_APICBASE_ENABLE)); >> /* if disabled, cannot be enabled again */ >> if (!(val & MSR_IA32_APICBASE_ENABLE)) { >> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env) >> s->wait_for_sipi = 1; >> >> env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP); >> + >> +#ifdef KVM_CAP_MP_STATE >> + if (kvm_enabled() && kvm_irqchip_in_kernel()) >> + env->mp_state >> + = env->halted ? KVM_MP_STATE_UNINITIALIZED : >> KVM_MP_STATE_RUNNABLE; >> +#endif > > I don't think CAP_MP_STATE should be treated as an optional feature. > >> +static int kvm_kernel_lapic_load_from_user(APICState *s) >> +{ >> + int r = 0; >> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) >> + struct kvm_lapic_state apic; >> + struct kvm_lapic_state *klapic = &apic; >> + int i; >> + >> + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) >> + return 0; >> + >> + memset(klapic, 0, sizeof apic); >> + kapic_set_reg(klapic, 0x2, s->id << 24); >> + kapic_set_reg(klapic, 0x8, s->tpr); >> + kapic_set_reg(klapic, 0xd, s->log_dest << 24); >> + kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff); >> + kapic_set_reg(klapic, 0xf, s->spurious_vec); >> + for (i = 0; i < 8; i++) { >> + kapic_set_reg(klapic, 0x10 + i, s->isr[i]); >> + kapic_set_reg(klapic, 0x18 + i, s->tmr[i]); >> + kapic_set_reg(klapic, 0x20 + i, s->irr[i]); >> + } >> + kapic_set_reg(klapic, 0x28, s->esr); >> + kapic_set_reg(klapic, 0x30, s->icr[0]); >> + kapic_set_reg(klapic, 0x31, s->icr[1]); >> + for (i = 0; i < APIC_LVT_NB; i++) >> + kapic_set_reg(klapic, 0x32 + i, s->lvt[i]); >> + kapic_set_reg(klapic, 0x38, s->initial_count); >> + kapic_set_reg(klapic, 0x3e, s->divide_conf); >> + >> + r = kvm_set_lapic(s->cpu_env, klapic); >> +#endif >> + return r; >> +} > > You should probably just setup VMState such that it directly saves > kvm_lapic_state and then have the pre/post functions call the kernel > ioctls to sync it. There's not a whole lot of point switching the > state between two different structures. It ensures the two models are compatible. Since they're the same device from the point of view of the guest, there's no reason for them to have different representations or to be incompatible.
On Thu, Oct 08, 2009 at 04:09:27PM +0200, Avi Kivity wrote: > On 10/08/2009 03:55 PM, Anthony Liguori wrote: >> Glauber Costa wrote: >>> This patch provides kvm with an in-kernel apic. We are currently not >>> enabling it. >>> The code is heavily based on what's in qemu-kvm.git. >>> >>> Signed-off-by: Glauber Costa <glommer@redhat.com> >>> --- >>> hw/apic.c | 135 >>> +++++++++++++++++++++++++++++++++++++++++++++++++++-- >>> kvm.h | 3 + >>> target-i386/kvm.c | 18 +++++++ >>> 3 files changed, 152 insertions(+), 4 deletions(-) >>> >>> diff --git a/hw/apic.c b/hw/apic.c >>> index c89008e..5635607 100644 >>> --- a/hw/apic.c >>> +++ b/hw/apic.c >>> @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val) >>> #endif >>> if (!s) >>> return; >>> - s->apicbase = (val & 0xfffff000) | >>> + >>> + if (kvm_enabled() && kvm_irqchip_in_kernel()) >>> + s->apicbase = val; >>> + else >>> + s->apicbase = (val & 0xfffff000) | >>> (s->apicbase & (MSR_IA32_APICBASE_BSP | >>> MSR_IA32_APICBASE_ENABLE)); >>> /* if disabled, cannot be enabled again */ >>> if (!(val & MSR_IA32_APICBASE_ENABLE)) { >>> @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env) >>> s->wait_for_sipi = 1; >>> >>> env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP); >>> + >>> +#ifdef KVM_CAP_MP_STATE >>> + if (kvm_enabled() && kvm_irqchip_in_kernel()) >>> + env->mp_state >>> + = env->halted ? KVM_MP_STATE_UNINITIALIZED : >>> KVM_MP_STATE_RUNNABLE; >>> +#endif >> >> I don't think CAP_MP_STATE should be treated as an optional feature. >> >>> +static int kvm_kernel_lapic_load_from_user(APICState *s) >>> +{ >>> + int r = 0; >>> +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) >>> + struct kvm_lapic_state apic; >>> + struct kvm_lapic_state *klapic = &apic; >>> + int i; >>> + >>> + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) >>> + return 0; >>> + >>> + memset(klapic, 0, sizeof apic); >>> + kapic_set_reg(klapic, 0x2, s->id << 24); >>> + kapic_set_reg(klapic, 0x8, s->tpr); >>> + kapic_set_reg(klapic, 0xd, s->log_dest << 24); >>> + kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff); >>> + kapic_set_reg(klapic, 0xf, s->spurious_vec); >>> + for (i = 0; i < 8; i++) { >>> + kapic_set_reg(klapic, 0x10 + i, s->isr[i]); >>> + kapic_set_reg(klapic, 0x18 + i, s->tmr[i]); >>> + kapic_set_reg(klapic, 0x20 + i, s->irr[i]); >>> + } >>> + kapic_set_reg(klapic, 0x28, s->esr); >>> + kapic_set_reg(klapic, 0x30, s->icr[0]); >>> + kapic_set_reg(klapic, 0x31, s->icr[1]); >>> + for (i = 0; i < APIC_LVT_NB; i++) >>> + kapic_set_reg(klapic, 0x32 + i, s->lvt[i]); >>> + kapic_set_reg(klapic, 0x38, s->initial_count); >>> + kapic_set_reg(klapic, 0x3e, s->divide_conf); >>> + >>> + r = kvm_set_lapic(s->cpu_env, klapic); >>> +#endif >>> + return r; >>> +} >> >> You should probably just setup VMState such that it directly saves >> kvm_lapic_state and then have the pre/post functions call the kernel >> ioctls to sync it. There's not a whole lot of point switching the >> state between two different structures. > > It ensures the two models are compatible. Since they're the same device > from the point of view of the guest, there's no reason for them to have > different representations or to be incompatible. live migration between something that has in-kernel irqchip and something that has not is likely to be a completely untested thing. And this is the only reason we might think of it as the same device. I don't see any value in supporting this combination
Avi Kivity wrote: > On 10/08/2009 03:55 PM, Anthony Liguori wrote: >> >> You should probably just setup VMState such that it directly saves >> kvm_lapic_state and then have the pre/post functions call the kernel >> ioctls to sync it. There's not a whole lot of point switching the >> state between two different structures. > > It ensures the two models are compatible. Since they're the same > device from the point of view of the guest, there's no reason for them > to have different representations or to be incompatible. The problem is, the in-kernel apic is not part of the qemu source tree. If we add a field to the qemu apic, then we would break the in-kernel apic and vice-versa. It's far easier to just have the in-kernel apic as a separate model. Most importantly though, the code should reflect how things behave. It's extremely difficult to look at the apic code and figure out what bits are used and what bits aren't used when the in-kernel apic is enabled. Regards, Anthony Liguori
On 10/08/2009 04:26 PM, Anthony Liguori wrote: > Avi Kivity wrote: >> On 10/08/2009 03:55 PM, Anthony Liguori wrote: >>> >>> You should probably just setup VMState such that it directly saves >>> kvm_lapic_state and then have the pre/post functions call the kernel >>> ioctls to sync it. There's not a whole lot of point switching the >>> state between two different structures. >> >> It ensures the two models are compatible. Since they're the same >> device from the point of view of the guest, there's no reason for >> them to have different representations or to be incompatible. > > The problem is, the in-kernel apic is not part of the qemu source > tree. If we add a field to the qemu apic, then we would break the > in-kernel apic and vice-versa. It's far easier to just have the > in-kernel apic as a separate model. > You need to handle it anyway due to save/restore; that is the new field and whatever functionality it has must be optional. > Most importantly though, the code should reflect how things behave. > It's extremely difficult to look at the apic code and figure out what > bits are used and what bits aren't used when the in-kernel apic is > enabled. That doesn't mean they need to be separate devices.
Avi Kivity wrote: >> The problem is, the in-kernel apic is not part of the qemu source >> tree. If we add a field to the qemu apic, then we would break the >> in-kernel apic and vice-versa. It's far easier to just have the >> in-kernel apic as a separate model. >> > > You need to handle it anyway due to save/restore; that is the new > field and whatever functionality it has must be optional. Not necessarily. If it's a bug fix, we may disable the older versions of savevm (which we have to do from time to time). > That doesn't mean they need to be separate devices. But they are. The in-kernel device models are not mere accelerations. There are features that are only enabled with in-kernel device models (like PCI passthrough). In fact, in-kernel PIT is not at all an acceleration, it's there because it's functionally different. The sync stuff is really ugly too. It would be much cleaner to have a separate state for the in-kernel device models that saved the structures from the kernel directly instead of having to translate between formats. More straight forward code means it's all easier to understand, easier to debug, etc. Regards, Anthony Liguori
On Thu, Oct 08, 2009 at 04:31:57PM +0200, Avi Kivity wrote: > On 10/08/2009 04:26 PM, Anthony Liguori wrote: >> Avi Kivity wrote: >>> On 10/08/2009 03:55 PM, Anthony Liguori wrote: >>>> >>>> You should probably just setup VMState such that it directly saves >>>> kvm_lapic_state and then have the pre/post functions call the >>>> kernel ioctls to sync it. There's not a whole lot of point >>>> switching the state between two different structures. >>> >>> It ensures the two models are compatible. Since they're the same >>> device from the point of view of the guest, there's no reason for >>> them to have different representations or to be incompatible. >> >> The problem is, the in-kernel apic is not part of the qemu source >> tree. If we add a field to the qemu apic, then we would break the >> in-kernel apic and vice-versa. It's far easier to just have the >> in-kernel apic as a separate model. >> > > You need to handle it anyway due to save/restore; that is the new field > and whatever functionality it has must be optional. Not necessarily. You can grab the structures directly from the kernel definition , copy that over, issue the ioctl, and just make sure the source and destination have compatible kernels.
On Thu, Oct 08, 2009 at 09:39:13AM -0500, Anthony Liguori wrote: > The sync stuff is really ugly too. It would be much cleaner to have a > separate state for the in-kernel device models that saved the structures > from the kernel directly instead of having to translate between formats. > More straight forward code means it's all easier to understand, easier to > debug, etc. > One may arguee that I am stupid, but I have caught myself reading code that was totally unused in the quest for irqchip related bugs...
Glauber Costa wrote: > > It ensures the two models are compatible. Since they're the same device > > from the point of view of the guest, there's no reason for them to have > > different representations or to be incompatible. > > live migration between something that has in-kernel irqchip and > something that has not is likely to be a completely untested > thing. And this is the only reason we might think of it as the same > device. I don't see any value in supporting this combination Not just live migration. ACPI sleep + savevm + loadvm + ACPI resume, for example. -- Jamie
On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote: > Glauber Costa wrote: > > > It ensures the two models are compatible. Since they're the same device > > > from the point of view of the guest, there's no reason for them to have > > > different representations or to be incompatible. > > > > live migration between something that has in-kernel irqchip and > > something that has not is likely to be a completely untested > > thing. And this is the only reason we might think of it as the same > > device. I don't see any value in supporting this combination > > Not just live migration. ACPI sleep + savevm + loadvm + ACPI resume, > for example. Yes, but in this case too, I'd expect the irqchipness of qemu not to change.
Glauber Costa wrote: > On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote: > > Glauber Costa wrote: > > > > It ensures the two models are compatible. Since they're the same device > > > > from the point of view of the guest, there's no reason for them to have > > > > different representations or to be incompatible. > > > > > > live migration between something that has in-kernel irqchip and > > > something that has not is likely to be a completely untested > > > thing. And this is the only reason we might think of it as the same > > > device. I don't see any value in supporting this combination > > > > Not just live migration. ACPI sleep + savevm + loadvm + ACPI resume, > > for example. > Yes, but in this case too, I'd expect the irqchipness of qemu not to change. If I've just been sent an image produced by someone running KVM, and my machine is not KVM-capable, or I cannot upgrade the KVM kernel module because it's in use by other VMs (had this problem a few times), there's no choice but to change the irqchipness. -- Jamie
On Fri, Oct 09, 2009 at 05:48:31PM +0100, Jamie Lokier wrote: > Glauber Costa wrote: > > On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote: > > > Glauber Costa wrote: > > > > > It ensures the two models are compatible. Since they're the same device > > > > > from the point of view of the guest, there's no reason for them to have > > > > > different representations or to be incompatible. > > > > > > > > live migration between something that has in-kernel irqchip and > > > > something that has not is likely to be a completely untested > > > > thing. And this is the only reason we might think of it as the same > > > > device. I don't see any value in supporting this combination > > > > > > Not just live migration. ACPI sleep + savevm + loadvm + ACPI resume, > > > for example. > > Yes, but in this case too, I'd expect the irqchipness of qemu not to change. > > If I've just been sent an image produced by someone running KVM, and > my machine is not KVM-capable, or I cannot upgrade the KVM kernel > module because it's in use by other VMs (had this problem a few > times), there's no choice but to change the irqchipness. As gleb mentioned, requiring such a change to happen offline (across a reboot) is not that much of a pain. There are thousands of scenarios in which it will have to happen anyway, including major bumps in qemu version.
Jamie Lokier wrote: > Glauber Costa wrote: > >> On Fri, Oct 09, 2009 at 11:06:41AM +0100, Jamie Lokier wrote: >> >>> Glauber Costa wrote: >>> >>>>> It ensures the two models are compatible. Since they're the same device >>>>> from the point of view of the guest, there's no reason for them to have >>>>> different representations or to be incompatible. >>>>> >>>> live migration between something that has in-kernel irqchip and >>>> something that has not is likely to be a completely untested >>>> thing. And this is the only reason we might think of it as the same >>>> device. I don't see any value in supporting this combination >>>> >>> Not just live migration. ACPI sleep + savevm + loadvm + ACPI resume, >>> for example. >>> >> Yes, but in this case too, I'd expect the irqchipness of qemu not to change. >> > > If I've just been sent an image produced by someone running KVM, and > my machine is not KVM-capable, or I cannot upgrade the KVM kernel > module because it's in use by other VMs (had this problem a few > times), there's no choice but to change the irqchipness. > You cannot migrate from KVM to TCG so this use-case is irrelevant.
On 10/09/2009 09:49 PM, Anthony Liguori wrote: >> If I've just been sent an image produced by someone running KVM, and >> my machine is not KVM-capable, or I cannot upgrade the KVM kernel >> module because it's in use by other VMs (had this problem a few >> times), there's no choice but to change the irqchipness. > > > You cannot migrate from KVM to TCG so this use-case is irrelevant. I agree it's mostly irrelevant, however nothing in principle prevents such a migration, as long as the cpuid feature bits are implemented on both sides.
Avi Kivity wrote: > On 10/09/2009 09:49 PM, Anthony Liguori wrote: >>> If I've just been sent an image produced by someone running KVM, and >>> my machine is not KVM-capable, or I cannot upgrade the KVM kernel >>> module because it's in use by other VMs (had this problem a few >>> times), there's no choice but to change the irqchipness. >> >> >> You cannot migrate from KVM to TCG so this use-case is irrelevant. > > I agree it's mostly irrelevant, however nothing in principle prevents > such a migration, as long as the cpuid feature bits are implemented on > both sides. Sure, in principle it's certainly possible but in practice, it isn't today and I don't see anything that's likely to happen in the near term future that would make it.
diff --git a/hw/apic.c b/hw/apic.c index c89008e..5635607 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -299,7 +299,11 @@ void cpu_set_apic_base(CPUState *env, uint64_t val) #endif if (!s) return; - s->apicbase = (val & 0xfffff000) | + + if (kvm_enabled() && kvm_irqchip_in_kernel()) + s->apicbase = val; + else + s->apicbase = (val & 0xfffff000) | (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); /* if disabled, cannot be enabled again */ if (!(val & MSR_IA32_APICBASE_ENABLE)) { @@ -497,6 +501,13 @@ void apic_init_reset(CPUState *env) s->wait_for_sipi = 1; env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP); + +#ifdef KVM_CAP_MP_STATE + if (kvm_enabled() && kvm_irqchip_in_kernel()) + env->mp_state + = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE; +#endif + } static void apic_startup(APICState *s, int vector_num) @@ -903,12 +914,120 @@ static int apic_load_old(QEMUFile *f, void *opaque, int version_id) return 0; } +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) +static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id) +{ + return *((uint32_t *) (kapic->regs + (reg_id << 4))); +} + +static inline void kapic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *) (kapic->regs + (reg_id << 4))) = val; +} +#endif + +static int kvm_kernel_lapic_load_from_user(APICState *s) +{ + int r = 0; +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) + struct kvm_lapic_state apic; + struct kvm_lapic_state *klapic = &apic; + int i; + + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) + return 0; + + memset(klapic, 0, sizeof apic); + kapic_set_reg(klapic, 0x2, s->id << 24); + kapic_set_reg(klapic, 0x8, s->tpr); + kapic_set_reg(klapic, 0xd, s->log_dest << 24); + kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kapic_set_reg(klapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kapic_set_reg(klapic, 0x10 + i, s->isr[i]); + kapic_set_reg(klapic, 0x18 + i, s->tmr[i]); + kapic_set_reg(klapic, 0x20 + i, s->irr[i]); + } + kapic_set_reg(klapic, 0x28, s->esr); + kapic_set_reg(klapic, 0x30, s->icr[0]); + kapic_set_reg(klapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) + kapic_set_reg(klapic, 0x32 + i, s->lvt[i]); + kapic_set_reg(klapic, 0x38, s->initial_count); + kapic_set_reg(klapic, 0x3e, s->divide_conf); + + r = kvm_set_lapic(s->cpu_env, klapic); +#endif + return r; +} + +static void kvm_kernel_lapic_save_to_user(APICState *s) +{ +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) + struct kvm_lapic_state apic; + struct kvm_lapic_state *kapic = &apic; + int i, v; + + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) + return; + + kvm_get_lapic(s->cpu_env, kapic); + + s->id = kapic_reg(kapic, 0x2) >> 24; + s->tpr = kapic_reg(kapic, 0x8); + s->arb_id = kapic_reg(kapic, 0x9); + s->log_dest = kapic_reg(kapic, 0xd) >> 24; + s->dest_mode = kapic_reg(kapic, 0xe) >> 28; + s->spurious_vec = kapic_reg(kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kapic_reg(kapic, 0x10 + i); + s->tmr[i] = kapic_reg(kapic, 0x18 + i); + s->irr[i] = kapic_reg(kapic, 0x20 + i); + } + s->esr = kapic_reg(kapic, 0x28); + s->icr[0] = kapic_reg(kapic, 0x30); + s->icr[1] = kapic_reg(kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) + s->lvt[i] = kapic_reg(kapic, 0x32 + i); + s->initial_count = kapic_reg(kapic, 0x38); + s->divide_conf = kapic_reg(kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_get_clock(vm_clock); + apic_timer_update(s, s->initial_count_load_time); +#endif +} + +static void qemu_kvm_load_lapic(CPUState *env) +{ + kvm_kernel_lapic_load_from_user(env->apic_state); +} + +static void apic_pre_save(void *opaque) +{ + APICState *s = (void *)opaque; + + kvm_kernel_lapic_save_to_user(s); +} + +static int apic_post_load(void *opaque, int version_id) +{ + APICState *s = opaque; + + return kvm_kernel_lapic_load_from_user(s); +} + static const VMStateDescription vmstate_apic = { .name = "apic", .version_id = 3, .minimum_version_id = 3, .minimum_version_id_old = 1, .load_state_old = apic_load_old, + .pre_save = apic_pre_save, + .post_load = apic_post_load, .fields = (VMStateField []) { VMSTATE_UINT32(apicbase, APICState), VMSTATE_UINT8(id, APICState), @@ -933,9 +1052,8 @@ static const VMStateDescription vmstate_apic = { } }; -static void apic_reset(void *opaque) +static void apic_do_reset(APICState *s) { - APICState *s = opaque; int bsp; cpu_synchronize_state(s->cpu_env); @@ -957,6 +1075,15 @@ static void apic_reset(void *opaque) } } +static void apic_reset(void *opaque) +{ + APICState *s = opaque; + + apic_do_reset(s); + + qemu_kvm_load_lapic(s->cpu_env); +} + static CPUReadMemoryFunc * const apic_mem_read[3] = { apic_mem_readb, apic_mem_readw, @@ -981,7 +1108,7 @@ int apic_init(CPUState *env) s->id = env->cpuid_apic_id; s->cpu_env = env; - apic_reset(s); + apic_do_reset(s); msix_supported = 1; /* XXX: mapping more APICs at the same memory location */ diff --git a/kvm.h b/kvm.h index 8d4afa0..099b55e 100644 --- a/kvm.h +++ b/kvm.h @@ -97,6 +97,9 @@ int kvm_arch_init(KVMState *s, int smp_cpus); int kvm_arch_init_vcpu(CPUState *env); +int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s); +int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s); + struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7010999..d485d4c 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -957,3 +957,21 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) } } #endif /* KVM_CAP_SET_GUEST_DEBUG */ + +#ifdef KVM_CAP_IRQCHIP +int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s) +{ + if (!kvm_irqchip_in_kernel()) + return 0; + + return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s); +} + +int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s) +{ + if (!kvm_irqchip_in_kernel()) + return 0; + + return kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s); +} +#endif
This patch provides kvm with an in-kernel apic. We are currently not enabling it. The code is heavily based on what's in qemu-kvm.git. Signed-off-by: Glauber Costa <glommer@redhat.com> --- hw/apic.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++-- kvm.h | 3 + target-i386/kvm.c | 18 +++++++ 3 files changed, 152 insertions(+), 4 deletions(-)