Message ID | CABYiri8GdsNdkZ=qyJweph-FqsSzp0sEnsQaz3wcLUD8nywd6g@mail.gmail.com |
---|---|
State | New |
Headers | show |
Andrey, Can you get a trace of the guest (echo kvm > /sys/kernel/debug/tracing/set_event; sleep 1s; cp /sys/kernel/debug/tracing/trace /tmp/trace.txt) while it is in the 100% cpu consumption state? Is it otherwise operational? On Thu, Sep 25, 2014 at 10:40:27PM +0400, Andrey Korolyov wrote: > Hello, > > there is a somewhat weird issue with 3.10, backported IOAPIC behavior > (which allowed windows guest to not hang in my case), 2.1.1 and webkit > suite which is well-known for all timer weirdness for Windows guests. > Here is a way to reproduce: > > - start the guest, > - open webkit-based browser, cpu consumption will raise a bit but with > hv_ bits will remain under single core in my case, > - live-migrate VM, > - close browser, > - observe 100% cpu consumption by guest. > > The loop can be 'destroyed' by doing another live migration, e.g. > without something what drives guest timer insane. > > Launch arguments can be found here: > https://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg04012.html, > also I am re-sending the patch since I found that I missed comma in > last public version. The issue is specific to at least W2k8R2. If > webkit is stood idle for some time, it looks like Windows timer is > falling back to defaults and migration issue will not appear. Bug has > floating nature by itself, so it is just matter of interest if it can > reappear on others` setup. > diff -ru linux-3.10.11/arch/ia64/kvm/kvm-ia64.c linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c > --- linux-3.10.11/arch/ia64/kvm/kvm-ia64.c 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c 2014-08-24 19:49:25.723072383 +0400 > @@ -199,6 +199,7 @@ > case KVM_CAP_IRQCHIP: > case KVM_CAP_MP_STATE: > case KVM_CAP_IRQ_INJECT_STATUS: > + case KVM_CAP_IOAPIC_POLARITY_IGNORED: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff -ru linux-3.10.11/arch/x86/kvm/x86.c linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c > --- linux-3.10.11/arch/x86/kvm/x86.c 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c 2014-08-24 19:50:06.553716276 +0400 > @@ -2537,6 +2537,7 @@ > case KVM_CAP_GET_TSC_KHZ: > case KVM_CAP_KVMCLOCK_CTRL: > case KVM_CAP_READONLY_MEM: > + case KVM_CAP_IOAPIC_POLARITY_IGNORED: > #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > diff -ru linux-3.10.11/include/uapi/linux/kvm.h linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h > --- linux-3.10.11/include/uapi/linux/kvm.h 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h 2014-08-24 19:51:10.975577204 +0400 > @@ -666,6 +666,7 @@ > #define KVM_CAP_IRQ_MPIC 90 > #define KVM_CAP_PPC_RTAS 91 > #define KVM_CAP_IRQ_XICS 92 > +#define KVM_CAP_IOAPIC_POLARITY_IGNORED 93 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff -ru linux-3.10.11/virt/kvm/ioapic.c linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c > --- linux-3.10.11/virt/kvm/ioapic.c 2013-09-08 09:10:14.000000000 +0400 > +++ linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c 2014-08-24 19:59:26.755137527 +0400 > @@ -50,7 +50,7 @@ > #else > #define ioapic_debug(fmt, arg...) > #endif > -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, > +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, > bool line_status); > > static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, > @@ -163,23 +163,67 @@ > return false; > } > > -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, > - bool line_status) > +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, > + int irq_level, bool line_status) > { > - union kvm_ioapic_redirect_entry *pent; > - int injected = -1; > + union kvm_ioapic_redirect_entry entry; > + u32 mask = 1 << irq; > + u32 old_irr; > + int edge, ret; > > - pent = &ioapic->redirtbl[idx]; > + entry = ioapic->redirtbl[irq]; > + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); > > - if (!pent->fields.mask) { > - injected = ioapic_deliver(ioapic, idx, line_status); > - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) > - pent->fields.remote_irr = 1; > + if (!irq_level) { > + ioapic->irr &= ~mask; > + ret = 1; > + goto out; > + } > + > + /* > + * Return 0 for coalesced interrupts; for edge-triggered interrupts, > + * this only happens if a previous edge has not been delivered due > + * do masking. For level interrupts, the remote_irr field tells > + * us if the interrupt is waiting for an EOI. > + * > + * RTC is special: it is edge-triggered, but userspace likes to know > + * if it has been already ack-ed via EOI because coalesced RTC > + * interrupts lead to time drift in Windows guests. So we track > + * EOI manually for the RTC interrupt. > + */ > + if (irq == RTC_GSI && line_status && > + rtc_irq_check_coalesced(ioapic)) { > + ret = 0; > + goto out; > } > > - return injected; > + old_irr = ioapic->irr; > + ioapic->irr |= mask; > + if ((edge && old_irr == ioapic->irr) || > + (!edge && entry.fields.remote_irr)) { > + ret = 0; > + goto out; > + } > + > + ret = ioapic_service(ioapic, irq, line_status); > + > +out: > + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); > + return ret; > +} > + > +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) > +{ > + u32 idx; > + > + rtc_irq_eoi_tracking_reset(ioapic); > + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) > + ioapic_set_irq(ioapic, idx, 1, true); > + > + kvm_rtc_eoi_tracking_restore_all(ioapic); > } > > + > static void update_handled_vectors(struct kvm_ioapic *ioapic) > { > DECLARE_BITMAP(handled_vectors, 256); > @@ -282,12 +326,15 @@ > } > } > > -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) > +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) > { > union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; > struct kvm_lapic_irq irqe; > int ret; > > + if (entry->fields.mask) > + return -1; > + > ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " > "vector=%x trig_mode=%x\n", > entry->fields.dest_id, entry->fields.dest_mode, > @@ -302,6 +349,10 @@ > irqe.level = 1; > irqe.shorthand = 0; > > + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) > + ioapic->irr &= ~(1 << irq); > + > + > if (irq == RTC_GSI && line_status) { > BUG_ON(ioapic->rtc_status.pending_eoi != 0); > ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, > @@ -309,6 +360,8 @@ > ioapic->rtc_status.pending_eoi = ret; > } else > ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); > + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) > + entry->fields.remote_irr = 1; > > return ret; > } > @@ -316,39 +369,15 @@ > int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, > int level, bool line_status) > { > - u32 old_irr; > - u32 mask = 1 << irq; > - union kvm_ioapic_redirect_entry entry; > int ret, irq_level; > > BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); > > spin_lock(&ioapic->lock); > - old_irr = ioapic->irr; > irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], > irq_source_id, level); > - entry = ioapic->redirtbl[irq]; > - irq_level ^= entry.fields.polarity; > - if (!irq_level) { > - ioapic->irr &= ~mask; > - ret = 1; > - } else { > - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); > + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); > > - if (irq == RTC_GSI && line_status && > - rtc_irq_check_coalesced(ioapic)) { > - ret = 0; /* coalesced */ > - goto out; > - } > - ioapic->irr |= mask; > - if ((edge && old_irr != ioapic->irr) || > - (!edge && !entry.fields.remote_irr)) > - ret = ioapic_service(ioapic, irq, line_status); > - else > - ret = 0; /* report coalesced interrupt */ > - } > -out: > - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); > spin_unlock(&ioapic->lock); > > return ret; > @@ -394,7 +423,7 @@ > > ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); > ent->fields.remote_irr = 0; > - if (!ent->fields.mask && (ioapic->irr & (1 << i))) > + if (ioapic->irr & (1 << i)) > ioapic_service(ioapic, i, false); > } > } > @@ -595,9 +624,10 @@ > > spin_lock(&ioapic->lock); > memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); > + ioapic->irr = 0; > update_handled_vectors(ioapic); > kvm_vcpu_request_scan_ioapic(kvm); > - kvm_rtc_eoi_tracking_restore_all(ioapic); > + kvm_ioapic_inject_all(ioapic, state->irr); > spin_unlock(&ioapic->lock); > return 0; > }
On Mon, Sep 29, 2014 at 10:25 PM, Marcelo Tosatti <mtosatti@redhat.com> wrote: > Andrey, > > Can you get a trace of the guest (echo kvm > > /sys/kernel/debug/tracing/set_event; sleep 1s; cp > /sys/kernel/debug/tracing/trace /tmp/trace.txt) while it is in the 100% > cpu consumption state? > > Is it otherwise operational? > Yes, but I cannot do this because of specific of a Windows guest :) Yes, it fully operational and I just wanted to know if the problem is reproducible anywhere else (for example, on 3.10 RHEL kernel which includes apic backport). Linux guests are not a subject for this bug at all.
Il 29/09/2014 20:31, Andrey Korolyov ha scritto: >> > >> > Can you get a trace of the guest (echo kvm > >> > /sys/kernel/debug/tracing/set_event; sleep 1s; cp >> > /sys/kernel/debug/tracing/trace /tmp/trace.txt) while it is in the 100% >> > cpu consumption state? >> > >> > Is it otherwise operational? >> > > Yes, but I cannot do this because of specific of a Windows guest :) > Yes, it fully operational and I just wanted to know if the problem is > reproducible anywhere else (for example, on 3.10 RHEL kernel which > includes apic backport). Linux guests are not a subject for this bug > at all. Actually that is a trace of the guest, *captured on the host*. So you can do that with Windows guests. Paolo
diff -ru linux-3.10.11/arch/ia64/kvm/kvm-ia64.c linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c --- linux-3.10.11/arch/ia64/kvm/kvm-ia64.c 2013-09-08 09:10:14.000000000 +0400 +++ linux-3.10.11.patched-ioapic/arch/ia64/kvm/kvm-ia64.c 2014-08-24 19:49:25.723072383 +0400 @@ -199,6 +199,7 @@ case KVM_CAP_IRQCHIP: case KVM_CAP_MP_STATE: case KVM_CAP_IRQ_INJECT_STATUS: + case KVM_CAP_IOAPIC_POLARITY_IGNORED: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff -ru linux-3.10.11/arch/x86/kvm/x86.c linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c --- linux-3.10.11/arch/x86/kvm/x86.c 2013-09-08 09:10:14.000000000 +0400 +++ linux-3.10.11.patched-ioapic/arch/x86/kvm/x86.c 2014-08-24 19:50:06.553716276 +0400 @@ -2537,6 +2537,7 @@ case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: + case KVM_CAP_IOAPIC_POLARITY_IGNORED: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: diff -ru linux-3.10.11/include/uapi/linux/kvm.h linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h --- linux-3.10.11/include/uapi/linux/kvm.h 2013-09-08 09:10:14.000000000 +0400 +++ linux-3.10.11.patched-ioapic/include/uapi/linux/kvm.h 2014-08-24 19:51:10.975577204 +0400 @@ -666,6 +666,7 @@ #define KVM_CAP_IRQ_MPIC 90 #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_IOAPIC_POLARITY_IGNORED 93 #ifdef KVM_CAP_IRQ_ROUTING diff -ru linux-3.10.11/virt/kvm/ioapic.c linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c --- linux-3.10.11/virt/kvm/ioapic.c 2013-09-08 09:10:14.000000000 +0400 +++ linux-3.10.11.patched-ioapic/virt/kvm/ioapic.c 2014-08-24 19:59:26.755137527 +0400 @@ -50,7 +50,7 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, @@ -163,23 +163,67 @@ return false; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, - bool line_status) +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) { - union kvm_ioapic_redirect_entry *pent; - int injected = -1; + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; - pent = &ioapic->redirtbl[idx]; + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); - if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx, line_status); - if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) - pent->fields.remote_irr = 1; + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; } - return injected; + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); } + static void update_handled_vectors(struct kvm_ioapic *ioapic) { DECLARE_BITMAP(handled_vectors, 256); @@ -282,12 +326,15 @@ } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; int ret; + if (entry->fields.mask) + return -1; + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", entry->fields.dest_id, entry->fields.dest_mode, @@ -302,6 +349,10 @@ irqe.level = 1; irqe.shorthand = 0; + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + + if (irq == RTC_GSI && line_status) { BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, @@ -309,6 +360,8 @@ ioapic->rtc_status.pending_eoi = ret; } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; return ret; } @@ -316,39 +369,15 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status) { - u32 old_irr; - u32 mask = 1 << irq; - union kvm_ioapic_redirect_entry entry; int ret, irq_level; BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); spin_lock(&ioapic->lock); - old_irr = ioapic->irr; irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], irq_source_id, level); - entry = ioapic->redirtbl[irq]; - irq_level ^= entry.fields.polarity; - if (!irq_level) { - ioapic->irr &= ~mask; - ret = 1; - } else { - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); - if (irq == RTC_GSI && line_status && - rtc_irq_check_coalesced(ioapic)) { - ret = 0; /* coalesced */ - goto out; - } - ioapic->irr |= mask; - if ((edge && old_irr != ioapic->irr) || - (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq, line_status); - else - ret = 0; /* report coalesced interrupt */ - } -out: - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); return ret; @@ -394,7 +423,7 @@ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) + if (ioapic->irr & (1 << i)) ioapic_service(ioapic, i, false); } } @@ -595,9 +624,10 @@ spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); - kvm_rtc_eoi_tracking_restore_all(ioapic); + kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); return 0; }