@@ -27,6 +27,8 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
#include "hw/i386/x86.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
#include "hw/irq.h"
#include "xen_evtchn.h"
@@ -100,6 +102,7 @@ struct XenEvtchnState {
uint64_t callback_param;
bool evtchn_in_kernel;
+ uint32_t callback_gsi;
QEMUBH *gsi_bh;
@@ -216,11 +219,41 @@ static void xen_evtchn_register_types(void)
type_init(xen_evtchn_register_types)
+static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
+{
+ PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+ uint8_t pin = param & 3;
+ uint8_t devfn = (param >> 8) & 0xff;
+ uint16_t bus = (param >> 16) & 0xffff;
+ uint16_t domain = (param >> 32) & 0xffff;
+ PCIDevice *pdev;
+ PCIINTxRoute r;
+
+ if (domain || !pcms) {
+ return 0;
+ }
+
+ pdev = pci_find_device(pcms->bus, bus, devfn);
+ if (!pdev) {
+ return 0;
+ }
+
+ r = pci_device_route_intx_to_irq(pdev, pin);
+ if (r.mode != PCI_INTX_ENABLED) {
+ return 0;
+ }
+
+ /*
+ * Hm, can we be notified of INTX routing changes? Not without
+ * *owning* the device and being allowed to overwrite its own
+ * ->intx_routing_notifier, AFAICT. So let's not.
+ */
+ return r.irq;
+}
+
void xen_evtchn_set_callback_level(int level)
{
XenEvtchnState *s = xen_evtchn_singleton;
- uint32_t param;
-
if (!s) {
return;
}
@@ -259,18 +292,12 @@ void xen_evtchn_set_callback_level(int level)
return;
}
- param = (uint32_t)s->callback_param;
-
- switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) {
- case HVM_PARAM_CALLBACK_TYPE_GSI:
- if (param < GSI_NUM_PINS) {
- qemu_set_irq(s->gsis[param], level);
- if (level) {
- /* Ensure the vCPU polls for deassertion */
- kvm_xen_set_callback_asserted();
- }
+ if (s->callback_gsi && s->callback_gsi < GSI_NUM_PINS) {
+ qemu_set_irq(s->gsis[s->callback_gsi], level);
+ if (level) {
+ /* Ensure the vCPU polls for deassertion */
+ kvm_xen_set_callback_asserted();
}
- break;
}
}
@@ -278,15 +305,22 @@ int xen_evtchn_set_callback_param(uint64_t param)
{
XenEvtchnState *s = xen_evtchn_singleton;
bool in_kernel = false;
+ uint32_t gsi = 0;
+ int type = param >> CALLBACK_VIA_TYPE_SHIFT;
int ret;
if (!s) {
return -ENOTSUP;
}
+ /*
+ * Take the BQL because set_callback_pci_intx() may call into PCI code,
+ * and because we may need to manipulate the old and new GSI levels.
+ */
+ qemu_mutex_lock_iothread();
qemu_mutex_lock(&s->port_lock);
- switch (param >> CALLBACK_VIA_TYPE_SHIFT) {
+ switch (type) {
case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
struct kvm_xen_hvm_attr xa = {
.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
@@ -297,10 +331,17 @@ int xen_evtchn_set_callback_param(uint64_t param)
if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
in_kernel = true;
}
+ gsi = 0;
break;
}
+ case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
+ gsi = set_callback_pci_intx(s, param);
+ ret = gsi ? 0 : -EINVAL;
+ break;
+
case HVM_PARAM_CALLBACK_TYPE_GSI:
+ gsi = (uint32_t)param;
ret = 0;
break;
@@ -312,9 +353,21 @@ int xen_evtchn_set_callback_param(uint64_t param)
if (!ret) {
s->callback_param = param;
s->evtchn_in_kernel = in_kernel;
+
+ if (gsi != s->callback_gsi) {
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+
+ xen_evtchn_set_callback_level(0);
+ s->callback_gsi = gsi;
+
+ if (gsi && vi && vi->evtchn_upcall_pending) {
+ kvm_xen_inject_vcpu_callback_vector(0, type);
+ }
+ }
}
qemu_mutex_unlock(&s->port_lock);
+ qemu_mutex_unlock_iothread();
return ret;
}
@@ -130,6 +130,38 @@ int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
return ret;
}
+ /* If called a second time, don't repeat the rest of the setup. */
+ if (s->xen_caps) {
+ return 0;
+ }
+
+ /*
+ * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
+ * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
+ *
+ * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
+ * such things to be polled at precisely the right time. We *could* do
+ * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
+ * the moment the IRQ is acked, and see if it should be reasserted.
+ *
+ * But the in-kernel irqchip is deprecated, so we're unlikely to add
+ * that support in the kernel. Insist on using the split irqchip mode
+ * instead.
+ *
+ * This leaves us polling for the level going low in QEMU, which lacks
+ * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
+ * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
+ * the device (for which it has to unmap the device and trap access, for
+ * some period after an IRQ!!). In the Xen case, we do it on exit from
+ * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
+ * Which is kind of icky, but less so than the VFIO one. I may fix them
+ * both later...
+ */
+ if (!kvm_kernel_irqchip_split()) {
+ error_report("kvm: Xen support requires kernel-irqchip=split");
+ return -EINVAL;
+ }
+
s->xen_caps = xen_caps;
return 0;
}