@@ -2,3 +2,4 @@ kvm_xen_map_pirq(int pirq, int gsi) "pirq %d gsi %d"
kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
+kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
@@ -12,6 +12,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-misc.h"
+#include "xen_evtchn.h"
EvtchnInfoList *qmp_xen_event_list(Error **errp)
{
@@ -23,3 +24,17 @@ void qmp_xen_event_inject(uint32_t port, Error **errp)
{
error_setg(errp, "Xen event channel emulation not enabled");
}
+
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+ uint64_t addr, uint32_t data, bool is_masked)
+{
+}
+
+void xen_evtchn_remove_pci_device(PCIDevice *dev)
+{
+}
+
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
+{
+ return false;
+}
@@ -30,9 +30,10 @@
#include "hw/i386/x86.h"
#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
#include "hw/irq.h"
#include "hw/xen/xen_backend_ops.h"
-
#include "xen_evtchn.h"
#include "xen_overlay.h"
#include "xen_xenstore.h"
@@ -45,6 +46,9 @@
#include "standard-headers/xen/memory.h"
#include "standard-headers/xen/hvm/params.h"
+/* XX: For kvm_update_msi_routes_all() */
+#include "target/i386/kvm/kvm_i386.h"
+
#define TYPE_XEN_EVTCHN "xen-evtchn"
OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
@@ -121,6 +125,11 @@ struct xenevtchn_handle {
struct pirq_info {
int gsi;
uint16_t port;
+ PCIDevice *dev;
+ int vector;
+ bool is_msix;
+ bool is_masked;
+ bool is_translated;
};
struct XenEvtchnState {
@@ -147,7 +156,7 @@ struct XenEvtchnState {
uint64_t pirq_inuse[DIV_ROUND_UP(MAX_XEN_PIRQ, 64)];
uint32_t pirq_gsi_set;
- /* Per-PIRQ information (rebuilt on migration) */
+ /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
struct pirq_info pirq[MAX_XEN_PIRQ];
};
@@ -958,16 +967,22 @@ static bool virq_is_global(uint32_t virq)
}
}
-static int close_port(XenEvtchnState *s, evtchn_port_t port)
+static int close_port(XenEvtchnState *s, evtchn_port_t port, bool *flush_kvm_routes)
{
XenEvtchnPort *p = &s->port_table[port];
+ /* Because it *might* be a PIRQ port */
+ assert(qemu_mutex_iothread_locked());
+
switch (p->type) {
case EVTCHNSTAT_closed:
return -ENOENT;
case EVTCHNSTAT_pirq:
s->pirq[p->type_val].port = 0;
+ if (s->pirq[p->type_val].is_translated) {
+ *flush_kvm_routes = true;
+ }
break;
case EVTCHNSTAT_virq:
@@ -1016,20 +1031,27 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port)
int xen_evtchn_soft_reset(void)
{
XenEvtchnState *s = xen_evtchn_singleton;
+ bool flush_kvm_routes;
int i;
if (!s) {
return -ENOTSUP;
}
+ qemu_mutex_lock_iothread();
qemu_mutex_lock(&s->port_lock);
for (i = 0; i < s->nr_ports; i++) {
- close_port(s, i);
+ close_port(s, i, &flush_kvm_routes);
}
qemu_mutex_unlock(&s->port_lock);
+ if (flush_kvm_routes) {
+ kvm_update_msi_routes_all(NULL, true, 0, 0);
+ }
+
+ qemu_mutex_unlock_iothread();
return 0;
}
@@ -1045,6 +1067,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
int xen_evtchn_close_op(struct evtchn_close *close)
{
XenEvtchnState *s = xen_evtchn_singleton;
+ bool flush_kvm_routes = false;
int ret;
if (!s) {
@@ -1055,11 +1078,19 @@ int xen_evtchn_close_op(struct evtchn_close *close)
return -EINVAL;
}
+ qemu_mutex_lock_iothread();
qemu_mutex_lock(&s->port_lock);
- ret = close_port(s, close->port);
+ ret = close_port(s, close->port, &flush_kvm_routes);
qemu_mutex_unlock(&s->port_lock);
+ qemu_mutex_unlock_iothread();
+
+ if (flush_kvm_routes) {
+ qemu_mutex_lock_iothread();
+ kvm_update_msi_routes_all(NULL, true, 0, 0);
+ qemu_mutex_unlock_iothread();
+ }
return ret;
}
@@ -1177,21 +1208,54 @@ int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
return -EINVAL;
}
- QEMU_LOCK_GUARD(&s->port_lock);
+ QEMU_IOTHREAD_LOCK_GUARD();
if (s->pirq[pirq->pirq].port) {
return -EBUSY;
}
+ qemu_mutex_lock(&s->port_lock);
+
ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
&pirq->port);
if (ret) {
+ qemu_mutex_unlock(&s->port_lock);
return ret;
}
s->pirq[pirq->pirq].port = pirq->port;
trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
+ qemu_mutex_unlock(&s->port_lock);
+
+ /*
+ * Need to do the unmask outside port_lock because it may call
+ * back into the MSI translate function.
+ */
+ if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
+ if (s->pirq[pirq->pirq].is_masked) {
+ PCIDevice *dev = s->pirq[pirq->pirq].dev;
+ int vector = s->pirq[pirq->pirq].vector;
+ char *dev_path = qdev_get_dev_path(DEVICE(dev));
+
+ trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
+ g_free(dev_path);
+
+ if (s->pirq[pirq->pirq].is_msix) {
+ msix_set_mask(dev, vector, false);
+ } else {
+ msi_set_mask(dev, vector, false, NULL);
+ }
+ } else if (s->pirq[pirq->pirq].is_translated) {
+ /*
+ * If KVM had attempted to translate this one before, make it try
+ * again. If we unmasked, then the notifier on the MSI(-X) vector
+ * will already have had the same effect.
+ */
+ kvm_update_msi_routes_all(NULL, true, 0, 0);
+ }
+ }
+
return ret;
}
@@ -1444,6 +1508,16 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
goto found;
}
}
+ for (pirq = 16 + GSI_NUM_PINS ; pirq < MAX_XEN_PIRQ; pirq++) {
+ if (pirq_inuse(s, pirq)) {
+ continue;
+ }
+
+ /* Found it */
+ goto found;
+ }
+
+
for (pirq = MAX_XEN_PIRQ - 1; pirq >= GSI_NUM_PINS; pirq--) {
/* Skip whole words at a time when they're full */
if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
@@ -1513,6 +1587,174 @@ bool xen_evtchn_set_gsi(int gsi, int level)
return true;
}
+static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
+{
+ /* The vector (in low 8 bits of data) must be zero */
+ if (data & 0xff)
+ return 0;
+
+ uint32_t pirq = (addr & 0xff000) >> 12;
+ pirq |= (addr >> 32) & 0xffffff00;
+
+ return pirq;
+}
+
+static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
+ int except_pirq)
+{
+ uint32_t pirq;
+
+ for (pirq = 0; pirq < MAX_XEN_PIRQ; pirq++) {
+ /*
+ * We could be cleverer here, but it isn't really a fast path, and
+ * this trivial optimisation is enough to let us skip the big gap
+ * in the middle a bit quicker (in terms of both loop iterations,
+ * and cache lines).
+ */
+ if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
+ pirq += 64;
+ continue;
+ }
+ if (except_pirq && pirq == except_pirq) {
+ continue;
+ }
+ if (s->pirq[pirq].dev != dev) {
+ continue;
+ }
+ if (vector != -1 && s->pirq[pirq].vector != vector) {
+ continue;
+ }
+
+ /* It could theoretically be bound to a port already, but that is OK. */
+ s->pirq[pirq].dev = dev;
+ s->pirq[pirq].gsi = IRQ_UNBOUND;
+ s->pirq[pirq].is_msix = false;
+ s->pirq[pirq].vector = 0;
+ s->pirq[pirq].is_masked = false;
+ s->pirq[pirq].is_translated = false;
+ }
+}
+
+void xen_evtchn_remove_pci_device(PCIDevice *dev)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+
+ if (!s) {
+ return;
+ }
+
+ QEMU_LOCK_GUARD(&s->port_lock);
+ do_remove_pci_vector(s, dev, -1, 0);
+}
+
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+ uint64_t addr, uint32_t data, bool is_masked)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ uint32_t pirq;
+
+ if (!s) {
+ return;
+ }
+
+ assert(qemu_mutex_iothread_locked());
+
+ pirq = msi_pirq_target(addr, data);
+
+ /*
+ * The PIRQ# must be sane, and there must be an allocated PIRQ in
+ * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
+ */
+ if (!pirq || pirq >= MAX_XEN_PIRQ || !pirq_inuse(s, pirq) ||
+ (s->pirq[pirq].gsi != IRQ_UNBOUND &&
+ s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
+ pirq = 0;
+ }
+
+ if (pirq) {
+ s->pirq[pirq].dev = dev;
+ s->pirq[pirq].gsi = IRQ_MSI_EMU;
+ s->pirq[pirq].is_msix = is_msix;
+ s->pirq[pirq].vector = vector;
+ s->pirq[pirq].is_masked = is_masked;
+ }
+
+ /* Remove any (other) entries for this {device, vector} */
+ do_remove_pci_vector(s, dev, vector, pirq);
+}
+
+bool xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
+ uint64_t address, uint32_t data)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ uint32_t pirq, port;
+ CPUState *cpu;
+
+ if (!s) {
+ return false;
+ }
+
+ assert(qemu_mutex_iothread_locked());
+
+ pirq = msi_pirq_target(address, data);
+ if (!pirq || pirq >= MAX_XEN_PIRQ) {
+ return false;
+ }
+
+ if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
+ return false;
+ }
+
+ /* Remember that KVM tried to translate this. It might need to try again. */
+ s->pirq[pirq].is_translated = true;
+
+ QEMU_LOCK_GUARD(&s->port_lock);
+
+ port = s->pirq[pirq].port;
+ if (!valid_port(port)) {
+ return false;
+ }
+
+ cpu = qemu_get_cpu(s->port_table[port].vcpu);
+ if (!cpu) {
+ return false;
+ }
+
+ route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ route->u.xen_evtchn.port = port;
+ route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
+ route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ return true;
+}
+
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ uint32_t pirq, port;
+
+ if (!s) {
+ return false;
+ }
+
+ assert(qemu_mutex_iothread_locked());
+
+ pirq = msi_pirq_target(address, data);
+ if (!pirq || pirq >= MAX_XEN_PIRQ) {
+ return false;
+ }
+
+ QEMU_LOCK_GUARD(&s->port_lock);
+
+ port = s->pirq[pirq].port;
+ if (!valid_port(port)) {
+ return false;
+ }
+
+ set_port_pending(s, port);
+ return true;
+}
+
int xen_physdev_map_pirq(struct physdev_map_pirq *map)
{
XenEvtchnState *s = xen_evtchn_singleton;
@@ -1523,6 +1765,7 @@ int xen_physdev_map_pirq(struct physdev_map_pirq *map)
return -ENOTSUP;
}
+ QEMU_IOTHREAD_LOCK_GUARD();
QEMU_LOCK_GUARD(&s->port_lock);
if (map->domid != DOMID_SELF && map->domid != xen_domid) {
@@ -1578,9 +1821,11 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
if (pirq < 0 || pirq >= MAX_XEN_PIRQ)
return -EINVAL;
- QEMU_LOCK_GUARD(&s->port_lock);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ qemu_mutex_lock(&s->port_lock);
if (!pirq_inuse(s, pirq)) {
+ qemu_mutex_unlock(&s->port_lock);
return -ENOENT;
}
@@ -1588,6 +1833,7 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
/* We can only unmap GSI PIRQs */
if (gsi < 0) {
+ qemu_mutex_unlock(&s->port_lock);
return -EINVAL;
}
@@ -1596,6 +1842,12 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
trace_kvm_xen_unmap_pirq(pirq, gsi);
+ qemu_mutex_unlock(&s->port_lock);
+
+ if (gsi == IRQ_MSI_EMU) {
+ kvm_update_msi_routes_all(NULL, true, 0, 0);
+ }
+
return 0;
}
@@ -1609,6 +1861,7 @@ int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
return -ENOTSUP;
}
+ QEMU_IOTHREAD_LOCK_GUARD();
QEMU_LOCK_GUARD(&s->port_lock);
if (!pirq_inuse(s, pirq)) {
@@ -1638,6 +1891,7 @@ int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
return -ENOTSUP;
}
+ QEMU_IOTHREAD_LOCK_GUARD();
QEMU_LOCK_GUARD(&s->port_lock);
if (!pirq_inuse(s, pirq)) {
@@ -25,6 +25,14 @@ void xen_evtchn_set_callback_level(int level);
int xen_evtchn_set_port(uint16_t port);
bool xen_evtchn_set_gsi(int gsi, int level);
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+ uint64_t addr, uint32_t data, bool is_masked);
+void xen_evtchn_remove_pci_device(PCIDevice *dev);
+struct kvm_irq_routing_entry;
+bool xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
+ uint64_t address, uint32_t data);
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data);
+
/*
* These functions mirror the libxenevtchn library API, providing the QEMU
@@ -24,6 +24,8 @@
#include "qemu/range.h"
#include "qapi/error.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+
/* PCI_MSI_ADDRESS_LO */
#define PCI_MSI_ADDRESS_LO_MASK (~0x3)
@@ -414,6 +416,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
fprintf(stderr, "\n");
#endif
+ if (xen_mode == XEN_EMULATE) {
+ for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
+ MSIMessage msg = msi_prepare_message(dev, vector);
+
+ xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
+ msi_is_masked(dev, vector));
+ }
+ }
+
if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
return;
}
@@ -26,6 +26,8 @@
#include "qapi/error.h"
#include "trace.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+
/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
@@ -124,6 +126,11 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
{
bool is_masked = msix_is_masked(dev, vector);
+ if (xen_mode == XEN_EMULATE) {
+ MSIMessage msg = msix_prepare_message(dev, vector);
+ xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data, is_masked);
+ }
+
if (is_masked == was_masked) {
return;
}
@@ -49,6 +49,9 @@
#include "qemu/cutils.h"
#include "pci-internal.h"
+#include "hw/xen/xen.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+
//#define DEBUG_PCI
#ifdef DEBUG_PCI
# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
@@ -319,6 +322,17 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
{
MemTxAttrs attrs = {};
+ /*
+ * Xen uses the high bits of the address to contain some of the bits
+ * of the PIRQ#. Therefore we can't just send the write cycle and
+ * trust that it's caught by the APIC at 0xfee00000 because the
+ * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166.
+ * So we intercept the delivery here instead of in kvm_send_msi().
+ */
+ if (xen_mode == XEN_EMULATE &&
+ xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) {
+ return;
+ }
attrs.requester_id = pci_requester_id(dev);
address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
attrs, NULL);
@@ -988,6 +1002,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
pci_config_free(pci_dev);
+ if (xen_mode == XEN_EMULATE) {
+ xen_evtchn_remove_pci_device(pci_dev);
+ }
if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
memory_region_del_subregion(&pci_dev->bus_master_container_region,
&pci_dev->bus_master_enable_region);
@@ -33,6 +33,7 @@ extern bool msi_nonbroken;
void msi_set_message(PCIDevice *dev, MSIMessage msg);
MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
bool msi_enabled(const PCIDevice *dev);
+void msi_set_enabled(PCIDevice *dev);
int msi_init(struct PCIDevice *dev, uint8_t offset,
unsigned int nr_vectors, bool msi64bit,
bool msi_per_vector_mask, Error **errp);
@@ -43,6 +43,7 @@
#include "qemu/error-report.h"
#include "qemu/memalign.h"
#include "hw/i386/x86.h"
+#include "hw/i386/kvm/xen_evtchn.h"
#include "hw/i386/pc.h"
#include "hw/i386/apic.h"
#include "hw/i386/apic_internal.h"
@@ -5640,6 +5641,13 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
}
}
+#ifdef CONFIG_XEN_EMU
+ if (xen_mode == XEN_EMULATE &&
+ xen_evtchn_translate_pirq_msi(route, address, data)) {
+ return 0;
+ }
+#endif
+
address = kvm_swizzle_msi_ext_dest_id(address);
route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
@@ -5659,8 +5667,8 @@ struct MSIRouteEntry {
static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
QLIST_HEAD_INITIALIZER(msi_route_list);
-static void kvm_update_msi_routes_all(void *private, bool global,
- uint32_t index, uint32_t mask)
+void kvm_update_msi_routes_all(void *private, bool global,
+ uint32_t index, uint32_t mask)
{
int cnt = 0, vector;
MSIRouteEntry *entry;
@@ -51,6 +51,8 @@ bool kvm_hv_vpindex_settable(void);
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+void kvm_update_msi_routes_all(void *private, bool global,
+ uint32_t index, uint32_t mask);
bool kvm_enable_sgx_provisioning(KVMState *s);
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
@@ -266,7 +266,8 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
1 << XENFEAT_auto_translated_physmap |
1 << XENFEAT_supervisor_mode_kernel |
1 << XENFEAT_hvm_callback_vector |
- 1 << XENFEAT_hvm_safe_pvclock;
+ 1 << XENFEAT_hvm_safe_pvclock |
+ 1 << XENFEAT_hvm_pirqs;
}
err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));