@@ -26,6 +26,8 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/i386/x86.h"
+#include "hw/irq.h"
#include "xen_evtchn.h"
#include "xen_overlay.h"
@@ -99,9 +101,12 @@ struct XenEvtchnState {
uint64_t callback_param;
bool evtchn_in_kernel;
+ QEMUBH *gsi_bh;
+
QemuMutex port_lock;
uint32_t nr_ports;
XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
+ qemu_irq gsis[GSI_NUM_PINS];
};
struct XenEvtchnState *xen_evtchn_singleton;
@@ -166,13 +171,42 @@ static const TypeInfo xen_evtchn_info = {
.class_init = xen_evtchn_class_init,
};
+static void gsi_assert_bh(void *opaque)
+{
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+ if (vi) {
+ xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
+ }
+}
+
void xen_evtchn_create(void)
{
XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
-1, NULL));
+ int i;
+
xen_evtchn_singleton = s;
qemu_mutex_init(&s->port_lock);
+ s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
+
+ for (i = 0; i < GSI_NUM_PINS; i++) {
+ sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
+ }
+}
+
+void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ int i;
+
+ if (!s) {
+ return;
+ }
+
+ for (i = 0; i < GSI_NUM_PINS; i++) {
+ sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
+ }
}
static void xen_evtchn_register_types(void)
@@ -182,6 +216,64 @@ static void xen_evtchn_register_types(void)
type_init(xen_evtchn_register_types)
+void xen_evtchn_set_callback_level(int level)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ uint32_t param;
+
+ if (!s) {
+ return;
+ }
+
+ /*
+ * We get to this function in a number of ways:
+ *
+ * • From I/O context, via PV backend drivers sending a notification to
+ * the guest.
+ *
+ * • From guest vCPU context, via loopback interdomain event channels
+ * (or theoretically even IPIs but guests don't use those with GSI
+ * delivery because that's pointless. We don't want a malicious guest
+ * to be able to trigger a deadlock though, so we can't rule it out.)
+ *
+ * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
+ * configured.
+ *
+ * • From guest vCPU context in the KVM exit handler, if the upcall
+ * pending flag has been cleared and the GSI needs to be deasserted.
+ *
+ * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
+ * been acked in the irqchip.
+ *
+ * Whichever context we come from if we aren't already holding the BQL
+ * then e can't take it now, as we may already hold s->port_lock. So
+ * trigger the BH to set the IRQ for us instead of doing it immediately.
+ *
+ * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
+ * will deliberately take the BQL because they want the change to take
+ * effect immediately. That just leaves interdomain loopback as the case
+ * which uses the BH.
+ */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_bh_schedule(s->gsi_bh);
+ return;
+ }
+
+ param = (uint32_t)s->callback_param;
+
+ switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) {
+ case HVM_PARAM_CALLBACK_TYPE_GSI:
+ if (param < GSI_NUM_PINS) {
+ qemu_set_irq(s->gsis[param], level);
+ if (level) {
+ /* Ensure the vCPU polls for deassertion */
+ kvm_xen_set_callback_asserted();
+ }
+ }
+ break;
+ }
+}
+
int xen_evtchn_set_callback_param(uint64_t param)
{
XenEvtchnState *s = xen_evtchn_singleton;
@@ -207,6 +299,11 @@ int xen_evtchn_set_callback_param(uint64_t param)
}
break;
}
+
+ case HVM_PARAM_CALLBACK_TYPE_GSI:
+ ret = 0;
+ break;
+
default:
ret = -ENOSYS;
break;
@@ -12,9 +12,13 @@
#ifndef QEMU_XEN_EVTCHN_H
#define QEMU_XEN_EVTCHN_H
+#include "hw/sysbus.h"
+
void xen_evtchn_create(void);
int xen_evtchn_soft_reset(void);
int xen_evtchn_set_callback_param(uint64_t param);
+void xen_evtchn_connect_gsis(qemu_irq *system_gsis);
+void xen_evtchn_set_callback_level(int level);
void hmp_xen_event_inject(Monitor *mon, const QDict *qdict);
void hmp_xen_event_list(Monitor *mon, const QDict *qdict);
@@ -1308,6 +1308,12 @@ void pc_basic_device_init(struct PCMachineState *pcms,
}
*rtc_state = mc146818_rtc_init(isa_bus, 2000, rtc_irq);
+#ifdef CONFIG_XEN_EMU
+ if (xen_mode == XEN_EMULATE) {
+ xen_evtchn_connect_gsis(gsi);
+ }
+#endif
+
qemu_register_boot_set(pc_boot_set, *rtc_state);
if (!xen_enabled() &&
@@ -22,6 +22,7 @@
uint32_t kvm_xen_get_caps(void);
void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
+void kvm_xen_set_callback_asserted(void);
int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \
@@ -1797,6 +1797,7 @@ typedef struct CPUArchState {
uint64_t xen_vcpu_time_info_gpa;
uint64_t xen_vcpu_runstate_gpa;
uint8_t xen_vcpu_callback_vector;
+ bool xen_callback_asserted;
uint16_t xen_virq[XEN_NR_VIRQS];
uint64_t xen_singleshot_timer_ns;
#endif
@@ -5415,6 +5415,19 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
char str[256];
KVMState *state;
+#ifdef CONFIG_XEN_EMU
+ /*
+ * If the callback is asserted as a GSI (or PCI INTx) then check if
+ * vcpu_info->evtchn_upcall_pending has been cleared, and deassert
+ * the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC
+ * EOI and only resample then, exactly how the VFIO eventfd pairs
+ * are designed to work for level triggered interrupts.
+ */
+ if (cpu->env.xen_callback_asserted) {
+ kvm_xen_maybe_deassert_callback(cs);
+ }
+#endif
+
switch (run->exit_reason) {
case KVM_EXIT_HLT:
DPRINTF("handle_hlt\n");
@@ -311,6 +311,31 @@ void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
return X86_CPU(cs)->env.xen_vcpu_info_hva;
}
+void kvm_xen_maybe_deassert_callback(CPUState *cs)
+{
+ CPUX86State *env = &X86_CPU(cs)->env;
+ struct vcpu_info *vi = env->xen_vcpu_info_hva;
+ if (!vi) {
+ return;
+ }
+
+ /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
+ if (!vi->evtchn_upcall_pending) {
+ qemu_mutex_lock_iothread();
+ xen_evtchn_set_callback_level(0);
+ qemu_mutex_unlock_iothread();
+ }
+}
+
+void kvm_xen_set_callback_asserted(void)
+{
+ CPUState *cs = qemu_get_cpu(0);
+
+ if (cs) {
+ X86_CPU(cs)->env.xen_callback_asserted = true;
+ }
+}
+
void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
{
CPUState *cs = qemu_get_cpu(vcpu_id);
@@ -343,6 +368,13 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
*/
qemu_cpu_kick(cs);
break;
+
+ case HVM_PARAM_CALLBACK_TYPE_GSI:
+ case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
+ if (vcpu_id == 0) {
+ xen_evtchn_set_callback_level(1);
+ }
+ break;
}
}
@@ -28,5 +28,6 @@ int kvm_xen_init_vcpu(CPUState *cs);
int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
int kvm_put_xen_state(CPUState *cs);
int kvm_get_xen_state(CPUState *cs);
+void kvm_xen_maybe_deassert_callback(CPUState *cs);
#endif /* QEMU_I386_KVM_XEN_EMU_H */