@@ -2840,7 +2840,7 @@ static void spapr_machine_init(MachineState *machine)
spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
/* advertise XIVE on POWER9 machines */
- if (spapr->irq->xive) {
+ if (kvmppc_has_cap_xive() && spapr->irq->xive) {
spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
}
@@ -20,6 +20,7 @@
#include "hw/qdev-properties.h"
#include "cpu-models.h"
#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
#include "trace.h"
@@ -294,6 +295,7 @@ uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ bool cap_xive = kvmppc_has_cap_xive();
if (kvm_enabled() && kvm_kernel_irqchip_split()) {
error_setg(errp, "kernel_irqchip split mode not supported on pseries");
@@ -304,6 +306,16 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
return;
}
+ /*
+ * Check for valid ic-mode - XIVE native won't work if hypervisor doesn't
+ * have support
+ */
+ if (!cap_xive && !spapr->irq->xics) {
+ error_setg(errp,
+ "XIVE native mode not available, don't use ic-mode=xive");
+ return;
+ }
+
/* Initialize the MSI IRQ allocator. */
spapr_irq_msi_init(spapr);
@@ -323,7 +335,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
spapr->ics = ICS_SPAPR(obj);
}
- if (spapr->irq->xive) {
+ if (cap_xive && spapr->irq->xive) {
uint32_t nr_servers = spapr_max_server_number(spapr);
DeviceState *dev;
int i;
Currently, XIVE native exploitation mode is not supported in nested guests. When we boot up a nested guest on PowerNV platform, we observe the following entries in the device tree of nested guest: ``` device_type = "power-ivpe"; compatible = "ibm,power-ivpe"; ``` But as per LoPAR section B.5.9[1], these entries should only be present when XIVE native exploitation mode is being used. Presently, there is no support for nested virtualization in the context of XIVE, and hence, DT shouldn't advertise support for XIVE interrupt controller to a nested guest. Also, according to the present behaviour, when we boot a nested KVM guest, the following QEMU warnings are reported : ``` Calling ibm,client-architecture-support...qemu-system-ppc64: warning: kernel_irqchip allowed but unavailable: IRQ_XIVE capability must be present for KVM Falling back to kernel-irqchip=off . . . [ 0.000000][ T0] xive: Using IRQ range [0-0] [ 0.000000][ T0] xive: Interrupt handling initialized with spapr backend [ 0.000000][ T0] xive: Using priority 6 for all interrupts [ 0.000000][ T0] xive: Using 64kB queues ``` With this patch, the above warnings are no longer observed in nested guest's dmesg and also the device tree contains the following entries: ``` device_type = "PowerPC-External-Interrupt-Presentation"; compatible = "IBM,ppc-xicp"; ``` Also add an additional check to handle the scenarios where ic-mode=<mode> is explicitly specified by user - make the code error out when XIVE native capability is not there and user specifies ic-mode=xive. Testing: 1. This patch has been tested on a P9 PowerNV machine by spinning up both a KVM guest and nested KVM guest. The guest can use XIVE native mode just fine with correct DT entries and for nested guest, interrupt emulation is being used and the DT contains correct entries. 2. This patch also has been tested on KVM on PowerVM platform. In this scenario, we can boot up a KVM guest on top of a Power Hypervisor guest. Kernel patches - lore.kernel.org/linuxppc-dev/20230605064848.12319-1-jpn@linux.vnet.ibm.com QEMU tree to test - github.com/mikey/qemu/tree/kvm-papr [1] : https://files.openpower.foundation/s/ZmtZyCGiJ2oJHim Signed-off-by: Gautam Menghani <gautam@linux.ibm.com> --- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_irq.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-)