Message ID | 20241106133407.604587-6-dbarboza@ventanamicro.com |
---|---|
State | New |
Headers | show |
Series | hw/riscv: riscv-iommu-sys device | expand |
On Wed, Nov 6, 2024 at 11:36 PM Daniel Henrique Barboza <dbarboza@ventanamicro.com> wrote: > > MSIx support is added in the RISC-V IOMMU platform device by including > the required MSIx facilities to alow software to properly setup the MSIx > subsystem. > > We took inspiration of what is being done in the riscv-iommu-pci device, > mainly msix_init() and msix_notify(), while keeping in mind that > riscv-iommu-sys isn't a true PCI device and we don't need to copy/paste > all the contents of these MSIx functions. > > Two extra MSI MemoryRegions were added: 'msix-table' and 'msix-pba'. > They are used to manage r/w of the MSI table and Pending Bit Array (PBA) > respectively. Both are subregions of the main IOMMU memory region, > iommu->regs_mr, initialized during riscv_iommu_realize(), and each one > has their own handlers for MSIx reads and writes. > > This is the expected memory map when using this device in the 'virt' > machine: > > 0000000003010000-0000000003010fff (prio 0, i/o): riscv-iommu-regs > 0000000003010300-000000000301034f (prio 0, i/o): msix-table > 0000000003010400-0000000003010407 (prio 0, i/o): msix-pba > > We're now able to set IGS to RISCV_IOMMU_CAP_IGS_BOTH, and userspace is > free to decide which interrupt model to use. > > Enabling MSIx support for this device in the 'virt' machine requires > adding 'msi-parent' in the iommu-sys DT. > > Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Alistair > --- > hw/riscv/riscv-iommu-sys.c | 116 +++++++++++++++++++++++++++++++++++-- > hw/riscv/trace-events | 2 + > hw/riscv/virt.c | 6 +- > 3 files changed, 119 insertions(+), 5 deletions(-) > > diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c > index 4b82046ce9..a0ef67a20b 100644 > --- a/hw/riscv/riscv-iommu-sys.c > +++ b/hw/riscv/riscv-iommu-sys.c > @@ -26,11 +26,15 @@ > #include "qemu/host-utils.h" > #include "qemu/module.h" > #include "qom/object.h" > +#include "exec/exec-all.h" > +#include "trace.h" > > #include "riscv-iommu.h" > > #define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333 > > +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5 > + > /* RISC-V IOMMU System Platform Device Emulation */ > > struct RISCVIOMMUStateSys { > @@ -39,21 +43,123 @@ struct RISCVIOMMUStateSys { > uint32_t base_irq; > DeviceState *irqchip; > RISCVIOMMUState iommu; > + > + /* Wired int support */ > qemu_irq irqs[RISCV_IOMMU_INTR_COUNT]; > + > + /* Memory Regions for MSIX table and pending bit entries. */ > + MemoryRegion msix_table_mmio; > + MemoryRegion msix_pba_mmio; > + uint8_t *msix_table; > + uint8_t *msix_pba; > +}; > + > +static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, > + unsigned size) > +{ > + RISCVIOMMUStateSys *s = opaque; > + > + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); > + return pci_get_long(s->msix_table + addr); > +} > + > +static void msix_table_mmio_write(void *opaque, hwaddr addr, > + uint64_t val, unsigned size) > +{ > + RISCVIOMMUStateSys *s = opaque; > + > + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); > + pci_set_long(s->msix_table + addr, val); > +} > + > +static const MemoryRegionOps msix_table_mmio_ops = { > + .read = msix_table_mmio_read, > + .write = msix_table_mmio_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .valid = { > + .min_access_size = 4, > + .max_access_size = 8, > + }, > + .impl = { > + .max_access_size = 4, > + }, > +}; > + > +static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, > + unsigned size) > +{ > + RISCVIOMMUStateSys *s = opaque; > + > + return pci_get_long(s->msix_pba + addr); > +} > + > +static void msix_pba_mmio_write(void *opaque, hwaddr addr, > + uint64_t val, unsigned size) > +{ > +} > + > +static const MemoryRegionOps msix_pba_mmio_ops = { > + .read = msix_pba_mmio_read, > + .write = msix_pba_mmio_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .valid = { > + .min_access_size = 4, > + .max_access_size = 8, > + }, > + .impl = { > + .max_access_size = 4, > + }, > }; > > +static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s, > + uint32_t n_vectors) > +{ > + RISCVIOMMUState *iommu = &s->iommu; > + uint32_t table_size = table_size = n_vectors * PCI_MSIX_ENTRY_SIZE; > + uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG; > + uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8; > + uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256; > + > + s->msix_table = g_malloc0(table_size); > + s->msix_pba = g_malloc0(pba_size); > + > + memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops, > + s, "msix-table", table_size); > + memory_region_add_subregion(&iommu->regs_mr, table_offset, > + &s->msix_table_mmio); > + > + memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s, > + "msix-pba", pba_size); > + memory_region_add_subregion(&iommu->regs_mr, pba_offset, > + &s->msix_pba_mmio); > +} > + > +static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s, > + uint32_t vector) > +{ > + uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE; > + uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); > + uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); > + MemTxResult result; > + > + address_space_stl_le(&address_space_memory, msi_addr, > + msi_data, MEMTXATTRS_UNSPECIFIED, &result); > + trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result); > +} > + > static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu, > unsigned vector) > { > RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu); > uint32_t fctl = riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL); > > - /* We do not support MSIs yet */ > - if (!(fctl & RISCV_IOMMU_FCTL_WSI)) { > + if (fctl & RISCV_IOMMU_FCTL_WSI) { > + qemu_irq_pulse(s->irqs[vector]); > + trace_riscv_iommu_sys_irq_sent(vector); > return; > } > > - qemu_irq_pulse(s->irqs[vector]); > + riscv_iommu_sysdev_send_MSI(s, vector); > } > > static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp) > @@ -82,6 +188,8 @@ static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp) > irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i); > sysbus_connect_irq(sysdev, i, irq); > } > + > + riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS); > } > > static void riscv_iommu_sys_init(Object *obj) > @@ -93,7 +201,7 @@ static void riscv_iommu_sys_init(Object *obj) > qdev_alias_all_properties(DEVICE(iommu), obj); > > iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS; > - riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_WSI); > + riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH); > } > > static Property riscv_iommu_sys_properties[] = { > diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events > index 0527c56c91..94facbb8b1 100644 > --- a/hw/riscv/trace-events > +++ b/hw/riscv/trace-events > @@ -15,3 +15,5 @@ riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0 > riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64 > riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate" > riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response" > +riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u" > +riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%lx msi_data 0x%x result %u" > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index 23d1380b86..281fc65cc6 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -1043,6 +1043,7 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) > } > > static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, > + uint32_t msi_phandle, > uint32_t *iommu_sys_phandle) > { > const char comp[] = "riscv,iommu"; > @@ -1077,6 +1078,8 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, > iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW, > iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW); > > + qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle); > + > *iommu_sys_phandle = iommu_phandle; > } > > @@ -1117,7 +1120,8 @@ static void finalize_fdt(RISCVVirtState *s) > create_fdt_virtio(s, virt_memmap, irq_virtio_phandle); > > if (virt_is_iommu_sys_enabled(s)) { > - create_fdt_iommu_sys(s, irq_mmio_phandle, &iommu_sys_phandle); > + create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle, > + &iommu_sys_phandle); > } > create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle, > iommu_sys_phandle); > -- > 2.45.2 > >
diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c index 4b82046ce9..a0ef67a20b 100644 --- a/hw/riscv/riscv-iommu-sys.c +++ b/hw/riscv/riscv-iommu-sys.c @@ -26,11 +26,15 @@ #include "qemu/host-utils.h" #include "qemu/module.h" #include "qom/object.h" +#include "exec/exec-all.h" +#include "trace.h" #include "riscv-iommu.h" #define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333 +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5 + /* RISC-V IOMMU System Platform Device Emulation */ struct RISCVIOMMUStateSys { @@ -39,21 +43,123 @@ struct RISCVIOMMUStateSys { uint32_t base_irq; DeviceState *irqchip; RISCVIOMMUState iommu; + + /* Wired int support */ qemu_irq irqs[RISCV_IOMMU_INTR_COUNT]; + + /* Memory Regions for MSIX table and pending bit entries. */ + MemoryRegion msix_table_mmio; + MemoryRegion msix_pba_mmio; + uint8_t *msix_table; + uint8_t *msix_pba; +}; + +static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); + return pci_get_long(s->msix_table + addr); +} + +static void msix_table_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); + pci_set_long(s->msix_table + addr, val); +} + +static const MemoryRegionOps msix_table_mmio_ops = { + .read = msix_table_mmio_read, + .write = msix_table_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .max_access_size = 4, + }, +}; + +static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + return pci_get_long(s->msix_pba + addr); +} + +static void msix_pba_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ +} + +static const MemoryRegionOps msix_pba_mmio_ops = { + .read = msix_pba_mmio_read, + .write = msix_pba_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .max_access_size = 4, + }, }; +static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s, + uint32_t n_vectors) +{ + RISCVIOMMUState *iommu = &s->iommu; + uint32_t table_size = table_size = n_vectors * PCI_MSIX_ENTRY_SIZE; + uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG; + uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8; + uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256; + + s->msix_table = g_malloc0(table_size); + s->msix_pba = g_malloc0(pba_size); + + memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops, + s, "msix-table", table_size); + memory_region_add_subregion(&iommu->regs_mr, table_offset, + &s->msix_table_mmio); + + memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s, + "msix-pba", pba_size); + memory_region_add_subregion(&iommu->regs_mr, pba_offset, + &s->msix_pba_mmio); +} + +static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s, + uint32_t vector) +{ + uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE; + uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + MemTxResult result; + + address_space_stl_le(&address_space_memory, msi_addr, + msi_data, MEMTXATTRS_UNSPECIFIED, &result); + trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result); +} + static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu, unsigned vector) { RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu); uint32_t fctl = riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL); - /* We do not support MSIs yet */ - if (!(fctl & RISCV_IOMMU_FCTL_WSI)) { + if (fctl & RISCV_IOMMU_FCTL_WSI) { + qemu_irq_pulse(s->irqs[vector]); + trace_riscv_iommu_sys_irq_sent(vector); return; } - qemu_irq_pulse(s->irqs[vector]); + riscv_iommu_sysdev_send_MSI(s, vector); } static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp) @@ -82,6 +188,8 @@ static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp) irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i); sysbus_connect_irq(sysdev, i, irq); } + + riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS); } static void riscv_iommu_sys_init(Object *obj) @@ -93,7 +201,7 @@ static void riscv_iommu_sys_init(Object *obj) qdev_alias_all_properties(DEVICE(iommu), obj); iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS; - riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_WSI); + riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH); } static Property riscv_iommu_sys_properties[] = { diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events index 0527c56c91..94facbb8b1 100644 --- a/hw/riscv/trace-events +++ b/hw/riscv/trace-events @@ -15,3 +15,5 @@ riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0 riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64 riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate" riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response" +riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u" +riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%lx msi_data 0x%x result %u" diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 23d1380b86..281fc65cc6 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -1043,6 +1043,7 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) } static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, + uint32_t msi_phandle, uint32_t *iommu_sys_phandle) { const char comp[] = "riscv,iommu"; @@ -1077,6 +1078,8 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW, iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW); + qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle); + *iommu_sys_phandle = iommu_phandle; } @@ -1117,7 +1120,8 @@ static void finalize_fdt(RISCVVirtState *s) create_fdt_virtio(s, virt_memmap, irq_virtio_phandle); if (virt_is_iommu_sys_enabled(s)) { - create_fdt_iommu_sys(s, irq_mmio_phandle, &iommu_sys_phandle); + create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle, + &iommu_sys_phandle); } create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle, iommu_sys_phandle);
MSIx support is added in the RISC-V IOMMU platform device by including the required MSIx facilities to alow software to properly setup the MSIx subsystem. We took inspiration of what is being done in the riscv-iommu-pci device, mainly msix_init() and msix_notify(), while keeping in mind that riscv-iommu-sys isn't a true PCI device and we don't need to copy/paste all the contents of these MSIx functions. Two extra MSI MemoryRegions were added: 'msix-table' and 'msix-pba'. They are used to manage r/w of the MSI table and Pending Bit Array (PBA) respectively. Both are subregions of the main IOMMU memory region, iommu->regs_mr, initialized during riscv_iommu_realize(), and each one has their own handlers for MSIx reads and writes. This is the expected memory map when using this device in the 'virt' machine: 0000000003010000-0000000003010fff (prio 0, i/o): riscv-iommu-regs 0000000003010300-000000000301034f (prio 0, i/o): msix-table 0000000003010400-0000000003010407 (prio 0, i/o): msix-pba We're now able to set IGS to RISCV_IOMMU_CAP_IGS_BOTH, and userspace is free to decide which interrupt model to use. Enabling MSIx support for this device in the 'virt' machine requires adding 'msi-parent' in the iommu-sys DT. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> --- hw/riscv/riscv-iommu-sys.c | 116 +++++++++++++++++++++++++++++++++++-- hw/riscv/trace-events | 2 + hw/riscv/virt.c | 6 +- 3 files changed, 119 insertions(+), 5 deletions(-)