Message ID | 1499274819-15607-10-git-send-email-clg@kaod.org |
---|---|
State | New |
Headers | show |
On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: > Each source adds its own ESB mempry region to the overall ESB memory > region of the controller. It will be mapped in the CPU address space > when XIVE is activated. > > The default mapping address for the ESB memory region is the same one > used on baremetal. > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > hw/intc/xive-internal.h | 5 +++++ > hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 48 insertions(+), 1 deletion(-) > > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > index 8e755aa88a14..c06be823aad0 100644 > --- a/hw/intc/xive-internal.h > +++ b/hw/intc/xive-internal.h > @@ -98,6 +98,7 @@ struct XIVE { > SysBusDevice parent; > > /* Properties */ > + uint32_t chip_id; So there is a XIVE object per chip. How does this work on PAPR? One logical chip/XIVE, or something more complex? > uint32_t nr_targets; > > /* IRQ number allocator */ > @@ -111,6 +112,10 @@ struct XIVE { > void *sbe; > XiveIVE *ivt; > XiveEQ *eqdt; > + > + /* ESB and TIMA memory location */ > + hwaddr vc_base; > + MemoryRegion esb_iomem; > }; > > void xive_reset(void *dev); > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > index 8f8bb8b787bd..a1cb87a07b76 100644 > --- a/hw/intc/xive.c > +++ b/hw/intc/xive.c > @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > XiveICSState *xs = ICS_XIVE(ics); > Object *obj; > Error *err = NULL; > + XIVE *x; I don't really like just 'x' for a context variable like this (as opposed to a temporary). > > obj = object_property_get_link(OBJECT(xs), "xive", &err); > if (!obj) { > @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > __func__, error_get_pretty(err)); > return; > } > - xs->xive = XIVE(obj); > + x = xs->xive = XIVE(obj); > > if (!ics->nr_irqs) { > error_setg(errp, "Number of interrupts needs to be greater 0"); > @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > "xive.esb", > (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); > > + /* Install the ESB memory region in the overall one */ > + memory_region_add_subregion(&x->esb_iomem, > + ICS_BASE(xs)->offset * (1 << xs->esb_shift), > + &xs->esb_iomem); > + > qemu_register_reset(xive_ics_reset, xs); > } > > @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { > */ > #define MAX_HW_IRQS_ENTRIES (8 * 1024) > > +/* VC BAR contains set translations for the ESBs and the EQs. */ > +#define VC_BAR_DEFAULT 0x10000000000ull > +#define VC_BAR_SIZE 0x08000000000ull > + > +#define P9_MMIO_BASE 0x006000000000000ull > +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) chip-based MMIO addresses leaking into the PAPR model seems like it might not be what you want > +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) > +{ > + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", > + __func__, offset, size); > + return 0; > +} > + > +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, > + unsigned size) > +{ > + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", > + __func__, offset, value, size); > +} > + > +static const MemoryRegionOps xive_esb_default_ops = { > + .read = xive_esb_default_read, > + .write = xive_esb_default_write, > + .endianness = DEVICE_BIG_ENDIAN, > +}; > > void xive_reset(void *dev) > { > @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) > x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * > sizeof(XiveEQ)); > > + /* VC BAR. That's the full window but we will only map the > + * subregions in use. */ > + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); > + > + /* install default memory region handlers to log bogus access */ > + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, > + NULL, "xive.esb", VC_BAR_SIZE); > + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); > + > qemu_register_reset(xive_reset, dev); > } > > static Property xive_properties[] = { > + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), > DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), > DEFINE_PROP_END_OF_LIST(), > };
On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: > On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: > > Each source adds its own ESB mempry region to the overall ESB memory > > region of the controller. It will be mapped in the CPU address space > > when XIVE is activated. > > > > The default mapping address for the ESB memory region is the same one > > used on baremetal. > > > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > > --- > > hw/intc/xive-internal.h | 5 +++++ > > hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 48 insertions(+), 1 deletion(-) > > > > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > > index 8e755aa88a14..c06be823aad0 100644 > > --- a/hw/intc/xive-internal.h > > +++ b/hw/intc/xive-internal.h > > @@ -98,6 +98,7 @@ struct XIVE { > > SysBusDevice parent; > > > > /* Properties */ > > + uint32_t chip_id; > > So there is a XIVE object per chip. How does this work on PAPR? One > logical chip/XIVE, or something more complex? One global XIVE for PAPR. For the MMIOs, the way it works is that: - For MMIOs pertaining to a specific interrupt or queue, there's an H- call that will return the proper "guest physical" address. For qemu with KVM we'll have to probably create a single chunk of qemu address space (a single mem region) that contains individual pages mapped with MAP_FIXED originating from the different HW bits, we still need to sort out how exactly we'll do that in practice. - For the TIMA (the presentation MMIOs), those are always at the same physical address for everybody (so for a guest it's a single memory region we'll map to that physical address), the HW "knows" which HW thread is talking to it (and the hypervisor tells the HW which vcpu is running on a given HW thread at a given point in time). That address is obtained from the device-tree > > uint32_t nr_targets; > > > > /* IRQ number allocator */ > > @@ -111,6 +112,10 @@ struct XIVE { > > void *sbe; > > XiveIVE *ivt; > > XiveEQ *eqdt; > > + > > + /* ESB and TIMA memory location */ > > + hwaddr vc_base; > > + MemoryRegion esb_iomem; > > }; > > > > void xive_reset(void *dev); > > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > > index 8f8bb8b787bd..a1cb87a07b76 100644 > > --- a/hw/intc/xive.c > > +++ b/hw/intc/xive.c > > @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > > XiveICSState *xs = ICS_XIVE(ics); > > Object *obj; > > Error *err = NULL; > > + XIVE *x; > > I don't really like just 'x' for a context variable like this (as > opposed to a temporary). > > > > > obj = object_property_get_link(OBJECT(xs), "xive", &err); > > if (!obj) { > > @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > > __func__, error_get_pretty(err)); > > return; > > } > > - xs->xive = XIVE(obj); > > + x = xs->xive = XIVE(obj); > > > > if (!ics->nr_irqs) { > > error_setg(errp, "Number of interrupts needs to be greater 0"); > > @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > > "xive.esb", > > (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); > > > > + /* Install the ESB memory region in the overall one */ > > + memory_region_add_subregion(&x->esb_iomem, > > + ICS_BASE(xs)->offset * (1 << xs->esb_shift), > > + &xs->esb_iomem); > > + > > qemu_register_reset(xive_ics_reset, xs); > > } > > > > @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { > > */ > > #define MAX_HW_IRQS_ENTRIES (8 * 1024) > > > > +/* VC BAR contains set translations for the ESBs and the EQs. */ > > +#define VC_BAR_DEFAULT 0x10000000000ull > > +#define VC_BAR_SIZE 0x08000000000ull > > + > > +#define P9_MMIO_BASE 0x006000000000000ull > > +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) > > chip-based MMIO addresses leaking into the PAPR model seems like it > might not be what you want > > > +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) > > +{ > > + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", > > + __func__, offset, size); > > + return 0; > > +} > > + > > +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, > > + unsigned size) > > +{ > > + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", > > + __func__, offset, value, size); > > +} > > + > > +static const MemoryRegionOps xive_esb_default_ops = { > > + .read = xive_esb_default_read, > > + .write = xive_esb_default_write, > > + .endianness = DEVICE_BIG_ENDIAN, > > +}; > > > > void xive_reset(void *dev) > > { > > @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) > > x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * > > sizeof(XiveEQ)); > > > > + /* VC BAR. That's the full window but we will only map the > > + * subregions in use. */ > > + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); > > + > > + /* install default memory region handlers to log bogus access */ > > + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, > > + NULL, "xive.esb", VC_BAR_SIZE); > > + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); > > + > > qemu_register_reset(xive_reset, dev); > > } > > > > static Property xive_properties[] = { > > + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), > > DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), > > DEFINE_PROP_END_OF_LIST(), > > }; > >
On Mon, Jul 24, 2017 at 04:09:31PM +1000, Benjamin Herrenschmidt wrote: > On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: > > On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: > > > Each source adds its own ESB mempry region to the overall ESB memory > > > region of the controller. It will be mapped in the CPU address space > > > when XIVE is activated. > > > > > > The default mapping address for the ESB memory region is the same one > > > used on baremetal. > > > > > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > > > --- > > > hw/intc/xive-internal.h | 5 +++++ > > > hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > > > 2 files changed, 48 insertions(+), 1 deletion(-) > > > > > > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > > > index 8e755aa88a14..c06be823aad0 100644 > > > --- a/hw/intc/xive-internal.h > > > +++ b/hw/intc/xive-internal.h > > > @@ -98,6 +98,7 @@ struct XIVE { > > > SysBusDevice parent; > > > > > > /* Properties */ > > > + uint32_t chip_id; > > > > So there is a XIVE object per chip. How does this work on PAPR? One > > logical chip/XIVE, or something more complex? > > One global XIVE for PAPR. For the MMIOs, the way it works is that: > > - For MMIOs pertaining to a specific interrupt or queue, there's an H- > call that will return the proper "guest physical" address. For qemu > with KVM we'll have to probably create a single chunk of qemu address > space (a single mem region) that contains individual pages mapped with > MAP_FIXED originating from the different HW bits, we still need to sort > out how exactly we'll do that in practice. > > - For the TIMA (the presentation MMIOs), those are always at the same > physical address for everybody (so for a guest it's a single memory > region we'll map to that physical address), the HW "knows" which HW > thread is talking to it (and the hypervisor tells the HW which vcpu is > running on a given HW thread at a given point in time). That address is > obtained from the device-tree Ok. That leaves "chip_id" as a rather surprising thing to see in an object which will appear on PAPR.
On 07/24/2017 08:09 AM, Benjamin Herrenschmidt wrote: > On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: >> On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: >>> Each source adds its own ESB mempry region to the overall ESB memory >>> region of the controller. It will be mapped in the CPU address space >>> when XIVE is activated. >>> >>> The default mapping address for the ESB memory region is the same one >>> used on baremetal. >>> >>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>> --- >>> hw/intc/xive-internal.h | 5 +++++ >>> hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- >>> 2 files changed, 48 insertions(+), 1 deletion(-) >>> >>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >>> index 8e755aa88a14..c06be823aad0 100644 >>> --- a/hw/intc/xive-internal.h >>> +++ b/hw/intc/xive-internal.h >>> @@ -98,6 +98,7 @@ struct XIVE { >>> SysBusDevice parent; >>> >>> /* Properties */ >>> + uint32_t chip_id; >> >> So there is a XIVE object per chip. How does this work on PAPR? One >> logical chip/XIVE, or something more complex? > > One global XIVE for PAPR. Yes. The chip-id is useless for sPAPR (0 is the default) but for a PowerNV system, the address used to map the ESB memory region depends on the chip-id and I thought we could reuse the same XIVE object. So, a sPAPR guest would use the address of a single chip baremetal system. This needs more explanation I agree. Thanks to Ben who is providing a lot. I will update the changelogs in the next version. The TIMA is mapped at a fixed address so the chip-id does not come in play. > For the MMIOs, the way it works is that: > > - For MMIOs pertaining to a specific interrupt or queue, there's an H- > call that will return the proper "guest physical" address. For qemu > with KVM we'll have to probably create a single chunk of qemu address > space (a single mem region) that contains individual pages mapped with > MAP_FIXED originating from the different HW bits, we still need to sort > out how exactly we'll do that in practice. I haven't looked at all the KVM details. But, regarding the ESBs, I had the above in mind and used a single memory region to contain them all. > - For the TIMA (the presentation MMIOs), those are always at the same > physical address for everybody (so for a guest it's a single memory > region we'll map to that physical address), the HW "knows" which HW > thread is talking to it (and the hypervisor tells the HW which vcpu is > running on a given HW thread at a given point in time). That address is > obtained from the device-tree > >>> uint32_t nr_targets; >>> >>> /* IRQ number allocator */ >>> @@ -111,6 +112,10 @@ struct XIVE { >>> void *sbe; >>> XiveIVE *ivt; >>> XiveEQ *eqdt; >>> + >>> + /* ESB and TIMA memory location */ >>> + hwaddr vc_base; >>> + MemoryRegion esb_iomem; >>> }; >>> >>> void xive_reset(void *dev); >>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >>> index 8f8bb8b787bd..a1cb87a07b76 100644 >>> --- a/hw/intc/xive.c >>> +++ b/hw/intc/xive.c >>> @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>> XiveICSState *xs = ICS_XIVE(ics); >>> Object *obj; >>> Error *err = NULL; >>> + XIVE *x; >> >> I don't really like just 'x' for a context variable like this (as >> opposed to a temporary). OK. I will change 'x' in 'xive' then. >>> >>> obj = object_property_get_link(OBJECT(xs), "xive", &err); >>> if (!obj) { >>> @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>> __func__, error_get_pretty(err)); >>> return; >>> } >>> - xs->xive = XIVE(obj); >>> + x = xs->xive = XIVE(obj); >>> >>> if (!ics->nr_irqs) { >>> error_setg(errp, "Number of interrupts needs to be greater 0"); >>> @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>> "xive.esb", >>> (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); >>> >>> + /* Install the ESB memory region in the overall one */ >>> + memory_region_add_subregion(&x->esb_iomem, >>> + ICS_BASE(xs)->offset * (1 << xs->esb_shift), >>> + &xs->esb_iomem); >>> + >>> qemu_register_reset(xive_ics_reset, xs); >>> } >>> >>> @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { >>> */ >>> #define MAX_HW_IRQS_ENTRIES (8 * 1024) >>> >>> +/* VC BAR contains set translations for the ESBs and the EQs. */ >>> +#define VC_BAR_DEFAULT 0x10000000000ull >>> +#define VC_BAR_SIZE 0x08000000000ull >>> + >>> +#define P9_MMIO_BASE 0x006000000000000ull >>> +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) >> >> chip-based MMIO addresses leaking into the PAPR model seems like it >> might not be what you want See above for the reason. Thanks, C. >> >>> +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) >>> +{ >>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", >>> + __func__, offset, size); >>> + return 0; >>> +} >>> + >>> +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, >>> + unsigned size) >>> +{ >>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", >>> + __func__, offset, value, size); >>> +} >>> + >>> +static const MemoryRegionOps xive_esb_default_ops = { >>> + .read = xive_esb_default_read, >>> + .write = xive_esb_default_write, >>> + .endianness = DEVICE_BIG_ENDIAN, >>> +}; >>> >>> void xive_reset(void *dev) >>> { >>> @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) >>> x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * >>> sizeof(XiveEQ)); >>> >>> + /* VC BAR. That's the full window but we will only map the >>> + * subregions in use. */ >>> + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); >>> + >>> + /* install default memory region handlers to log bogus access */ >>> + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, >>> + NULL, "xive.esb", VC_BAR_SIZE); >>> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); >>> + >>> qemu_register_reset(xive_reset, dev); >>> } >>> >>> static Property xive_properties[] = { >>> + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), >>> DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), >>> DEFINE_PROP_END_OF_LIST(), >>> }; >> >>
On 07/24/2017 08:39 AM, David Gibson wrote: > On Mon, Jul 24, 2017 at 04:09:31PM +1000, Benjamin Herrenschmidt wrote: >> On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: >>> On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: >>>> Each source adds its own ESB mempry region to the overall ESB memory >>>> region of the controller. It will be mapped in the CPU address space >>>> when XIVE is activated. >>>> >>>> The default mapping address for the ESB memory region is the same one >>>> used on baremetal. >>>> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>> --- >>>> hw/intc/xive-internal.h | 5 +++++ >>>> hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- >>>> 2 files changed, 48 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >>>> index 8e755aa88a14..c06be823aad0 100644 >>>> --- a/hw/intc/xive-internal.h >>>> +++ b/hw/intc/xive-internal.h >>>> @@ -98,6 +98,7 @@ struct XIVE { >>>> SysBusDevice parent; >>>> >>>> /* Properties */ >>>> + uint32_t chip_id; >>> >>> So there is a XIVE object per chip. How does this work on PAPR? One >>> logical chip/XIVE, or something more complex? >> >> One global XIVE for PAPR. For the MMIOs, the way it works is that: >> >> - For MMIOs pertaining to a specific interrupt or queue, there's an H- >> call that will return the proper "guest physical" address. For qemu >> with KVM we'll have to probably create a single chunk of qemu address >> space (a single mem region) that contains individual pages mapped with >> MAP_FIXED originating from the different HW bits, we still need to sort >> out how exactly we'll do that in practice. >> >> - For the TIMA (the presentation MMIOs), those are always at the same >> physical address for everybody (so for a guest it's a single memory >> region we'll map to that physical address), the HW "knows" which HW >> thread is talking to it (and the hypervisor tells the HW which vcpu is >> running on a given HW thread at a given point in time). That address is >> obtained from the device-tree > > Ok. That leaves "chip_id" as a rather surprising thing to see in an > object which will appear on PAPR. We could also pass the address as a property instead of the chip-id when creating the XIVE object. May be better for sPAPR. C.
On Mon, Jul 24, 2017 at 03:25:29PM +0200, Cédric Le Goater wrote: > On 07/24/2017 08:09 AM, Benjamin Herrenschmidt wrote: > > On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: > >> On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: > >>> Each source adds its own ESB mempry region to the overall ESB memory > >>> region of the controller. It will be mapped in the CPU address space > >>> when XIVE is activated. > >>> > >>> The default mapping address for the ESB memory region is the same one > >>> used on baremetal. > >>> > >>> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >>> --- > >>> hw/intc/xive-internal.h | 5 +++++ > >>> hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > >>> 2 files changed, 48 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > >>> index 8e755aa88a14..c06be823aad0 100644 > >>> --- a/hw/intc/xive-internal.h > >>> +++ b/hw/intc/xive-internal.h > >>> @@ -98,6 +98,7 @@ struct XIVE { > >>> SysBusDevice parent; > >>> > >>> /* Properties */ > >>> + uint32_t chip_id; > >> > >> So there is a XIVE object per chip. How does this work on PAPR? One > >> logical chip/XIVE, or something more complex? > > > > One global XIVE for PAPR. > > Yes. > > The chip-id is useless for sPAPR (0 is the default) but for a PowerNV > system, the address used to map the ESB memory region depends on the > chip-id and I thought we could reuse the same XIVE object. Hmm, maybe. > So, a sPAPR guest would use the address of a single chip baremetal > system. This needs more explanation I agree. Thanks to Ben who is > providing a lot. I will update the changelogs in the next version. > The TIMA is mapped at a fixed address so the chip-id does not come > in play. > > > For the MMIOs, the way it works is that: > > > > - For MMIOs pertaining to a specific interrupt or queue, there's an H- > > call that will return the proper "guest physical" address. For qemu > > with KVM we'll have to probably create a single chunk of qemu address > > space (a single mem region) that contains individual pages mapped with > > MAP_FIXED originating from the different HW bits, we still need to sort > > out how exactly we'll do that in practice. > > I haven't looked at all the KVM details. But, regarding the ESBs, I had > the above in mind and used a single memory region to contain them all. > > > - For the TIMA (the presentation MMIOs), those are always at the same > > physical address for everybody (so for a guest it's a single memory > > region we'll map to that physical address), the HW "knows" which HW > > thread is talking to it (and the hypervisor tells the HW which vcpu is > > running on a given HW thread at a given point in time). That address is > > obtained from the device-tree > > > >>> uint32_t nr_targets; > >>> > >>> /* IRQ number allocator */ > >>> @@ -111,6 +112,10 @@ struct XIVE { > >>> void *sbe; > >>> XiveIVE *ivt; > >>> XiveEQ *eqdt; > >>> + > >>> + /* ESB and TIMA memory location */ > >>> + hwaddr vc_base; > >>> + MemoryRegion esb_iomem; > >>> }; > >>> > >>> void xive_reset(void *dev); > >>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c > >>> index 8f8bb8b787bd..a1cb87a07b76 100644 > >>> --- a/hw/intc/xive.c > >>> +++ b/hw/intc/xive.c > >>> @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > >>> XiveICSState *xs = ICS_XIVE(ics); > >>> Object *obj; > >>> Error *err = NULL; > >>> + XIVE *x; > >> > >> I don't really like just 'x' for a context variable like this (as > >> opposed to a temporary). > > OK. I will change 'x' in 'xive' then. > > >>> > >>> obj = object_property_get_link(OBJECT(xs), "xive", &err); > >>> if (!obj) { > >>> @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > >>> __func__, error_get_pretty(err)); > >>> return; > >>> } > >>> - xs->xive = XIVE(obj); > >>> + x = xs->xive = XIVE(obj); > >>> > >>> if (!ics->nr_irqs) { > >>> error_setg(errp, "Number of interrupts needs to be greater 0"); > >>> @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) > >>> "xive.esb", > >>> (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); > >>> > >>> + /* Install the ESB memory region in the overall one */ > >>> + memory_region_add_subregion(&x->esb_iomem, > >>> + ICS_BASE(xs)->offset * (1 << xs->esb_shift), > >>> + &xs->esb_iomem); > >>> + > >>> qemu_register_reset(xive_ics_reset, xs); > >>> } > >>> > >>> @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { > >>> */ > >>> #define MAX_HW_IRQS_ENTRIES (8 * 1024) > >>> > >>> +/* VC BAR contains set translations for the ESBs and the EQs. */ > >>> +#define VC_BAR_DEFAULT 0x10000000000ull > >>> +#define VC_BAR_SIZE 0x08000000000ull > >>> + > >>> +#define P9_MMIO_BASE 0x006000000000000ull > >>> +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) > >> > >> chip-based MMIO addresses leaking into the PAPR model seems like it > >> might not be what you want > > See above for the reason. > > > Thanks, > > C. > > >> > >>> +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) > >>> +{ > >>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", > >>> + __func__, offset, size); > >>> + return 0; > >>> +} > >>> + > >>> +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, > >>> + unsigned size) > >>> +{ > >>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", > >>> + __func__, offset, value, size); > >>> +} > >>> + > >>> +static const MemoryRegionOps xive_esb_default_ops = { > >>> + .read = xive_esb_default_read, > >>> + .write = xive_esb_default_write, > >>> + .endianness = DEVICE_BIG_ENDIAN, > >>> +}; > >>> > >>> void xive_reset(void *dev) > >>> { > >>> @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) > >>> x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * > >>> sizeof(XiveEQ)); > >>> > >>> + /* VC BAR. That's the full window but we will only map the > >>> + * subregions in use. */ > >>> + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); > >>> + > >>> + /* install default memory region handlers to log bogus access */ > >>> + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, > >>> + NULL, "xive.esb", VC_BAR_SIZE); > >>> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); > >>> + > >>> qemu_register_reset(xive_reset, dev); > >>> } > >>> > >>> static Property xive_properties[] = { > >>> + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), > >>> DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), > >>> DEFINE_PROP_END_OF_LIST(), > >>> }; > >> > >> >
On Mon, Jul 24, 2017 at 03:27:18PM +0200, Cédric Le Goater wrote: > On 07/24/2017 08:39 AM, David Gibson wrote: > > On Mon, Jul 24, 2017 at 04:09:31PM +1000, Benjamin Herrenschmidt wrote: > >> On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: > >>> On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: > >>>> Each source adds its own ESB mempry region to the overall ESB memory > >>>> region of the controller. It will be mapped in the CPU address space > >>>> when XIVE is activated. > >>>> > >>>> The default mapping address for the ESB memory region is the same one > >>>> used on baremetal. > >>>> > >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >>>> --- > >>>> hw/intc/xive-internal.h | 5 +++++ > >>>> hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > >>>> 2 files changed, 48 insertions(+), 1 deletion(-) > >>>> > >>>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > >>>> index 8e755aa88a14..c06be823aad0 100644 > >>>> --- a/hw/intc/xive-internal.h > >>>> +++ b/hw/intc/xive-internal.h > >>>> @@ -98,6 +98,7 @@ struct XIVE { > >>>> SysBusDevice parent; > >>>> > >>>> /* Properties */ > >>>> + uint32_t chip_id; > >>> > >>> So there is a XIVE object per chip. How does this work on PAPR? One > >>> logical chip/XIVE, or something more complex? > >> > >> One global XIVE for PAPR. For the MMIOs, the way it works is that: > >> > >> - For MMIOs pertaining to a specific interrupt or queue, there's an H- > >> call that will return the proper "guest physical" address. For qemu > >> with KVM we'll have to probably create a single chunk of qemu address > >> space (a single mem region) that contains individual pages mapped with > >> MAP_FIXED originating from the different HW bits, we still need to sort > >> out how exactly we'll do that in practice. > >> > >> - For the TIMA (the presentation MMIOs), those are always at the same > >> physical address for everybody (so for a guest it's a single memory > >> region we'll map to that physical address), the HW "knows" which HW > >> thread is talking to it (and the hypervisor tells the HW which vcpu is > >> running on a given HW thread at a given point in time). That address is > >> obtained from the device-tree > > > > Ok. That leaves "chip_id" as a rather surprising thing to see in an > > object which will appear on PAPR. > > We could also pass the address as a property instead of the chip-id when > creating the XIVE object. May be better for sPAPR. Yes, I think that sounds like a much better option.
On 07/25/2017 04:19 AM, David Gibson wrote: > On Mon, Jul 24, 2017 at 03:25:29PM +0200, Cédric Le Goater wrote: >> On 07/24/2017 08:09 AM, Benjamin Herrenschmidt wrote: >>> On Mon, 2017-07-24 at 14:49 +1000, David Gibson wrote: >>>> On Wed, Jul 05, 2017 at 07:13:22PM +0200, Cédric Le Goater wrote: >>>>> Each source adds its own ESB mempry region to the overall ESB memory >>>>> region of the controller. It will be mapped in the CPU address space >>>>> when XIVE is activated. >>>>> >>>>> The default mapping address for the ESB memory region is the same one >>>>> used on baremetal. >>>>> >>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>>> --- >>>>> hw/intc/xive-internal.h | 5 +++++ >>>>> hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- >>>>> 2 files changed, 48 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >>>>> index 8e755aa88a14..c06be823aad0 100644 >>>>> --- a/hw/intc/xive-internal.h >>>>> +++ b/hw/intc/xive-internal.h >>>>> @@ -98,6 +98,7 @@ struct XIVE { >>>>> SysBusDevice parent; >>>>> >>>>> /* Properties */ >>>>> + uint32_t chip_id; >>>> >>>> So there is a XIVE object per chip. How does this work on PAPR? One >>>> logical chip/XIVE, or something more complex? >>> >>> One global XIVE for PAPR. >> >> Yes. >> >> The chip-id is useless for sPAPR (0 is the default) but for a PowerNV >> system, the address used to map the ESB memory region depends on the >> chip-id and I thought we could reuse the same XIVE object. > > Hmm, maybe. yes. I am thinking of greatly simplifying the allocator to fit only the sPAPR needs : a simple range of IRQ numbers with the IPIs at the beginning, and the HW interrupts starting at an offset (in sync with the XICS allocator). That's what I ended doing for CAS negotiation. So we could just call it sPAPRXive and forget about PowerNV support for the moment. C. > >> So, a sPAPR guest would use the address of a single chip baremetal >> system. This needs more explanation I agree. Thanks to Ben who is >> providing a lot. I will update the changelogs in the next version. > >> The TIMA is mapped at a fixed address so the chip-id does not come >> in play. >> >>> For the MMIOs, the way it works is that: >>> >>> - For MMIOs pertaining to a specific interrupt or queue, there's an H- >>> call that will return the proper "guest physical" address. For qemu >>> with KVM we'll have to probably create a single chunk of qemu address >>> space (a single mem region) that contains individual pages mapped with >>> MAP_FIXED originating from the different HW bits, we still need to sort >>> out how exactly we'll do that in practice. >> >> I haven't looked at all the KVM details. But, regarding the ESBs, I had >> the above in mind and used a single memory region to contain them all. >> >>> - For the TIMA (the presentation MMIOs), those are always at the same >>> physical address for everybody (so for a guest it's a single memory >>> region we'll map to that physical address), the HW "knows" which HW >>> thread is talking to it (and the hypervisor tells the HW which vcpu is >>> running on a given HW thread at a given point in time). That address is >>> obtained from the device-tree >>> >>>>> uint32_t nr_targets; >>>>> >>>>> /* IRQ number allocator */ >>>>> @@ -111,6 +112,10 @@ struct XIVE { >>>>> void *sbe; >>>>> XiveIVE *ivt; >>>>> XiveEQ *eqdt; >>>>> + >>>>> + /* ESB and TIMA memory location */ >>>>> + hwaddr vc_base; >>>>> + MemoryRegion esb_iomem; >>>>> }; >>>>> >>>>> void xive_reset(void *dev); >>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >>>>> index 8f8bb8b787bd..a1cb87a07b76 100644 >>>>> --- a/hw/intc/xive.c >>>>> +++ b/hw/intc/xive.c >>>>> @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>>>> XiveICSState *xs = ICS_XIVE(ics); >>>>> Object *obj; >>>>> Error *err = NULL; >>>>> + XIVE *x; >>>> >>>> I don't really like just 'x' for a context variable like this (as >>>> opposed to a temporary). >> >> OK. I will change 'x' in 'xive' then. >> >>>>> >>>>> obj = object_property_get_link(OBJECT(xs), "xive", &err); >>>>> if (!obj) { >>>>> @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>>>> __func__, error_get_pretty(err)); >>>>> return; >>>>> } >>>>> - xs->xive = XIVE(obj); >>>>> + x = xs->xive = XIVE(obj); >>>>> >>>>> if (!ics->nr_irqs) { >>>>> error_setg(errp, "Number of interrupts needs to be greater 0"); >>>>> @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) >>>>> "xive.esb", >>>>> (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); >>>>> >>>>> + /* Install the ESB memory region in the overall one */ >>>>> + memory_region_add_subregion(&x->esb_iomem, >>>>> + ICS_BASE(xs)->offset * (1 << xs->esb_shift), >>>>> + &xs->esb_iomem); >>>>> + >>>>> qemu_register_reset(xive_ics_reset, xs); >>>>> } >>>>> >>>>> @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { >>>>> */ >>>>> #define MAX_HW_IRQS_ENTRIES (8 * 1024) >>>>> >>>>> +/* VC BAR contains set translations for the ESBs and the EQs. */ >>>>> +#define VC_BAR_DEFAULT 0x10000000000ull >>>>> +#define VC_BAR_SIZE 0x08000000000ull >>>>> + >>>>> +#define P9_MMIO_BASE 0x006000000000000ull >>>>> +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) >>>> >>>> chip-based MMIO addresses leaking into the PAPR model seems like it >>>> might not be what you want >> >> See above for the reason. >> >> >> Thanks, >> >> C. >> >>>> >>>>> +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) >>>>> +{ >>>>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", >>>>> + __func__, offset, size); >>>>> + return 0; >>>>> +} >>>>> + >>>>> +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, >>>>> + unsigned size) >>>>> +{ >>>>> + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", >>>>> + __func__, offset, value, size); >>>>> +} >>>>> + >>>>> +static const MemoryRegionOps xive_esb_default_ops = { >>>>> + .read = xive_esb_default_read, >>>>> + .write = xive_esb_default_write, >>>>> + .endianness = DEVICE_BIG_ENDIAN, >>>>> +}; >>>>> >>>>> void xive_reset(void *dev) >>>>> { >>>>> @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) >>>>> x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * >>>>> sizeof(XiveEQ)); >>>>> >>>>> + /* VC BAR. That's the full window but we will only map the >>>>> + * subregions in use. */ >>>>> + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); >>>>> + >>>>> + /* install default memory region handlers to log bogus access */ >>>>> + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, >>>>> + NULL, "xive.esb", VC_BAR_SIZE); >>>>> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); >>>>> + >>>>> qemu_register_reset(xive_reset, dev); >>>>> } >>>>> >>>>> static Property xive_properties[] = { >>>>> + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), >>>>> DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), >>>>> DEFINE_PROP_END_OF_LIST(), >>>>> }; >>>> >>>> >> >
diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h index 8e755aa88a14..c06be823aad0 100644 --- a/hw/intc/xive-internal.h +++ b/hw/intc/xive-internal.h @@ -98,6 +98,7 @@ struct XIVE { SysBusDevice parent; /* Properties */ + uint32_t chip_id; uint32_t nr_targets; /* IRQ number allocator */ @@ -111,6 +112,10 @@ struct XIVE { void *sbe; XiveIVE *ivt; XiveEQ *eqdt; + + /* ESB and TIMA memory location */ + hwaddr vc_base; + MemoryRegion esb_iomem; }; void xive_reset(void *dev); diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 8f8bb8b787bd..a1cb87a07b76 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -312,6 +312,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) XiveICSState *xs = ICS_XIVE(ics); Object *obj; Error *err = NULL; + XIVE *x; obj = object_property_get_link(OBJECT(xs), "xive", &err); if (!obj) { @@ -319,7 +320,7 @@ static void xive_ics_realize(ICSState *ics, Error **errp) __func__, error_get_pretty(err)); return; } - xs->xive = XIVE(obj); + x = xs->xive = XIVE(obj); if (!ics->nr_irqs) { error_setg(errp, "Number of interrupts needs to be greater 0"); @@ -338,6 +339,11 @@ static void xive_ics_realize(ICSState *ics, Error **errp) "xive.esb", (1ull << xs->esb_shift) * ICS_BASE(xs)->nr_irqs); + /* Install the ESB memory region in the overall one */ + memory_region_add_subregion(&x->esb_iomem, + ICS_BASE(xs)->offset * (1 << xs->esb_shift), + &xs->esb_iomem); + qemu_register_reset(xive_ics_reset, xs); } @@ -375,6 +381,32 @@ static const TypeInfo xive_ics_info = { */ #define MAX_HW_IRQS_ENTRIES (8 * 1024) +/* VC BAR contains set translations for the ESBs and the EQs. */ +#define VC_BAR_DEFAULT 0x10000000000ull +#define VC_BAR_SIZE 0x08000000000ull + +#define P9_MMIO_BASE 0x006000000000000ull +#define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) + +static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", + __func__, offset, size); + return 0; +} + +static void xive_esb_default_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n", + __func__, offset, value, size); +} + +static const MemoryRegionOps xive_esb_default_ops = { + .read = xive_esb_default_read, + .write = xive_esb_default_write, + .endianness = DEVICE_BIG_ENDIAN, +}; void xive_reset(void *dev) { @@ -435,10 +467,20 @@ static void xive_realize(DeviceState *dev, Error **errp) x->eqdt = g_malloc0(x->nr_targets * XIVE_EQ_PRIORITY_COUNT * sizeof(XiveEQ)); + /* VC BAR. That's the full window but we will only map the + * subregions in use. */ + x->vc_base = (hwaddr)(P9_CHIP_BASE(x->chip_id) | VC_BAR_DEFAULT); + + /* install default memory region handlers to log bogus access */ + memory_region_init_io(&x->esb_iomem, NULL, &xive_esb_default_ops, + NULL, "xive.esb", VC_BAR_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); + qemu_register_reset(xive_reset, dev); } static Property xive_properties[] = { + DEFINE_PROP_UINT32("chip-id", XIVE, chip_id, 0), DEFINE_PROP_UINT32("nr-targets", XIVE, nr_targets, 0), DEFINE_PROP_END_OF_LIST(), };
Each source adds its own ESB mempry region to the overall ESB memory region of the controller. It will be mapped in the CPU address space when XIVE is activated. The default mapping address for the ESB memory region is the same one used on baremetal. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- hw/intc/xive-internal.h | 5 +++++ hw/intc/xive.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-)