Message ID | 1499274819-15607-15-git-send-email-clg@kaod.org |
---|---|
State | New |
Headers | show |
On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: > The Thread Interrupt Management Area for the OS is mostly used to > acknowledge interrupts and set the CPPR of the CPU. > > The TIMA is mapped at the same address for each CPU. 'current_cpu' is > used to retrieve the targeted interrupt presenter object. > > Signed-off-by: Cédric Le Goater <clg@kaod.org> Am I right in thinking that this shoehorns the XIVE TIMA state into the existing XICS ICP object. That.. doesn't seem like a good idea. > --- > hw/intc/xive-internal.h | 4 ++ > hw/intc/xive.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 191 insertions(+) > > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > index ba5e648a5258..5e8b78a1ea6a 100644 > --- a/hw/intc/xive-internal.h > +++ b/hw/intc/xive-internal.h > @@ -200,6 +200,10 @@ struct XIVE { > /* ESB and TIMA memory location */ > hwaddr vc_base; > MemoryRegion esb_iomem; > + > + uint32_t tm_shift; > + hwaddr tm_base; > + MemoryRegion tm_iomem; > }; > > void xive_reset(void *dev); > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > index c08a4f8efb58..82b2f0dcda0b 100644 > --- a/hw/intc/xive.c > +++ b/hw/intc/xive.c > @@ -26,6 +26,180 @@ > > #include "xive-internal.h" > > +static uint8_t priority_to_ipb(uint8_t priority) > +{ > + return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0; > +} > + > +static uint64_t xive_icp_accept(XiveICPState *xicp) > +{ > + ICPState *icp = ICP(xicp); > + uint8_t nsr = xicp->tima_os[TM_NSR]; > + > + qemu_irq_lower(icp->output); > + > + if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) { > + uint8_t cppr = xicp->tima_os[TM_PIPR]; > + > + xicp->tima_os[TM_CPPR] = cppr; > + > + /* Reset the pending buffer bit */ > + xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr); > + > + /* Drop Exception bit for OS */ > + xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO; > + } > + > + return (nsr << 8) | xicp->tima_os[TM_CPPR]; > +} > + > +static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr) > +{ > + if (cppr > XIVE_PRIORITY_MAX) { > + cppr = 0xff; > + } > + > + xicp->tima_os[TM_CPPR] = cppr; > +} > + > +/* > + * Thread Interrupt Management Area MMIO > + */ > +static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset, > + unsigned size) > +{ > + uint64_t ret = -1; > + > + if (offset == TM_SPC_ACK_OS_REG && size == 2) { > + ret = xive_icp_accept(icp); > + } else { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%" > + HWADDR_PRIx" size %d\n", offset, size); > + } > + > + return ret; > +} > + > +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); > + XiveICPState *icp = XIVE_ICP(cpu->intc); > + uint64_t ret = -1; > + int i; > + > + if (offset >= TM_SPC_ACK_EBB) { > + return xive_tm_read_special(icp, offset, size); > + } > + > + if (offset & TM_QW1_OS) { > + switch (size) { > + case 1: > + case 2: > + case 4: > + case 8: > + if (QEMU_IS_ALIGNED(offset, size)) { > + ret = 0; > + for (i = 0; i < size; i++) { > + ret |= icp->tima[offset + i] << (8 * i); > + } > + } else { > + qemu_log_mask(LOG_GUEST_ERROR, > + "XIVE: invalid TIMA read alignment @%" > + HWADDR_PRIx" size %d\n", offset, size); > + } > + break; > + default: > + g_assert_not_reached(); > + } > + } else { > + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" > + HWADDR_PRIx"\n", offset); > + } > + > + return ret; > +} > + > +static bool xive_tm_is_readonly(uint8_t index) > +{ > + /* Let's be optimistic and prepare ground for HV mode support */ > + switch (index) { > + case TM_QW1_OS + TM_CPPR: > + return false; > + default: > + return true; > + } > +} > + > +static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset, > + uint64_t value, unsigned size) > +{ > + if (offset == TM_SPC_SET_OS_PENDING && size == 1) { > + xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff); > + } else { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" > + HWADDR_PRIx" size %d\n", offset, size); > + } > + > + /* TODO: support TM_SPC_ACK_OS_EL */ > +} > + > +static void xive_tm_write(void *opaque, hwaddr offset, > + uint64_t value, unsigned size) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); > + XiveICPState *icp = XIVE_ICP(cpu->intc); > + int i; > + > + if (offset >= TM_SPC_ACK_EBB) { > + xive_tm_write_special(icp, offset, value, size); > + return; > + } > + > + if (offset & TM_QW1_OS) { > + switch (size) { > + case 1: > + if (offset == TM_QW1_OS + TM_CPPR) { > + xive_icp_set_cppr(icp, value & 0xff); > + } > + break; > + case 4: > + case 8: > + if (QEMU_IS_ALIGNED(offset, size)) { > + for (i = 0; i < size; i++) { > + if (!xive_tm_is_readonly(offset + i)) { > + icp->tima[offset + i] = (value >> (8 * i)) & 0xff; > + } > + } > + } else { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" > + HWADDR_PRIx" size %d\n", offset, size); > + } > + break; > + default: > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" > + HWADDR_PRIx" size %d\n", offset, size); > + } > + } else { > + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" > + HWADDR_PRIx"\n", offset); > + } > +} > + > + > +static const MemoryRegionOps xive_tm_ops = { > + .read = xive_tm_read, > + .write = xive_tm_write, > + .endianness = DEVICE_BIG_ENDIAN, > + .valid = { > + .min_access_size = 1, > + .max_access_size = 8, > + }, > + .impl = { > + .min_access_size = 1, > + .max_access_size = 8, > + }, > +}; > + > static void xive_icp_reset(ICPState *icp) > { > XiveICPState *xicp = XIVE_ICP(icp); > @@ -453,6 +627,11 @@ static const TypeInfo xive_ics_info = { > #define P9_MMIO_BASE 0x006000000000000ull > #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) > > +/* Thread Interrupt Management Area MMIO */ > +#define TM_BAR_DEFAULT 0x30203180000ull > +#define TM_SHIFT 16 > +#define TM_BAR_SIZE (XIVE_TM_RING_COUNT * (1 << TM_SHIFT)) > + > static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) > { > qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", > @@ -541,6 +720,14 @@ static void xive_realize(DeviceState *dev, Error **errp) > NULL, "xive.esb", VC_BAR_SIZE); > sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); > > + /* TM BAR. Same address for each chip */ > + x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT); > + x->tm_shift = TM_SHIFT; > + > + memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x, > + "xive.tm", TM_BAR_SIZE); > + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem); > + > qemu_register_reset(xive_reset, dev); > } >
On 07/24/2017 08:35 AM, David Gibson wrote: > On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: >> The Thread Interrupt Management Area for the OS is mostly used to >> acknowledge interrupts and set the CPPR of the CPU. >> >> The TIMA is mapped at the same address for each CPU. 'current_cpu' is >> used to retrieve the targeted interrupt presenter object. >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> > > Am I right in thinking that this shoehorns the XIVE TIMA state into > the existing XICS ICP object. That.. doesn't seem like a good idea. The TIMA memory region is under the XIVE object because it is unique for the system. The lookup of the ICP is simply done using 'current_cpu'. The TIMA state is under the ICPState, yes, but this model does not seem incorrect to me as this state contains the interrupt information presented to a CPU. Thanks, C. >> --- >> hw/intc/xive-internal.h | 4 ++ >> hw/intc/xive.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 191 insertions(+) >> >> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >> index ba5e648a5258..5e8b78a1ea6a 100644 >> --- a/hw/intc/xive-internal.h >> +++ b/hw/intc/xive-internal.h >> @@ -200,6 +200,10 @@ struct XIVE { >> /* ESB and TIMA memory location */ >> hwaddr vc_base; >> MemoryRegion esb_iomem; >> + >> + uint32_t tm_shift; >> + hwaddr tm_base; >> + MemoryRegion tm_iomem; >> }; >> >> void xive_reset(void *dev); >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >> index c08a4f8efb58..82b2f0dcda0b 100644 >> --- a/hw/intc/xive.c >> +++ b/hw/intc/xive.c >> @@ -26,6 +26,180 @@ >> >> #include "xive-internal.h" >> >> +static uint8_t priority_to_ipb(uint8_t priority) >> +{ >> + return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0; >> +} >> + >> +static uint64_t xive_icp_accept(XiveICPState *xicp) >> +{ >> + ICPState *icp = ICP(xicp); >> + uint8_t nsr = xicp->tima_os[TM_NSR]; >> + >> + qemu_irq_lower(icp->output); >> + >> + if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) { >> + uint8_t cppr = xicp->tima_os[TM_PIPR]; >> + >> + xicp->tima_os[TM_CPPR] = cppr; >> + >> + /* Reset the pending buffer bit */ >> + xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr); >> + >> + /* Drop Exception bit for OS */ >> + xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO; >> + } >> + >> + return (nsr << 8) | xicp->tima_os[TM_CPPR]; >> +} >> + >> +static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr) >> +{ >> + if (cppr > XIVE_PRIORITY_MAX) { >> + cppr = 0xff; >> + } >> + >> + xicp->tima_os[TM_CPPR] = cppr; >> +} >> + >> +/* >> + * Thread Interrupt Management Area MMIO >> + */ >> +static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset, >> + unsigned size) >> +{ >> + uint64_t ret = -1; >> + >> + if (offset == TM_SPC_ACK_OS_REG && size == 2) { >> + ret = xive_icp_accept(icp); >> + } else { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%" >> + HWADDR_PRIx" size %d\n", offset, size); >> + } >> + >> + return ret; >> +} >> + >> +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size) >> +{ >> + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); >> + XiveICPState *icp = XIVE_ICP(cpu->intc); >> + uint64_t ret = -1; >> + int i; >> + >> + if (offset >= TM_SPC_ACK_EBB) { >> + return xive_tm_read_special(icp, offset, size); >> + } >> + >> + if (offset & TM_QW1_OS) { >> + switch (size) { >> + case 1: >> + case 2: >> + case 4: >> + case 8: >> + if (QEMU_IS_ALIGNED(offset, size)) { >> + ret = 0; >> + for (i = 0; i < size; i++) { >> + ret |= icp->tima[offset + i] << (8 * i); >> + } >> + } else { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "XIVE: invalid TIMA read alignment @%" >> + HWADDR_PRIx" size %d\n", offset, size); >> + } >> + break; >> + default: >> + g_assert_not_reached(); >> + } >> + } else { >> + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" >> + HWADDR_PRIx"\n", offset); >> + } >> + >> + return ret; >> +} >> + >> +static bool xive_tm_is_readonly(uint8_t index) >> +{ >> + /* Let's be optimistic and prepare ground for HV mode support */ >> + switch (index) { >> + case TM_QW1_OS + TM_CPPR: >> + return false; >> + default: >> + return true; >> + } >> +} >> + >> +static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset, >> + uint64_t value, unsigned size) >> +{ >> + if (offset == TM_SPC_SET_OS_PENDING && size == 1) { >> + xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff); >> + } else { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" >> + HWADDR_PRIx" size %d\n", offset, size); >> + } >> + >> + /* TODO: support TM_SPC_ACK_OS_EL */ >> +} >> + >> +static void xive_tm_write(void *opaque, hwaddr offset, >> + uint64_t value, unsigned size) >> +{ >> + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); >> + XiveICPState *icp = XIVE_ICP(cpu->intc); >> + int i; >> + >> + if (offset >= TM_SPC_ACK_EBB) { >> + xive_tm_write_special(icp, offset, value, size); >> + return; >> + } >> + >> + if (offset & TM_QW1_OS) { >> + switch (size) { >> + case 1: >> + if (offset == TM_QW1_OS + TM_CPPR) { >> + xive_icp_set_cppr(icp, value & 0xff); >> + } >> + break; >> + case 4: >> + case 8: >> + if (QEMU_IS_ALIGNED(offset, size)) { >> + for (i = 0; i < size; i++) { >> + if (!xive_tm_is_readonly(offset + i)) { >> + icp->tima[offset + i] = (value >> (8 * i)) & 0xff; >> + } >> + } >> + } else { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" >> + HWADDR_PRIx" size %d\n", offset, size); >> + } >> + break; >> + default: >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" >> + HWADDR_PRIx" size %d\n", offset, size); >> + } >> + } else { >> + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" >> + HWADDR_PRIx"\n", offset); >> + } >> +} >> + >> + >> +static const MemoryRegionOps xive_tm_ops = { >> + .read = xive_tm_read, >> + .write = xive_tm_write, >> + .endianness = DEVICE_BIG_ENDIAN, >> + .valid = { >> + .min_access_size = 1, >> + .max_access_size = 8, >> + }, >> + .impl = { >> + .min_access_size = 1, >> + .max_access_size = 8, >> + }, >> +}; >> + >> static void xive_icp_reset(ICPState *icp) >> { >> XiveICPState *xicp = XIVE_ICP(icp); >> @@ -453,6 +627,11 @@ static const TypeInfo xive_ics_info = { >> #define P9_MMIO_BASE 0x006000000000000ull >> #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) >> >> +/* Thread Interrupt Management Area MMIO */ >> +#define TM_BAR_DEFAULT 0x30203180000ull >> +#define TM_SHIFT 16 >> +#define TM_BAR_SIZE (XIVE_TM_RING_COUNT * (1 << TM_SHIFT)) >> + >> static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) >> { >> qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", >> @@ -541,6 +720,14 @@ static void xive_realize(DeviceState *dev, Error **errp) >> NULL, "xive.esb", VC_BAR_SIZE); >> sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); >> >> + /* TM BAR. Same address for each chip */ >> + x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT); >> + x->tm_shift = TM_SHIFT; >> + >> + memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x, >> + "xive.tm", TM_BAR_SIZE); >> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem); >> + >> qemu_register_reset(xive_reset, dev); >> } >> >
On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote: > On 07/24/2017 08:35 AM, David Gibson wrote: > > On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: > >> The Thread Interrupt Management Area for the OS is mostly used to > >> acknowledge interrupts and set the CPPR of the CPU. > >> > >> The TIMA is mapped at the same address for each CPU. 'current_cpu' is > >> used to retrieve the targeted interrupt presenter object. > >> > >> Signed-off-by: Cédric Le Goater <clg@kaod.org> > > > > Am I right in thinking that this shoehorns the XIVE TIMA state into > > the existing XICS ICP object. That.. doesn't seem like a good idea. > > The TIMA memory region is under the XIVE object because it is > unique for the system. The lookup of the ICP is simply done using > 'current_cpu'. The TIMA state is under the ICPState, yes, but this > model does not seem incorrect to me as this state contains the > interrupt information presented to a CPU. Yeah, that's not the point I'm making. My point is that the TIMA state isn't really the same as xics ICP state. You're squeezing one into the other in a pretty ugly way.
On 07/25/2017 06:20 AM, David Gibson wrote: > On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote: >> On 07/24/2017 08:35 AM, David Gibson wrote: >>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: >>>> The Thread Interrupt Management Area for the OS is mostly used to >>>> acknowledge interrupts and set the CPPR of the CPU. >>>> >>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is >>>> used to retrieve the targeted interrupt presenter object. >>>> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>> >>> Am I right in thinking that this shoehorns the XIVE TIMA state into >>> the existing XICS ICP object. That.. doesn't seem like a good idea. >> >> The TIMA memory region is under the XIVE object because it is >> unique for the system. The lookup of the ICP is simply done using >> 'current_cpu'. The TIMA state is under the ICPState, yes, but this >> model does not seem incorrect to me as this state contains the >> interrupt information presented to a CPU. > > Yeah, that's not the point I'm making. My point is that the TIMA > state isn't really the same as xics ICP state. You're squeezing one > into the other in a pretty ugly way. yes, well, we need to have compatible objects between the XICS and XIVE mode because of the CAS negotiation. for migration compatibility, it is much easier to extend existing objects. This approach I am taking today. C.
On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote: > On 07/25/2017 06:20 AM, David Gibson wrote: > > On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote: > >> On 07/24/2017 08:35 AM, David Gibson wrote: > >>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: > >>>> The Thread Interrupt Management Area for the OS is mostly used to > >>>> acknowledge interrupts and set the CPPR of the CPU. > >>>> > >>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is > >>>> used to retrieve the targeted interrupt presenter object. > >>>> > >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >>> > >>> Am I right in thinking that this shoehorns the XIVE TIMA state into > >>> the existing XICS ICP object. That.. doesn't seem like a good idea. > >> > >> The TIMA memory region is under the XIVE object because it is > >> unique for the system. The lookup of the ICP is simply done using > >> 'current_cpu'. The TIMA state is under the ICPState, yes, but this > >> model does not seem incorrect to me as this state contains the > >> interrupt information presented to a CPU. > > > > Yeah, that's not the point I'm making. My point is that the TIMA > > state isn't really the same as xics ICP state. You're squeezing one > > into the other in a pretty ugly way. > > yes, well, we need to have compatible objects between the XICS and XIVE > mode because of the CAS negotiation. for migration compatibility, it is > much easier to extend existing objects. This approach I am taking today. Yeah, I really don't think this approach is workable. Roughly speaking, you're keeping the same structures between xics and xive, but with mostly different code. I can't see any way that's not going to be horribly fragile, with any update to xics OR xive requiring enormous caution not to break the other. I really think we have to go one of two ways: 1) Abstract the notion of interrupt source and interrupt presenter, so we can use a truly common model between xics and xive. Given the differences between the two, I don't know this is even possible. 2) Separate the objects entirely. ICPs are entirely separate from TIMAs, like wise ICSes and xive interrupt sources. I think this is probably the way to go. To make this work with CAS switching will require different methods, but I don't think it's impossible. For example, we could (on the new machine type) create both xics ICSes and ICPs and xive sources and TIMAs. We'd have a (migrated) flag in the machine saying which is currently active. All the objects would hang around, but only the active ones would do anything. Now obviously that means we'd be migrating a bunch of redundant state, but I still think it's preferable to a Frankenstinian fusion of xics-ish and xive-ish state. I think there's a good chance we can improve on the basic idea to remove most or all of that redundant state.
On 07/25/2017 03:21 PM, David Gibson wrote: > On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote: >> On 07/25/2017 06:20 AM, David Gibson wrote: >>> On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote: >>>> On 07/24/2017 08:35 AM, David Gibson wrote: >>>>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: >>>>>> The Thread Interrupt Management Area for the OS is mostly used to >>>>>> acknowledge interrupts and set the CPPR of the CPU. >>>>>> >>>>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is >>>>>> used to retrieve the targeted interrupt presenter object. >>>>>> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>>> >>>>> Am I right in thinking that this shoehorns the XIVE TIMA state into >>>>> the existing XICS ICP object. That.. doesn't seem like a good idea. >>>> >>>> The TIMA memory region is under the XIVE object because it is >>>> unique for the system. The lookup of the ICP is simply done using >>>> 'current_cpu'. The TIMA state is under the ICPState, yes, but this >>>> model does not seem incorrect to me as this state contains the >>>> interrupt information presented to a CPU. >>> >>> Yeah, that's not the point I'm making. My point is that the TIMA >>> state isn't really the same as xics ICP state. You're squeezing one >>> into the other in a pretty ugly way. >> >> yes, well, we need to have compatible objects between the XICS and XIVE >> mode because of the CAS negotiation. for migration compatibility, it is >> much easier to extend existing objects. This approach I am taking today. > > Yeah, I really don't think this approach is workable. > > Roughly speaking, you're keeping the same structures between xics and > xive, but with mostly different code. I can't see any way that's not > going to be horribly fragile, with any update to xics OR xive > requiring enormous caution not to break the other. > > I really think we have to go one of two ways: > > 1) Abstract the notion of interrupt source and interrupt presenter, so > we can use a truly common model between xics and xive. > > Given the differences between the two, I don't know this is even > possible. > > 2) Separate the objects entirely. ICPs are entirely separate from > TIMAs, like wise ICSes and xive interrupt sources. > > I think this is probably the way to go. To make this work with CAS > switching will require different methods, but I don't think it's > impossible. > > For example, we could (on the new machine type) create both xics ICSes > and ICPs and xive sources and TIMAs. We'd have a (migrated) flag in > the machine saying which is currently active. All the objects would > hang around, but only the active ones would do anything. ok. There is still the question of sharing the IRQ numbers unless we find away to allocate them after the CAS negotiation. I will take a look at KVM support now before reworking the patchset. Thanks, C. > Now obviously that means we'd be migrating a bunch of redundant state, > but I still think it's preferable to a Frankenstinian fusion of > xics-ish and xive-ish state. I think there's a good chance we can > improve on the basic idea to remove most or all of that redundant > state. >
On Tue, Jul 25, 2017 at 05:01:48PM +0200, Cédric Le Goater wrote: > On 07/25/2017 03:21 PM, David Gibson wrote: > > On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote: > >> On 07/25/2017 06:20 AM, David Gibson wrote: > >>> On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote: > >>>> On 07/24/2017 08:35 AM, David Gibson wrote: > >>>>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote: > >>>>>> The Thread Interrupt Management Area for the OS is mostly used to > >>>>>> acknowledge interrupts and set the CPPR of the CPU. > >>>>>> > >>>>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is > >>>>>> used to retrieve the targeted interrupt presenter object. > >>>>>> > >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >>>>> > >>>>> Am I right in thinking that this shoehorns the XIVE TIMA state into > >>>>> the existing XICS ICP object. That.. doesn't seem like a good idea. > >>>> > >>>> The TIMA memory region is under the XIVE object because it is > >>>> unique for the system. The lookup of the ICP is simply done using > >>>> 'current_cpu'. The TIMA state is under the ICPState, yes, but this > >>>> model does not seem incorrect to me as this state contains the > >>>> interrupt information presented to a CPU. > >>> > >>> Yeah, that's not the point I'm making. My point is that the TIMA > >>> state isn't really the same as xics ICP state. You're squeezing one > >>> into the other in a pretty ugly way. > >> > >> yes, well, we need to have compatible objects between the XICS and XIVE > >> mode because of the CAS negotiation. for migration compatibility, it is > >> much easier to extend existing objects. This approach I am taking today. > > > > Yeah, I really don't think this approach is workable. > > > > Roughly speaking, you're keeping the same structures between xics and > > xive, but with mostly different code. I can't see any way that's not > > going to be horribly fragile, with any update to xics OR xive > > requiring enormous caution not to break the other. > > > > I really think we have to go one of two ways: > > > > 1) Abstract the notion of interrupt source and interrupt presenter, so > > we can use a truly common model between xics and xive. > > > > Given the differences between the two, I don't know this is even > > possible. > > > > 2) Separate the objects entirely. ICPs are entirely separate from > > TIMAs, like wise ICSes and xive interrupt sources. > > > > I think this is probably the way to go. To make this work with CAS > > switching will require different methods, but I don't think it's > > impossible. > > > > For example, we could (on the new machine type) create both xics ICSes > > and ICPs and xive sources and TIMAs. We'd have a (migrated) flag in > > the machine saying which is currently active. All the objects would > > hang around, but only the active ones would do anything. > > ok. There is still the question of sharing the IRQ numbers unless we > find away to allocate them after the CAS negotiation. Right. As I said, I think allocating after CAS is probably possible, but it may not be necessary. Merely lining up the irq numbers should be much simpler than actually fusing the source and presenter structures. Especially since both xics and xive are flexible enough that we can map the irqs almost however we want. This definitely does mean the irq mapping / allocation should sit inside the machine type, rather than the xics *or* the xive code, and preferably be entirely common. Then it's just a matter of setting up both the xics and xive components so they can back that irq allocation. > I will take a look at KVM support now before reworking the patchset. > > Thanks, > > C. > > > Now obviously that means we'd be migrating a bunch of redundant state, > > but I still think it's preferable to a Frankenstinian fusion of > > xics-ish and xive-ish state. I think there's a good chance we can > > improve on the basic idea to remove most or all of that redundant > > state. > > >
diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h index ba5e648a5258..5e8b78a1ea6a 100644 --- a/hw/intc/xive-internal.h +++ b/hw/intc/xive-internal.h @@ -200,6 +200,10 @@ struct XIVE { /* ESB and TIMA memory location */ hwaddr vc_base; MemoryRegion esb_iomem; + + uint32_t tm_shift; + hwaddr tm_base; + MemoryRegion tm_iomem; }; void xive_reset(void *dev); diff --git a/hw/intc/xive.c b/hw/intc/xive.c index c08a4f8efb58..82b2f0dcda0b 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -26,6 +26,180 @@ #include "xive-internal.h" +static uint8_t priority_to_ipb(uint8_t priority) +{ + return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0; +} + +static uint64_t xive_icp_accept(XiveICPState *xicp) +{ + ICPState *icp = ICP(xicp); + uint8_t nsr = xicp->tima_os[TM_NSR]; + + qemu_irq_lower(icp->output); + + if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) { + uint8_t cppr = xicp->tima_os[TM_PIPR]; + + xicp->tima_os[TM_CPPR] = cppr; + + /* Reset the pending buffer bit */ + xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr); + + /* Drop Exception bit for OS */ + xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO; + } + + return (nsr << 8) | xicp->tima_os[TM_CPPR]; +} + +static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr) +{ + if (cppr > XIVE_PRIORITY_MAX) { + cppr = 0xff; + } + + xicp->tima_os[TM_CPPR] = cppr; +} + +/* + * Thread Interrupt Management Area MMIO + */ +static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset, + unsigned size) +{ + uint64_t ret = -1; + + if (offset == TM_SPC_ACK_OS_REG && size == 2) { + ret = xive_icp_accept(icp); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%" + HWADDR_PRIx" size %d\n", offset, size); + } + + return ret; +} + +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size) +{ + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + XiveICPState *icp = XIVE_ICP(cpu->intc); + uint64_t ret = -1; + int i; + + if (offset >= TM_SPC_ACK_EBB) { + return xive_tm_read_special(icp, offset, size); + } + + if (offset & TM_QW1_OS) { + switch (size) { + case 1: + case 2: + case 4: + case 8: + if (QEMU_IS_ALIGNED(offset, size)) { + ret = 0; + for (i = 0; i < size; i++) { + ret |= icp->tima[offset + i] << (8 * i); + } + } else { + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: invalid TIMA read alignment @%" + HWADDR_PRIx" size %d\n", offset, size); + } + break; + default: + g_assert_not_reached(); + } + } else { + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" + HWADDR_PRIx"\n", offset); + } + + return ret; +} + +static bool xive_tm_is_readonly(uint8_t index) +{ + /* Let's be optimistic and prepare ground for HV mode support */ + switch (index) { + case TM_QW1_OS + TM_CPPR: + return false; + default: + return true; + } +} + +static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset, + uint64_t value, unsigned size) +{ + if (offset == TM_SPC_SET_OS_PENDING && size == 1) { + xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" + HWADDR_PRIx" size %d\n", offset, size); + } + + /* TODO: support TM_SPC_ACK_OS_EL */ +} + +static void xive_tm_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + XiveICPState *icp = XIVE_ICP(cpu->intc); + int i; + + if (offset >= TM_SPC_ACK_EBB) { + xive_tm_write_special(icp, offset, value, size); + return; + } + + if (offset & TM_QW1_OS) { + switch (size) { + case 1: + if (offset == TM_QW1_OS + TM_CPPR) { + xive_icp_set_cppr(icp, value & 0xff); + } + break; + case 4: + case 8: + if (QEMU_IS_ALIGNED(offset, size)) { + for (i = 0; i < size; i++) { + if (!xive_tm_is_readonly(offset + i)) { + icp->tima[offset + i] = (value >> (8 * i)) & 0xff; + } + } + } else { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" + HWADDR_PRIx" size %d\n", offset, size); + } + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%" + HWADDR_PRIx" size %d\n", offset, size); + } + } else { + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%" + HWADDR_PRIx"\n", offset); + } +} + + +static const MemoryRegionOps xive_tm_ops = { + .read = xive_tm_read, + .write = xive_tm_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + static void xive_icp_reset(ICPState *icp) { XiveICPState *xicp = XIVE_ICP(icp); @@ -453,6 +627,11 @@ static const TypeInfo xive_ics_info = { #define P9_MMIO_BASE 0x006000000000000ull #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id))) +/* Thread Interrupt Management Area MMIO */ +#define TM_BAR_DEFAULT 0x30203180000ull +#define TM_SHIFT 16 +#define TM_BAR_SIZE (XIVE_TM_RING_COUNT * (1 << TM_SHIFT)) + static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size) { qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n", @@ -541,6 +720,14 @@ static void xive_realize(DeviceState *dev, Error **errp) NULL, "xive.esb", VC_BAR_SIZE); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem); + /* TM BAR. Same address for each chip */ + x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT); + x->tm_shift = TM_SHIFT; + + memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x, + "xive.tm", TM_BAR_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem); + qemu_register_reset(xive_reset, dev); }
The Thread Interrupt Management Area for the OS is mostly used to acknowledge interrupts and set the CPPR of the CPU. The TIMA is mapped at the same address for each CPU. 'current_cpu' is used to retrieve the targeted interrupt presenter object. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- hw/intc/xive-internal.h | 4 ++ hw/intc/xive.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+)