diff mbox

[RFC,14/26] ppc/xive: add MMIO handlers to the XIVE interrupt presenter model

Message ID 1499274819-15607-15-git-send-email-clg@kaod.org
State New
Headers show

Commit Message

Cédric Le Goater July 5, 2017, 5:13 p.m. UTC
The Thread Interrupt Management Area for the OS is mostly used to
acknowledge interrupts and set the CPPR of the CPU.

The TIMA is mapped at the same address for each CPU. 'current_cpu' is
used to retrieve the targeted interrupt presenter object.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/intc/xive-internal.h |   4 ++
 hw/intc/xive.c          | 187 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 191 insertions(+)

Comments

David Gibson July 24, 2017, 6:35 a.m. UTC | #1
On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
> The Thread Interrupt Management Area for the OS is mostly used to
> acknowledge interrupts and set the CPPR of the CPU.
> 
> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
> used to retrieve the targeted interrupt presenter object.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Am I right in thinking that this shoehorns the XIVE TIMA state into
the existing XICS ICP object.  That.. doesn't seem like a good idea.

> ---
>  hw/intc/xive-internal.h |   4 ++
>  hw/intc/xive.c          | 187 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 191 insertions(+)
> 
> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h
> index ba5e648a5258..5e8b78a1ea6a 100644
> --- a/hw/intc/xive-internal.h
> +++ b/hw/intc/xive-internal.h
> @@ -200,6 +200,10 @@ struct XIVE {
>      /* ESB and TIMA memory location */
>      hwaddr       vc_base;
>      MemoryRegion esb_iomem;
> +
> +    uint32_t     tm_shift;
> +    hwaddr       tm_base;
> +    MemoryRegion tm_iomem;
>  };
>  
>  void xive_reset(void *dev);
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index c08a4f8efb58..82b2f0dcda0b 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -26,6 +26,180 @@
>  
>  #include "xive-internal.h"
>  
> +static uint8_t priority_to_ipb(uint8_t priority)
> +{
> +    return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0;
> +}
> +
> +static uint64_t xive_icp_accept(XiveICPState *xicp)
> +{
> +    ICPState *icp = ICP(xicp);
> +    uint8_t nsr = xicp->tima_os[TM_NSR];
> +
> +    qemu_irq_lower(icp->output);
> +
> +    if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) {
> +        uint8_t cppr = xicp->tima_os[TM_PIPR];
> +
> +        xicp->tima_os[TM_CPPR] = cppr;
> +
> +        /* Reset the pending buffer bit */
> +        xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr);
> +
> +        /* Drop Exception bit for OS */
> +        xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO;
> +    }
> +
> +    return (nsr << 8) | xicp->tima_os[TM_CPPR];
> +}
> +
> +static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr)
> +{
> +    if (cppr > XIVE_PRIORITY_MAX) {
> +        cppr = 0xff;
> +    }
> +
> +    xicp->tima_os[TM_CPPR] = cppr;
> +}
> +
> +/*
> + * Thread Interrupt Management Area MMIO
> + */
> +static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset,
> +                                     unsigned size)
> +{
> +    uint64_t ret = -1;
> +
> +    if (offset == TM_SPC_ACK_OS_REG && size == 2) {
> +        ret = xive_icp_accept(icp);
> +    } else {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%"
> +                      HWADDR_PRIx" size %d\n", offset, size);
> +    }
> +
> +    return ret;
> +}
> +
> +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
> +    XiveICPState *icp = XIVE_ICP(cpu->intc);
> +    uint64_t ret = -1;
> +    int i;
> +
> +    if (offset >= TM_SPC_ACK_EBB) {
> +        return xive_tm_read_special(icp, offset, size);
> +    }
> +
> +    if (offset & TM_QW1_OS) {
> +        switch (size) {
> +        case 1:
> +        case 2:
> +        case 4:
> +        case 8:
> +            if (QEMU_IS_ALIGNED(offset, size)) {
> +                ret = 0;
> +                for (i = 0; i < size; i++) {
> +                    ret |= icp->tima[offset + i] << (8 * i);
> +                }
> +            } else {
> +                qemu_log_mask(LOG_GUEST_ERROR,
> +                              "XIVE: invalid TIMA read alignment @%"
> +                              HWADDR_PRIx" size %d\n", offset, size);
> +            }
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +    } else {
> +        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
> +                      HWADDR_PRIx"\n", offset);
> +    }
> +
> +    return ret;
> +}
> +
> +static bool xive_tm_is_readonly(uint8_t index)
> +{
> +    /* Let's be optimistic and prepare ground for HV mode support */
> +    switch (index) {
> +    case TM_QW1_OS + TM_CPPR:
> +        return false;
> +    default:
> +        return true;
> +    }
> +}
> +
> +static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset,
> +                                  uint64_t value, unsigned size)
> +{
> +    if (offset == TM_SPC_SET_OS_PENDING && size == 1) {
> +        xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff);
> +    } else {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
> +                      HWADDR_PRIx" size %d\n", offset, size);
> +    }
> +
> +    /* TODO: support TM_SPC_ACK_OS_EL */
> +}
> +
> +static void xive_tm_write(void *opaque, hwaddr offset,
> +                           uint64_t value, unsigned size)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
> +    XiveICPState *icp = XIVE_ICP(cpu->intc);
> +    int i;
> +
> +    if (offset >= TM_SPC_ACK_EBB) {
> +        xive_tm_write_special(icp, offset, value, size);
> +        return;
> +    }
> +
> +    if (offset & TM_QW1_OS) {
> +        switch (size) {
> +        case 1:
> +            if (offset == TM_QW1_OS + TM_CPPR) {
> +                xive_icp_set_cppr(icp, value & 0xff);
> +            }
> +            break;
> +        case 4:
> +        case 8:
> +            if (QEMU_IS_ALIGNED(offset, size)) {
> +                for (i = 0; i < size; i++) {
> +                    if (!xive_tm_is_readonly(offset + i)) {
> +                        icp->tima[offset + i] = (value >> (8 * i)) & 0xff;
> +                    }
> +                }
> +            } else {
> +                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
> +                              HWADDR_PRIx" size %d\n", offset, size);
> +            }
> +            break;
> +        default:
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
> +                          HWADDR_PRIx" size %d\n", offset, size);
> +        }
> +    } else {
> +        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
> +                      HWADDR_PRIx"\n", offset);
> +    }
> +}
> +
> +
> +static const MemoryRegionOps xive_tm_ops = {
> +    .read = xive_tm_read,
> +    .write = xive_tm_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +    .valid = {
> +        .min_access_size = 1,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 1,
> +        .max_access_size = 8,
> +    },
> +};
> +
>  static void xive_icp_reset(ICPState *icp)
>  {
>      XiveICPState *xicp = XIVE_ICP(icp);
> @@ -453,6 +627,11 @@ static const TypeInfo xive_ics_info = {
>  #define P9_MMIO_BASE     0x006000000000000ull
>  #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id)))
>  
> +/* Thread Interrupt Management Area MMIO */
> +#define TM_BAR_DEFAULT   0x30203180000ull
> +#define TM_SHIFT         16
> +#define TM_BAR_SIZE      (XIVE_TM_RING_COUNT * (1 << TM_SHIFT))
> +
>  static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size)
>  {
>      qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
> @@ -541,6 +720,14 @@ static void xive_realize(DeviceState *dev, Error **errp)
>                            NULL, "xive.esb", VC_BAR_SIZE);
>      sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem);
>  
> +    /* TM BAR. Same address for each chip */
> +    x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT);
> +    x->tm_shift = TM_SHIFT;
> +
> +    memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x,
> +                          "xive.tm", TM_BAR_SIZE);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem);
> +
>      qemu_register_reset(xive_reset, dev);
>  }
>
Cédric Le Goater July 24, 2017, 2:44 p.m. UTC | #2
On 07/24/2017 08:35 AM, David Gibson wrote:
> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
>> The Thread Interrupt Management Area for the OS is mostly used to
>> acknowledge interrupts and set the CPPR of the CPU.
>>
>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
>> used to retrieve the targeted interrupt presenter object.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Am I right in thinking that this shoehorns the XIVE TIMA state into
> the existing XICS ICP object.  That.. doesn't seem like a good idea.

The TIMA memory region is under the XIVE object because it is 
unique for the system. The lookup of the ICP is simply done using 
'current_cpu'. The TIMA state is under the ICPState, yes, but this 
model does not seem incorrect to me as this state contains the 
interrupt information presented to a CPU.   

Thanks,

C.

>> ---
>>  hw/intc/xive-internal.h |   4 ++
>>  hw/intc/xive.c          | 187 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 191 insertions(+)
>>
>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h
>> index ba5e648a5258..5e8b78a1ea6a 100644
>> --- a/hw/intc/xive-internal.h
>> +++ b/hw/intc/xive-internal.h
>> @@ -200,6 +200,10 @@ struct XIVE {
>>      /* ESB and TIMA memory location */
>>      hwaddr       vc_base;
>>      MemoryRegion esb_iomem;
>> +
>> +    uint32_t     tm_shift;
>> +    hwaddr       tm_base;
>> +    MemoryRegion tm_iomem;
>>  };
>>  
>>  void xive_reset(void *dev);
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index c08a4f8efb58..82b2f0dcda0b 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -26,6 +26,180 @@
>>  
>>  #include "xive-internal.h"
>>  
>> +static uint8_t priority_to_ipb(uint8_t priority)
>> +{
>> +    return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0;
>> +}
>> +
>> +static uint64_t xive_icp_accept(XiveICPState *xicp)
>> +{
>> +    ICPState *icp = ICP(xicp);
>> +    uint8_t nsr = xicp->tima_os[TM_NSR];
>> +
>> +    qemu_irq_lower(icp->output);
>> +
>> +    if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) {
>> +        uint8_t cppr = xicp->tima_os[TM_PIPR];
>> +
>> +        xicp->tima_os[TM_CPPR] = cppr;
>> +
>> +        /* Reset the pending buffer bit */
>> +        xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr);
>> +
>> +        /* Drop Exception bit for OS */
>> +        xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO;
>> +    }
>> +
>> +    return (nsr << 8) | xicp->tima_os[TM_CPPR];
>> +}
>> +
>> +static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr)
>> +{
>> +    if (cppr > XIVE_PRIORITY_MAX) {
>> +        cppr = 0xff;
>> +    }
>> +
>> +    xicp->tima_os[TM_CPPR] = cppr;
>> +}
>> +
>> +/*
>> + * Thread Interrupt Management Area MMIO
>> + */
>> +static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset,
>> +                                     unsigned size)
>> +{
>> +    uint64_t ret = -1;
>> +
>> +    if (offset == TM_SPC_ACK_OS_REG && size == 2) {
>> +        ret = xive_icp_accept(icp);
>> +    } else {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%"
>> +                      HWADDR_PRIx" size %d\n", offset, size);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
>> +{
>> +    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
>> +    XiveICPState *icp = XIVE_ICP(cpu->intc);
>> +    uint64_t ret = -1;
>> +    int i;
>> +
>> +    if (offset >= TM_SPC_ACK_EBB) {
>> +        return xive_tm_read_special(icp, offset, size);
>> +    }
>> +
>> +    if (offset & TM_QW1_OS) {
>> +        switch (size) {
>> +        case 1:
>> +        case 2:
>> +        case 4:
>> +        case 8:
>> +            if (QEMU_IS_ALIGNED(offset, size)) {
>> +                ret = 0;
>> +                for (i = 0; i < size; i++) {
>> +                    ret |= icp->tima[offset + i] << (8 * i);
>> +                }
>> +            } else {
>> +                qemu_log_mask(LOG_GUEST_ERROR,
>> +                              "XIVE: invalid TIMA read alignment @%"
>> +                              HWADDR_PRIx" size %d\n", offset, size);
>> +            }
>> +            break;
>> +        default:
>> +            g_assert_not_reached();
>> +        }
>> +    } else {
>> +        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
>> +                      HWADDR_PRIx"\n", offset);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static bool xive_tm_is_readonly(uint8_t index)
>> +{
>> +    /* Let's be optimistic and prepare ground for HV mode support */
>> +    switch (index) {
>> +    case TM_QW1_OS + TM_CPPR:
>> +        return false;
>> +    default:
>> +        return true;
>> +    }
>> +}
>> +
>> +static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset,
>> +                                  uint64_t value, unsigned size)
>> +{
>> +    if (offset == TM_SPC_SET_OS_PENDING && size == 1) {
>> +        xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff);
>> +    } else {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
>> +                      HWADDR_PRIx" size %d\n", offset, size);
>> +    }
>> +
>> +    /* TODO: support TM_SPC_ACK_OS_EL */
>> +}
>> +
>> +static void xive_tm_write(void *opaque, hwaddr offset,
>> +                           uint64_t value, unsigned size)
>> +{
>> +    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
>> +    XiveICPState *icp = XIVE_ICP(cpu->intc);
>> +    int i;
>> +
>> +    if (offset >= TM_SPC_ACK_EBB) {
>> +        xive_tm_write_special(icp, offset, value, size);
>> +        return;
>> +    }
>> +
>> +    if (offset & TM_QW1_OS) {
>> +        switch (size) {
>> +        case 1:
>> +            if (offset == TM_QW1_OS + TM_CPPR) {
>> +                xive_icp_set_cppr(icp, value & 0xff);
>> +            }
>> +            break;
>> +        case 4:
>> +        case 8:
>> +            if (QEMU_IS_ALIGNED(offset, size)) {
>> +                for (i = 0; i < size; i++) {
>> +                    if (!xive_tm_is_readonly(offset + i)) {
>> +                        icp->tima[offset + i] = (value >> (8 * i)) & 0xff;
>> +                    }
>> +                }
>> +            } else {
>> +                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
>> +                              HWADDR_PRIx" size %d\n", offset, size);
>> +            }
>> +            break;
>> +        default:
>> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
>> +                          HWADDR_PRIx" size %d\n", offset, size);
>> +        }
>> +    } else {
>> +        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
>> +                      HWADDR_PRIx"\n", offset);
>> +    }
>> +}
>> +
>> +
>> +static const MemoryRegionOps xive_tm_ops = {
>> +    .read = xive_tm_read,
>> +    .write = xive_tm_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 1,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 1,
>> +        .max_access_size = 8,
>> +    },
>> +};
>> +
>>  static void xive_icp_reset(ICPState *icp)
>>  {
>>      XiveICPState *xicp = XIVE_ICP(icp);
>> @@ -453,6 +627,11 @@ static const TypeInfo xive_ics_info = {
>>  #define P9_MMIO_BASE     0x006000000000000ull
>>  #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id)))
>>  
>> +/* Thread Interrupt Management Area MMIO */
>> +#define TM_BAR_DEFAULT   0x30203180000ull
>> +#define TM_SHIFT         16
>> +#define TM_BAR_SIZE      (XIVE_TM_RING_COUNT * (1 << TM_SHIFT))
>> +
>>  static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size)
>>  {
>>      qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
>> @@ -541,6 +720,14 @@ static void xive_realize(DeviceState *dev, Error **errp)
>>                            NULL, "xive.esb", VC_BAR_SIZE);
>>      sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem);
>>  
>> +    /* TM BAR. Same address for each chip */
>> +    x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT);
>> +    x->tm_shift = TM_SHIFT;
>> +
>> +    memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x,
>> +                          "xive.tm", TM_BAR_SIZE);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem);
>> +
>>      qemu_register_reset(xive_reset, dev);
>>  }
>>  
>
David Gibson July 25, 2017, 4:20 a.m. UTC | #3
On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote:
> On 07/24/2017 08:35 AM, David Gibson wrote:
> > On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
> >> The Thread Interrupt Management Area for the OS is mostly used to
> >> acknowledge interrupts and set the CPPR of the CPU.
> >>
> >> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
> >> used to retrieve the targeted interrupt presenter object.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > 
> > Am I right in thinking that this shoehorns the XIVE TIMA state into
> > the existing XICS ICP object.  That.. doesn't seem like a good idea.
> 
> The TIMA memory region is under the XIVE object because it is 
> unique for the system. The lookup of the ICP is simply done using 
> 'current_cpu'. The TIMA state is under the ICPState, yes, but this 
> model does not seem incorrect to me as this state contains the 
> interrupt information presented to a CPU.

Yeah, that's not the point I'm making.  My point is that the TIMA
state isn't really the same as xics ICP state.  You're squeezing one
into the other in a pretty ugly way.
Cédric Le Goater July 25, 2017, 9:08 a.m. UTC | #4
On 07/25/2017 06:20 AM, David Gibson wrote:
> On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote:
>> On 07/24/2017 08:35 AM, David Gibson wrote:
>>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
>>>> The Thread Interrupt Management Area for the OS is mostly used to
>>>> acknowledge interrupts and set the CPPR of the CPU.
>>>>
>>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
>>>> used to retrieve the targeted interrupt presenter object.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>
>>> Am I right in thinking that this shoehorns the XIVE TIMA state into
>>> the existing XICS ICP object.  That.. doesn't seem like a good idea.
>>
>> The TIMA memory region is under the XIVE object because it is 
>> unique for the system. The lookup of the ICP is simply done using 
>> 'current_cpu'. The TIMA state is under the ICPState, yes, but this 
>> model does not seem incorrect to me as this state contains the 
>> interrupt information presented to a CPU.
> 
> Yeah, that's not the point I'm making.  My point is that the TIMA
> state isn't really the same as xics ICP state.  You're squeezing one
> into the other in a pretty ugly way.

yes, well, we need to have compatible objects between the XICS and XIVE 
mode because of the CAS negotiation. for migration compatibility, it is 
much easier to extend existing objects. This approach I am taking today.


C.
David Gibson July 25, 2017, 1:21 p.m. UTC | #5
On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote:
> On 07/25/2017 06:20 AM, David Gibson wrote:
> > On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote:
> >> On 07/24/2017 08:35 AM, David Gibson wrote:
> >>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
> >>>> The Thread Interrupt Management Area for the OS is mostly used to
> >>>> acknowledge interrupts and set the CPPR of the CPU.
> >>>>
> >>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
> >>>> used to retrieve the targeted interrupt presenter object.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>
> >>> Am I right in thinking that this shoehorns the XIVE TIMA state into
> >>> the existing XICS ICP object.  That.. doesn't seem like a good idea.
> >>
> >> The TIMA memory region is under the XIVE object because it is 
> >> unique for the system. The lookup of the ICP is simply done using 
> >> 'current_cpu'. The TIMA state is under the ICPState, yes, but this 
> >> model does not seem incorrect to me as this state contains the 
> >> interrupt information presented to a CPU.
> > 
> > Yeah, that's not the point I'm making.  My point is that the TIMA
> > state isn't really the same as xics ICP state.  You're squeezing one
> > into the other in a pretty ugly way.
> 
> yes, well, we need to have compatible objects between the XICS and XIVE 
> mode because of the CAS negotiation. for migration compatibility, it is 
> much easier to extend existing objects. This approach I am taking today.

Yeah, I really don't think this approach is workable.

Roughly speaking, you're keeping the same structures between xics and
xive, but with mostly different code.  I can't see any way that's not
going to be horribly fragile, with any update to xics OR xive
requiring enormous caution not to break the other.

I really think we have to go one of two ways:

1) Abstract the notion of interrupt source and interrupt presenter, so
we can use a truly common model between xics and xive.

Given the differences between the two, I don't know this is even
possible.

2) Separate the objects entirely.  ICPs are entirely separate from
TIMAs, like wise ICSes and xive interrupt sources.

I think this is probably the way to go.  To make this work with CAS
switching will require different methods, but I don't think it's
impossible.

For example, we could (on the new machine type) create both xics ICSes
and ICPs and xive sources and TIMAs.  We'd have a (migrated) flag in
the machine saying which is currently active.  All the objects would
hang around, but only the active ones would do anything.

Now obviously that means we'd be migrating a bunch of redundant state,
but I still think it's preferable to a Frankenstinian fusion of
xics-ish and xive-ish state.  I think there's a good chance we can
improve on the basic idea to remove most or all of that redundant
state.
Cédric Le Goater July 25, 2017, 3:01 p.m. UTC | #6
On 07/25/2017 03:21 PM, David Gibson wrote:
> On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote:
>> On 07/25/2017 06:20 AM, David Gibson wrote:
>>> On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote:
>>>> On 07/24/2017 08:35 AM, David Gibson wrote:
>>>>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
>>>>>> The Thread Interrupt Management Area for the OS is mostly used to
>>>>>> acknowledge interrupts and set the CPPR of the CPU.
>>>>>>
>>>>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
>>>>>> used to retrieve the targeted interrupt presenter object.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>
>>>>> Am I right in thinking that this shoehorns the XIVE TIMA state into
>>>>> the existing XICS ICP object.  That.. doesn't seem like a good idea.
>>>>
>>>> The TIMA memory region is under the XIVE object because it is 
>>>> unique for the system. The lookup of the ICP is simply done using 
>>>> 'current_cpu'. The TIMA state is under the ICPState, yes, but this 
>>>> model does not seem incorrect to me as this state contains the 
>>>> interrupt information presented to a CPU.
>>>
>>> Yeah, that's not the point I'm making.  My point is that the TIMA
>>> state isn't really the same as xics ICP state.  You're squeezing one
>>> into the other in a pretty ugly way.
>>
>> yes, well, we need to have compatible objects between the XICS and XIVE 
>> mode because of the CAS negotiation. for migration compatibility, it is 
>> much easier to extend existing objects. This approach I am taking today.
> 
> Yeah, I really don't think this approach is workable.
> 
> Roughly speaking, you're keeping the same structures between xics and
> xive, but with mostly different code.  I can't see any way that's not
> going to be horribly fragile, with any update to xics OR xive
> requiring enormous caution not to break the other.
> 
> I really think we have to go one of two ways:
> 
> 1) Abstract the notion of interrupt source and interrupt presenter, so
> we can use a truly common model between xics and xive.
> 
> Given the differences between the two, I don't know this is even
> possible.
> 
> 2) Separate the objects entirely.  ICPs are entirely separate from
> TIMAs, like wise ICSes and xive interrupt sources.
> 
> I think this is probably the way to go.  To make this work with CAS
> switching will require different methods, but I don't think it's
> impossible.
>
> For example, we could (on the new machine type) create both xics ICSes
> and ICPs and xive sources and TIMAs.  We'd have a (migrated) flag in
> the machine saying which is currently active.  All the objects would
> hang around, but only the active ones would do anything.

ok. There is still the question of sharing the IRQ numbers unless we 
find away to allocate them after the CAS negotiation. 

I will take a look at KVM support now before reworking the patchset.

Thanks,

C.

> Now obviously that means we'd be migrating a bunch of redundant state,
> but I still think it's preferable to a Frankenstinian fusion of
> xics-ish and xive-ish state.  I think there's a good chance we can
> improve on the basic idea to remove most or all of that redundant
> state.
>
David Gibson July 26, 2017, 2:02 a.m. UTC | #7
On Tue, Jul 25, 2017 at 05:01:48PM +0200, Cédric Le Goater wrote:
> On 07/25/2017 03:21 PM, David Gibson wrote:
> > On Tue, Jul 25, 2017 at 11:08:46AM +0200, Cédric Le Goater wrote:
> >> On 07/25/2017 06:20 AM, David Gibson wrote:
> >>> On Mon, Jul 24, 2017 at 04:44:00PM +0200, Cédric Le Goater wrote:
> >>>> On 07/24/2017 08:35 AM, David Gibson wrote:
> >>>>> On Wed, Jul 05, 2017 at 07:13:27PM +0200, Cédric Le Goater wrote:
> >>>>>> The Thread Interrupt Management Area for the OS is mostly used to
> >>>>>> acknowledge interrupts and set the CPPR of the CPU.
> >>>>>>
> >>>>>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
> >>>>>> used to retrieve the targeted interrupt presenter object.
> >>>>>>
> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>
> >>>>> Am I right in thinking that this shoehorns the XIVE TIMA state into
> >>>>> the existing XICS ICP object.  That.. doesn't seem like a good idea.
> >>>>
> >>>> The TIMA memory region is under the XIVE object because it is 
> >>>> unique for the system. The lookup of the ICP is simply done using 
> >>>> 'current_cpu'. The TIMA state is under the ICPState, yes, but this 
> >>>> model does not seem incorrect to me as this state contains the 
> >>>> interrupt information presented to a CPU.
> >>>
> >>> Yeah, that's not the point I'm making.  My point is that the TIMA
> >>> state isn't really the same as xics ICP state.  You're squeezing one
> >>> into the other in a pretty ugly way.
> >>
> >> yes, well, we need to have compatible objects between the XICS and XIVE 
> >> mode because of the CAS negotiation. for migration compatibility, it is 
> >> much easier to extend existing objects. This approach I am taking today.
> > 
> > Yeah, I really don't think this approach is workable.
> > 
> > Roughly speaking, you're keeping the same structures between xics and
> > xive, but with mostly different code.  I can't see any way that's not
> > going to be horribly fragile, with any update to xics OR xive
> > requiring enormous caution not to break the other.
> > 
> > I really think we have to go one of two ways:
> > 
> > 1) Abstract the notion of interrupt source and interrupt presenter, so
> > we can use a truly common model between xics and xive.
> > 
> > Given the differences between the two, I don't know this is even
> > possible.
> > 
> > 2) Separate the objects entirely.  ICPs are entirely separate from
> > TIMAs, like wise ICSes and xive interrupt sources.
> > 
> > I think this is probably the way to go.  To make this work with CAS
> > switching will require different methods, but I don't think it's
> > impossible.
> >
> > For example, we could (on the new machine type) create both xics ICSes
> > and ICPs and xive sources and TIMAs.  We'd have a (migrated) flag in
> > the machine saying which is currently active.  All the objects would
> > hang around, but only the active ones would do anything.
> 
> ok. There is still the question of sharing the IRQ numbers unless we 
> find away to allocate them after the CAS negotiation. 

Right.  As I said, I think allocating after CAS is probably possible,
but it may not be necessary.

Merely lining up the irq numbers should be much simpler than actually
fusing the source and presenter structures.  Especially since both
xics and xive are flexible enough that we can map the irqs almost
however we want.

This definitely does mean the irq mapping / allocation should sit
inside the machine type, rather than the xics *or* the xive code, and
preferably be entirely common.  Then it's just a matter of setting up
both the xics and xive components so they can back that irq allocation.

> I will take a look at KVM support now before reworking the patchset.
> 
> Thanks,
> 
> C.
> 
> > Now obviously that means we'd be migrating a bunch of redundant state,
> > but I still think it's preferable to a Frankenstinian fusion of
> > xics-ish and xive-ish state.  I think there's a good chance we can
> > improve on the basic idea to remove most or all of that redundant
> > state.
> > 
>
diff mbox

Patch

diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h
index ba5e648a5258..5e8b78a1ea6a 100644
--- a/hw/intc/xive-internal.h
+++ b/hw/intc/xive-internal.h
@@ -200,6 +200,10 @@  struct XIVE {
     /* ESB and TIMA memory location */
     hwaddr       vc_base;
     MemoryRegion esb_iomem;
+
+    uint32_t     tm_shift;
+    hwaddr       tm_base;
+    MemoryRegion tm_iomem;
 };
 
 void xive_reset(void *dev);
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index c08a4f8efb58..82b2f0dcda0b 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -26,6 +26,180 @@ 
 
 #include "xive-internal.h"
 
+static uint8_t priority_to_ipb(uint8_t priority)
+{
+    return priority < XIVE_EQ_PRIORITY_COUNT ? 1 << (7 - priority) : 0;
+}
+
+static uint64_t xive_icp_accept(XiveICPState *xicp)
+{
+    ICPState *icp = ICP(xicp);
+    uint8_t nsr = xicp->tima_os[TM_NSR];
+
+    qemu_irq_lower(icp->output);
+
+    if (xicp->tima_os[TM_NSR] & TM_QW1_NSR_EO) {
+        uint8_t cppr = xicp->tima_os[TM_PIPR];
+
+        xicp->tima_os[TM_CPPR] = cppr;
+
+        /* Reset the pending buffer bit */
+        xicp->tima_os[TM_IPB] &= ~priority_to_ipb(cppr);
+
+        /* Drop Exception bit for OS */
+        xicp->tima_os[TM_NSR] &= ~TM_QW1_NSR_EO;
+    }
+
+    return (nsr << 8) | xicp->tima_os[TM_CPPR];
+}
+
+static void xive_icp_set_cppr(XiveICPState *xicp, uint8_t cppr)
+{
+    if (cppr > XIVE_PRIORITY_MAX) {
+        cppr = 0xff;
+    }
+
+    xicp->tima_os[TM_CPPR] = cppr;
+}
+
+/*
+ * Thread Interrupt Management Area MMIO
+ */
+static uint64_t xive_tm_read_special(XiveICPState *icp, hwaddr offset,
+                                     unsigned size)
+{
+    uint64_t ret = -1;
+
+    if (offset == TM_SPC_ACK_OS_REG && size == 2) {
+        ret = xive_icp_accept(icp);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%"
+                      HWADDR_PRIx" size %d\n", offset, size);
+    }
+
+    return ret;
+}
+
+static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+    XiveICPState *icp = XIVE_ICP(cpu->intc);
+    uint64_t ret = -1;
+    int i;
+
+    if (offset >= TM_SPC_ACK_EBB) {
+        return xive_tm_read_special(icp, offset, size);
+    }
+
+    if (offset & TM_QW1_OS) {
+        switch (size) {
+        case 1:
+        case 2:
+        case 4:
+        case 8:
+            if (QEMU_IS_ALIGNED(offset, size)) {
+                ret = 0;
+                for (i = 0; i < size; i++) {
+                    ret |= icp->tima[offset + i] << (8 * i);
+                }
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "XIVE: invalid TIMA read alignment @%"
+                              HWADDR_PRIx" size %d\n", offset, size);
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
+                      HWADDR_PRIx"\n", offset);
+    }
+
+    return ret;
+}
+
+static bool xive_tm_is_readonly(uint8_t index)
+{
+    /* Let's be optimistic and prepare ground for HV mode support */
+    switch (index) {
+    case TM_QW1_OS + TM_CPPR:
+        return false;
+    default:
+        return true;
+    }
+}
+
+static void xive_tm_write_special(XiveICPState *xicp, hwaddr offset,
+                                  uint64_t value, unsigned size)
+{
+    if (offset == TM_SPC_SET_OS_PENDING && size == 1) {
+        xicp->tima_os[TM_IPB] |= priority_to_ipb(value & 0xff);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
+                      HWADDR_PRIx" size %d\n", offset, size);
+    }
+
+    /* TODO: support TM_SPC_ACK_OS_EL */
+}
+
+static void xive_tm_write(void *opaque, hwaddr offset,
+                           uint64_t value, unsigned size)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+    XiveICPState *icp = XIVE_ICP(cpu->intc);
+    int i;
+
+    if (offset >= TM_SPC_ACK_EBB) {
+        xive_tm_write_special(icp, offset, value, size);
+        return;
+    }
+
+    if (offset & TM_QW1_OS) {
+        switch (size) {
+        case 1:
+            if (offset == TM_QW1_OS + TM_CPPR) {
+                xive_icp_set_cppr(icp, value & 0xff);
+            }
+            break;
+        case 4:
+        case 8:
+            if (QEMU_IS_ALIGNED(offset, size)) {
+                for (i = 0; i < size; i++) {
+                    if (!xive_tm_is_readonly(offset + i)) {
+                        icp->tima[offset + i] = (value >> (8 * i)) & 0xff;
+                    }
+                }
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
+                              HWADDR_PRIx" size %d\n", offset, size);
+            }
+            break;
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
+                          HWADDR_PRIx" size %d\n", offset, size);
+        }
+    } else {
+        qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
+                      HWADDR_PRIx"\n", offset);
+    }
+}
+
+
+static const MemoryRegionOps xive_tm_ops = {
+    .read = xive_tm_read,
+    .write = xive_tm_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
 static void xive_icp_reset(ICPState *icp)
 {
     XiveICPState *xicp = XIVE_ICP(icp);
@@ -453,6 +627,11 @@  static const TypeInfo xive_ics_info = {
 #define P9_MMIO_BASE     0x006000000000000ull
 #define P9_CHIP_BASE(id) (P9_MMIO_BASE | (0x40000000000ull * (uint64_t) (id)))
 
+/* Thread Interrupt Management Area MMIO */
+#define TM_BAR_DEFAULT   0x30203180000ull
+#define TM_SHIFT         16
+#define TM_BAR_SIZE      (XIVE_TM_RING_COUNT * (1 << TM_SHIFT))
+
 static uint64_t xive_esb_default_read(void *p, hwaddr offset, unsigned size)
 {
     qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
@@ -541,6 +720,14 @@  static void xive_realize(DeviceState *dev, Error **errp)
                           NULL, "xive.esb", VC_BAR_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->esb_iomem);
 
+    /* TM BAR. Same address for each chip */
+    x->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT);
+    x->tm_shift = TM_SHIFT;
+
+    memory_region_init_io(&x->tm_iomem, OBJECT(x), &xive_tm_ops, x,
+                          "xive.tm", TM_BAR_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &x->tm_iomem);
+
     qemu_register_reset(xive_reset, dev);
 }