diff mbox series

[v2,2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

Message ID 1521530809-11780-3-git-send-email-zhaoshenglong@huawei.com
State New
Headers show
Series two fixes for KVM GICv3 dist get/put functions | expand

Commit Message

Shannon Zhao March 20, 2018, 7:26 a.m. UTC
While we skip the GIC_INTERNAL irqs, we don't change the register offset
accordingly. This will overlap the GICR registers value and leave the
last GIC_INTERNAL irq's registers out of update.

Fix this by skipping the registers banked by GICR.

Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
---
 hw/intc/arm_gicv3_kvm.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Eric Auger March 20, 2018, 8:42 a.m. UTC | #1
Hi Shannon,
On 20/03/18 08:26, Shannon Zhao wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
> 
> Fix this by skipping the registers banked by GICR.
> 
> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
> ---
>  hw/intc/arm_gicv3_kvm.c | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 3536795..d423cba 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      int irq;
>  
>      field = (uint32_t *)bmp;
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
> +     * sync them.
> +     */
> +    offset += (8 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>          kvm_gicd_access(s, offset, &reg, false);
>          *field = reg;
> @@ -150,6 +156,12 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      int irq;
>  
>      field = (uint32_t *)bmp;
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
> +     * sync them.
> +     */
> +    offset += (8 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>          reg = *field;
>          kvm_gicd_access(s, offset, &reg, true);
> @@ -164,6 +176,12 @@ static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
> +     * them.
> +     */
> +    offset += (2 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>          kvm_gicd_access(s, offset, &reg, false);
>          reg = half_unshuffle32(reg >> 1);
> @@ -181,6 +199,12 @@ static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
> +     * them.
> +     */
> +    offset += (2 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>          reg = *gic_bmp_ptr32(bmp, irq);
>          if (irq % 32 != 0) {
> @@ -222,6 +246,12 @@ static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the
> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
> +     * are always RAZ/WI. The corresponding functionality is replaced by the
> +     * GICR registers. So it doesn't need to sync them.
> +     */
> +    offset += (1 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>          kvm_gicd_access(s, offset, &reg, false);
>          *gic_bmp_ptr32(bmp, irq) = reg;
> @@ -235,6 +265,14 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the
> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
> +     * are always RAZ/WI. The corresponding functionality is replaced by the
> +     * GICR registers. So it doesn't need to sync them.
> +     */
> +    offset += (1 * sizeof(uint32_t));
I wonder we couldn't create a new for_each_dist_irq_reg() macro taking
the offset and clroffset and integrating that shift inside?

> +    if (clroffset != 0)
nit style issue: brace needed here

Besides Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric

> +        clroffset += (1 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>          /* If this bitmap is a set/clear register pair, first write to the
>           * clear-reg to clear all bits before using the set-reg to write
>
Peter Maydell March 20, 2018, 11:22 a.m. UTC | #2
On 20 March 2018 at 07:26, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
>
> Fix this by skipping the registers banked by GICR.
>

I'm still not entirely sure what the underlying problem
you're trying to fix is...

Do we fail to correctly migrate a VM without this change?
Does the code work on some host CPU/GIC implementations but
not others? Is this just improving efficiency by avoiding
doing some unnecessary work?

thanks
-- PMM
Shannon Zhao March 20, 2018, 11:36 a.m. UTC | #3
On 2018/3/20 19:22, Peter Maydell wrote:
> On 20 March 2018 at 07:26, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>> accordingly. This will overlap the GICR registers value and leave the
>> last GIC_INTERNAL irq's registers out of update.
>>
>> Fix this by skipping the registers banked by GICR.
>>
> 
> I'm still not entirely sure what the underlying problem
> you're trying to fix is...
> 
> Do we fail to correctly migrate a VM without this change?
> Does the code work on some host CPU/GIC implementations but
> not others? Is this just improving efficiency by avoiding
> doing some unnecessary work?
> 
When we reboot a VM and before entering uefi or guest kernel, we expect
all these registers staying at the initial state. But currently these
registers of the last 32 irqs are not reset. For example, the PRIORITY
of irq from 32 to 255 is 0 but the PRIORITY of irq from 256 to 287 is
0xa0(Linux kernel set the PRIORITY to 0xa0 by default).

When migrating a VM, since we don't save and restore the registers of
the last 32 irq, so the PRIORITY is 0 while we expecting 0xa0.
And also it will overlap the PRIORITY of SGIs and PPIs.

We don't fail to migrate a vm since currently we don't use the last 32
irqs in virt machine. But the bug is still there.

Thanks,
Peter Maydell March 20, 2018, 11:54 a.m. UTC | #4
On 20 March 2018 at 11:36, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>
>
> On 2018/3/20 19:22, Peter Maydell wrote:
>> On 20 March 2018 at 07:26, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>>> accordingly. This will overlap the GICR registers value and leave the
>>> last GIC_INTERNAL irq's registers out of update.
>>>
>>> Fix this by skipping the registers banked by GICR.
>>>
>>
>> I'm still not entirely sure what the underlying problem
>> you're trying to fix is...
>>
>> Do we fail to correctly migrate a VM without this change?
>> Does the code work on some host CPU/GIC implementations but
>> not others? Is this just improving efficiency by avoiding
>> doing some unnecessary work?
>>
> When we reboot a VM and before entering uefi or guest kernel, we expect
> all these registers staying at the initial state. But currently these
> registers of the last 32 irqs are not reset. For example, the PRIORITY
> of irq from 32 to 255 is 0 but the PRIORITY of irq from 256 to 287 is
> 0xa0(Linux kernel set the PRIORITY to 0xa0 by default).
>
> When migrating a VM, since we don't save and restore the registers of
> the last 32 irq, so the PRIORITY is 0 while we expecting 0xa0.
> And also it will overlap the PRIORITY of SGIs and PPIs.
>
> We don't fail to migrate a vm since currently we don't use the last 32
> irqs in virt machine. But the bug is still there.

Oh, I see, the number of registers we transfer is accounting
for the first N registers in the bank not being used, but the
first register offset to transfer wasn't.

Can you still successfully migrate a VM from a QEMU version
without this bugfix to one with the bugfix ?

thanks
-- PMM
Shannon Zhao March 21, 2018, 8 a.m. UTC | #5
On 2018/3/20 19:54, Peter Maydell wrote:
> On 20 March 2018 at 11:36, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>
>>
>> On 2018/3/20 19:22, Peter Maydell wrote:
>>> On 20 March 2018 at 07:26, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>>>> accordingly. This will overlap the GICR registers value and leave the
>>>> last GIC_INTERNAL irq's registers out of update.
>>>>
>>>> Fix this by skipping the registers banked by GICR.
>>>>
>>>
>>> I'm still not entirely sure what the underlying problem
>>> you're trying to fix is...
>>>
>>> Do we fail to correctly migrate a VM without this change?
>>> Does the code work on some host CPU/GIC implementations but
>>> not others? Is this just improving efficiency by avoiding
>>> doing some unnecessary work?
>>>
>> When we reboot a VM and before entering uefi or guest kernel, we expect
>> all these registers staying at the initial state. But currently these
>> registers of the last 32 irqs are not reset. For example, the PRIORITY
>> of irq from 32 to 255 is 0 but the PRIORITY of irq from 256 to 287 is
>> 0xa0(Linux kernel set the PRIORITY to 0xa0 by default).
>>
>> When migrating a VM, since we don't save and restore the registers of
>> the last 32 irq, so the PRIORITY is 0 while we expecting 0xa0.
>> And also it will overlap the PRIORITY of SGIs and PPIs.
>>
>> We don't fail to migrate a vm since currently we don't use the last 32
>> irqs in virt machine. But the bug is still there.
> 
> Oh, I see, the number of registers we transfer is accounting
> for the first N registers in the bank not being used, but the
> first register offset to transfer wasn't.
> 
> Can you still successfully migrate a VM from a QEMU version
> without this bugfix to one with the bugfix ?
> 
I've tested this case. I can migrate a VM between these two versions.

Thanks,
Shannon Zhao March 21, 2018, 8:33 a.m. UTC | #6
On 2018/3/20 16:42, Auger Eric wrote:
> Hi Shannon,
> On 20/03/18 08:26, Shannon Zhao wrote:
>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>> accordingly. This will overlap the GICR registers value and leave the
>> last GIC_INTERNAL irq's registers out of update.
>>
>> Fix this by skipping the registers banked by GICR.
>>
>> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
>> ---
>>  hw/intc/arm_gicv3_kvm.c | 38 ++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 38 insertions(+)
>>
>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>> index 3536795..d423cba 100644
>> --- a/hw/intc/arm_gicv3_kvm.c
>> +++ b/hw/intc/arm_gicv3_kvm.c
>> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      int irq;
>>  
>>      field = (uint32_t *)bmp;
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>> +     * sync them.
>> +     */
>> +    offset += (8 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>>          kvm_gicd_access(s, offset, &reg, false);
>>          *field = reg;
>> @@ -150,6 +156,12 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      int irq;
>>  
>>      field = (uint32_t *)bmp;
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>> +     * sync them.
>> +     */
>> +    offset += (8 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>>          reg = *field;
>>          kvm_gicd_access(s, offset, &reg, true);
>> @@ -164,6 +176,12 @@ static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
>>      uint32_t reg;
>>      int irq;
>>  
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
>> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
>> +     * them.
>> +     */
>> +    offset += (2 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>>          kvm_gicd_access(s, offset, &reg, false);
>>          reg = half_unshuffle32(reg >> 1);
>> @@ -181,6 +199,12 @@ static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
>>      uint32_t reg;
>>      int irq;
>>  
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
>> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
>> +     * them.
>> +     */
>> +    offset += (2 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>>          reg = *gic_bmp_ptr32(bmp, irq);
>>          if (irq % 32 != 0) {
>> @@ -222,6 +246,12 @@ static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
>>      uint32_t reg;
>>      int irq;
>>  
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the
>> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
>> +     * are always RAZ/WI. The corresponding functionality is replaced by the
>> +     * GICR registers. So it doesn't need to sync them.
>> +     */
>> +    offset += (1 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>>          kvm_gicd_access(s, offset, &reg, false);
>>          *gic_bmp_ptr32(bmp, irq) = reg;
>> @@ -235,6 +265,14 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
>>      uint32_t reg;
>>      int irq;
>>  
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the
>> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
>> +     * are always RAZ/WI. The corresponding functionality is replaced by the
>> +     * GICR registers. So it doesn't need to sync them.
>> +     */
>> +    offset += (1 * sizeof(uint32_t));
> I wonder we couldn't create a new for_each_dist_irq_reg() macro taking
> the offset and clroffset and integrating that shift inside?
> 
Peter, what's your opinion?

>> +    if (clroffset != 0)
> nit style issue: brace needed here
>
OK

> Besides Reviewed-by: Eric Auger <eric.auger@redhat.com>
> 
Thanks!

> Thanks
> 
> Eric
> 
>> +        clroffset += (1 * sizeof(uint32_t));
>>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>>          /* If this bitmap is a set/clear register pair, first write to the
>>           * clear-reg to clear all bits before using the set-reg to write
>>
> 
> .
>
Peter Maydell March 23, 2018, 12:08 p.m. UTC | #7
On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> On 2018/3/20 19:54, Peter Maydell wrote:
>> Can you still successfully migrate a VM from a QEMU version
>> without this bugfix to one with the bugfix ?
>>
> I've tested this case. I can migrate a VM between these two versions.

Hmm. Looking at the code I can't see how that would work,
except by accident. Let me see if I understand what's happening
here:

In the code in master, we have QEMU data structures
(bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
for a 1-bit-per-irq bitmap:
 [0x00000000, irq 32, irq 33, .... ]

When we fill in the values from KVM into these data structures,
we start after the unused space, because the for_each_dist_irq_reg()
macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
the offset value we use for the KVM access, so we start by
reading the RAZ/WI values from KVM, and the data structure
contents end up with:
 [0x00000000, 0x00000000, irq 32, irq 33, ... ]
(and the last irqs wouldn't get transferred).

With this change to the code we will get the offset right and
the data structure will be filled as
 [0x00000000, irq 32, irq 33, .... ]

But for migration from the old version, the data structure
we receive from the migration source will contain the old
broken layout of
 [0x00000000, 0x00000000, irq 32, irq 33, ... ]
so if the new code doesn't do anything special to handle
migration from that old version then it will write zeroes to
irq 32..63, and then write incorrect values for all the irqs
after that, won't it?

That suggests to me that we need to have some code in the
migration post-load routine that identifies that the data
is coming from an old version with this bug, and shifts
all the data down in the arrays so that the code to write
it to the kernel can handle it.

thanks
-- PMM
Peter Maydell March 29, 2018, 10:54 a.m. UTC | #8
On 23 March 2018 at 12:08, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>> On 2018/3/20 19:54, Peter Maydell wrote:
>>> Can you still successfully migrate a VM from a QEMU version
>>> without this bugfix to one with the bugfix ?
>>>
>> I've tested this case. I can migrate a VM between these two versions.
>
> Hmm. Looking at the code I can't see how that would work,
> except by accident. Let me see if I understand what's happening
> here:
>
> In the code in master, we have QEMU data structures
> (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
> irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
> for a 1-bit-per-irq bitmap:
>  [0x00000000, irq 32, irq 33, .... ]
>
> When we fill in the values from KVM into these data structures,
> we start after the unused space, because the for_each_dist_irq_reg()
> macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
> the offset value we use for the KVM access, so we start by
> reading the RAZ/WI values from KVM, and the data structure
> contents end up with:
>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> (and the last irqs wouldn't get transferred).
>
> With this change to the code we will get the offset right and
> the data structure will be filled as
>  [0x00000000, irq 32, irq 33, .... ]
>
> But for migration from the old version, the data structure
> we receive from the migration source will contain the old
> broken layout of
>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> so if the new code doesn't do anything special to handle
> migration from that old version then it will write zeroes to
> irq 32..63, and then write incorrect values for all the irqs
> after that, won't it?
>
> That suggests to me that we need to have some code in the
> migration post-load routine that identifies that the data
> is coming from an old version with this bug, and shifts
> all the data down in the arrays so that the code to write
> it to the kernel can handle it.

I was thinking a bit more about how to handle this, and
my best idea was:

(1) send something in the migration stream that says
"I don't have this bug" (version number change?
vmstate field that's just a "no bug" flag? subsection
with no contents?)

(2) on the destination, if the source doesn't tell us
it doesn't have this bug, and we are running KVM, then
shift all the data in the arrays down to fix it up
[Strictly what we want to know is if the source is
running KVM, not if the destination is, but I don't
know of a way to find that out, and in practice TCG->KVM
migrations don't work anyway, so it's not a big deal.]

Juan, David, do you have any suggestions for the best
mechanism for part 1; or is there some clever way to
handle this sort of bug that I've missed?

thanks
-- PMM
Dr. David Alan Gilbert March 29, 2018, 11:11 a.m. UTC | #9
* Peter Maydell (peter.maydell@linaro.org) wrote:
> On 23 March 2018 at 12:08, Peter Maydell <peter.maydell@linaro.org> wrote:
> > On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> >> On 2018/3/20 19:54, Peter Maydell wrote:
> >>> Can you still successfully migrate a VM from a QEMU version
> >>> without this bugfix to one with the bugfix ?
> >>>
> >> I've tested this case. I can migrate a VM between these two versions.
> >
> > Hmm. Looking at the code I can't see how that would work,
> > except by accident. Let me see if I understand what's happening
> > here:
> >
> > In the code in master, we have QEMU data structures
> > (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
> > irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
> > for a 1-bit-per-irq bitmap:
> >  [0x00000000, irq 32, irq 33, .... ]
> >
> > When we fill in the values from KVM into these data structures,
> > we start after the unused space, because the for_each_dist_irq_reg()
> > macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
> > the offset value we use for the KVM access, so we start by
> > reading the RAZ/WI values from KVM, and the data structure
> > contents end up with:
> >  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> > (and the last irqs wouldn't get transferred).
> >
> > With this change to the code we will get the offset right and
> > the data structure will be filled as
> >  [0x00000000, irq 32, irq 33, .... ]
> >
> > But for migration from the old version, the data structure
> > we receive from the migration source will contain the old
> > broken layout of
> >  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> > so if the new code doesn't do anything special to handle
> > migration from that old version then it will write zeroes to
> > irq 32..63, and then write incorrect values for all the irqs
> > after that, won't it?
> >
> > That suggests to me that we need to have some code in the
> > migration post-load routine that identifies that the data
> > is coming from an old version with this bug, and shifts
> > all the data down in the arrays so that the code to write
> > it to the kernel can handle it.
> 
> I was thinking a bit more about how to handle this, and
> my best idea was:
> 
> (1) send something in the migration stream that says
> "I don't have this bug" (version number change?
> vmstate field that's just a "no bug" flag? subsection
> with no contents?)
> 
> (2) on the destination, if the source doesn't tell us
> it doesn't have this bug, and we are running KVM, then
> shift all the data in the arrays down to fix it up
> [Strictly what we want to know is if the source is
> running KVM, not if the destination is, but I don't
> know of a way to find that out, and in practice TCG->KVM
> migrations don't work anyway, so it's not a big deal.]
> 
> Juan, David, do you have any suggestions for the best
> mechanism for part 1; or is there some clever way to
> handle this sort of bug that I've missed?

The subsection is probably the best bet; unless that is you can find
a bit to misuse in an existing field.

Dave

> thanks
> -- PMM
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Peter Maydell April 5, 2018, 2:22 p.m. UTC | #10
On 29 March 2018 at 11:54, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 23 March 2018 at 12:08, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>> On 2018/3/20 19:54, Peter Maydell wrote:
>>>> Can you still successfully migrate a VM from a QEMU version
>>>> without this bugfix to one with the bugfix ?
>>>>
>>> I've tested this case. I can migrate a VM between these two versions.
>>
>> Hmm. Looking at the code I can't see how that would work,
>> except by accident. Let me see if I understand what's happening
>> here:

> I was thinking a bit more about how to handle this, and
> my best idea was:
>
> (1) send something in the migration stream that says
> "I don't have this bug" (version number change?
> vmstate field that's just a "no bug" flag? subsection
> with no contents?)
>
> (2) on the destination, if the source doesn't tell us
> it doesn't have this bug, and we are running KVM, then
> shift all the data in the arrays down to fix it up
> [Strictly what we want to know is if the source is
> running KVM, not if the destination is, but I don't
> know of a way to find that out, and in practice TCG->KVM
> migrations don't work anyway, so it's not a big deal.]

Shannon, are you planning to look at this for 2.12, or should
we postpone it to 2.13? (It's not a regression, right? So
we don't necessarily have to urgently fix it for 2.12.)

thanks
-- PMM
Peter Maydell April 6, 2018, 9:36 a.m. UTC | #11
On 5 April 2018 at 15:22, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 29 March 2018 at 11:54, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 23 March 2018 at 12:08, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>>> On 2018/3/20 19:54, Peter Maydell wrote:
>>>>> Can you still successfully migrate a VM from a QEMU version
>>>>> without this bugfix to one with the bugfix ?
>>>>>
>>>> I've tested this case. I can migrate a VM between these two versions.
>>>
>>> Hmm. Looking at the code I can't see how that would work,
>>> except by accident. Let me see if I understand what's happening
>>> here:
>
>> I was thinking a bit more about how to handle this, and
>> my best idea was:
>>
>> (1) send something in the migration stream that says
>> "I don't have this bug" (version number change?
>> vmstate field that's just a "no bug" flag? subsection
>> with no contents?)
>>
>> (2) on the destination, if the source doesn't tell us
>> it doesn't have this bug, and we are running KVM, then
>> shift all the data in the arrays down to fix it up
>> [Strictly what we want to know is if the source is
>> running KVM, not if the destination is, but I don't
>> know of a way to find that out, and in practice TCG->KVM
>> migrations don't work anyway, so it's not a big deal.]
>
> Shannon, are you planning to look at this for 2.12, or should
> we postpone it to 2.13? (It's not a regression, right? So
> we don't necessarily have to urgently fix it for 2.12.)

On reflection, I think I'd aim for 2.13 for this, since:
 * it's not a regression
 * it doesn't actually affect any of our boards, because
   none of them define enough interrupt lines that they
   would actually be using the top 32 that we fail to migrate
 * getting the migration compat right is a bit tricky and
   will benefit from having the time for careful review and testing

Let me know if I'm wrong with any of those assumptions.

thanks
-- PMM
Shannon Zhao April 8, 2018, 1:50 a.m. UTC | #12
On 2018/4/6 17:36, Peter Maydell wrote:
> On 5 April 2018 at 15:22, Peter Maydell <peter.maydell@linaro.org> wrote:
>> > On 29 March 2018 at 11:54, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> >> On 23 March 2018 at 12:08, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>> >>> On 21 March 2018 at 08:00, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>>>> >>>> On 2018/3/20 19:54, Peter Maydell wrote:
>>>>>> >>>>> Can you still successfully migrate a VM from a QEMU version
>>>>>> >>>>> without this bugfix to one with the bugfix ?
>>>>>> >>>>>
>>>>> >>>> I've tested this case. I can migrate a VM between these two versions.
>>>> >>>
>>>> >>> Hmm. Looking at the code I can't see how that would work,
>>>> >>> except by accident. Let me see if I understand what's happening
>>>> >>> here:
>> >
>>> >> I was thinking a bit more about how to handle this, and
>>> >> my best idea was:
>>> >>
>>> >> (1) send something in the migration stream that says
>>> >> "I don't have this bug" (version number change?
>>> >> vmstate field that's just a "no bug" flag? subsection
>>> >> with no contents?)
>>> >>
>>> >> (2) on the destination, if the source doesn't tell us
>>> >> it doesn't have this bug, and we are running KVM, then
>>> >> shift all the data in the arrays down to fix it up
>>> >> [Strictly what we want to know is if the source is
>>> >> running KVM, not if the destination is, but I don't
>>> >> know of a way to find that out, and in practice TCG->KVM
>>> >> migrations don't work anyway, so it's not a big deal.]
>> >
>> > Shannon, are you planning to look at this for 2.12, or should
>> > we postpone it to 2.13? (It's not a regression, right? So
>> > we don't necessarily have to urgently fix it for 2.12.)
> On reflection, I think I'd aim for 2.13 for this, since:
>  * it's not a regression
>  * it doesn't actually affect any of our boards, because
>    none of them define enough interrupt lines that they
>    would actually be using the top 32 that we fail to migrate
>  * getting the migration compat right is a bit tricky and
>    will benefit from having the time for careful review and testing
> 
> Let me know if I'm wrong with any of those assumptions.
Yes, it's no need to merge this for 2.12. I'll respin this patch later.

Thanks,
Peter Maydell May 22, 2018, 9:13 a.m. UTC | #13
On 8 April 2018 at 02:50, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> On 2018/4/6 17:36, Peter Maydell wrote:
>> On reflection, I think I'd aim for 2.13 for this, since:
>>  * it's not a regression
>>  * it doesn't actually affect any of our boards, because
>>    none of them define enough interrupt lines that they
>>    would actually be using the top 32 that we fail to migrate
>>  * getting the migration compat right is a bit tricky and
>>    will benefit from having the time for careful review and testing
>>
>> Let me know if I'm wrong with any of those assumptions.
>
> Yes, it's no need to merge this for 2.12. I'll respin this patch later.

Looking at another GIC related patch this morning reminded me
about this one. Shannon, are you planning to respin this now we've
reopened for 2.13 development?

thanks
-- PMM
Shannon Zhao May 24, 2018, 6:29 a.m. UTC | #14
On 2018/5/22 17:13, Peter Maydell wrote:
> On 8 April 2018 at 02:50, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>> On 2018/4/6 17:36, Peter Maydell wrote:
>>> On reflection, I think I'd aim for 2.13 for this, since:
>>>  * it's not a regression
>>>  * it doesn't actually affect any of our boards, because
>>>    none of them define enough interrupt lines that they
>>>    would actually be using the top 32 that we fail to migrate
>>>  * getting the migration compat right is a bit tricky and
>>>    will benefit from having the time for careful review and testing
>>>
>>> Let me know if I'm wrong with any of those assumptions.
>>
>> Yes, it's no need to merge this for 2.12. I'll respin this patch later.
> 
> Looking at another GIC related patch this morning reminded me
> about this one. Shannon, are you planning to respin this now we've
> reopened for 2.13 development?
> 
Yeah, I've sent new version yesterday. Please have a look.
http://patchwork.ozlabs.org/patch/918734/

Thanks,
diff mbox series

Patch

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 3536795..d423cba 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -136,6 +136,12 @@  static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     int irq;
 
     field = (uint32_t *)bmp;
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
+     * sync them.
+     */
+    offset += (8 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         kvm_gicd_access(s, offset, &reg, false);
         *field = reg;
@@ -150,6 +156,12 @@  static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     int irq;
 
     field = (uint32_t *)bmp;
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
+     * sync them.
+     */
+    offset += (8 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         reg = *field;
         kvm_gicd_access(s, offset, &reg, true);
@@ -164,6 +176,12 @@  static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
+     * them.
+     */
+    offset += (2 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 2) {
         kvm_gicd_access(s, offset, &reg, false);
         reg = half_unshuffle32(reg >> 1);
@@ -181,6 +199,12 @@  static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
+     * them.
+     */
+    offset += (2 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 2) {
         reg = *gic_bmp_ptr32(bmp, irq);
         if (irq % 32 != 0) {
@@ -222,6 +246,12 @@  static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the
+     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
+     * are always RAZ/WI. The corresponding functionality is replaced by the
+     * GICR registers. So it doesn't need to sync them.
+     */
+    offset += (1 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 1) {
         kvm_gicd_access(s, offset, &reg, false);
         *gic_bmp_ptr32(bmp, irq) = reg;
@@ -235,6 +265,14 @@  static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the
+     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
+     * are always RAZ/WI. The corresponding functionality is replaced by the
+     * GICR registers. So it doesn't need to sync them.
+     */
+    offset += (1 * sizeof(uint32_t));
+    if (clroffset != 0)
+        clroffset += (1 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 1) {
         /* If this bitmap is a set/clear register pair, first write to the
          * clear-reg to clear all bits before using the set-reg to write