Message ID | AANLkTi=7bS=W+FZihBya-pRXR2asQ6BgSTBPcPewgHBF@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: > 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: > > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: > >> virtio save/load is currently sending last_avail_idx, but inuse isn't. > >> This causes inconsistent state when using Kemari which replays > >> outstanding requests on the secondary. By letting last_avail_idx to > >> be updated after inuse is decreased, it would be possible to replay > >> the outstanding requests. Noth that live migration shouldn't be > >> affected because it waits until flushing all requests. Also in > >> conjunction with event-tap, requests inversion should be avoided. > >> > >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > > > > I think I understood the request inversion. My question now is, > > event-tap transfers inuse events as well, wont the same > > request be repeated twice? > > > >> --- > >> hw/virtio.c | 8 +++++++- > >> 1 files changed, 7 insertions(+), 1 deletions(-) > >> > >> diff --git a/hw/virtio.c b/hw/virtio.c > >> index 07dbf86..f915c46 100644 > >> --- a/hw/virtio.c > >> +++ b/hw/virtio.c > >> @@ -72,7 +72,7 @@ struct VirtQueue > >> VRing vring; > >> target_phys_addr_t pa; > >> uint16_t last_avail_idx; > >> - int inuse; > >> + uint16_t inuse; > >> uint16_t vector; > >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); > >> VirtIODevice *vdev; > >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) > >> qemu_put_be32(f, vdev->vq[i].vring.num); > >> qemu_put_be64(f, vdev->vq[i].pa); > >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); > >> + qemu_put_be16s(f, &vdev->vq[i].inuse); > >> if (vdev->binding->save_queue) > >> vdev->binding->save_queue(vdev->binding_opaque, i, f); > >> } > >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >> vdev->vq[i].vring.num = qemu_get_be32(f); > >> vdev->vq[i].pa = qemu_get_be64(f); > >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); > >> + qemu_get_be16s(f, &vdev->vq[i].inuse); > >> + > >> + /* revert last_avail_idx if there are outstanding emulation. */ > > > > if there are outstanding emulation -> if requests > > are outstanding in event-tap? > > > >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; > >> + vdev->vq[i].inuse = 0; > >> > > > > I don't understand it, if this is all we do we can equivalently > > decrement on the sender side and avoid breaking migration compatibility? > > It seems I sent the old patch... I'm really sorry. Currently > I'm taking the approach to update last_avai_idx later. > Decreasing looks scary to me if the guest already knows about it. It seems exactly the same functionally. > commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 > Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > Date: Mon May 17 10:36:14 2010 +0900 > > virtio: update last_avail_idx when inuse is decreased. > > virtio save/load is currently sending last_avail_idx, but inuse isn't. > This causes inconsistent state when using Kemari which replays > outstanding requests on the secondary. By letting last_avail_idx to > be updated after inuse is decreased, it would be possible to replay > the outstanding requests. Noth that live migration shouldn't be > affected because it waits until flushing all requests. Also in > conjunction with event-tap, requests inversion should be avoided. > > Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > > diff --git a/hw/virtio.c b/hw/virtio.c > index 07dbf86..b1586da 100644 > --- a/hw/virtio.c > +++ b/hw/virtio.c > @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) > > int virtio_queue_empty(VirtQueue *vq) > { > - return vring_avail_idx(vq) == vq->last_avail_idx; > + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; > } > > void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, > @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) > wmb(); > trace_virtqueue_flush(vq, count); > vring_used_idx_increment(vq, count); > + vq->last_avail_idx += count; > vq->inuse -= count; > } > > @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o > unsigned int idx; > int total_bufs, in_total, out_total; > > - idx = vq->last_avail_idx; > + idx = vq->last_avail_idx + vq->inuse; > > total_bufs = in_total = out_total = 0; > while (virtqueue_num_heads(vq, idx)) { > @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > unsigned int i, head, max; > target_phys_addr_t desc_pa = vq->vring.desc; > > - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) > + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) > return 0; > > /* When we start there are none of either input nor output. */ > @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > > max = vq->vring.num; > > - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); > + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); > > if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { > if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { > @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > /* Always notify when queue is empty (when feature acknowledge) */ > if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && > (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || > - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) > + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) > return; > > trace_virtio_notify(vdev, vq); > > > > > >> if (vdev->vq[i].pa) { > >> uint16_t nheads; > >> -- > >> 1.7.1.2 > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > >
2010/12/24 Michael S. Tsirkin <mst@redhat.com>: > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> >> This causes inconsistent state when using Kemari which replays >> >> outstanding requests on the secondary. By letting last_avail_idx to >> >> be updated after inuse is decreased, it would be possible to replay >> >> the outstanding requests. Noth that live migration shouldn't be >> >> affected because it waits until flushing all requests. Also in >> >> conjunction with event-tap, requests inversion should be avoided. >> >> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> > >> > I think I understood the request inversion. My question now is, >> > event-tap transfers inuse events as well, wont the same >> > request be repeated twice? >> > >> >> --- >> >> hw/virtio.c | 8 +++++++- >> >> 1 files changed, 7 insertions(+), 1 deletions(-) >> >> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> >> index 07dbf86..f915c46 100644 >> >> --- a/hw/virtio.c >> >> +++ b/hw/virtio.c >> >> @@ -72,7 +72,7 @@ struct VirtQueue >> >> VRing vring; >> >> target_phys_addr_t pa; >> >> uint16_t last_avail_idx; >> >> - int inuse; >> >> + uint16_t inuse; >> >> uint16_t vector; >> >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); >> >> VirtIODevice *vdev; >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) >> >> qemu_put_be32(f, vdev->vq[i].vring.num); >> >> qemu_put_be64(f, vdev->vq[i].pa); >> >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); >> >> + qemu_put_be16s(f, &vdev->vq[i].inuse); >> >> if (vdev->binding->save_queue) >> >> vdev->binding->save_queue(vdev->binding_opaque, i, f); >> >> } >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >> >> vdev->vq[i].vring.num = qemu_get_be32(f); >> >> vdev->vq[i].pa = qemu_get_be64(f); >> >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); >> >> + qemu_get_be16s(f, &vdev->vq[i].inuse); >> >> + >> >> + /* revert last_avail_idx if there are outstanding emulation. */ >> > >> > if there are outstanding emulation -> if requests >> > are outstanding in event-tap? >> > >> >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; >> >> + vdev->vq[i].inuse = 0; >> >> >> > >> > I don't understand it, if this is all we do we can equivalently >> > decrement on the sender side and avoid breaking migration compatibility? >> >> It seems I sent the old patch... I'm really sorry. Currently >> I'm taking the approach to update last_avai_idx later. >> Decreasing looks scary to me if the guest already knows about it. > > It seems exactly the same functionally. If it is the same I'm fine to go with the decreasing approach. Is it fine for the guest? Is last_avai_idx irrelevant to the guest's behavior? Yoshi >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 >> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> Date: Mon May 17 10:36:14 2010 +0900 >> >> virtio: update last_avail_idx when inuse is decreased. >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> This causes inconsistent state when using Kemari which replays >> outstanding requests on the secondary. By letting last_avail_idx to >> be updated after inuse is decreased, it would be possible to replay >> the outstanding requests. Noth that live migration shouldn't be >> affected because it waits until flushing all requests. Also in >> conjunction with event-tap, requests inversion should be avoided. >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> index 07dbf86..b1586da 100644 >> --- a/hw/virtio.c >> +++ b/hw/virtio.c >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) >> >> int virtio_queue_empty(VirtQueue *vq) >> { >> - return vring_avail_idx(vq) == vq->last_avail_idx; >> + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; >> } >> >> void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) >> wmb(); >> trace_virtqueue_flush(vq, count); >> vring_used_idx_increment(vq, count); >> + vq->last_avail_idx += count; >> vq->inuse -= count; >> } >> >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o >> unsigned int idx; >> int total_bufs, in_total, out_total; >> >> - idx = vq->last_avail_idx; >> + idx = vq->last_avail_idx + vq->inuse; >> >> total_bufs = in_total = out_total = 0; >> while (virtqueue_num_heads(vq, idx)) { >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> unsigned int i, head, max; >> target_phys_addr_t desc_pa = vq->vring.desc; >> >> - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) >> + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) >> return 0; >> >> /* When we start there are none of either input nor output. */ >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> >> max = vq->vring.num; >> >> - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); >> + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); >> >> if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { >> if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >> /* Always notify when queue is empty (when feature acknowledge) */ >> if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && >> (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || >> - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) >> + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) >> return; >> >> trace_virtio_notify(vdev, vq); >> >> >> > >> >> if (vdev->vq[i].pa) { >> >> uint16_t nheads; >> >> -- >> >> 1.7.1.2 >> > -- >> > To unsubscribe from this list: send the line "unsubscribe kvm" in >> > the body of a message to majordomo@vger.kernel.org >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote: > 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: > > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: > >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: > >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: > >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. > >> >> This causes inconsistent state when using Kemari which replays > >> >> outstanding requests on the secondary. By letting last_avail_idx to > >> >> be updated after inuse is decreased, it would be possible to replay > >> >> the outstanding requests. Noth that live migration shouldn't be > >> >> affected because it waits until flushing all requests. Also in > >> >> conjunction with event-tap, requests inversion should be avoided. > >> >> > >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > >> > > >> > I think I understood the request inversion. My question now is, > >> > event-tap transfers inuse events as well, wont the same > >> > request be repeated twice? > >> > > >> >> --- > >> >> hw/virtio.c | 8 +++++++- > >> >> 1 files changed, 7 insertions(+), 1 deletions(-) > >> >> > >> >> diff --git a/hw/virtio.c b/hw/virtio.c > >> >> index 07dbf86..f915c46 100644 > >> >> --- a/hw/virtio.c > >> >> +++ b/hw/virtio.c > >> >> @@ -72,7 +72,7 @@ struct VirtQueue > >> >> VRing vring; > >> >> target_phys_addr_t pa; > >> >> uint16_t last_avail_idx; > >> >> - int inuse; > >> >> + uint16_t inuse; > >> >> uint16_t vector; > >> >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); > >> >> VirtIODevice *vdev; > >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) > >> >> qemu_put_be32(f, vdev->vq[i].vring.num); > >> >> qemu_put_be64(f, vdev->vq[i].pa); > >> >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); > >> >> + qemu_put_be16s(f, &vdev->vq[i].inuse); > >> >> if (vdev->binding->save_queue) > >> >> vdev->binding->save_queue(vdev->binding_opaque, i, f); > >> >> } > >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >> >> vdev->vq[i].vring.num = qemu_get_be32(f); > >> >> vdev->vq[i].pa = qemu_get_be64(f); > >> >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); > >> >> + qemu_get_be16s(f, &vdev->vq[i].inuse); > >> >> + > >> >> + /* revert last_avail_idx if there are outstanding emulation. */ > >> > > >> > if there are outstanding emulation -> if requests > >> > are outstanding in event-tap? > >> > > >> >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; > >> >> + vdev->vq[i].inuse = 0; > >> >> > >> > > >> > I don't understand it, if this is all we do we can equivalently > >> > decrement on the sender side and avoid breaking migration compatibility? > >> > >> It seems I sent the old patch... I'm really sorry. Currently > >> I'm taking the approach to update last_avai_idx later. > >> Decreasing looks scary to me if the guest already knows about it. > > > > It seems exactly the same functionally. > > If it is the same I'm fine to go with the decreasing approach. > Is it fine for the guest? Is last_avai_idx irrelevant to the > guest's behavior? > > Yoshi At least at the moment, yes. > >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 > >> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > >> Date: Mon May 17 10:36:14 2010 +0900 > >> > >> virtio: update last_avail_idx when inuse is decreased. > >> > >> virtio save/load is currently sending last_avail_idx, but inuse isn't. > >> This causes inconsistent state when using Kemari which replays > >> outstanding requests on the secondary. By letting last_avail_idx to > >> be updated after inuse is decreased, it would be possible to replay > >> the outstanding requests. Noth that live migration shouldn't be > >> affected because it waits until flushing all requests. Also in > >> conjunction with event-tap, requests inversion should be avoided. > >> > >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> > >> > >> diff --git a/hw/virtio.c b/hw/virtio.c > >> index 07dbf86..b1586da 100644 > >> --- a/hw/virtio.c > >> +++ b/hw/virtio.c > >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) > >> > >> int virtio_queue_empty(VirtQueue *vq) > >> { > >> - return vring_avail_idx(vq) == vq->last_avail_idx; > >> + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; > >> } > >> > >> void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, > >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) > >> wmb(); > >> trace_virtqueue_flush(vq, count); > >> vring_used_idx_increment(vq, count); > >> + vq->last_avail_idx += count; > >> vq->inuse -= count; > >> } > >> > >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o > >> unsigned int idx; > >> int total_bufs, in_total, out_total; > >> > >> - idx = vq->last_avail_idx; > >> + idx = vq->last_avail_idx + vq->inuse; > >> > >> total_bufs = in_total = out_total = 0; > >> while (virtqueue_num_heads(vq, idx)) { > >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > >> unsigned int i, head, max; > >> target_phys_addr_t desc_pa = vq->vring.desc; > >> > >> - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) > >> + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) > >> return 0; > >> > >> /* When we start there are none of either input nor output. */ > >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > >> > >> max = vq->vring.num; > >> > >> - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); > >> + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); > >> > >> if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { > >> if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { > >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > >> /* Always notify when queue is empty (when feature acknowledge) */ > >> if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && > >> (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || > >> - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) > >> + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) > >> return; > >> > >> trace_virtio_notify(vdev, vq); > >> > >> > >> > > >> >> if (vdev->vq[i].pa) { > >> >> uint16_t nheads; > >> >> -- > >> >> 1.7.1.2 > >> > -- > >> > To unsubscribe from this list: send the line "unsubscribe kvm" in > >> > the body of a message to majordomo@vger.kernel.org > >> > More majordomo info at http://vger.kernel.org/majordomo-info.html > >> > > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > >
2010/12/24 Michael S. Tsirkin <mst@redhat.com>: > On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote: >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: >> > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: >> >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>: >> >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: >> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> >> >> This causes inconsistent state when using Kemari which replays >> >> >> outstanding requests on the secondary. By letting last_avail_idx to >> >> >> be updated after inuse is decreased, it would be possible to replay >> >> >> the outstanding requests. Noth that live migration shouldn't be >> >> >> affected because it waits until flushing all requests. Also in >> >> >> conjunction with event-tap, requests inversion should be avoided. >> >> >> >> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> >> > >> >> > I think I understood the request inversion. My question now is, >> >> > event-tap transfers inuse events as well, wont the same >> >> > request be repeated twice? >> >> > >> >> >> --- >> >> >> hw/virtio.c | 8 +++++++- >> >> >> 1 files changed, 7 insertions(+), 1 deletions(-) >> >> >> >> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> >> >> index 07dbf86..f915c46 100644 >> >> >> --- a/hw/virtio.c >> >> >> +++ b/hw/virtio.c >> >> >> @@ -72,7 +72,7 @@ struct VirtQueue >> >> >> VRing vring; >> >> >> target_phys_addr_t pa; >> >> >> uint16_t last_avail_idx; >> >> >> - int inuse; >> >> >> + uint16_t inuse; >> >> >> uint16_t vector; >> >> >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); >> >> >> VirtIODevice *vdev; >> >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) >> >> >> qemu_put_be32(f, vdev->vq[i].vring.num); >> >> >> qemu_put_be64(f, vdev->vq[i].pa); >> >> >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); >> >> >> + qemu_put_be16s(f, &vdev->vq[i].inuse); >> >> >> if (vdev->binding->save_queue) >> >> >> vdev->binding->save_queue(vdev->binding_opaque, i, f); >> >> >> } >> >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >> >> >> vdev->vq[i].vring.num = qemu_get_be32(f); >> >> >> vdev->vq[i].pa = qemu_get_be64(f); >> >> >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); >> >> >> + qemu_get_be16s(f, &vdev->vq[i].inuse); >> >> >> + >> >> >> + /* revert last_avail_idx if there are outstanding emulation. */ >> >> > >> >> > if there are outstanding emulation -> if requests >> >> > are outstanding in event-tap? >> >> > >> >> >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; >> >> >> + vdev->vq[i].inuse = 0; >> >> >> >> >> > >> >> > I don't understand it, if this is all we do we can equivalently >> >> > decrement on the sender side and avoid breaking migration compatibility? >> >> >> >> It seems I sent the old patch... I'm really sorry. Currently >> >> I'm taking the approach to update last_avai_idx later. >> >> Decreasing looks scary to me if the guest already knows about it. >> > >> > It seems exactly the same functionally. >> >> If it is the same I'm fine to go with the decreasing approach. >> Is it fine for the guest? Is last_avai_idx irrelevant to the >> guest's behavior? >> >> Yoshi > > At least at the moment, yes. OK. I'll put it in the next spin. Thanks for your advices! Yoshi > >> >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 >> >> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> >> Date: Mon May 17 10:36:14 2010 +0900 >> >> >> >> virtio: update last_avail_idx when inuse is decreased. >> >> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> >> This causes inconsistent state when using Kemari which replays >> >> outstanding requests on the secondary. By letting last_avail_idx to >> >> be updated after inuse is decreased, it would be possible to replay >> >> the outstanding requests. Noth that live migration shouldn't be >> >> affected because it waits until flushing all requests. Also in >> >> conjunction with event-tap, requests inversion should be avoided. >> >> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp> >> >> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> >> index 07dbf86..b1586da 100644 >> >> --- a/hw/virtio.c >> >> +++ b/hw/virtio.c >> >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) >> >> >> >> int virtio_queue_empty(VirtQueue *vq) >> >> { >> >> - return vring_avail_idx(vq) == vq->last_avail_idx; >> >> + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; >> >> } >> >> >> >> void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, >> >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) >> >> wmb(); >> >> trace_virtqueue_flush(vq, count); >> >> vring_used_idx_increment(vq, count); >> >> + vq->last_avail_idx += count; >> >> vq->inuse -= count; >> >> } >> >> >> >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o >> >> unsigned int idx; >> >> int total_bufs, in_total, out_total; >> >> >> >> - idx = vq->last_avail_idx; >> >> + idx = vq->last_avail_idx + vq->inuse; >> >> >> >> total_bufs = in_total = out_total = 0; >> >> while (virtqueue_num_heads(vq, idx)) { >> >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> >> unsigned int i, head, max; >> >> target_phys_addr_t desc_pa = vq->vring.desc; >> >> >> >> - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) >> >> + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) >> >> return 0; >> >> >> >> /* When we start there are none of either input nor output. */ >> >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> >> >> >> max = vq->vring.num; >> >> >> >> - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); >> >> + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); >> >> >> >> if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { >> >> if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { >> >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >> >> /* Always notify when queue is empty (when feature acknowledge) */ >> >> if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && >> >> (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || >> >> - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) >> >> + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) >> >> return; >> >> >> >> trace_virtio_notify(vdev, vq); >> >> >> >> >> >> > >> >> >> if (vdev->vq[i].pa) { >> >> >> uint16_t nheads; >> >> >> -- >> >> >> 1.7.1.2 >> >> > -- >> >> > To unsubscribe from this list: send the line "unsubscribe kvm" in >> >> > the body of a message to majordomo@vger.kernel.org >> >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> >> > >> > -- >> > To unsubscribe from this list: send the line "unsubscribe kvm" in >> > the body of a message to majordomo@vger.kernel.org >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> > > >
diff --git a/hw/virtio.c b/hw/virtio.c index 07dbf86..b1586da 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) int virtio_queue_empty(VirtQueue *vq) { - return vring_avail_idx(vq) == vq->last_avail_idx; + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; } void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) wmb(); trace_virtqueue_flush(vq, count); vring_used_idx_increment(vq, count); + vq->last_avail_idx += count; vq->inuse -= count; } @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o unsigned int idx; int total_bufs, in_total, out_total; - idx = vq->last_avail_idx; + idx = vq->last_avail_idx + vq->inuse; total_bufs = in_total = out_total = 0; while (virtqueue_num_heads(vq, idx)) { @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) unsigned int i, head, max; target_phys_addr_t desc_pa = vq->vring.desc; - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) return 0; /* When we start there are none of either input nor output. */ @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) max = vq->vring.num; - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) /* Always notify when queue is empty (when feature acknowledge) */ if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) return; trace_virtio_notify(vdev, vq);