diff mbox series

[v4,4/6] virtio: virtqueue_ordered_flush - VIRTIO_F_IN_ORDER support

Message ID 20240710125522.4168043-5-jonah.palmer@oracle.com
State New
Headers show
Series virtio,vhost: Add VIRTIO_F_IN_ORDER support | expand

Commit Message

Jonah Palmer July 10, 2024, 12:55 p.m. UTC
Add VIRTIO_F_IN_ORDER feature support for the virtqueue_flush operation.

The goal of the virtqueue_ordered_flush operation when the
VIRTIO_F_IN_ORDER feature has been negotiated is to write elements to
the used/descriptor ring in-order and then update used_idx.

The function iterates through the VirtQueueElement used_elems array
in-order starting at vq->used_idx. If the element is valid (filled), the
element is written to the used/descriptor ring. This process continues
until we find an invalid (not filled) element.

For packed VQs, the first entry (at vq->used_idx) is written to the
descriptor ring last so the guest doesn't see any invalid descriptors.

If any elements were written, the used_idx is updated.

Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
Several fixes here for the split VQ case:
- Ensure all previous write operations to buffers are completed before
  updating the used_idx (via smp_wmb()).

- used_elems index 'i' should be incremented by the number of descriptors
  in the current element we just processed, not by the running total of
  descriptors already seen. This would've caused batched operations to
  miss ordered elements when looping through the used_elems array.

- Do not keep the VQ's used_idx bound between 0 and vring.num-1 when
  setting it via vring_used_idx_set().

  While the packed VQ case naturally keeps used_idx bound between 0 and
  vring.num-1, the split VQ case cannot. This is because used_idx is
  used to compare the current event index with the new and old used
  indices to decide if a notification is necessary (see
  virtio_split_should_notify()). This comparison expects used_idx to be
  between 0 and 65535, not 0 and vring.num-1.

 hw/virtio/virtio.c | 70 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

Comments

Eugenio Pérez July 10, 2024, 4:16 p.m. UTC | #1
On Wed, Jul 10, 2024 at 2:56 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>
> Add VIRTIO_F_IN_ORDER feature support for the virtqueue_flush operation.
>
> The goal of the virtqueue_ordered_flush operation when the
> VIRTIO_F_IN_ORDER feature has been negotiated is to write elements to
> the used/descriptor ring in-order and then update used_idx.
>
> The function iterates through the VirtQueueElement used_elems array
> in-order starting at vq->used_idx. If the element is valid (filled), the
> element is written to the used/descriptor ring. This process continues
> until we find an invalid (not filled) element.
>
> For packed VQs, the first entry (at vq->used_idx) is written to the
> descriptor ring last so the guest doesn't see any invalid descriptors.
>
> If any elements were written, the used_idx is updated.
>
> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>

Acked-by: Eugenio Pérez <eperezma@redhat.com>

> ---
> Several fixes here for the split VQ case:
> - Ensure all previous write operations to buffers are completed before
>   updating the used_idx (via smp_wmb()).
>
> - used_elems index 'i' should be incremented by the number of descriptors
>   in the current element we just processed, not by the running total of
>   descriptors already seen. This would've caused batched operations to
>   miss ordered elements when looping through the used_elems array.
>
> - Do not keep the VQ's used_idx bound between 0 and vring.num-1 when
>   setting it via vring_used_idx_set().
>
>   While the packed VQ case naturally keeps used_idx bound between 0 and
>   vring.num-1, the split VQ case cannot. This is because used_idx is
>   used to compare the current event index with the new and old used
>   indices to decide if a notification is necessary (see
>   virtio_split_should_notify()). This comparison expects used_idx to be
>   between 0 and 65535, not 0 and vring.num-1.
>
>  hw/virtio/virtio.c | 70 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 0000a7b41c..b419d8d6e7 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1023,6 +1023,72 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
>      }
>  }
>
> +static void virtqueue_ordered_flush(VirtQueue *vq)
> +{
> +    unsigned int i = vq->used_idx % vq->vring.num;
> +    unsigned int ndescs = 0;
> +    uint16_t old = vq->used_idx;
> +    uint16_t new;
> +    bool packed;
> +    VRingUsedElem uelem;
> +
> +    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
> +
> +    if (packed) {
> +        if (unlikely(!vq->vring.desc)) {
> +            return;
> +        }
> +    } else if (unlikely(!vq->vring.used)) {
> +        return;
> +    }
> +
> +    /* First expected in-order element isn't ready, nothing to do */
> +    if (!vq->used_elems[i].in_order_filled) {
> +        return;
> +    }
> +
> +    /* Search for filled elements in-order */
> +    while (vq->used_elems[i].in_order_filled) {
> +        /*
> +         * First entry for packed VQs is written last so the guest
> +         * doesn't see invalid descriptors.
> +         */
> +        if (packed && i != vq->used_idx) {
> +            virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
> +        } else if (!packed) {
> +            uelem.id = vq->used_elems[i].index;
> +            uelem.len = vq->used_elems[i].len;
> +            vring_used_write(vq, &uelem, i);
> +        }
> +
> +        vq->used_elems[i].in_order_filled = false;
> +        ndescs += vq->used_elems[i].ndescs;
> +        i += vq->used_elems[i].ndescs;
> +        if (i >= vq->vring.num) {
> +            i -= vq->vring.num;
> +        }
> +    }
> +
> +    if (packed) {
> +        virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true);
> +        vq->used_idx += ndescs;
> +        if (vq->used_idx >= vq->vring.num) {
> +            vq->used_idx -= vq->vring.num;
> +            vq->used_wrap_counter ^= 1;
> +            vq->signalled_used_valid = false;
> +        }
> +    } else {
> +        /* Make sure buffer is written before we update index. */
> +        smp_wmb();
> +        new = old + ndescs;
> +        vring_used_idx_set(vq, new);
> +        if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
> +            vq->signalled_used_valid = false;
> +        }
> +    }
> +    vq->inuse -= ndescs;
> +}
> +
>  void virtqueue_flush(VirtQueue *vq, unsigned int count)
>  {
>      if (virtio_device_disabled(vq->vdev)) {
> @@ -1030,7 +1096,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
>          return;
>      }
>
> -    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
> +        virtqueue_ordered_flush(vq);
> +    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
>          virtqueue_packed_flush(vq, count);
>      } else {
>          virtqueue_split_flush(vq, count);
> --
> 2.43.5
>
diff mbox series

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 0000a7b41c..b419d8d6e7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1023,6 +1023,72 @@  static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
     }
 }
 
+static void virtqueue_ordered_flush(VirtQueue *vq)
+{
+    unsigned int i = vq->used_idx % vq->vring.num;
+    unsigned int ndescs = 0;
+    uint16_t old = vq->used_idx;
+    uint16_t new;
+    bool packed;
+    VRingUsedElem uelem;
+
+    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
+
+    if (packed) {
+        if (unlikely(!vq->vring.desc)) {
+            return;
+        }
+    } else if (unlikely(!vq->vring.used)) {
+        return;
+    }
+
+    /* First expected in-order element isn't ready, nothing to do */
+    if (!vq->used_elems[i].in_order_filled) {
+        return;
+    }
+
+    /* Search for filled elements in-order */
+    while (vq->used_elems[i].in_order_filled) {
+        /*
+         * First entry for packed VQs is written last so the guest
+         * doesn't see invalid descriptors.
+         */
+        if (packed && i != vq->used_idx) {
+            virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
+        } else if (!packed) {
+            uelem.id = vq->used_elems[i].index;
+            uelem.len = vq->used_elems[i].len;
+            vring_used_write(vq, &uelem, i);
+        }
+
+        vq->used_elems[i].in_order_filled = false;
+        ndescs += vq->used_elems[i].ndescs;
+        i += vq->used_elems[i].ndescs;
+        if (i >= vq->vring.num) {
+            i -= vq->vring.num;
+        }
+    }
+
+    if (packed) {
+        virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true);
+        vq->used_idx += ndescs;
+        if (vq->used_idx >= vq->vring.num) {
+            vq->used_idx -= vq->vring.num;
+            vq->used_wrap_counter ^= 1;
+            vq->signalled_used_valid = false;
+        }
+    } else {
+        /* Make sure buffer is written before we update index. */
+        smp_wmb();
+        new = old + ndescs;
+        vring_used_idx_set(vq, new);
+        if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
+            vq->signalled_used_valid = false;
+        }
+    }
+    vq->inuse -= ndescs;
+}
+
 void virtqueue_flush(VirtQueue *vq, unsigned int count)
 {
     if (virtio_device_disabled(vq->vdev)) {
@@ -1030,7 +1096,9 @@  void virtqueue_flush(VirtQueue *vq, unsigned int count)
         return;
     }
 
-    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
+        virtqueue_ordered_flush(vq);
+    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
         virtqueue_packed_flush(vq, count);
     } else {
         virtqueue_split_flush(vq, count);