[v3] Add Mergeable receive buffer support to vhost_net

This patch adds support for the Mergeable Receive Buffers feature to
vhost_net.

						+-DLS

Changes from previous revision:
1) renamed:
	vhost_discard_vq_desc -> vhost_discard_desc
	vhost_get_heads -> vhost_get_desc_n
	vhost_get_vq_desc -> vhost_get_desc
2) added heads as argument to ghost_get_desc_n
3) changed "vq->heads" from iovec to vring_used_elem, removed casts
4) changed vhost_add_used to do multiple elements in a single
copy_to_user,
	or two when we wrap the ring.
5) removed rxmaxheadcount and available buffer checks in favor of
running until
	an allocation failure, but making sure we break the loop if we get
	two in a row, indicating we have at least 1 buffer, but not enough
	for the current receive packet
6) restore non-vnet header handling

Signed-Off-By: David L Stevens <dlstevens@us.ibm.com>

diff -ruNp net-next-p0/drivers/vhost/vhost.c
net-next-v3/drivers/vhost/vhost.c

--- net-next-p0/drivers/vhost/vhost.c	2010-03-22 12:04:38.000000000
-0700
+++ net-next-v3/drivers/vhost/vhost.c	2010-04-06 12:57:51.000000000
-0700
@@ -856,6 +856,47 @@ static unsigned get_indirect(struct vhos
 	return 0;
 }
 
+/* This is a multi-buffer version of vhost_get_vq_desc
+ * @vq		- the relevant virtqueue
+ * datalen	- data length we'll be reading
+ * @iovcount	- returned count of io vectors we fill
+ * @log		- vhost log
+ * @log_num	- log offset
+ *	returns number of buffer heads allocated, 0 on error
+ */
+int vhost_get_desc_n(struct vhost_virtqueue *vq, struct vring_used_elem
*heads,
+		     int datalen, int *iovcount, struct vhost_log *log,
+		     unsigned int *log_num)
+{
+	int out, in;
+	int seg = 0;		/* iov index */
+	int hc = 0;		/* head count */
+
+	while (datalen > 0) {
+		if (hc >= VHOST_NET_MAX_SG)
+			goto err;
+		heads[hc].id = vhost_get_desc(vq->dev, vq, vq->iov+seg,
+					      ARRAY_SIZE(vq->iov)-seg, &out,
+					      &in, log, log_num);
+		if (heads[hc].id == vq->num)
+			goto err;
+		if (out || in <= 0) {
+			vq_err(vq, "unexpected descriptor format for RX: "
+				"out %d, in %d\n", out, in);
+			goto err;
+		}
+		heads[hc].len = iov_length(vq->iov+seg, in);
+		datalen -= heads[hc].len;
+		hc++;
+		seg += in;
+	}
+	*iovcount = seg;
+	return hc;
+err:
+	vhost_discard_desc(vq, hc);
+	return 0;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and
converts
  * it to an iovec for convenient access.  Since descriptors consist of
some
  * number of output then some number of input descriptors, it's
actually two
@@ -863,7 +904,7 @@ static unsigned get_indirect(struct vhos
  *
  * This function returns the descriptor number found, or vq->num (which
  * is never a valid descriptor number) if none was found. */
-unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct
vhost_virtqueue *vq,
+unsigned vhost_get_desc(struct vhost_dev *dev, struct vhost_virtqueue
*vq,
 			   struct iovec iov[], unsigned int iov_size,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num)
@@ -981,31 +1022,42 @@ unsigned vhost_get_vq_desc(struct vhost_
 }
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling.
*/
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+void vhost_discard_desc(struct vhost_virtqueue *vq, int n)
 {
-	vq->last_avail_idx--;
+	vq->last_avail_idx -= n;
 }
 
 /* After we've used one of their buffers, we tell them about it.  We'll
then
  * want to notify the guest, using eventfd. */
-int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int
len)
+int vhost_add_used(struct vhost_virtqueue *vq, struct vring_used_elem
*heads,
+		   int count)
 {
 	struct vring_used_elem *used;
+	int start, n;
+
+	if (count <= 0)
+		return -EINVAL;
 
-	/* The virtqueue contains a ring of used buffers.  Get a pointer to
the
-	 * next entry in that used ring. */
-	used = &vq->used->ring[vq->last_used_idx % vq->num];
-	if (put_user(head, &used->id)) {
-		vq_err(vq, "Failed to write used id");
+	start = vq->last_used_idx % vq->num;
+	if (vq->num - start < count)
+		n = vq->num - start;
+	else
+		n = count;
+	used = vq->used->ring + start;
+	if (copy_to_user(used, heads, sizeof(heads[0])*n)) {
+		vq_err(vq, "Failed to write used");
 		return -EFAULT;
 	}
-	if (put_user(len, &used->len)) {
-		vq_err(vq, "Failed to write used len");
-		return -EFAULT;
+	if (n < count) {	/* wrapped the ring */
+		used = vq->used->ring;
+		if (copy_to_user(used, heads+n, sizeof(heads[0])*(count-n))) {
+			vq_err(vq, "Failed to write used");
+			return -EFAULT;
+		}
 	}
 	/* Make sure buffer is written before we update index. */
 	smp_wmb();
-	if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+	if (put_user(vq->last_used_idx+count, &vq->used->idx)) {
 		vq_err(vq, "Failed to increment used idx");
 		return -EFAULT;
 	}
@@ -1023,7 +1075,7 @@ int vhost_add_used(struct vhost_virtqueu
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
-	vq->last_used_idx++;
+	vq->last_used_idx += count;
 	return 0;
 }
 
@@ -1049,10 +1101,23 @@ void vhost_signal(struct vhost_dev *dev,
 
 /* And here's the combo meal deal.  Supersize me! */
 void vhost_add_used_and_signal(struct vhost_dev *dev,
-			       struct vhost_virtqueue *vq,
-			       unsigned int head, int len)
+			       struct vhost_virtqueue *vq, unsigned int id,
+			       int len)
+{
+	struct vring_used_elem head;
+
+	head.id = id;
+	head.len = len;
+	vhost_add_used(vq, &head, 1);
+	vhost_signal(dev, vq);
+}
+
+/* multi-buffer version of vhost_add_used_and_signal */
+void vhost_add_used_and_signal_n(struct vhost_dev *dev,
+				 struct vhost_virtqueue *vq,
+				 struct vring_used_elem *heads, int count)
 {
-	vhost_add_used(vq, head, len);
+	vhost_add_used(vq, heads, count);
 	vhost_signal(dev, vq);
 }
 
diff -ruNp net-next-p0/drivers/vhost/vhost.h
net-next-v3/drivers/vhost/vhost.h
--- net-next-p0/drivers/vhost/vhost.h	2010-03-22 12:04:38.000000000
-0700
+++ net-next-v3/drivers/vhost/vhost.h	2010-04-05 20:33:57.000000000
-0700
@@ -85,6 +85,7 @@ struct vhost_virtqueue {
 	struct iovec iov[VHOST_NET_MAX_SG];
 	struct iovec hdr[VHOST_NET_MAX_SG];
 	size_t hdr_size;
+	struct vring_used_elem heads[VHOST_NET_MAX_SG];
 	/* We use a kind of RCU to access private pointer.
 	 * All readers access it from workqueue, which makes it possible to
 	 * flush the workqueue instead of synchronize_rcu. Therefore readers
do
@@ -120,16 +121,22 @@ long vhost_dev_ioctl(struct vhost_dev *,
 int vhost_vq_access_ok(struct vhost_virtqueue *vq);
 int vhost_log_access_ok(struct vhost_dev *);
 
-unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue
*,
+int vhost_get_desc_n(struct vhost_virtqueue *, struct vring_used_elem
*heads,
+		     int datalen, int *iovcount, struct vhost_log *log,
+		     unsigned int *log_num);
+unsigned vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *,
 			   struct iovec iov[], unsigned int iov_count,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *);
+void vhost_discard_desc(struct vhost_virtqueue *, int);
 
-int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int
len);
-void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+int vhost_add_used(struct vhost_virtqueue *, struct vring_used_elem
*heads,
+		    int count);
 void vhost_add_used_and_signal(struct vhost_dev *, struct
vhost_virtqueue *,
-			       unsigned int head, int len);
+			       unsigned int id, int len);
+void vhost_add_used_and_signal_n(struct vhost_dev *, struct
vhost_virtqueue *,
+			       struct vring_used_elem *heads, int count);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
 void vhost_disable_notify(struct vhost_virtqueue *);
 bool vhost_enable_notify(struct vhost_virtqueue *);
 
@@ -149,7 +156,8 @@ enum {
 	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
 			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
 			 (1 << VHOST_F_LOG_ALL) |
-			 (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+			 (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
+			 (1 << VIRTIO_NET_F_MRG_RXBUF),
 };
 
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Message ID	1270585973.28407.3.camel@lab1.dls
State	Not Applicable, archived
Delegated to:	David Miller
Headers	show Return-Path: <netdev-owner@vger.kernel.org> Subject: [PATCH v3] Add Mergeable receive buffer support to vhost_net From: David L Stevens <dlstevens@us.ibm.com> Reply-To: dlstevens@us.ibm.com To: "Michael S. Tsirkin" <mst@redhat.com> Cc: kvm@vger.kernel.org, netdev@vger.kernel.org, rusty@rustcorp.com.au, virtualization@lists.osdl.org Content-Type: text/plain; charset="UTF-8" Organization: IBM Date: Tue, 06 Apr 2010 13:32:53 -0700 Message-ID: <1270585973.28407.3.camel@lab1.dls> Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Sender: netdev-owner@vger.kernel.org Precedence: bulk

[v3] Add Mergeable receive buffer support to vhost_net

Commit Message

Comments

Patch