diff mbox

[RFC,0/1] vhost: Reduce TX used buffer signal for performance

Message ID 1288213557.17571.28.camel@localhost.localdomain
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Shirley Ma Oct. 27, 2010, 9:05 p.m. UTC
This patch changes vhost TX used buffer signal to guest from one by
one to up to 3/4 of vring size. This change improves vhost TX message 
size from 256 to 8K performance for both bandwidth and CPU utilization 
without inducing any regression.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 drivers/vhost/net.c   |   19 ++++++++++++++++++-
 drivers/vhost/vhost.c |   31 +++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |    3 +++
 3 files changed, 52 insertions(+), 1 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Stefan Hajnoczi Oct. 28, 2010, 8:57 a.m. UTC | #1
On Wed, Oct 27, 2010 at 10:05 PM, Shirley Ma <mashirle@us.ibm.com> wrote:
> This patch changes vhost TX used buffer signal to guest from one by
> one to up to 3/4 of vring size. This change improves vhost TX message
> size from 256 to 8K performance for both bandwidth and CPU utilization
> without inducing any regression.

Any concerns about introducing latency or does the guest not care when
TX completions come in?

> Signed-off-by: Shirley Ma <xma@us.ibm.com>
> ---
>
>  drivers/vhost/net.c   |   19 ++++++++++++++++++-
>  drivers/vhost/vhost.c |   31 +++++++++++++++++++++++++++++++
>  drivers/vhost/vhost.h |    3 +++
>  3 files changed, 52 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4b4da5b..bd1ba71 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -198,7 +198,24 @@ static void handle_tx(struct vhost_net *net)
>                if (err != len)
>                        pr_debug("Truncated TX packet: "
>                                 " len %d != %zd\n", err, len);
> -               vhost_add_used_and_signal(&net->dev, vq, head, 0);
> +               /*
> +                * if no pending buffer size allocate, signal used buffer
> +                * one by one, otherwise, signal used buffer when reaching
> +                * 3/4 ring size to reduce CPU utilization.
> +                */
> +               if (unlikely(vq->pend))
> +                       vhost_add_used_and_signal(&net->dev, vq, head, 0);
> +               else {
> +                       vq->pend[vq->num_pend].id = head;

I don't understand the logic here: if !vq->pend then we assign to
vq->pend[vq->num_pend].

> +                       vq->pend[vq->num_pend].len = 0;
> +                       ++vq->num_pend;
> +                       if (vq->num_pend == (vq->num - (vq->num >> 2))) {
> +                               vhost_add_used_and_signal_n(&net->dev, vq,
> +                                                           vq->pend,
> +                                                           vq->num_pend);
> +                               vq->num_pend = 0;
> +                       }
> +               }
>                total_len += len;
>                if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
>                        vhost_poll_queue(&vq->poll);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 94701ff..47696d2 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -170,6 +170,16 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>        vq->call_ctx = NULL;
>        vq->call = NULL;
>        vq->log_ctx = NULL;
> +       /* signal pending used buffers */
> +       if (vq->pend) {
> +               if (vq->num_pend != 0) {
> +                       vhost_add_used_and_signal_n(dev, vq, vq->pend,
> +                                                   vq->num_pend);
> +                       vq->num_pend = 0;
> +               }
> +               kfree(vq->pend);
> +       }
> +       vq->pend = NULL;
>  }
>
>  static int vhost_worker(void *data)
> @@ -273,7 +283,13 @@ long vhost_dev_init(struct vhost_dev *dev,
>                dev->vqs[i].heads = NULL;
>                dev->vqs[i].dev = dev;
>                mutex_init(&dev->vqs[i].mutex);
> +               dev->vqs[i].num_pend = 0;
> +               dev->vqs[i].pend = NULL;
>                vhost_vq_reset(dev, dev->vqs + i);
> +               /* signal 3/4 of ring size used buffers */
> +               dev->vqs[i].pend = kmalloc((dev->vqs[i].num -
> +                                          (dev->vqs[i].num >> 2)) *
> +                                          sizeof *vq->peed, GFP_KERNEL);

Has this patch been compile tested?  vq->peed?

Stefan
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Oct. 28, 2010, 8:59 a.m. UTC | #2
On Thu, Oct 28, 2010 at 9:57 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
Just read the patch 1/1 discussion and it looks like you're already on
it.  Sorry for the noise.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4b4da5b..bd1ba71 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -198,7 +198,24 @@  static void handle_tx(struct vhost_net *net)
 		if (err != len)
 			pr_debug("Truncated TX packet: "
 				 " len %d != %zd\n", err, len);
-		vhost_add_used_and_signal(&net->dev, vq, head, 0);
+		/*
+		 * if no pending buffer size allocate, signal used buffer
+		 * one by one, otherwise, signal used buffer when reaching
+		 * 3/4 ring size to reduce CPU utilization.
+		 */
+		if (unlikely(vq->pend))
+			vhost_add_used_and_signal(&net->dev, vq, head, 0);
+		else {
+			vq->pend[vq->num_pend].id = head;
+			vq->pend[vq->num_pend].len = 0;
+			++vq->num_pend;
+			if (vq->num_pend == (vq->num - (vq->num >> 2))) {
+				vhost_add_used_and_signal_n(&net->dev, vq,
+							    vq->pend,
+							    vq->num_pend);
+				vq->num_pend = 0;
+			}
+		}
 		total_len += len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
 			vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 94701ff..47696d2 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -170,6 +170,16 @@  static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->call_ctx = NULL;
 	vq->call = NULL;
 	vq->log_ctx = NULL;
+	/* signal pending used buffers */
+	if (vq->pend) {
+		if (vq->num_pend != 0) {
+			vhost_add_used_and_signal_n(dev, vq, vq->pend,
+						    vq->num_pend);
+			vq->num_pend = 0;
+		}
+		kfree(vq->pend);
+	}
+	vq->pend = NULL;
 }
 
 static int vhost_worker(void *data)
@@ -273,7 +283,13 @@  long vhost_dev_init(struct vhost_dev *dev,
 		dev->vqs[i].heads = NULL;
 		dev->vqs[i].dev = dev;
 		mutex_init(&dev->vqs[i].mutex);
+		dev->vqs[i].num_pend = 0;
+		dev->vqs[i].pend = NULL;
 		vhost_vq_reset(dev, dev->vqs + i);
+		/* signal 3/4 of ring size used buffers */
+		dev->vqs[i].pend = kmalloc((dev->vqs[i].num -
+					   (dev->vqs[i].num >> 2)) *
+					   sizeof *vq->peed, GFP_KERNEL);
 		if (dev->vqs[i].handle_kick)
 			vhost_poll_init(&dev->vqs[i].poll,
 					dev->vqs[i].handle_kick, POLLIN, dev);
@@ -599,6 +615,21 @@  static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
 			r = -EINVAL;
 			break;
 		}
+		if (vq->num != s.num) {
+			/* signal used buffers first */
+			if (vq->pend) {
+				if (vq->num_pend != 0) {
+					vhost_add_used_and_signal_n(vq->dev, vq,
+								    vq->pend,
+								    vq->num_pend);
+					vq->num_pend = 0;
+				}
+				kfree(vq->pend);
+			}
+			/* realloc pending used buffers size */
+			vq->pend = kmalloc((s.num - (s.num >> 2)) *
+					   sizeof *vq->pend, GFP_KERNEL);
+		}
 		vq->num = s.num;
 		break;
 	case VHOST_SET_VRING_BASE:
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 073d06a..78949c0 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -108,6 +108,9 @@  struct vhost_virtqueue {
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
+	/* delay multiple used buffers to signal once */
+	int num_pend;
+	struct vring_used_elem *pend;
 };
 
 struct vhost_dev {