Message ID | 201102091107.20270.rusty@rustcorp.com.au |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Wed, Feb 09, 2011 at 11:07:20AM +1030, Rusty Russell wrote: > On Wed, 2 Feb 2011 03:12:22 pm Michael S. Tsirkin wrote: > > On Wed, Feb 02, 2011 at 10:09:18AM +0530, Krishna Kumar2 wrote: > > > > "Michael S. Tsirkin" <mst@redhat.com> 02/02/2011 03:11 AM > > > > > > > > On Tue, Feb 01, 2011 at 01:28:45PM -0800, Shirley Ma wrote: > > > > > On Tue, 2011-02-01 at 23:21 +0200, Michael S. Tsirkin wrote: > > > > > > Confused. We compare capacity to skb frags, no? > > > > > > That's sg I think ... > > > > > > > > > > Current guest kernel use indirect buffers, num_free returns how many > > > > > available descriptors not skb frags. So it's wrong here. > > > > > > > > > > Shirley > > > > > > > > I see. Good point. In other words when we complete the buffer > > > > it was indirect, but when we add a new one we > > > > can not allocate indirect so we consume. > > > > And then we start the queue and add will fail. > > > > I guess we need some kind of API to figure out > > > > whether the buf we complete was indirect? > > I've finally read this thread... I think we need to get more serious > with our stats gathering to diagnose these kind of performance issues. > > This is a start; it should tell us what is actually happening to the > virtio ring(s) without significant performance impact... > > Subject: virtio: CONFIG_VIRTIO_STATS > > For performance problems we'd like to know exactly what the ring looks > like. This patch adds stats indexed by how-full-ring-is; we could extend > it to also record them by how-used-ring-is if we need. > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Not sure whether the intent is to merge this. If yes - would it make sense to use tracing for this instead? That's what kvm does. > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig > --- a/drivers/virtio/Kconfig > +++ b/drivers/virtio/Kconfig > @@ -7,6 +7,14 @@ config VIRTIO_RING > tristate > depends on VIRTIO > > +config VIRTIO_STATS > + bool "Virtio debugging stats (EXPERIMENTAL)" > + depends on VIRTIO_RING > + select DEBUG_FS > + ---help--- > + Virtio stats collected by how full the ring is at any time, > + presented under debugfs/virtio/<name>-<vq>/<num-used>/ > + > config VIRTIO_PCI > tristate "PCI driver for virtio devices (EXPERIMENTAL)" > depends on PCI && EXPERIMENTAL > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -21,6 +21,7 @@ > #include <linux/virtio_config.h> > #include <linux/device.h> > #include <linux/slab.h> > +#include <linux/debugfs.h> > > /* virtio guest is communicating with a virtual "device" that actually runs on > * a host processor. Memory barriers are used to control SMP effects. */ > @@ -95,6 +96,11 @@ struct vring_virtqueue > /* How to notify other side. FIXME: commonalize hcalls! */ > void (*notify)(struct virtqueue *vq); > > +#ifdef CONFIG_VIRTIO_STATS > + struct vring_stat *stats; > + struct dentry *statdir; > +#endif > + > #ifdef DEBUG > /* They're supposed to lock for us. */ > unsigned int in_use; > @@ -106,6 +112,87 @@ struct vring_virtqueue > > #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) > > +#ifdef CONFIG_VIRTIO_STATS > +/* We have an array of these, indexed by how full the ring is. */ > +struct vring_stat { > + /* How many interrupts? */ > + size_t interrupt_nowork, interrupt_work; > + /* How many non-notify kicks, how many notify kicks, how many add notify? */ > + size_t kick_no_notify, kick_notify, add_notify; > + /* How many adds? */ > + size_t add_direct, add_indirect, add_fail; > + /* How many gets? */ > + size_t get; > + /* How many disable callbacks? */ > + size_t disable_cb; > + /* How many enables? */ > + size_t enable_cb_retry, enable_cb_success; > +}; > + > +static struct dentry *virtio_stats; > + > +static void create_stat_files(struct vring_virtqueue *vq) > +{ > + char name[80]; > + unsigned int i; > + > + /* Racy in theory, but we don't care. */ > + if (!virtio_stats) > + virtio_stats = debugfs_create_dir("virtio-stats", NULL); > + > + sprintf(name, "%s-%s", dev_name(&vq->vq.vdev->dev), vq->vq.name); > + vq->statdir = debugfs_create_dir(name, virtio_stats); > + > + for (i = 0; i < vq->vring.num; i++) { > + struct dentry *dir; > + > + sprintf(name, "%i", i); > + dir = debugfs_create_dir(name, vq->statdir); > + debugfs_create_size_t("interrupt_nowork", 0400, dir, > + &vq->stats[i].interrupt_nowork); > + debugfs_create_size_t("interrupt_work", 0400, dir, > + &vq->stats[i].interrupt_work); > + debugfs_create_size_t("kick_no_notify", 0400, dir, > + &vq->stats[i].kick_no_notify); > + debugfs_create_size_t("kick_notify", 0400, dir, > + &vq->stats[i].kick_notify); > + debugfs_create_size_t("add_notify", 0400, dir, > + &vq->stats[i].add_notify); > + debugfs_create_size_t("add_direct", 0400, dir, > + &vq->stats[i].add_direct); > + debugfs_create_size_t("add_indirect", 0400, dir, > + &vq->stats[i].add_indirect); > + debugfs_create_size_t("add_fail", 0400, dir, > + &vq->stats[i].add_fail); > + debugfs_create_size_t("get", 0400, dir, > + &vq->stats[i].get); > + debugfs_create_size_t("disable_cb", 0400, dir, > + &vq->stats[i].disable_cb); > + debugfs_create_size_t("enable_cb_retry", 0400, dir, > + &vq->stats[i].enable_cb_retry); > + debugfs_create_size_t("enable_cb_success", 0400, dir, > + &vq->stats[i].enable_cb_success); > + } > +} > + > +static void delete_stat_files(struct vring_virtqueue *vq) > +{ > + debugfs_remove_recursive(vq->statdir); > +} > + > +#define add_stat(vq, name) \ > + do { \ > + struct vring_virtqueue *_vq = (vq); \ > + _vq->stats[_vq->num_free - _vq->vring.num].name++; \ > + } while (0) > + > +#else > +#define add_stat(vq, name) > +static void delete_stat_files(struct vring_virtqueue *vq) > +{ > +} > +#endif > + > /* Set up an indirect table of descriptors and add it to the queue. */ > static int vring_add_indirect(struct vring_virtqueue *vq, > struct scatterlist sg[], > @@ -121,6 +208,8 @@ static int vring_add_indirect(struct vri > if (!desc) > return -ENOMEM; > > + add_stat(vq, add_indirect); > + > /* Transfer entries from the sg list into the indirect page */ > for (i = 0; i < out; i++) { > desc[i].flags = VRING_DESC_F_NEXT; > @@ -183,17 +272,22 @@ int virtqueue_add_buf_gfp(struct virtque > BUG_ON(out + in == 0); > > if (vq->num_free < out + in) { > + add_stat(vq, add_fail); > pr_debug("Can't add buf len %i - avail = %i\n", > out + in, vq->num_free); > /* FIXME: for historical reasons, we force a notify here if > * there are outgoing parts to the buffer. Presumably the > * host should service the ring ASAP. */ > - if (out) > + if (out) { > + add_stat(vq, add_notify); > vq->notify(&vq->vq); > + } > END_USE(vq); > return -ENOSPC; > } > > + add_stat(vq, add_direct); > + > /* We're about to use some buffers from the free list. */ > vq->num_free -= out + in; > > @@ -248,9 +342,12 @@ void virtqueue_kick(struct virtqueue *_v > /* Need to update avail index before checking if we should notify */ > virtio_mb(); > > - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) > + if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) { > + add_stat(vq, kick_notify); > /* Prod other side to tell it about changes. */ > vq->notify(&vq->vq); > + } else > + add_stat(vq, kick_no_notify); > > END_USE(vq); > } > @@ -294,6 +391,8 @@ void *virtqueue_get_buf(struct virtqueue > > START_USE(vq); > > + add_stat(vq, get); > + > if (unlikely(vq->broken)) { > END_USE(vq); > return NULL; > @@ -333,6 +432,7 @@ void virtqueue_disable_cb(struct virtque > { > struct vring_virtqueue *vq = to_vvq(_vq); > > + add_stat(vq, disable_cb); > vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; > } > EXPORT_SYMBOL_GPL(virtqueue_disable_cb); > @@ -348,10 +448,12 @@ bool virtqueue_enable_cb(struct virtqueu > vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; > virtio_mb(); > if (unlikely(more_used(vq))) { > + add_stat(vq, enable_cb_retry); > END_USE(vq); > return false; > } > > + add_stat(vq, enable_cb_success); > END_USE(vq); > return true; > } > @@ -387,10 +489,12 @@ irqreturn_t vring_interrupt(int irq, voi > struct vring_virtqueue *vq = to_vvq(_vq); > > if (!more_used(vq)) { > + add_stat(vq, interrupt_nowork); > pr_debug("virtqueue interrupt with no work for %p\n", vq); > return IRQ_NONE; > } > > + add_stat(vq, interrupt_work); > if (unlikely(vq->broken)) > return IRQ_HANDLED; > > @@ -451,6 +555,15 @@ struct virtqueue *vring_new_virtqueue(un > } > vq->data[i] = NULL; > > +#ifdef CONFIG_VIRTIO_STATS > + vq->stats = kzalloc(sizeof(*vq->stats) * num, GFP_KERNEL); > + if (!vq->stats) { > + kfree(vq); > + return NULL; > + } > + create_stat_files(vq); > +#endif > + > return &vq->vq; > } > EXPORT_SYMBOL_GPL(vring_new_virtqueue); > @@ -458,6 +571,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue); > void vring_del_virtqueue(struct virtqueue *vq) > { > list_del(&vq->list); > + delete_stat_files(to_vvq(vq)); > kfree(to_vvq(vq)); > } > EXPORT_SYMBOL_GPL(vring_del_virtqueue); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 9 Feb 2011 11:23:45 am Michael S. Tsirkin wrote: > On Wed, Feb 09, 2011 at 11:07:20AM +1030, Rusty Russell wrote: > > On Wed, 2 Feb 2011 03:12:22 pm Michael S. Tsirkin wrote: > > > On Wed, Feb 02, 2011 at 10:09:18AM +0530, Krishna Kumar2 wrote: > > > > > "Michael S. Tsirkin" <mst@redhat.com> 02/02/2011 03:11 AM > > > > > > > > > > On Tue, Feb 01, 2011 at 01:28:45PM -0800, Shirley Ma wrote: > > > > > > On Tue, 2011-02-01 at 23:21 +0200, Michael S. Tsirkin wrote: > > > > > > > Confused. We compare capacity to skb frags, no? > > > > > > > That's sg I think ... > > > > > > > > > > > > Current guest kernel use indirect buffers, num_free returns how many > > > > > > available descriptors not skb frags. So it's wrong here. > > > > > > > > > > > > Shirley > > > > > > > > > > I see. Good point. In other words when we complete the buffer > > > > > it was indirect, but when we add a new one we > > > > > can not allocate indirect so we consume. > > > > > And then we start the queue and add will fail. > > > > > I guess we need some kind of API to figure out > > > > > whether the buf we complete was indirect? > > > > I've finally read this thread... I think we need to get more serious > > with our stats gathering to diagnose these kind of performance issues. > > > > This is a start; it should tell us what is actually happening to the > > virtio ring(s) without significant performance impact... > > > > Subject: virtio: CONFIG_VIRTIO_STATS > > > > For performance problems we'd like to know exactly what the ring looks > > like. This patch adds stats indexed by how-full-ring-is; we could extend > > it to also record them by how-used-ring-is if we need. > > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > Not sure whether the intent is to merge this. If yes - > would it make sense to use tracing for this instead? > That's what kvm does. Intent wasn't; I've not used tracepoints before, but maybe we should consider a longer-term monitoring solution? Patch welcome! Cheers, Rusty. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Feb 09, 2011 at 12:09:35PM +1030, Rusty Russell wrote: > On Wed, 9 Feb 2011 11:23:45 am Michael S. Tsirkin wrote: > > On Wed, Feb 09, 2011 at 11:07:20AM +1030, Rusty Russell wrote: > > > On Wed, 2 Feb 2011 03:12:22 pm Michael S. Tsirkin wrote: > > > > On Wed, Feb 02, 2011 at 10:09:18AM +0530, Krishna Kumar2 wrote: > > > > > > "Michael S. Tsirkin" <mst@redhat.com> 02/02/2011 03:11 AM > > > > > > > > > > > > On Tue, Feb 01, 2011 at 01:28:45PM -0800, Shirley Ma wrote: > > > > > > > On Tue, 2011-02-01 at 23:21 +0200, Michael S. Tsirkin wrote: > > > > > > > > Confused. We compare capacity to skb frags, no? > > > > > > > > That's sg I think ... > > > > > > > > > > > > > > Current guest kernel use indirect buffers, num_free returns how many > > > > > > > available descriptors not skb frags. So it's wrong here. > > > > > > > > > > > > > > Shirley > > > > > > > > > > > > I see. Good point. In other words when we complete the buffer > > > > > > it was indirect, but when we add a new one we > > > > > > can not allocate indirect so we consume. > > > > > > And then we start the queue and add will fail. > > > > > > I guess we need some kind of API to figure out > > > > > > whether the buf we complete was indirect? > > > > > > I've finally read this thread... I think we need to get more serious > > > with our stats gathering to diagnose these kind of performance issues. > > > > > > This is a start; it should tell us what is actually happening to the > > > virtio ring(s) without significant performance impact... > > > > > > Subject: virtio: CONFIG_VIRTIO_STATS > > > > > > For performance problems we'd like to know exactly what the ring looks > > > like. This patch adds stats indexed by how-full-ring-is; we could extend > > > it to also record them by how-used-ring-is if we need. > > > > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > > > Not sure whether the intent is to merge this. If yes - > > would it make sense to use tracing for this instead? > > That's what kvm does. > > Intent wasn't; I've not used tracepoints before, but maybe we should > consider a longer-term monitoring solution? > > Patch welcome! > > Cheers, > Rusty. Sure, I'll look into this.
On Wed, Feb 9, 2011 at 1:55 AM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Wed, Feb 09, 2011 at 12:09:35PM +1030, Rusty Russell wrote: >> On Wed, 9 Feb 2011 11:23:45 am Michael S. Tsirkin wrote: >> > On Wed, Feb 09, 2011 at 11:07:20AM +1030, Rusty Russell wrote: >> > > On Wed, 2 Feb 2011 03:12:22 pm Michael S. Tsirkin wrote: >> > > > On Wed, Feb 02, 2011 at 10:09:18AM +0530, Krishna Kumar2 wrote: >> > > > > > "Michael S. Tsirkin" <mst@redhat.com> 02/02/2011 03:11 AM >> > > > > > >> > > > > > On Tue, Feb 01, 2011 at 01:28:45PM -0800, Shirley Ma wrote: >> > > > > > > On Tue, 2011-02-01 at 23:21 +0200, Michael S. Tsirkin wrote: >> > > > > > > > Confused. We compare capacity to skb frags, no? >> > > > > > > > That's sg I think ... >> > > > > > > >> > > > > > > Current guest kernel use indirect buffers, num_free returns how many >> > > > > > > available descriptors not skb frags. So it's wrong here. >> > > > > > > >> > > > > > > Shirley >> > > > > > >> > > > > > I see. Good point. In other words when we complete the buffer >> > > > > > it was indirect, but when we add a new one we >> > > > > > can not allocate indirect so we consume. >> > > > > > And then we start the queue and add will fail. >> > > > > > I guess we need some kind of API to figure out >> > > > > > whether the buf we complete was indirect? >> > > >> > > I've finally read this thread... I think we need to get more serious >> > > with our stats gathering to diagnose these kind of performance issues. >> > > >> > > This is a start; it should tell us what is actually happening to the >> > > virtio ring(s) without significant performance impact... >> > > >> > > Subject: virtio: CONFIG_VIRTIO_STATS >> > > >> > > For performance problems we'd like to know exactly what the ring looks >> > > like. This patch adds stats indexed by how-full-ring-is; we could extend >> > > it to also record them by how-used-ring-is if we need. >> > > >> > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >> > >> > Not sure whether the intent is to merge this. If yes - >> > would it make sense to use tracing for this instead? >> > That's what kvm does. >> >> Intent wasn't; I've not used tracepoints before, but maybe we should >> consider a longer-term monitoring solution? >> >> Patch welcome! >> >> Cheers, >> Rusty. > > Sure, I'll look into this. There are several virtio trace events already in QEMU today (see the trace-events file): virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" virtqueue_flush(void *vq, unsigned int count) "vq %p count %u" virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u" virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p" virtio_irq(void *vq) "vq %p" virtio_notify(void *vdev, void *vq) "vdev %p vq %p" These can be used by building QEMU with a suitable tracing backend like SystemTap (see docs/tracing.txt). Inside the guest I've used dynamic ftrace in the past, although static tracepoints would be nice. Stefan -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2011-02-09 at 11:07 +1030, Rusty Russell wrote: > I've finally read this thread... I think we need to get more serious > with our stats gathering to diagnose these kind of performance issues. > > This is a start; it should tell us what is actually happening to the > virtio ring(s) without significant performance impact... Should we also add similar stat on vhost vq as well for monitoring vhost_signal & vhost_notify? Shirley -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2011-03-08 at 13:57 -0800, Shirley Ma wrote: > On Wed, 2011-02-09 at 11:07 +1030, Rusty Russell wrote: > > I've finally read this thread... I think we need to get more serious > > with our stats gathering to diagnose these kind of performance issues. > > > > This is a start; it should tell us what is actually happening to the > > virtio ring(s) without significant performance impact... > > Should we also add similar stat on vhost vq as well for monitoring > vhost_signal & vhost_notify? Tom L has started using Rusty's patches and found some interesting results, sent yesterday: http://marc.info/?l=kvm&m=129953710930124&w=2 -Andrew > > Shirley > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2011-03-08 at 20:21 -0600, Andrew Theurer wrote: > Tom L has started using Rusty's patches and found some interesting > results, sent yesterday: > http://marc.info/?l=kvm&m=129953710930124&w=2 Thanks. Very good experimental. I have been struggling with guest/vhost optimization work for a while. I created different experimental patches, performance results really depends on workloads. Based on the discussions and findings, seems that to improve virtio_net/vhost optimization work, we really need to collect more statistics data on both virtio_net and vhost for both TX and RX. A way to filter number of guest exits, I/O exits, irq injections in guest networking stacks only would be helpful. Thanks Shirley -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -7,6 +7,14 @@ config VIRTIO_RING tristate depends on VIRTIO +config VIRTIO_STATS + bool "Virtio debugging stats (EXPERIMENTAL)" + depends on VIRTIO_RING + select DEBUG_FS + ---help--- + Virtio stats collected by how full the ring is at any time, + presented under debugfs/virtio/<name>-<vq>/<num-used>/ + config VIRTIO_PCI tristate "PCI driver for virtio devices (EXPERIMENTAL)" depends on PCI && EXPERIMENTAL diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -21,6 +21,7 @@ #include <linux/virtio_config.h> #include <linux/device.h> #include <linux/slab.h> +#include <linux/debugfs.h> /* virtio guest is communicating with a virtual "device" that actually runs on * a host processor. Memory barriers are used to control SMP effects. */ @@ -95,6 +96,11 @@ struct vring_virtqueue /* How to notify other side. FIXME: commonalize hcalls! */ void (*notify)(struct virtqueue *vq); +#ifdef CONFIG_VIRTIO_STATS + struct vring_stat *stats; + struct dentry *statdir; +#endif + #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -106,6 +112,87 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +#ifdef CONFIG_VIRTIO_STATS +/* We have an array of these, indexed by how full the ring is. */ +struct vring_stat { + /* How many interrupts? */ + size_t interrupt_nowork, interrupt_work; + /* How many non-notify kicks, how many notify kicks, how many add notify? */ + size_t kick_no_notify, kick_notify, add_notify; + /* How many adds? */ + size_t add_direct, add_indirect, add_fail; + /* How many gets? */ + size_t get; + /* How many disable callbacks? */ + size_t disable_cb; + /* How many enables? */ + size_t enable_cb_retry, enable_cb_success; +}; + +static struct dentry *virtio_stats; + +static void create_stat_files(struct vring_virtqueue *vq) +{ + char name[80]; + unsigned int i; + + /* Racy in theory, but we don't care. */ + if (!virtio_stats) + virtio_stats = debugfs_create_dir("virtio-stats", NULL); + + sprintf(name, "%s-%s", dev_name(&vq->vq.vdev->dev), vq->vq.name); + vq->statdir = debugfs_create_dir(name, virtio_stats); + + for (i = 0; i < vq->vring.num; i++) { + struct dentry *dir; + + sprintf(name, "%i", i); + dir = debugfs_create_dir(name, vq->statdir); + debugfs_create_size_t("interrupt_nowork", 0400, dir, + &vq->stats[i].interrupt_nowork); + debugfs_create_size_t("interrupt_work", 0400, dir, + &vq->stats[i].interrupt_work); + debugfs_create_size_t("kick_no_notify", 0400, dir, + &vq->stats[i].kick_no_notify); + debugfs_create_size_t("kick_notify", 0400, dir, + &vq->stats[i].kick_notify); + debugfs_create_size_t("add_notify", 0400, dir, + &vq->stats[i].add_notify); + debugfs_create_size_t("add_direct", 0400, dir, + &vq->stats[i].add_direct); + debugfs_create_size_t("add_indirect", 0400, dir, + &vq->stats[i].add_indirect); + debugfs_create_size_t("add_fail", 0400, dir, + &vq->stats[i].add_fail); + debugfs_create_size_t("get", 0400, dir, + &vq->stats[i].get); + debugfs_create_size_t("disable_cb", 0400, dir, + &vq->stats[i].disable_cb); + debugfs_create_size_t("enable_cb_retry", 0400, dir, + &vq->stats[i].enable_cb_retry); + debugfs_create_size_t("enable_cb_success", 0400, dir, + &vq->stats[i].enable_cb_success); + } +} + +static void delete_stat_files(struct vring_virtqueue *vq) +{ + debugfs_remove_recursive(vq->statdir); +} + +#define add_stat(vq, name) \ + do { \ + struct vring_virtqueue *_vq = (vq); \ + _vq->stats[_vq->num_free - _vq->vring.num].name++; \ + } while (0) + +#else +#define add_stat(vq, name) +static void delete_stat_files(struct vring_virtqueue *vq) +{ +} +#endif + /* Set up an indirect table of descriptors and add it to the queue. */ static int vring_add_indirect(struct vring_virtqueue *vq, struct scatterlist sg[], @@ -121,6 +208,8 @@ static int vring_add_indirect(struct vri if (!desc) return -ENOMEM; + add_stat(vq, add_indirect); + /* Transfer entries from the sg list into the indirect page */ for (i = 0; i < out; i++) { desc[i].flags = VRING_DESC_F_NEXT; @@ -183,17 +272,22 @@ int virtqueue_add_buf_gfp(struct virtque BUG_ON(out + in == 0); if (vq->num_free < out + in) { + add_stat(vq, add_fail); pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ - if (out) + if (out) { + add_stat(vq, add_notify); vq->notify(&vq->vq); + } END_USE(vq); return -ENOSPC; } + add_stat(vq, add_direct); + /* We're about to use some buffers from the free list. */ vq->num_free -= out + in; @@ -248,9 +342,12 @@ void virtqueue_kick(struct virtqueue *_v /* Need to update avail index before checking if we should notify */ virtio_mb(); - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) + if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) { + add_stat(vq, kick_notify); /* Prod other side to tell it about changes. */ vq->notify(&vq->vq); + } else + add_stat(vq, kick_no_notify); END_USE(vq); } @@ -294,6 +391,8 @@ void *virtqueue_get_buf(struct virtqueue START_USE(vq); + add_stat(vq, get); + if (unlikely(vq->broken)) { END_USE(vq); return NULL; @@ -333,6 +432,7 @@ void virtqueue_disable_cb(struct virtque { struct vring_virtqueue *vq = to_vvq(_vq); + add_stat(vq, disable_cb); vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); @@ -348,10 +448,12 @@ bool virtqueue_enable_cb(struct virtqueu vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; virtio_mb(); if (unlikely(more_used(vq))) { + add_stat(vq, enable_cb_retry); END_USE(vq); return false; } + add_stat(vq, enable_cb_success); END_USE(vq); return true; } @@ -387,10 +489,12 @@ irqreturn_t vring_interrupt(int irq, voi struct vring_virtqueue *vq = to_vvq(_vq); if (!more_used(vq)) { + add_stat(vq, interrupt_nowork); pr_debug("virtqueue interrupt with no work for %p\n", vq); return IRQ_NONE; } + add_stat(vq, interrupt_work); if (unlikely(vq->broken)) return IRQ_HANDLED; @@ -451,6 +555,15 @@ struct virtqueue *vring_new_virtqueue(un } vq->data[i] = NULL; +#ifdef CONFIG_VIRTIO_STATS + vq->stats = kzalloc(sizeof(*vq->stats) * num, GFP_KERNEL); + if (!vq->stats) { + kfree(vq); + return NULL; + } + create_stat_files(vq); +#endif + return &vq->vq; } EXPORT_SYMBOL_GPL(vring_new_virtqueue); @@ -458,6 +571,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue); void vring_del_virtqueue(struct virtqueue *vq) { list_del(&vq->list); + delete_stat_files(to_vvq(vq)); kfree(to_vvq(vq)); } EXPORT_SYMBOL_GPL(vring_del_virtqueue);