Message ID | 200912162315.38802.rusty@rustcorp.com.au |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Wed, Dec 16, 2009 at 11:15:38PM +1030, Rusty Russell wrote: > On Wed, 16 Dec 2009 01:23:31 pm Herbert Xu wrote: > > On Wed, Dec 16, 2009 at 01:11:40PM +1030, Rusty Russell wrote: > > > > > > Thanks for the hint. They seem to use NAPI for xmit cleanup, so that's > > > what we should do? I'll try, but such a rewrite doesn't belong in 2.6.32. > > > > Well it depends. Real drivers can't touch the hardware so they're > > stuck with whatever the hardware does. For virtio we do have the > > flexibility of modifying the backend. > > > > Having said that, for existing backends that will signal when there > > is just a single free entry on the queue something like NAPI could > > reduce the overhead associated with the IRQs. > > OK, this is unfortunately untested, but wanted to send it out tonight: > > virtio_net: use NAPI for xmit (UNTESTED) > > This is closer to the way tg3 and ixgbe do it: use the NAPI framework to > free transmitted packets. It neatens things a little as well. Looks very neat. > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -47,6 +47,9 @@ struct virtnet_info > struct napi_struct napi; > unsigned int status; > > + /* We free packets and decide whether to restart xmit here. */ > + struct napi_struct xmit_napi; > + > /* Number of input buffers, and max we've ever had. */ > unsigned int num, max; > > @@ -60,6 +63,9 @@ struct virtnet_info > struct sk_buff_head recv; > struct sk_buff_head send; > > + /* Capacity left in xmit queue. */ > + unsigned int capacity; > + > /* Work struct for refilling if we run low on memory. */ > struct delayed_work refill; > > @@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque > { > struct virtnet_info *vi = svq->vdev->priv; > > - /* Suppress further interrupts. */ > - svq->vq_ops->disable_cb(svq); > - > /* We were probably waiting for more output buffers. */ > - netif_wake_queue(vi->dev); > + napi_schedule(&vi->xmit_napi); > } > > static void receive_skb(struct net_device *dev, struct sk_buff *skb, > @@ -446,7 +449,7 @@ static unsigned int free_old_xmit_skbs(s > > while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { > pr_debug("Sent skb %p\n", skb); > - __skb_unlink(skb, &vi->send); > + skb_unlink(skb, &vi->send); > vi->dev->stats.tx_bytes += skb->len; > vi->dev->stats.tx_packets++; > tot_sgs += skb_vnet_hdr(skb)->num_sg; > @@ -455,6 +458,23 @@ static unsigned int free_old_xmit_skbs(s > return tot_sgs; > } > > +static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget) > +{ > + struct virtnet_info *vi = > + container_of(xmit_napi, struct virtnet_info, xmit_napi); > + > + if (netif_queue_stopped(vi->dev)) { I am a bit concerned here: for example, on link down you do netif_stop_queue, and start on link up. So is it enough to check netif_queue_stopped to verify that tx is not running and that this is because it was out of capacity? It would be very bad if this run in parallel with TX ... If this is all safe, maybe add a comment explaining why ... > + vi->capacity += free_old_xmit_skbs(vi); > + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { > + /* Suppress further xmit interrupts. */ > + vi->svq->vq_ops->disable_cb(vi->svq); > + napi_complete(xmit_napi); > + netif_wake_queue(vi->dev); > + } > + } > + return 1; > +} > + One concern here: we are ignoring budget, is this an issue? > static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) > { > struct scatterlist sg[2+MAX_SKB_FRAGS]; > @@ -509,34 +529,22 @@ static netdev_tx_t start_xmit(struct sk_ > struct virtnet_info *vi = netdev_priv(dev); > int capacity; > > -again: > - /* Free up any pending old buffers before queueing new ones. */ > - free_old_xmit_skbs(vi); > - > /* Try to transmit */ > + skb_queue_head(&vi->send, skb); > capacity = xmit_skb(vi, skb); > > /* This can happen with OOM and indirect buffers. */ > if (unlikely(capacity < 0)) { > + skb_unlink(skb, &vi->send); > netif_stop_queue(dev); > dev_warn(&dev->dev, "Unexpected full queue\n"); > - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { > - vi->svq->vq_ops->disable_cb(vi->svq); > - netif_start_queue(dev); > - goto again; > - } > + /* If we missed an interrupt, we let virtnet_xmit_poll deal. */ > + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) > + napi_schedule(&vi->xmit_napi); > return NETDEV_TX_BUSY; > } > vi->svq->vq_ops->kick(vi->svq); > - > - /* > - * Put new one in send queue. You'd expect we'd need this before > - * xmit_skb calls add_buf(), since the callback can be triggered > - * immediately after that. But since the callback just triggers > - * another call back here, normal network xmit locking prevents the > - * race. > - */ > - __skb_queue_head(&vi->send, skb); > + vi->capacity = capacity; > > /* Don't wait up for transmitted skbs to be freed. */ > skb_orphan(skb); > @@ -544,15 +552,16 @@ again: > > /* Apparently nice girls don't return TX_BUSY; stop the queue > * before it gets out of hand. Naturally, this wastes entries. */ > - if (capacity < 2+MAX_SKB_FRAGS) { > - netif_stop_queue(dev); > - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { > - /* More just got used, free them then recheck. */ > - capacity += free_old_xmit_skbs(vi); > - if (capacity >= 2+MAX_SKB_FRAGS) { > - netif_start_queue(dev); > - vi->svq->vq_ops->disable_cb(vi->svq); > - } > + if (unlikely(capacity < 2+MAX_SKB_FRAGS)) { > + /* Free old skbs; might make more capacity. */ > + vi->capacity = capacity + free_old_xmit_skbs(vi); > + if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) { > + /* Make sure virtnet_xmit_poll sees updated capacity */ > + wmb(); > + netif_stop_queue(dev); Doesn't netif_stop_queue include an atomic op? If yes, does not atomic imply wmb already? > + /* Missed xmit irq? virtnet_xmit_poll will deal. */ > + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) > + napi_schedule(&vi->xmit_napi); > } > } > > @@ -590,6 +599,7 @@ static int virtnet_open(struct net_devic > struct virtnet_info *vi = netdev_priv(dev); > > napi_enable(&vi->napi); > + napi_enable(&vi->xmit_napi); > > /* If all buffers were filled by other side before we napi_enabled, we > * won't get another interrupt, so process any outstanding packets > @@ -652,6 +662,7 @@ static int virtnet_close(struct net_devi > struct virtnet_info *vi = netdev_priv(dev); > > napi_disable(&vi->napi); > + napi_disable(&vi->xmit_napi); > > return 0; > } > @@ -883,6 +894,7 @@ static int virtnet_probe(struct virtio_d > /* Set up our device-specific information */ > vi = netdev_priv(dev); > netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight); > + netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64); > vi->dev = dev; > vi->vdev = vdev; > vdev->priv = vi; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 16, 2009 at 11:15:38PM +1030, Rusty Russell wrote: > > virtio_net: use NAPI for xmit (UNTESTED) > > This is closer to the way tg3 and ixgbe do it: use the NAPI framework to > free transmitted packets. It neatens things a little as well. Looks good to me. > @@ -544,15 +552,16 @@ again: > > /* Apparently nice girls don't return TX_BUSY; stop the queue > * before it gets out of hand. Naturally, this wastes entries. */ > - if (capacity < 2+MAX_SKB_FRAGS) { > - netif_stop_queue(dev); > - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { > - /* More just got used, free them then recheck. */ > - capacity += free_old_xmit_skbs(vi); > - if (capacity >= 2+MAX_SKB_FRAGS) { > - netif_start_queue(dev); > - vi->svq->vq_ops->disable_cb(vi->svq); > - } > + if (unlikely(capacity < 2+MAX_SKB_FRAGS)) { > + /* Free old skbs; might make more capacity. */ > + vi->capacity = capacity + free_old_xmit_skbs(vi); > + if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) { > + /* Make sure virtnet_xmit_poll sees updated capacity */ > + wmb(); You only need smp_wmb() since this is protecting against the guest only as opposed to the host. Cheers,
On Wed, Dec 16, 2009 at 03:22:18PM +0200, Michael S. Tsirkin wrote: > > Doesn't netif_stop_queue include an atomic op? > If yes, does not atomic imply wmb already? Unfortunately it doesn't. Cheers,
On Wed, Dec 16, 2009 at 09:35:12PM +0800, Herbert Xu wrote: > On Wed, Dec 16, 2009 at 03:22:18PM +0200, Michael S. Tsirkin wrote: > > > > Doesn't netif_stop_queue include an atomic op? > > If yes, does not atomic imply wmb already? > > Unfortunately it doesn't. So do we need an rmb after testing it as well? Lock would be simpler :) > Cheers, > -- > Visit Openswan at http://www.openswan.org/ > Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> > Home Page: http://gondor.apana.org.au/~herbert/ > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 16, 2009 at 03:38:07PM +0200, Michael S. Tsirkin wrote: > On Wed, Dec 16, 2009 at 09:35:12PM +0800, Herbert Xu wrote: > > On Wed, Dec 16, 2009 at 03:22:18PM +0200, Michael S. Tsirkin wrote: > > > > > > Doesn't netif_stop_queue include an atomic op? > > > If yes, does not atomic imply wmb already? > > > > Unfortunately it doesn't. > > So do we need an rmb after testing it as well? You're right. We should have an smp_rmb on the other path. Cheers,
On Wed, 2009-12-16 at 23:15 +1030, Rusty Russell wrote: > On Wed, 16 Dec 2009 01:23:31 pm Herbert Xu wrote: > > On Wed, Dec 16, 2009 at 01:11:40PM +1030, Rusty Russell wrote: > > > > > > Thanks for the hint. They seem to use NAPI for xmit cleanup, so that's > > > what we should do? I'll try, but such a rewrite doesn't belong in 2.6.32. > > > > Well it depends. Real drivers can't touch the hardware so they're > > stuck with whatever the hardware does. For virtio we do have the > > flexibility of modifying the backend. > > > > Having said that, for existing backends that will signal when there > > is just a single free entry on the queue something like NAPI could > > reduce the overhead associated with the IRQs. > > OK, this is unfortunately untested, but wanted to send it out tonight: > > virtio_net: use NAPI for xmit (UNTESTED) > > This is closer to the way tg3 and ixgbe do it: use the NAPI framework to > free transmitted packets. It neatens things a little as well. I had to change virtnet_xmit_poll() to get it working. See below. With this change, i don't see any 'queue full' warnings, but requeues are still happening at the qdisc level (sch_direct_xmit() finds that tx queue is stopped and does requeues). Not sure why the host is not able to keep up with the guest. Thanks Sridhar > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -47,6 +47,9 @@ struct virtnet_info > struct napi_struct napi; > unsigned int status; > > + /* We free packets and decide whether to restart xmit here. */ > + struct napi_struct xmit_napi; > + > /* Number of input buffers, and max we've ever had. */ > unsigned int num, max; > > @@ -60,6 +63,9 @@ struct virtnet_info > struct sk_buff_head recv; > struct sk_buff_head send; > > + /* Capacity left in xmit queue. */ > + unsigned int capacity; > + > /* Work struct for refilling if we run low on memory. */ > struct delayed_work refill; > > @@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque > { > struct virtnet_info *vi = svq->vdev->priv; > > - /* Suppress further interrupts. */ > - svq->vq_ops->disable_cb(svq); > - > /* We were probably waiting for more output buffers. */ > - netif_wake_queue(vi->dev); > + napi_schedule(&vi->xmit_napi); > } > > static void receive_skb(struct net_device *dev, struct sk_buff *skb, > @@ -446,7 +449,7 @@ static unsigned int free_old_xmit_skbs(s > > while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { > pr_debug("Sent skb %p\n", skb); > - __skb_unlink(skb, &vi->send); > + skb_unlink(skb, &vi->send); > vi->dev->stats.tx_bytes += skb->len; > vi->dev->stats.tx_packets++; > tot_sgs += skb_vnet_hdr(skb)->num_sg; > @@ -455,6 +458,23 @@ static unsigned int free_old_xmit_skbs(s > return tot_sgs; > } > > +static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget) > +{ > + struct virtnet_info *vi = > + container_of(xmit_napi, struct virtnet_info, xmit_napi); > + > + if (netif_queue_stopped(vi->dev)) { > + vi->capacity += free_old_xmit_skbs(vi); > + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { > + /* Suppress further xmit interrupts. */ > + vi->svq->vq_ops->disable_cb(vi->svq); > + napi_complete(xmit_napi); > + netif_wake_queue(vi->dev); > + } > + } > + return 1; > +} +static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget) +{ + struct virtnet_info *vi = + container_of(xmit_napi, struct virtnet_info, xmit_napi); + + vi->capacity += free_old_xmit_skbs(vi); + + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { + /* Suppress further xmit interrupts. */ + vi->svq->vq_ops->disable_cb(vi->svq); + napi_complete(xmit_napi); + if (netif_queue_stopped(vi->dev)) + netif_wake_queue(vi->dev); + } + return 1; +} > + > static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) > { > struct scatterlist sg[2+MAX_SKB_FRAGS]; > @@ -509,34 +529,22 @@ static netdev_tx_t start_xmit(struct sk_ > struct virtnet_info *vi = netdev_priv(dev); > int capacity; > > -again: > - /* Free up any pending old buffers before queueing new ones. */ > - free_old_xmit_skbs(vi); > - > /* Try to transmit */ > + skb_queue_head(&vi->send, skb); > capacity = xmit_skb(vi, skb); > > /* This can happen with OOM and indirect buffers. */ > if (unlikely(capacity < 0)) { > + skb_unlink(skb, &vi->send); > netif_stop_queue(dev); > dev_warn(&dev->dev, "Unexpected full queue\n"); > - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { > - vi->svq->vq_ops->disable_cb(vi->svq); > - netif_start_queue(dev); > - goto again; > - } > + /* If we missed an interrupt, we let virtnet_xmit_poll deal. */ > + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) > + napi_schedule(&vi->xmit_napi); > return NETDEV_TX_BUSY; > } > vi->svq->vq_ops->kick(vi->svq); > - > - /* > - * Put new one in send queue. You'd expect we'd need this before > - * xmit_skb calls add_buf(), since the callback can be triggered > - * immediately after that. But since the callback just triggers > - * another call back here, normal network xmit locking prevents the > - * race. > - */ > - __skb_queue_head(&vi->send, skb); > + vi->capacity = capacity; > > /* Don't wait up for transmitted skbs to be freed. */ > skb_orphan(skb); > @@ -544,15 +552,16 @@ again: > > /* Apparently nice girls don't return TX_BUSY; stop the queue > * before it gets out of hand. Naturally, this wastes entries. */ > - if (capacity < 2+MAX_SKB_FRAGS) { > - netif_stop_queue(dev); > - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { > - /* More just got used, free them then recheck. */ > - capacity += free_old_xmit_skbs(vi); > - if (capacity >= 2+MAX_SKB_FRAGS) { > - netif_start_queue(dev); > - vi->svq->vq_ops->disable_cb(vi->svq); > - } > + if (unlikely(capacity < 2+MAX_SKB_FRAGS)) { > + /* Free old skbs; might make more capacity. */ > + vi->capacity = capacity + free_old_xmit_skbs(vi); > + if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) { > + /* Make sure virtnet_xmit_poll sees updated capacity */ > + wmb(); > + netif_stop_queue(dev); > + /* Missed xmit irq? virtnet_xmit_poll will deal. */ > + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) > + napi_schedule(&vi->xmit_napi); > } > } > > @@ -590,6 +599,7 @@ static int virtnet_open(struct net_devic > struct virtnet_info *vi = netdev_priv(dev); > > napi_enable(&vi->napi); > + napi_enable(&vi->xmit_napi); > > /* If all buffers were filled by other side before we napi_enabled, we > * won't get another interrupt, so process any outstanding packets > @@ -652,6 +662,7 @@ static int virtnet_close(struct net_devi > struct virtnet_info *vi = netdev_priv(dev); > > napi_disable(&vi->napi); > + napi_disable(&vi->xmit_napi); > > return 0; > } > @@ -883,6 +894,7 @@ static int virtnet_probe(struct virtio_d > /* Set up our device-specific information */ > vi = netdev_priv(dev); > netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight); > + netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64); > vi->dev = dev; > vi->vdev = vdev; > vdev->priv = vi; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 16, 2009 at 05:43:00PM -0800, Sridhar Samudrala wrote: > > I had to change virtnet_xmit_poll() to get it working. See below. > With this change, i don't see any 'queue full' warnings, but requeues > are still happening at the qdisc level (sch_direct_xmit() finds that > tx queue is stopped and does requeues). More importantly how does the performance look? Thanks,
On Wed, Dec 16, 2009 at 05:43:00PM -0800, Sridhar Samudrala wrote: > > I had to change virtnet_xmit_poll() to get it working. See below. > With this change, i don't see any 'queue full' warnings, but requeues > are still happening at the qdisc level (sch_direct_xmit() finds that > tx queue is stopped and does requeues). Actually this makes no sense. The queue should only be stopped at the end of a xmit run, in which case sch_direct_xmit should return 0 so we should never see a requeue. So if we're still seeing requeues then it hasn't been fixed properly. Cheers,
Herbert Xu wrote: > On Wed, Dec 16, 2009 at 05:43:00PM -0800, Sridhar Samudrala wrote: > >> I had to change virtnet_xmit_poll() to get it working. See below. >> With this change, i don't see any 'queue full' warnings, but requeues >> are still happening at the qdisc level (sch_direct_xmit() finds that >> tx queue is stopped and does requeues). >> > > More importantly how does the performance look? > TCP stream throughput is around the same 3200Mb/sec Thanks Sridhar -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Herbert Xu wrote: > On Wed, Dec 16, 2009 at 05:43:00PM -0800, Sridhar Samudrala wrote: > >> I had to change virtnet_xmit_poll() to get it working. See below. >> With this change, i don't see any 'queue full' warnings, but requeues >> are still happening at the qdisc level (sch_direct_xmit() finds that >> tx queue is stopped and does requeues). >> > > Actually this makes no sense. The queue should only be stopped > at the end of a xmit run, in which case sch_direct_xmit should > return 0 so we should never see a requeue. > I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as the tx queue is stopped and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. > So if we're still seeing requeues then it hasn't been fixed > properly. > > Cheers, > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 16, 2009 at 09:05:32PM -0800, Sridhar Samudrala wrote: > > I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as > the tx queue is stopped > and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. Yes but if the queue was stopped then we shouldn't even get into sch_direct_xmit. While inside sch_direct_xmit, then only way for the queue to be stopped is through dev_hard_start_xmit, and when that happens sch_direct_xmit will exit so we should never see a requeue if the driver is working properly. Cheers,
Herbert Xu wrote: > On Wed, Dec 16, 2009 at 09:05:32PM -0800, Sridhar Samudrala wrote: > >> I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as >> the tx queue is stopped >> and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. >> > > Yes but if the queue was stopped then we shouldn't even get into > sch_direct_xmit. I don't see any checks for txq_stopped in the callers of sch_direct_xmit() : __dev_xmit_skb() and qdisc_restart(). Both these routines get the txq and call sch_direct_xmit() which checks if tx queue is stopped or frozen. Am i missing something? Thanks Sridhar > While inside sch_direct_xmit, then only way for > the queue to be stopped is through dev_hard_start_xmit, and when > that happens sch_direct_xmit will exit so we should never see a > requeue if the driver is working properly. > > Cheers, > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Sridhar Samudrala <sri@us.ibm.com> > > Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net > > Herbert Xu wrote: > > On Wed, Dec 16, 2009 at 09:05:32PM -0800, Sridhar Samudrala wrote: > > > >> I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as > >> the tx queue is stopped > >> and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. > >> > > > > Yes but if the queue was stopped then we shouldn't even get into > > sch_direct_xmit. > I don't see any checks for txq_stopped in the callers of sch_direct_xmit() : > __dev_xmit_skb() and qdisc_restart(). Both these routines get the txq > and call > sch_direct_xmit() which checks if tx queue is stopped or frozen. > > Am i missing something? Yes - dequeue_skb. The final skb, before the queue was stopped, is transmitted by the driver. The next time sch_direct_xmit is called, it gets a skb and finds the device is stopped and requeue's the skb. For all subsequent xmits, dequeue_skb returns NULL (and the other caller - __dev_xmit_skb can never be called since qdisc_qlen is true) and thus requeue's will not happen. This also means that the number of requeues you see (eg 283K in one run) is the number of times the queue was stopped and restarted. So it looks like driver either: 1. didn't stop the queue when xmiting a packet successfully (the condition being that it would not be possible to xmit the next skb). But this doesn't seem to be the case. 2. wrongly restarted the queue. Possible - since a few places use both the start & wake queue api's. Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 17-12-2009 11:03, Krishna Kumar2 wrote: >> Sridhar Samudrala <sri@us.ibm.com> >> >> Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net >> >> Herbert Xu wrote: >>> On Wed, Dec 16, 2009 at 09:05:32PM -0800, Sridhar Samudrala wrote: >>> >>>> I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as > >>>> the tx queue is stopped >>>> and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. >>>> >>> Yes but if the queue was stopped then we shouldn't even get into >>> sch_direct_xmit. >> I don't see any checks for txq_stopped in the callers of > sch_direct_xmit() : >> __dev_xmit_skb() and qdisc_restart(). Both these routines get the txq >> and call >> sch_direct_xmit() which checks if tx queue is stopped or frozen. >> >> Am i missing something? > > Yes - dequeue_skb. > > The final skb, before the queue was stopped, is transmitted by > the driver. The next time sch_direct_xmit is called, it gets a > skb and finds the device is stopped and requeue's the skb. So we _should_ get into sch_direct_xmit when the queue was stopped... I guess Herbert might forget the multiqueue change, and Sridhar isn't missing much. ;-) Thanks, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 11:27:55AM +0000, Jarek Poplawski wrote: > > So we _should_ get into sch_direct_xmit when the queue was stopped... > I guess Herbert might forget the multiqueue change, and Sridhar isn't > missing much. ;-) Hmm, if that's the case then this new multiqueue code is seriously broken. It means that once the queue is stopped, every new tx packet will cause an unnecessary dequeue/requeue, up to 1000. Cheers,
On Thu, Dec 17, 2009 at 07:45:35PM +0800, Herbert Xu wrote: > On Thu, Dec 17, 2009 at 11:27:55AM +0000, Jarek Poplawski wrote: > > > > So we _should_ get into sch_direct_xmit when the queue was stopped... > > I guess Herbert might forget the multiqueue change, and Sridhar isn't > > missing much. ;-) > > Hmm, if that's the case then this new multiqueue code is seriously > broken. It means that once the queue is stopped, every new tx packet > will cause an unnecessary dequeue/requeue, up to 1000. And indeed, this new requeue behaviour was introduced in 2.6.32. Sridhar, please build the 2.6.32 virtio-net driver under a 2.6.31 kernel and see if the regression persists. Alternatively, build the virtio-net driver under 2.6.32 and see whether it still performs as well. Thanks,
> Jarek Poplawski <jarkao2@gmail.com> > > >>> On Wed, Dec 16, 2009 at 09:05:32PM -0800, Sridhar Samudrala wrote: > >>> > >>>> I think sch_direct_xmit() is not even calling dev_hard_start_xmit() as > > > >>>> the tx queue is stopped > >>>> and does a dev_requeue_skb() and returns NETDEV_TX_BUSY. > >>>> > >>> Yes but if the queue was stopped then we shouldn't even get into > >>> sch_direct_xmit. > >> I don't see any checks for txq_stopped in the callers of > > sch_direct_xmit() : > >> __dev_xmit_skb() and qdisc_restart(). Both these routines get the txq > >> and call > >> sch_direct_xmit() which checks if tx queue is stopped or frozen. > >> > >> Am i missing something? > > > > Yes - dequeue_skb. > > > > The final skb, before the queue was stopped, is transmitted by > > the driver. The next time sch_direct_xmit is called, it gets a > > skb and finds the device is stopped and requeue's the skb. > > So we _should_ get into sch_direct_xmit when the queue was stopped... > I guess Herbert might forget the multiqueue change, and Sridhar isn't > missing much. ;-) I meant his question on who is checking tx queue stopped before calling driver xmit. In stopped queue case, qdisc_restart makes sure sch_direct_xmit is not called for all subsequent skbs. Sridhar is seeing 280K requeue's, and that probably implies device was stopped and wrongly restarted immediately. So the next xmit in the kernel found the txq is not stopped and called the xmit handler, get a BUSY, requeue, and so on. That would also explain why his BW drops so much - all false starts (besides 19% of all skbs being requeued). I assume that each time when we check: if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); it passes the check and dev_hard_start_xmit is called wrongly. #Requeues: 283575 #total skbs: 1469482 Percentage requeued: 19.29% Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Herbert Xu <herbert@gondor.apana.org.au> > Sent by: netdev-owner@vger.kernel.org > > 12/17/2009 05:15 PM > > To > > Jarek Poplawski <jarkao2@gmail.com> > > cc > > Krishna Kumar2/India/IBM@IBMIN, Sridhar Samudrala <sri@us.ibm.com>, > mst@redhat.com, netdev@vger.kernel.org, Rusty Russell > <rusty@rustcorp.com.au>, "David S. Miller" <davem@davemloft.net> > > Subject > > Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net > > On Thu, Dec 17, 2009 at 11:27:55AM +0000, Jarek Poplawski wrote: > > > > So we _should_ get into sch_direct_xmit when the queue was stopped... > > I guess Herbert might forget the multiqueue change, and Sridhar isn't > > missing much. ;-) > > Hmm, if that's the case then this new multiqueue code is seriously > broken. It means that once the queue is stopped, every new tx packet > will cause an unnecessary dequeue/requeue, up to 1000. I think it is fine. The first skb after the stop is requeue'd. All subsequent new skbs get screened out at qdisc_restart() -> dequeue_skb -> NULL -> no work to do. Hope I didn't miss something now :-) Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 07:49:37PM +0800, Herbert Xu wrote: > > And indeed, this new requeue behaviour was introduced in 2.6.32. Actually no, it was merely rearranged. It would appear that this behaviour has been around for over a year. It is even worse than I thought. Not only would new tx packets trigger this unnecessary queue run, but once it is triggered, it would consume 100% CPU as dev_requeue_skb unconditionally reschedules the queue! Tell me this is not true please... Thanks,
On Thu, Dec 17, 2009 at 07:45:35PM +0800, Herbert Xu wrote: > On Thu, Dec 17, 2009 at 11:27:55AM +0000, Jarek Poplawski wrote: > > > > So we _should_ get into sch_direct_xmit when the queue was stopped... > > I guess Herbert might forget the multiqueue change, and Sridhar isn't > > missing much. ;-) > > Hmm, if that's the case then this new multiqueue code is seriously > broken. It means that once the queue is stopped, every new tx packet > will cause an unnecessary dequeue/requeue, up to 1000. Hmm, I can even remember who inspired this change ;-) But I'm not sure this really broken: it seems it's "optimized" for non-stopped case. Btw, IIRC, Krishna worked on changing this for uniqueues - I wonder if there could be much difference in testing? Cheers, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Herbert Xu <herbert@gondor.apana.org.au> wrote on 12/17/2009 05:38:56 PM: > > And indeed, this new requeue behaviour was introduced in 2.6.32. > > Actually no, it was merely rearranged. It would appear that this > behaviour has been around for over a year. > > It is even worse than I thought. Not only would new tx packets > trigger this unnecessary queue run, but once it is triggered, it > would consume 100% CPU as dev_requeue_skb unconditionally reschedules > the queue! > > Tell me this is not true please... Hi Herbert, I am confused. Isn't dequeue_skb returning NULL for 2nd - nth skbs till the queue is restarted, so how is it broken? __dev_xmit_skb calls sch_direct_xmit only once after the device is stopped, and this call results in skb being re-queued. Next call to __dev_xmit_skb calls qdisc_restart which will always bail out since dev_dequeue returns NULL. We also avoid rescheduling the queue after the first resched. I also think the resched in requeue is probably not required, since the device will call netif_tx_wake_queue anyway. dev_dequeue() { if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); struct netdev_queue *txq; /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) { q->gso_skb = NULL; q->q.qlen--; } else skb = NULL; } Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 05:57:15PM +0530, Krishna Kumar2 wrote: > the first resched. I also think the resched in requeue is probably > not required, since the device will call netif_tx_wake_queue anyway. IIRC there were mentioned some possibly broken drivers around these changes. So it would require a little bit of reviewing... Thanks, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 12:42:54PM +0000, Jarek Poplawski wrote: > On Thu, Dec 17, 2009 at 05:57:15PM +0530, Krishna Kumar2 wrote: > > the first resched. I also think the resched in requeue is probably > > not required, since the device will call netif_tx_wake_queue anyway. Actually, the requeue is there for the case where we hit lock contention on the device TX lock so it is needed for that at least. However, for the case where the device queue is stopped you're right we don't need to reschedule. Cheers,
Jarek Poplawski <jarkao2@gmail.com> wrote on 12/17/2009 06:12:54 PM: > > On Thu, Dec 17, 2009 at 05:57:15PM +0530, Krishna Kumar2 wrote: > > the first resched. I also think the resched in requeue is probably > > not required, since the device will call netif_tx_wake_queue anyway. > > IIRC there were mentioned some possibly broken drivers around these > changes. So it would require a little bit of reviewing... Yes, that was the reason I dropped that patch before submitting. I was not sure of that optimization since I didn't check that all, esp antique drivers, would resched correctly, but that was only a minor optimization (avoiding one call of resched per stop). However, the qdisc_restart/dev_dequeue path is clean since the stopped txq case is handled very early, and no actual dequeue/requeue/resched happens till the device restarts the txq. Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 05:26:37PM +0530, Krishna Kumar2 wrote: > Sridhar is seeing 280K requeue's, and that probably implies device > was stopped and wrongly restarted immediately. So the next xmit in > the kernel found the txq is not stopped and called the xmit handler, > get a BUSY, requeue, and so on. That would also explain why his BW > drops so much - all false starts (besides 19% of all skbs being > requeued). I assume that each time when we check: > > if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) > ret = dev_hard_start_xmit(skb, dev, txq); > it passes the check and dev_hard_start_xmit is called wrongly. > > #Requeues: 283575 > #total skbs: 1469482 > Percentage requeued: 19.29% I haven't followed this thread, so I'm not sure what are you looking for, but can't these requeues/drops mean some hardware limits were reached? I wonder why there are compared linux-2.6.32 vs. 2.6.31.6 with different test conditions (avg. packet sizes: 16800 vs. 64400)? Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Herbert Xu <herbert@gondor.apana.org.au> wrote on 12/17/2009 06:26:54 PM: > > > On Thu, Dec 17, 2009 at 05:57:15PM +0530, Krishna Kumar2 wrote: > > > the first resched. I also think the resched in requeue is probably > > > not required, since the device will call netif_tx_wake_queue anyway. > > Actually, the requeue is there for the case where we hit lock > contention on the device TX lock so it is needed for that at > least. > > However, for the case where the device queue is stopped you're > right we don't need to reschedule. Yes, my patch had a handle for both cases by adding another arg to dev_requeue_skb, something like: static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q, int reason) { q->gso_skb = skb; q->qstats.requeues++; q->q.qlen++; /* it's still part of the queue */ if (reason != NETDEV_TX_BUSY) __netif_schedule(q); return 0; } and pass the reason in the two places that call requeue. I didn't submit it since it felt a small optimization - avoiding one resched per stopped txq (not for every skb), plus I was not sure if every driver would properly call netif_tx_wake_queue. thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 05:57:15PM +0530, Krishna Kumar2 wrote: > > I am confused. Isn't dequeue_skb returning NULL for 2nd - nth skbs > till the queue is restarted, so how is it broken? Sorry I didn't read dev_dequeue carefully enough. Indeed it correctly checks the queue status so the loop that I thought was there doesn't exist. The requeues are probably caused by the driver still. Was Sridhar testing Rusty's latest patch? Thanks,
Jarek Poplawski <jarkao2@gmail.com> wrote on 12/17/2009 06:47:09 PM: > On Thu, Dec 17, 2009 at 05:26:37PM +0530, Krishna Kumar2 wrote: > > Sridhar is seeing 280K requeue's, and that probably implies device > > was stopped and wrongly restarted immediately. So the next xmit in > > the kernel found the txq is not stopped and called the xmit handler, > > get a BUSY, requeue, and so on. That would also explain why his BW > > drops so much - all false starts (besides 19% of all skbs being > > requeued). I assume that each time when we check: > > > > if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) > > ret = dev_hard_start_xmit(skb, dev, txq); > > it passes the check and dev_hard_start_xmit is called wrongly. > > > > #Requeues: 283575 > > #total skbs: 1469482 > > Percentage requeued: 19.29% > > I haven't followed this thread, so I'm not sure what are you looking > for, but can't these requeues/drops mean some hardware limits were > reached? I wonder why there are compared linux-2.6.32 vs. 2.6.31.6 > with different test conditions (avg. packet sizes: 16800 vs. 64400)? Hi Jarek, That is a good point. I am not sure why the avg packet sizes are so different in the bstats. Did GSO change in these two versions? I took the numbers from Sridhar's mail before the NAPI patch. I think having 280K requeue's in 1 min means that the driver is waking up the queue when it should not. The NAPI patch fixes that, but he still reported seeing requeue's. thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 07:40:32PM +0530, Krishna Kumar2 wrote: > > I took the numbers from Sridhar's mail before the NAPI patch. I think > having 280K requeue's in 1 min means that the driver is waking up the > queue when it should not. The NAPI patch fixes that, but he still > reported seeing requeue's. Since we're still in the dark as to why this is happening, it would be really helpful if we could test 2.6.31's virtio-net under 2.6.32 or vice versa to determine whether it is a driver change or a core change that's creating this regression. Cheers,
Herbert Xu <herbert@gondor.apana.org.au> wrote on 12/17/2009 07:14:08 PM: > > I am confused. Isn't dequeue_skb returning NULL for 2nd - nth skbs > > till the queue is restarted, so how is it broken? > > Sorry I didn't read dev_dequeue carefully enough. Indeed it > correctly checks the queue status so the loop that I thought > was there doesn't exist. > > The requeues are probably caused by the driver still. Was Sridhar > testing Rusty's latest patch? I was using numbers from his first test run. He has not posted the requeue numbers for the NAPI run. But he ran with NAPI, and said: "I had to change virtnet_xmit_poll() to get it working. See below. With this change, i don't see any 'queue full' warnings, but requeues are still happening at the qdisc level (sch_direct_xmit() finds that tx queue is stopped and does requeues)". I think the bug is in this check: + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { + /* Suppress further xmit interrupts. */ + vi->svq->vq_ops->disable_cb(vi->svq); + napi_complete(xmit_napi); + + /* Don't wake it if link is down. */ + if (likely(netif_carrier_ok(vi->vdev))) + netif_wake_queue(vi->dev); + } We wake up too fast, just enough space for one more skb to be sent before the queue is stopped again. And hence no more messages about queue full, but lot of requeues. The qdisc code is doing the correct thing, but we need to increase the limit here. Can we try with some big number, 64, 128? Thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 08:05:02PM +0530, Krishna Kumar2 wrote: > > I think the bug is in this check: > > + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { > + /* Suppress further xmit interrupts. */ > + vi->svq->vq_ops->disable_cb(vi->svq); > + napi_complete(xmit_napi); > + > + /* Don't wake it if link is down. */ > + if (likely(netif_carrier_ok(vi->vdev))) > + netif_wake_queue(vi->dev); > + } > > We wake up too fast, just enough space for one more skb to be sent > before the queue is stopped again. And hence no more messages about > queue full, but lot of requeues. The qdisc code is doing the correct > thing, but we need to increase the limit here. > > Can we try with some big number, 64, 128? Good point. Sridhar, could you please test doubling or quadrupling the threshold? 128 is probably a bit too much though as the ring only has 256 entries. Cheers,
On Thu, 2009-12-17 at 22:36 +0800, Herbert Xu wrote: > On Thu, Dec 17, 2009 at 08:05:02PM +0530, Krishna Kumar2 wrote: > > > > I think the bug is in this check: > > > > + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { > > + /* Suppress further xmit interrupts. */ > > + vi->svq->vq_ops->disable_cb(vi->svq); > > + napi_complete(xmit_napi); > > + > > + /* Don't wake it if link is down. */ > > + if (likely(netif_carrier_ok(vi->vdev))) > > + netif_wake_queue(vi->dev); > > + } > > > > We wake up too fast, just enough space for one more skb to be sent > > before the queue is stopped again. And hence no more messages about > > queue full, but lot of requeues. The qdisc code is doing the correct > > thing, but we need to increase the limit here. > > > > Can we try with some big number, 64, 128? > > Good point. Sridhar, could you please test doubling or quadrupling > the threshold? 128 is probably a bit too much though as the ring > only has 256 entries. Increasing the wakeup threshold value reduced the number of requeues, but didn't eliminate them. The throughput improved a little, but the CPU utilization went up. I don't see any 'queue full' warning messages from the driver and hence the driver is not returning NETDEV_TX_BUSY. The requeues are happening in sch_direct_xmit() as it is finding that the tx queue is stopped. I could not get 2.6.31 virtio-net driver to work with 2.6.32 kernel by simply replacing virtio-net.c. The compile and build went through fine, but the guest is not seeing the virtio-net device when it comes up. I think it is a driver issue, not a core issue as I am able to get good results by not stopping the queue early in start_xmit() and dropping the skb when xmit_skb() fails even with 2.6.32 kernel. I think this behavior is somewhat similar to 2.6.31 virtio-net driver as it caches 1 skb and drops any further skbs when ring is full in its start_xmit routine. 2.6.32 + Rusty's xmit_napi v2 patch ----------------------------------- $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 60.03 3255.22 87.16 82.57 2.193 4.156 $ tc -s qdisc show dev eth0 qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 24524210050 bytes 1482737 pkt (dropped 0, overlimits 0 requeues 339101) rate 0bit 0pps backlog 0b 0p requeues 339101 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=64 --------------------------------------------------------- $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 60.03 3356.71 95.41 89.56 2.329 4.372 [sridhar@localhost ~]$ tc -s qdisc show dev eth0 qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 25290227186 bytes 1555119 pkt (dropped 0, overlimits 0 requeues 78179) rate 0bit 0pps backlog 0b 0p requeues 78179 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=128 ---------------------------------------------------------- $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 60.03 3413.79 96.30 89.79 2.311 4.309 [sridhar@localhost ~]$ tc -s qdisc show dev eth0 qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 25719585472 bytes 1579448 pkt (dropped 0, overlimits 0 requeues 40299) rate 0bit 0pps backlog 0b 0p requeues 40299 2.6.32 + Rusty's xmit_napi v2 patch + don't stop early & drop skb on fail patch ------------------------------------------------------------------------------- $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 60.03 7741.65 70.09 72.84 0.742 1.542 [sridhar@localhost ~]$ tc -s qdisc show dev eth0 qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 58149531018 bytes 897991 pkt (dropped 0, overlimits 0 requeues 1) rate 0bit 0pps backlog 0b 0p requeues 1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2009-12-17 at 13:50 -0800, Sridhar Samudrala wrote: > On Thu, 2009-12-17 at 22:36 +0800, Herbert Xu wrote: > > On Thu, Dec 17, 2009 at 08:05:02PM +0530, Krishna Kumar2 wrote: > > > > > > I think the bug is in this check: > > > > > > + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { > > > + /* Suppress further xmit interrupts. */ > > > + vi->svq->vq_ops->disable_cb(vi->svq); > > > + napi_complete(xmit_napi); > > > + > > > + /* Don't wake it if link is down. */ > > > + if (likely(netif_carrier_ok(vi->vdev))) > > > + netif_wake_queue(vi->dev); > > > + } > > > > > > We wake up too fast, just enough space for one more skb to be sent > > > before the queue is stopped again. And hence no more messages about > > > queue full, but lot of requeues. The qdisc code is doing the correct > > > thing, but we need to increase the limit here. > > > > > > Can we try with some big number, 64, 128? > > > > Good point. Sridhar, could you please test doubling or quadrupling > > the threshold? 128 is probably a bit too much though as the ring > > only has 256 entries. > > Increasing the wakeup threshold value reduced the number of requeues, but > didn't eliminate them. The throughput improved a little, but the CPU utilization > went up. > I don't see any 'queue full' warning messages from the driver and hence the driver > is not returning NETDEV_TX_BUSY. The requeues are happening in sch_direct_xmit() > as it is finding that the tx queue is stopped. > > I could not get 2.6.31 virtio-net driver to work with 2.6.32 kernel by simply > replacing virtio-net.c. The compile and build went through fine, but the guest > is not seeing the virtio-net device when it comes up. > I think it is a driver issue, not a core issue as I am able to get good results > by not stopping the queue early in start_xmit() and dropping the skb when xmit_skb() > fails even with 2.6.32 kernel. I think this behavior is somewhat similar to 2.6.31 > virtio-net driver as it caches 1 skb and drops any further skbs when ring is full > in its start_xmit routine. Another interesting observation that Jarek pointed out is that if we drop skb's, i guess it is causing TCP to backoff and enabling TCP GSO code to send much larger packets(64K compared to 16K). So although the throughput is higher, packets/sec is less in that case. > > 2.6.32 + Rusty's xmit_napi v2 patch > ----------------------------------- > $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3255.22 87.16 82.57 2.193 4.156 > $ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 24524210050 bytes 1482737 pkt (dropped 0, overlimits 0 requeues 339101) > rate 0bit 0pps backlog 0b 0p requeues 339101 > > 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=64 > --------------------------------------------------------- > $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3356.71 95.41 89.56 2.329 4.372 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 25290227186 bytes 1555119 pkt (dropped 0, overlimits 0 requeues 78179) > rate 0bit 0pps backlog 0b 0p requeues 78179 > > 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=128 > ---------------------------------------------------------- > $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3413.79 96.30 89.79 2.311 4.309 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 25719585472 bytes 1579448 pkt (dropped 0, overlimits 0 requeues 40299) > rate 0bit 0pps backlog 0b 0p requeues 40299 > > 2.6.32 + Rusty's xmit_napi v2 patch + don't stop early & drop skb on fail patch > ------------------------------------------------------------------------------- > $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 7741.65 70.09 72.84 0.742 1.542 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 58149531018 bytes 897991 pkt (dropped 0, overlimits 0 requeues 1) > rate 0bit 0pps backlog 0b 0p requeues 1 > > > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 17, 2009 at 02:28:10PM -0800, Sridhar Samudrala wrote: > Another interesting observation that Jarek pointed out is that if we drop skb's, i guess > it is causing TCP to backoff and enabling TCP GSO code to send much larger packets(64K > compared to 16K). So although the throughput is higher, packets/sec is less in that > case. Maybe you're right, but there would be useful to have TCP-independent tests to compare, or at least without such impact, so with GSO disabled. Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sridhar Samudrala <sri@us.ibm.com> wrote on 12/18/2009 03:20:08 AM: > Increasing the wakeup threshold value reduced the number of requeues, but > didn't eliminate them. The throughput improved a little, but the CPU > utilization > went up. > I don't see any 'queue full' warning messages from the driver and > hence the driver > is not returning NETDEV_TX_BUSY. The requeues are happening in > sch_direct_xmit() > as it is finding that the tx queue is stopped. > > I could not get 2.6.31 virtio-net driver to work with 2.6.32 kernel by simply > replacing virtio-net.c. The compile and build went through fine, butthe guest > is not seeing the virtio-net device when it comes up. > I think it is a driver issue, not a core issue as I am able to get > good results > by not stopping the queue early in start_xmit() and dropping the skb > when xmit_skb() > fails even with 2.6.32 kernel. I think this behavior is somewhat > similar to 2.6.31 > virtio-net driver as it caches 1 skb and drops any further skbs when > ring is full > in its start_xmit routine. > > 2.6.32 + Rusty's xmit_napi v2 patch > ----------------------------------- > $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168. > 122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3255.22 87.16 82.57 2.193 4.156 > $ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 > 1 1 1 1 1 1 1 > Sent 24524210050 bytes 1482737 pkt (dropped 0, overlimits 0 requeues 339101) > rate 0bit 0pps backlog 0b 0p requeues 339101 > > 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=64 > --------------------------------------------------------- > $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168. > 122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3356.71 95.41 89.56 2.329 4.372 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 > 1 1 1 1 1 1 1 > Sent 25290227186 bytes 1555119 pkt (dropped 0, overlimits 0 requeues 78179) > rate 0bit 0pps backlog 0b 0p requeues 78179 > > 2.6.32 + Rusty's xmit_napi v2 patch + wakeup threshold=128 > ---------------------------------------------------------- > $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168. > 122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 3413.79 96.30 89.79 2.311 4.309 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 > 1 1 1 1 1 1 1 > Sent 25719585472 bytes 1579448 pkt (dropped 0, overlimits 0 requeues 40299) > rate 0bit 0pps backlog 0b 0p requeues 40299 > > 2.6.32 + Rusty's xmit_napi v2 patch + don't stop early & drop skb onfail patch > ------------------------------------------------------------------------------- > $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168. > 122.1 (192.168.122.1) port 0 AF_INET > Recv Send Send Utilization Service Demand > Socket Socket Message Elapsed Send Recv Send Recv > Size Size Size Time Throughput local remote local remote > bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB > > 87380 16384 16384 60.03 7741.65 70.09 72.84 0.742 1.542 > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1__dev_xmit_skb > 1 1 1 1 1 1 1 > Sent 58149531018 bytes 897991 pkt (dropped 0, overlimits 0 requeues 1) > rate 0bit 0pps backlog 0b 0p requeues 1 Is the "drop skb" patch doing: - return NETDEV_TX_BUSY; + + /* drop the skb under stress. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; Why is dropped count zero in the last test case? sch_direct_xmit is called from two places, and if it finds the txq stopped, it was called from __dev_xmit_skb (where the previous sucessful xmit had stopped the queue). This means the device is still stopping and restarting 1000's of times a min, and each restart fills up the device h/w queue with the backlogged skbs resulting in another stop. Isn't the txqlen set to 1000 in ether_setup? Can you increase the restart limit to a really high value, like 1/2 or 3/4th of the queue should be empty? Another thing to test is to simultaneously set txqueuelen to a big value. Requeue does not seem to be the reason for BW drop since it barely improved when requeue's reduced from 340K to 40K. So, as Jarek suggested, GSO could be reason. You could try testing with 64K I/O size (with GSO enabled) to get comparable results. thanks, - KK -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2009-12-18 at 19:16 +0530, Krishna Kumar2 wrote: > > > > 2.6.32 + Rusty's xmit_napi v2 patch + don't stop early & drop skb onfail > patch > > > ------------------------------------------------------------------------------- > > > $./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 > > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168. > > 122.1 (192.168.122.1) port 0 AF_INET > > Recv Send Send Utilization Service > Demand > > Socket Socket Message Elapsed Send Recv Send > Recv > > Size Size Size Time Throughput local remote local > remote > > bytes bytes bytes secs. 10^6bits/s % S % S us/KB > us/KB > > > > 87380 16384 16384 60.03 7741.65 70.09 72.84 0.742 > 1.542 > > [sridhar@localhost ~]$ tc -s qdisc show dev eth0 > > qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 > 1__dev_xmit_skb > > 1 1 1 1 1 1 1 > > Sent 58149531018 bytes 897991 pkt (dropped 0, overlimits 0 requeues 1) > > rate 0bit 0pps backlog 0b 0p requeues 1 > > Is the "drop skb" patch doing: > - return NETDEV_TX_BUSY; > + > + /* drop the skb under stress. */ > + vi->dev->stats.tx_dropped++; > + kfree_skb(skb); > + return NETDEV_TX_OK; Yes. This is the patch i used with plain 2.6.32. But with Rusty's patch, i also commented out the if condition that stops the queue early in start_xmit(). > > Why is dropped count zero in the last test case? The dropped count reported by 'tc' are drops at the qdisc level and are counted via qdisc_drop(). The drops at the driver level are counted as net_device stats and are reported by ip -s link command. I see a few drops(5-6) in a 60sec run with 2.6.31 kernel. > > sch_direct_xmit is called from two places, and if it finds > the txq stopped, it was called from __dev_xmit_skb (where > the previous sucessful xmit had stopped the queue). This > means the device is still stopping and restarting 1000's > of times a min, and each restart fills up the device h/w > queue with the backlogged skbs resulting in another stop. > Isn't the txqlen set to 1000 in ether_setup? Can you > increase the restart limit to a really high value, like > 1/2 or 3/4th of the queue should be empty? Another thing > to test is to simultaneously set txqueuelen to a big value. txqueuelen limits the qdisc queue, not the device transmit queue. The device tx queue length is set by qemu and defaults to 256 for virtio-net. So a reasonable wakeup threshhold could be 64/128 and it does reduce the number of requeues. > > Requeue does not seem to be the reason for BW drop since > it barely improved when requeue's reduced from 340K to 40K. > So, as Jarek suggested, GSO could be reason. You could try > testing with 64K I/O size (with GSO enabled) to get > comparable results. Yes. with 64K messages, i am getting comparable thruput, in fact slightly better although cpu utilization is higher. So it looks like the better thruput with 2.6.31 kernel with 16K message size is a side-effect of the drops. I think Rusty's patch with 1/4 of tx ring as wakeup threshold is the first step to address the queue full warnings in 2.6.32. With further tuning it may be possible to eliminate the requeues. 2.6.32 + Rusty's xmit_napi_v2 patch $ ./netperf -c -C -H 192.168.122.1 -t TCP_STREAM -l60 -- -m 65536 TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.1 (192.168.122.1) port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 65536 60.03 8200.80 92.52 91.63 0.924 1.831 $ tc -s qdisc show dev eth0 qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 61613336514 bytes 1208233 pkt (dropped 0, overlimits 0 requeues 237750) rate 0bit 0pps backlog 0b 0p requeues 237750 $ ip -s link show dev eth0 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN qlen 1000 link/ether 54:52:00:35:e3:74 brd ff:ff:ff:ff:ff:ff RX: bytes packets errors dropped overrun mcast 59348763 899170 0 0 0 0 TX: bytes packets errors dropped carrier collsns 1483793932 1208230 0 0 0 0 Thanks Sridhar -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -47,6 +47,9 @@ struct virtnet_info struct napi_struct napi; unsigned int status; + /* We free packets and decide whether to restart xmit here. */ + struct napi_struct xmit_napi; + /* Number of input buffers, and max we've ever had. */ unsigned int num, max; @@ -60,6 +63,9 @@ struct virtnet_info struct sk_buff_head recv; struct sk_buff_head send; + /* Capacity left in xmit queue. */ + unsigned int capacity; + /* Work struct for refilling if we run low on memory. */ struct delayed_work refill; @@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque { struct virtnet_info *vi = svq->vdev->priv; - /* Suppress further interrupts. */ - svq->vq_ops->disable_cb(svq); - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); + napi_schedule(&vi->xmit_napi); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -446,7 +449,7 @@ static unsigned int free_old_xmit_skbs(s while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); - __skb_unlink(skb, &vi->send); + skb_unlink(skb, &vi->send); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; tot_sgs += skb_vnet_hdr(skb)->num_sg; @@ -455,6 +458,23 @@ static unsigned int free_old_xmit_skbs(s return tot_sgs; } +static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget) +{ + struct virtnet_info *vi = + container_of(xmit_napi, struct virtnet_info, xmit_napi); + + if (netif_queue_stopped(vi->dev)) { + vi->capacity += free_old_xmit_skbs(vi); + if (vi->capacity >= 2 + MAX_SKB_FRAGS) { + /* Suppress further xmit interrupts. */ + vi->svq->vq_ops->disable_cb(vi->svq); + napi_complete(xmit_napi); + netif_wake_queue(vi->dev); + } + } + return 1; +} + static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { struct scatterlist sg[2+MAX_SKB_FRAGS]; @@ -509,34 +529,22 @@ static netdev_tx_t start_xmit(struct sk_ struct virtnet_info *vi = netdev_priv(dev); int capacity; -again: - /* Free up any pending old buffers before queueing new ones. */ - free_old_xmit_skbs(vi); - /* Try to transmit */ + skb_queue_head(&vi->send, skb); capacity = xmit_skb(vi, skb); /* This can happen with OOM and indirect buffers. */ if (unlikely(capacity < 0)) { + skb_unlink(skb, &vi->send); netif_stop_queue(dev); dev_warn(&dev->dev, "Unexpected full queue\n"); - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - vi->svq->vq_ops->disable_cb(vi->svq); - netif_start_queue(dev); - goto again; - } + /* If we missed an interrupt, we let virtnet_xmit_poll deal. */ + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) + napi_schedule(&vi->xmit_napi); return NETDEV_TX_BUSY; } vi->svq->vq_ops->kick(vi->svq); - - /* - * Put new one in send queue. You'd expect we'd need this before - * xmit_skb calls add_buf(), since the callback can be triggered - * immediately after that. But since the callback just triggers - * another call back here, normal network xmit locking prevents the - * race. - */ - __skb_queue_head(&vi->send, skb); + vi->capacity = capacity; /* Don't wait up for transmitted skbs to be freed. */ skb_orphan(skb); @@ -544,15 +552,16 @@ again: /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ - if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(vi); - if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); - vi->svq->vq_ops->disable_cb(vi->svq); - } + if (unlikely(capacity < 2+MAX_SKB_FRAGS)) { + /* Free old skbs; might make more capacity. */ + vi->capacity = capacity + free_old_xmit_skbs(vi); + if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) { + /* Make sure virtnet_xmit_poll sees updated capacity */ + wmb(); + netif_stop_queue(dev); + /* Missed xmit irq? virtnet_xmit_poll will deal. */ + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) + napi_schedule(&vi->xmit_napi); } } @@ -590,6 +599,7 @@ static int virtnet_open(struct net_devic struct virtnet_info *vi = netdev_priv(dev); napi_enable(&vi->napi); + napi_enable(&vi->xmit_napi); /* If all buffers were filled by other side before we napi_enabled, we * won't get another interrupt, so process any outstanding packets @@ -652,6 +662,7 @@ static int virtnet_close(struct net_devi struct virtnet_info *vi = netdev_priv(dev); napi_disable(&vi->napi); + napi_disable(&vi->xmit_napi); return 0; } @@ -883,6 +894,7 @@ static int virtnet_probe(struct virtio_d /* Set up our device-specific information */ vi = netdev_priv(dev); netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight); + netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64); vi->dev = dev; vi->vdev = vdev; vdev->priv = vi;