diff mbox

[net,v2,2/3] xen-netback: don't stop dealloc kthread too early

Message ID 1407515833-2550-3-git-send-email-wei.liu2@citrix.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Wei Liu Aug. 8, 2014, 4:37 p.m. UTC
Reference count the number of packets in host stack, so that we don't
stop the deallocation thread too early. If not, we can end up with
xenvif_free permanently waiting for deallocation thread to unmap grefs.

Reported-by: Thomas Leonard <talex5@gmail.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
---
 drivers/net/xen-netback/common.h    |    5 +++++
 drivers/net/xen-netback/interface.c |   16 ++++++++++++++++
 drivers/net/xen-netback/netback.c   |   24 ++++++++++++++++++++----
 3 files changed, 41 insertions(+), 4 deletions(-)

Comments

David Vrabel Aug. 11, 2014, 12:10 p.m. UTC | #1
On 08/08/14 17:37, Wei Liu wrote:
> Reference count the number of packets in host stack, so that we don't
> stop the deallocation thread too early. If not, we can end up with
> xenvif_free permanently waiting for deallocation thread to unmap grefs.
[...]
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -43,6 +43,17 @@
>  #define XENVIF_QUEUE_LENGTH 32
>  #define XENVIF_NAPI_WEIGHT  64
>  
> +void xenvif_inc_inflight_packets(struct xenvif_queue *queue)
> +{
> +	atomic_inc(&queue->inflight_packets);
> +}
> +
> +void xenvif_dec_inflight_packets(struct xenvif_queue *queue)
> +{
> +	if (atomic_dec_and_test(&queue->inflight_packets))
> +		wake_up(&queue->inflight_wq);
> +}

You don't need these wrappers if you remove the inflight_wq (see below).

>  static inline void xenvif_stop_queue(struct xenvif_queue *queue)
>  {
>  	struct net_device *dev = queue->vif->dev;
> @@ -561,6 +572,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
>  
>  	init_waitqueue_head(&queue->wq);
>  	init_waitqueue_head(&queue->dealloc_wq);
> +	init_waitqueue_head(&queue->inflight_wq);
> +	atomic_set(&queue->inflight_packets, 0);
>  
>  	if (tx_evtchn == rx_evtchn) {
>  		/* feature-split-event-channels == 0 */
> @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
>  			queue->task = NULL;
>  		}
>  
> +		wait_event(queue->inflight_wq,
> +			   atomic_read(&queue->inflight_packets) == 0);

Just make the dealloc task not stop unless it has deallocated all
outstanding requests.  There's no need for another wait queue here.

> +
>  		if (queue->dealloc_task) {
>  			kthread_stop(queue->dealloc_task);
>  			queue->dealloc_task = NULL;
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 4734472..d2f0c7d7 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -107,6 +107,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
>  #define callback_param(vif, pending_idx) \
>  	(vif->pending_tx_info[pending_idx].callback_struct)
>  
> +/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
> + * increasing the inflight counter. We need to increase the inflight
> + * counter because core driver calls into xenvif_zerocopy_callback
> + * which calls xenvif_dec_inflight_packets.
> + */
> +static void set_skb_zerocopy(struct xenvif_queue *queue,
> +			     struct sk_buff *skb)
> +{
> +	skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
> +	xenvif_inc_inflight_packets(queue);
> +}

This name makes this look like a core function instead of a netback
specific one.

I would suggest a pair of functions:

xenvif_skb_zerocopy_prepare()
xenvif_skb_zerocopy_complete()

Or similar.

David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Aug. 11, 2014, 1:48 p.m. UTC | #2
On Mon, Aug 11, 2014 at 01:10:12PM +0100, David Vrabel wrote:
> On 08/08/14 17:37, Wei Liu wrote:
[...]
> >  	if (tx_evtchn == rx_evtchn) {
> >  		/* feature-split-event-channels == 0 */
> > @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
> >  			queue->task = NULL;
> >  		}
> >  
> > +		wait_event(queue->inflight_wq,
> > +			   atomic_read(&queue->inflight_packets) == 0);
> 
> Just make the dealloc task not stop unless it has deallocated all
> outstanding requests.  There's no need for another wait queue here.
> 

Are you suggesting something like

  while (atomic_read(&queue->inflight_packets) !=0)
  	schedule_timeout(SOME_TIMEOUT);

?

> > +
> >  		if (queue->dealloc_task) {
> >  			kthread_stop(queue->dealloc_task);
> >  			queue->dealloc_task = NULL;
> > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> > index 4734472..d2f0c7d7 100644
> > --- a/drivers/net/xen-netback/netback.c
> > +++ b/drivers/net/xen-netback/netback.c
> > @@ -107,6 +107,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
> >  #define callback_param(vif, pending_idx) \
> >  	(vif->pending_tx_info[pending_idx].callback_struct)
> >  
> > +/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
> > + * increasing the inflight counter. We need to increase the inflight
> > + * counter because core driver calls into xenvif_zerocopy_callback
> > + * which calls xenvif_dec_inflight_packets.
> > + */
> > +static void set_skb_zerocopy(struct xenvif_queue *queue,
> > +			     struct sk_buff *skb)
> > +{
> > +	skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
> > +	xenvif_inc_inflight_packets(queue);
> > +}
> 
> This name makes this look like a core function instead of a netback
> specific one.
> 
> I would suggest a pair of functions:
> 
> xenvif_skb_zerocopy_prepare()
> xenvif_skb_zerocopy_complete()
> 

This will do.

Wei.

> Or similar.
> 
> David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Vrabel Aug. 11, 2014, 1:58 p.m. UTC | #3
On 11/08/14 14:48, Wei Liu wrote:
> On Mon, Aug 11, 2014 at 01:10:12PM +0100, David Vrabel wrote:
>> On 08/08/14 17:37, Wei Liu wrote:
> [...]
>>>  	if (tx_evtchn == rx_evtchn) {
>>>  		/* feature-split-event-channels == 0 */
>>> @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
>>>  			queue->task = NULL;
>>>  		}
>>>  
>>> +		wait_event(queue->inflight_wq,
>>> +			   atomic_read(&queue->inflight_packets) == 0);
>>
>> Just make the dealloc task not stop unless it has deallocated all
>> outstanding requests.  There's no need for another wait queue here.
>>
> 
> Are you suggesting something like
> 
>   while (atomic_read(&queue->inflight_packets) !=0)
>   	schedule_timeout(SOME_TIMEOUT);

No. That would be awful!

Add the test to the kthread itself:

int xenvif_dealloc_kthread(void *data)
{
	struct xenvif_queue *queue = data;

	while (atomic_read(&queue->inflight_packets) > 0
               || !kthread_should_stop()) {
            [...]

etc.

Although, the main loop is a bit confused, so I suggest adding:

static bool xenvif_dealloc_thread_should_stop(struct xenvif_queue *q)
{
    /* Dealloc thread must remain running if there are any inflight
     * packets so it can properly dealloc them when they complete.
     */
    return atomic_read(&queue->inflight_packets) == 0
        && kthread_should_stop();
}

And cleaning it up a bit (the while() could be a for(;;)).

David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zoltan Kiss Aug. 11, 2014, 2:13 p.m. UTC | #4
On 11/08/14 14:58, David Vrabel wrote:
> On 11/08/14 14:48, Wei Liu wrote:
>> On Mon, Aug 11, 2014 at 01:10:12PM +0100, David Vrabel wrote:
>>> On 08/08/14 17:37, Wei Liu wrote:
>> [...]
>>>>   	if (tx_evtchn == rx_evtchn) {
>>>>   		/* feature-split-event-channels == 0 */
>>>> @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
>>>>   			queue->task = NULL;
>>>>   		}
>>>>   
>>>> +		wait_event(queue->inflight_wq,
>>>> +			   atomic_read(&queue->inflight_packets) == 0);
>>>
>>> Just make the dealloc task not stop unless it has deallocated all
>>> outstanding requests.  There's no need for another wait queue here.
>>>
>>
>> Are you suggesting something like
>>
>>    while (atomic_read(&queue->inflight_packets) !=0)
>>    	schedule_timeout(SOME_TIMEOUT);
> 
> No. That would be awful!
> 
> Add the test to the kthread itself:
> 
> int xenvif_dealloc_kthread(void *data)
> {
> 	struct xenvif_queue *queue = data;
> 
> 	while (atomic_read(&queue->inflight_packets) > 0
>                 || !kthread_should_stop()) {
>              [...]
> 
> etc.
> 
> Although, the main loop is a bit confused, so I suggest adding:
> 
> static bool xenvif_dealloc_thread_should_stop(struct xenvif_queue *q)
> {
>      /* Dealloc thread must remain running if there are any inflight
>       * packets so it can properly dealloc them when they complete.
>       */
>      return atomic_read(&queue->inflight_packets) == 0
>          && kthread_should_stop();
> }
> 
> And cleaning it up a bit (the while() could be a for(;;)).

I would recommend this:
---
@@ -2066,7 +2066,7 @@ int xenvif_dealloc_kthread(void *data)
 		wait_event_interruptible(queue->dealloc_wq,
 					 tx_dealloc_work_todo(queue) ||
 					 kthread_should_stop());
-		if (kthread_should_stop())
+		if (kthread_should_stop() && !atomic_read(&queue->inflight_packets))
 			break;
 
 		xenvif_tx_dealloc_action(queue);
---
If kthread_stop called, this will keep the main loop running until all callbacks are called.
Then it proceeds to the exit branch, otherwise doesn't disrupt normal operation.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Aug. 11, 2014, 2:31 p.m. UTC | #5
On Mon, Aug 11, 2014 at 02:58:13PM +0100, David Vrabel wrote:
> On 11/08/14 14:48, Wei Liu wrote:
> > On Mon, Aug 11, 2014 at 01:10:12PM +0100, David Vrabel wrote:
> >> On 08/08/14 17:37, Wei Liu wrote:
> > [...]
> >>>  	if (tx_evtchn == rx_evtchn) {
> >>>  		/* feature-split-event-channels == 0 */
> >>> @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
> >>>  			queue->task = NULL;
> >>>  		}
> >>>  
> >>> +		wait_event(queue->inflight_wq,
> >>> +			   atomic_read(&queue->inflight_packets) == 0);
> >>
> >> Just make the dealloc task not stop unless it has deallocated all
> >> outstanding requests.  There's no need for another wait queue here.
> >>
> > 
> > Are you suggesting something like
> > 
> >   while (atomic_read(&queue->inflight_packets) !=0)
> >   	schedule_timeout(SOME_TIMEOUT);
> 
> No. That would be awful!
> 
> Add the test to the kthread itself:
> 
> int xenvif_dealloc_kthread(void *data)
> {
> 	struct xenvif_queue *queue = data;
> 
> 	while (atomic_read(&queue->inflight_packets) > 0
>                || !kthread_should_stop()) {
>             [...]
> 
> etc.
> 
> Although, the main loop is a bit confused, so I suggest adding:
> 
> static bool xenvif_dealloc_thread_should_stop(struct xenvif_queue *q)
> {
>     /* Dealloc thread must remain running if there are any inflight
>      * packets so it can properly dealloc them when they complete.
>      */
>     return atomic_read(&queue->inflight_packets) == 0
>         && kthread_should_stop();
> }
> 
> And cleaning it up a bit (the while() could be a for(;;)).
> 

Unfortunately this approach is bogus. If xenbus thread is not blocked it
can free up various resources while dealloc thread is running -- queue
can be gone under dealloc thread's feet.

Wei.

> David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Vrabel Aug. 11, 2014, 2:34 p.m. UTC | #6
On 11/08/14 15:31, Wei Liu wrote:
> On Mon, Aug 11, 2014 at 02:58:13PM +0100, David Vrabel wrote:
>> On 11/08/14 14:48, Wei Liu wrote:
>>> On Mon, Aug 11, 2014 at 01:10:12PM +0100, David Vrabel wrote:
>>>> On 08/08/14 17:37, Wei Liu wrote:
>>> [...]
>>>>>  	if (tx_evtchn == rx_evtchn) {
>>>>>  		/* feature-split-event-channels == 0 */
>>>>> @@ -687,6 +700,9 @@ void xenvif_disconnect(struct xenvif *vif)
>>>>>  			queue->task = NULL;
>>>>>  		}
>>>>>  
>>>>> +		wait_event(queue->inflight_wq,
>>>>> +			   atomic_read(&queue->inflight_packets) == 0);
>>>>
>>>> Just make the dealloc task not stop unless it has deallocated all
>>>> outstanding requests.  There's no need for another wait queue here.
>>>>
>>>
>>> Are you suggesting something like
>>>
>>>   while (atomic_read(&queue->inflight_packets) !=0)
>>>   	schedule_timeout(SOME_TIMEOUT);
>>
>> No. That would be awful!
>>
>> Add the test to the kthread itself:
>>
>> int xenvif_dealloc_kthread(void *data)
>> {
>> 	struct xenvif_queue *queue = data;
>>
>> 	while (atomic_read(&queue->inflight_packets) > 0
>>                || !kthread_should_stop()) {
>>             [...]
>>
>> etc.
>>
>> Although, the main loop is a bit confused, so I suggest adding:
>>
>> static bool xenvif_dealloc_thread_should_stop(struct xenvif_queue *q)
>> {
>>     /* Dealloc thread must remain running if there are any inflight
>>      * packets so it can properly dealloc them when they complete.
>>      */
>>     return atomic_read(&queue->inflight_packets) == 0
>>         && kthread_should_stop();
>> }
>>
>> And cleaning it up a bit (the while() could be a for(;;)).
>>
> 
> Unfortunately this approach is bogus. If xenbus thread is not blocked it
> can free up various resources while dealloc thread is running -- queue
> can be gone under dealloc thread's feet.

kthread_stop() waits until the thread exits (like pthread_join()).

/**
 * kthread_stop - stop a thread created by kthread_create().
 * @k: thread created by kthread_create().
 *
 * Sets kthread_should_stop() for @k to return true, wakes it, and
 * waits for it to exit.

David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Aug. 11, 2014, 2:39 p.m. UTC | #7
On Mon, Aug 11, 2014 at 03:34:48PM +0100, David Vrabel wrote:
[...]
> >>
> >> And cleaning it up a bit (the while() could be a for(;;)).
> >>
> > 
> > Unfortunately this approach is bogus. If xenbus thread is not blocked it
> > can free up various resources while dealloc thread is running -- queue
> > can be gone under dealloc thread's feet.
> 
> kthread_stop() waits until the thread exits (like pthread_join()).
> 
> /**
>  * kthread_stop - stop a thread created by kthread_create().
>  * @k: thread created by kthread_create().
>  *
>  * Sets kthread_should_stop() for @k to return true, wakes it, and
>  * waits for it to exit.
> 

Ah, misremeber the behaviour of kthread_stop. Sorry for the noise.

Wei.

> David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Aug. 11, 2014, 2:44 p.m. UTC | #8
On Mon, Aug 11, 2014 at 03:13:41PM +0100, Zoltan Kiss wrote:
[...]
> > And cleaning it up a bit (the while() could be a for(;;)).
> 
> I would recommend this:
> ---
> @@ -2066,7 +2066,7 @@ int xenvif_dealloc_kthread(void *data)
>  		wait_event_interruptible(queue->dealloc_wq,
>  					 tx_dealloc_work_todo(queue) ||
>  					 kthread_should_stop());
> -		if (kthread_should_stop())
> +		if (kthread_should_stop() && !atomic_read(&queue->inflight_packets))
>  			break;
>  
>  		xenvif_tx_dealloc_action(queue);
> ---
> If kthread_stop called, this will keep the main loop running until all callbacks are called.
> Then it proceeds to the exit branch, otherwise doesn't disrupt normal operation.

This snippet lacks change to while().

I would generally go for a shorter solution if the code is
self-explanatory.

@@ -2078,21 +2066,19 @@ int xenvif_dealloc_kthread(void *data)
 {
        struct xenvif_queue *queue = data;
 
-       while (!kthread_should_stop()) {
+       for (;;) {
                wait_event_interruptible(queue->dealloc_wq,
                                         tx_dealloc_work_todo(queue) ||
                                         kthread_should_stop());
-               if (kthread_should_stop())
+               if (kthread_should_stop() &&
+                   !atomic_read(&queue->inflight_packets) &&
+                   !tx_dealloc_work_todo(queue))
                        break;
 
                xenvif_tx_dealloc_action(queue);
                cond_resched();
        }
 
-       /* Unmap anything remaining*/
-       if (tx_dealloc_work_todo(queue))
-               xenvif_tx_dealloc_action(queue);
-
        return 0;
 }
 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Vrabel Aug. 11, 2014, 3:23 p.m. UTC | #9
On 11/08/14 15:44, Wei Liu wrote:
> On Mon, Aug 11, 2014 at 03:13:41PM +0100, Zoltan Kiss wrote:
> [...]
>>> And cleaning it up a bit (the while() could be a for(;;)).
>>
>> I would recommend this:
>> ---
>> @@ -2066,7 +2066,7 @@ int xenvif_dealloc_kthread(void *data)
>>  		wait_event_interruptible(queue->dealloc_wq,
>>  					 tx_dealloc_work_todo(queue) ||
>>  					 kthread_should_stop());
>> -		if (kthread_should_stop())
>> +		if (kthread_should_stop() && !atomic_read(&queue->inflight_packets))
>>  			break;
>>  
>>  		xenvif_tx_dealloc_action(queue);
>> ---
>> If kthread_stop called, this will keep the main loop running until all callbacks are called.
>> Then it proceeds to the exit branch, otherwise doesn't disrupt normal operation.
> 
> This snippet lacks change to while().
> 
> I would generally go for a shorter solution if the code is
> self-explanatory.
> 
> @@ -2078,21 +2066,19 @@ int xenvif_dealloc_kthread(void *data)
>  {
>         struct xenvif_queue *queue = data;
>  
> -       while (!kthread_should_stop()) {
> +       for (;;) {
>                 wait_event_interruptible(queue->dealloc_wq,
>                                          tx_dealloc_work_todo(queue) ||
>                                          kthread_should_stop());

This will never sleep if the thread is being stopped when there are
packets in flight.

> -               if (kthread_should_stop())
> +               if (kthread_should_stop() &&
> +                   !atomic_read(&queue->inflight_packets) &&
> +                   !tx_dealloc_work_todo(queue))
>                         break;

Moving the final dealloc into the loop adds a cond_resched() call.  This
is harmless but not really necessary when the thread is about to stop.

>  
>                 xenvif_tx_dealloc_action(queue);
>                 cond_resched();
>         }
>  
> -       /* Unmap anything remaining*/
> -       if (tx_dealloc_work_todo(queue))
> -               xenvif_tx_dealloc_action(queue);
> -
>         return 0;
>  }

David

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Aug. 11, 2014, 8:39 p.m. UTC | #10
On Mon, Aug 11, 2014 at 04:23:28PM +0100, David Vrabel wrote:
> On 11/08/14 15:44, Wei Liu wrote:
> > On Mon, Aug 11, 2014 at 03:13:41PM +0100, Zoltan Kiss wrote:
> > [...]
> >>> And cleaning it up a bit (the while() could be a for(;;)).
> >>
> >> I would recommend this:
> >> ---
> >> @@ -2066,7 +2066,7 @@ int xenvif_dealloc_kthread(void *data)
> >>  		wait_event_interruptible(queue->dealloc_wq,
> >>  					 tx_dealloc_work_todo(queue) ||
> >>  					 kthread_should_stop());
> >> -		if (kthread_should_stop())
> >> +		if (kthread_should_stop() && !atomic_read(&queue->inflight_packets))
> >>  			break;
> >>  
> >>  		xenvif_tx_dealloc_action(queue);
> >> ---
> >> If kthread_stop called, this will keep the main loop running until all callbacks are called.
> >> Then it proceeds to the exit branch, otherwise doesn't disrupt normal operation.
> > 
> > This snippet lacks change to while().
> > 
> > I would generally go for a shorter solution if the code is
> > self-explanatory.
> > 
> > @@ -2078,21 +2066,19 @@ int xenvif_dealloc_kthread(void *data)
> >  {
> >         struct xenvif_queue *queue = data;
> >  
> > -       while (!kthread_should_stop()) {
> > +       for (;;) {
> >                 wait_event_interruptible(queue->dealloc_wq,
> >                                          tx_dealloc_work_todo(queue) ||
> >                                          kthread_should_stop());
> 
> This will never sleep if the thread is being stopped when there are
> packets in flight.
> 

Good catch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f..d9b7f85 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,8 @@  struct xenvif_queue { /* Per-queue data for xenvif */
 	u16 dealloc_ring[MAX_PENDING_REQS];
 	struct task_struct *dealloc_task;
 	wait_queue_head_t dealloc_wq;
+	wait_queue_head_t inflight_wq;
+	atomic_t inflight_packets;
 
 	/* Use kthread for guest RX */
 	struct task_struct *task;
@@ -329,4 +331,7 @@  extern unsigned int xenvif_max_queues;
 extern struct dentry *xen_netback_dbg_root;
 #endif
 
+void xenvif_inc_inflight_packets(struct xenvif_queue *queue);
+void xenvif_dec_inflight_packets(struct xenvif_queue *queue);
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index fdb4fca..6488801 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,17 @@ 
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
 
+void xenvif_inc_inflight_packets(struct xenvif_queue *queue)
+{
+	atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_dec_inflight_packets(struct xenvif_queue *queue)
+{
+	if (atomic_dec_and_test(&queue->inflight_packets))
+		wake_up(&queue->inflight_wq);
+}
+
 static inline void xenvif_stop_queue(struct xenvif_queue *queue)
 {
 	struct net_device *dev = queue->vif->dev;
@@ -561,6 +572,8 @@  int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
 
 	init_waitqueue_head(&queue->wq);
 	init_waitqueue_head(&queue->dealloc_wq);
+	init_waitqueue_head(&queue->inflight_wq);
+	atomic_set(&queue->inflight_packets, 0);
 
 	if (tx_evtchn == rx_evtchn) {
 		/* feature-split-event-channels == 0 */
@@ -687,6 +700,9 @@  void xenvif_disconnect(struct xenvif *vif)
 			queue->task = NULL;
 		}
 
+		wait_event(queue->inflight_wq,
+			   atomic_read(&queue->inflight_packets) == 0);
+
 		if (queue->dealloc_task) {
 			kthread_stop(queue->dealloc_task);
 			queue->dealloc_task = NULL;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4734472..d2f0c7d7 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -107,6 +107,18 @@  static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
 #define callback_param(vif, pending_idx) \
 	(vif->pending_tx_info[pending_idx].callback_struct)
 
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_dec_inflight_packets.
+ */
+static void set_skb_zerocopy(struct xenvif_queue *queue,
+			     struct sk_buff *skb)
+{
+	skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+	xenvif_inc_inflight_packets(queue);
+}
+
 /* Find the containing VIF's structure from a pointer in pending_tx_info array
  */
 static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
@@ -1525,10 +1537,13 @@  static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 	/* remove traces of mapped pages and frag_list */
 	skb_frag_list_init(skb);
 	uarg = skb_shinfo(skb)->destructor_arg;
+	/* See comment on set_skb_zerocopy */
+	if (uarg->callback == xenvif_zerocopy_callback)
+		xenvif_inc_inflight_packets(queue);
 	uarg->callback(uarg, true);
 	skb_shinfo(skb)->destructor_arg = NULL;
 
-	skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+	set_skb_zerocopy(queue, nskb);
 	kfree_skb(nskb);
 
 	return 0;
@@ -1589,7 +1604,7 @@  static int xenvif_tx_submit(struct xenvif_queue *queue)
 				if (net_ratelimit())
 					netdev_err(queue->vif->dev,
 						   "Not enough memory to consolidate frag_list!\n");
-				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+				set_skb_zerocopy(queue, skb);
 				kfree_skb(skb);
 				continue;
 			}
@@ -1609,7 +1624,7 @@  static int xenvif_tx_submit(struct xenvif_queue *queue)
 				   "Can't setup checksum in net_tx_action\n");
 			/* We have to set this flag to trigger the callback */
 			if (skb_shinfo(skb)->destructor_arg)
-				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+				set_skb_zerocopy(queue, skb);
 			kfree_skb(skb);
 			continue;
 		}
@@ -1641,7 +1656,7 @@  static int xenvif_tx_submit(struct xenvif_queue *queue)
 		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
 		 */
 		if (skb_shinfo(skb)->destructor_arg) {
-			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+			set_skb_zerocopy(queue, skb);
 			queue->stats.tx_zerocopy_sent++;
 		}
 
@@ -1681,6 +1696,7 @@  void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
 		queue->stats.tx_zerocopy_success++;
 	else
 		queue->stats.tx_zerocopy_fail++;
+	xenvif_dec_inflight_packets(queue);
 }
 
 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)