diff mbox

[net-next,2/6] enic: Bug fix: try harder to fill Rx ring on skb allocation failures

Message ID 20091219020946.2745.7226.stgit@savbu-pc100.cisco.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Scott Feldman Dec. 19, 2009, 2:09 a.m. UTC
enic: Bug fix: try harder to fill Rx ring on skb allocation failures

During probe(), make sure we get at least one skb on the Rx ring. 
Otherwise abort the interface load.  Also, if we get skb allocation
failures in NAPI poll while trying to replenish the ring, try again
later so we don't end up starving out the Rx ring completely.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
---
 drivers/net/enic/enic_main.c |   54 ++++++++++++++++++++++++++----------------
 1 files changed, 33 insertions(+), 21 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Simon Horman Dec. 19, 2009, 10:41 a.m. UTC | #1
On Fri, Dec 18, 2009 at 06:09:46PM -0800, Scott Feldman wrote:
> enic: Bug fix: try harder to fill Rx ring on skb allocation failures
> 
> During probe(), make sure we get at least one skb on the Rx ring. 
> Otherwise abort the interface load.  Also, if we get skb allocation
> failures in NAPI poll while trying to replenish the ring, try again
> later so we don't end up starving out the Rx ring completely.
> 
> Signed-off-by: Scott Feldman <scofeldm@cisco.com>
> Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
> ---
>  drivers/net/enic/enic_main.c |   54 ++++++++++++++++++++++++++----------------
>  1 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
> index 496e8b6..0265b25 100644
> --- a/drivers/net/enic/enic_main.c
> +++ b/drivers/net/enic/enic_main.c
> @@ -1092,6 +1092,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
>  	unsigned int rq_work_to_do = budget;
>  	unsigned int wq_work_to_do = -1; /* no limit */
>  	unsigned int  work_done, rq_work_done, wq_work_done;
> +	int err;
>  
>  	/* Service RQ (first) and WQ
>  	 */
> @@ -1115,16 +1116,19 @@ static int enic_poll(struct napi_struct *napi, int budget)
>  			0 /* don't unmask intr */,
>  			0 /* don't reset intr timer */);
>  
> -	if (rq_work_done > 0) {
> +	err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
>  
> -		/* Replenish RQ
> -		 */
> +	/* Buffer allocation failed. Stay in polling
> +	 * mode so we can try to fill the ring again.
> +	 */
>  
> -		vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
> +	if (err)
> +		rq_work_done = rq_work_to_do;

Is it intentional for rq_work_done = rq_work_to_do to become the
return value?

>  
> -	} else {
> +	if (rq_work_done < rq_work_to_do) {
>  
> -		/* If no work done, flush all LROs and exit polling
> +		/* Some work done, but not enough to stay in polling,
> +		 * flush all LROs and exit polling
>  		 */
>  
>  		if (netdev->features & NETIF_F_LRO)
> @@ -1143,6 +1147,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
>  	struct net_device *netdev = enic->netdev;
>  	unsigned int work_to_do = budget;
>  	unsigned int work_done;
> +	int err;
>  
>  	/* Service RQ
>  	 */
> @@ -1150,25 +1155,30 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
>  	work_done = vnic_cq_service(&enic->cq[ENIC_CQ_RQ],
>  		work_to_do, enic_rq_service, NULL);
>  
> -	if (work_done > 0) {
> -
> -		/* Replenish RQ
> -		 */
> -
> -		vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
> -
> -		/* Return intr event credits for this polling
> -		 * cycle.  An intr event is the completion of a
> -		 * RQ packet.
> -		 */
> +	/* Return intr event credits for this polling
> +	 * cycle.  An intr event is the completion of a
> +	 * RQ packet.
> +	 */
>  
> +	if (work_done > 0)
>  		vnic_intr_return_credits(&enic->intr[ENIC_MSIX_RQ],
>  			work_done,
>  			0 /* don't unmask intr */,
>  			0 /* don't reset intr timer */);
> -	} else {
>  
> -		/* If no work done, flush all LROs and exit polling
> +	err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
> +
> +	/* Buffer allocation failed. Stay in polling mode
> +	 * so we can try to fill the ring again.
> +	 */
> +
> +	if (err)
> +		work_done = work_to_do;

Again, is it intended for this to be the return value of the function?

> +
> +	if (work_done < work_to_do) {
> +
> +		/* Some work done, but not enough to stay in polling,
> +		 * flush all LROs and exit polling
>  		 */
>  
>  		if (netdev->features & NETIF_F_LRO)
> @@ -1333,11 +1343,13 @@ static int enic_open(struct net_device *netdev)
>  	}
>  
>  	for (i = 0; i < enic->rq_count; i++) {
> -		err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> -		if (err) {
> +		vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> +		/* Need at least one buffer on ring to get going */
> +		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
>  			printk(KERN_ERR PFX
>  				"%s: Unable to alloc receive buffers.\n",
>  				netdev->name);
> +			err = -ENOMEM;
>  			goto err_out_notify_unset;
>  		}
>  	}

My brain may well have switched off for the day,
but its unclear to me how &enic->rq[i] could ever be NULL.

Also, in the case where a failure occurs for i > 0,
it it necessary to unwind the previous rq allocations?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Feldman Dec. 19, 2009, 8:39 p.m. UTC | #2
On 12/19/09 2:41 AM, "Simon Horman" <horms@verge.net.au> wrote:

> On Fri, Dec 18, 2009 at 06:09:46PM -0800, Scott Feldman wrote:
>> enic: Bug fix: try harder to fill Rx ring on skb allocation failures
>> 
>> During probe(), make sure we get at least one skb on the Rx ring.
>> Otherwise abort the interface load.  Also, if we get skb allocation
>> failures in NAPI poll while trying to replenish the ring, try again
>> later so we don't end up starving out the Rx ring completely.
>> 
>> @@ -1115,16 +1116,19 @@ static int enic_poll(struct napi_struct *napi, int
>> budget)
>> 0 /* don't unmask intr */,
>> 0 /* don't reset intr timer */);
>>  
>> - if (rq_work_done > 0) {
>> + err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
>>  
>> -  /* Replenish RQ
>> -   */
>> + /* Buffer allocation failed. Stay in polling
>> +  * mode so we can try to fill the ring again.
>> +  */
>>  
>> -  vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
>> + if (err)
>> +  rq_work_done = rq_work_to_do;
> 
> Is it intentional for rq_work_done = rq_work_to_do to become the
> return value?

That was intentional.  If the replacement skb allocation fails, we're
returning like we did a full budget's worth of work so we stay scheduled and
hopefully the next polling pass we'll get the allocations.  Before this fix,
there is a corner case which isn't covered: if hw has used all descs and
gens an intr and we get into polling and the replacement alloc fails, then
Rx is hung.  Hw is desc starved and we're not going to get any more intrs:
game over.

I was looking at tg3.c and it looks like it does napi_schedule() if the
allocation fails.  Would this be a better option than setting work_done =
budget?
 
>> @@ -1333,11 +1343,13 @@ static int enic_open(struct net_device *netdev)
>> }
>>  
>> for (i = 0; i < enic->rq_count; i++) {
>> -  err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
>> -  if (err) {
>> +  vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
>> +  /* Need at least one buffer on ring to get going */
>> +  if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
>> printk(KERN_ERR PFX
>> "%s: Unable to alloc receive buffers.\n",
>> netdev->name);
>> +   err = -ENOMEM;
>> goto err_out_notify_unset;
>> }
>> }
> 
> My brain may well have switched off for the day,
> but its unclear to me how &enic->rq[i] could ever be NULL.

You missed a ")"
 
> Also, in the case where a failure occurs for i > 0,
> it it necessary to unwind the previous rq allocations?

Yes, good catch.  We'll fix that.

-scott

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman Dec. 20, 2009, 1:15 a.m. UTC | #3
On Sat, Dec 19, 2009 at 12:39:03PM -0800, Scott Feldman wrote:
> On 12/19/09 2:41 AM, "Simon Horman" <horms@verge.net.au> wrote:
> 
> > On Fri, Dec 18, 2009 at 06:09:46PM -0800, Scott Feldman wrote:
> >> enic: Bug fix: try harder to fill Rx ring on skb allocation failures
> >> 
> >> During probe(), make sure we get at least one skb on the Rx ring.
> >> Otherwise abort the interface load.  Also, if we get skb allocation
> >> failures in NAPI poll while trying to replenish the ring, try again
> >> later so we don't end up starving out the Rx ring completely.
> >> 
> >> @@ -1115,16 +1116,19 @@ static int enic_poll(struct napi_struct *napi, int
> >> budget)
> >> 0 /* don't unmask intr */,
> >> 0 /* don't reset intr timer */);
> >>  
> >> - if (rq_work_done > 0) {
> >> + err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
> >>  
> >> -  /* Replenish RQ
> >> -   */
> >> + /* Buffer allocation failed. Stay in polling
> >> +  * mode so we can try to fill the ring again.
> >> +  */
> >>  
> >> -  vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
> >> + if (err)
> >> +  rq_work_done = rq_work_to_do;
> > 
> > Is it intentional for rq_work_done = rq_work_to_do to become the
> > return value?
> 
> That was intentional.  If the replacement skb allocation fails, we're
> returning like we did a full budget's worth of work so we stay scheduled and
> hopefully the next polling pass we'll get the allocations.  Before this fix,
> there is a corner case which isn't covered: if hw has used all descs and
> gens an intr and we get into polling and the replacement alloc fails, then
> Rx is hung.  Hw is desc starved and we're not going to get any more intrs:
> game over.

Ok, understood. Though doesn't this fix just narrow the scope for that
failure? It seems that it could still occur in a pathological case.

> I was looking at tg3.c and it looks like it does napi_schedule() if the
> allocation fails.  Would this be a better option than setting work_done =
> budget?

I don't think that calling napi_schedule() would help for this case as
it looks like it only schedules the poll routine if its not already running.

>  
> >> @@ -1333,11 +1343,13 @@ static int enic_open(struct net_device *netdev)
> >> }
> >>  
> >> for (i = 0; i < enic->rq_count; i++) {
> >> -  err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> >> -  if (err) {
> >> +  vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> >> +  /* Need at least one buffer on ring to get going */
> >> +  if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
> >> printk(KERN_ERR PFX
> >> "%s: Unable to alloc receive buffers.\n",
> >> netdev->name);
> >> +   err = -ENOMEM;
> >> goto err_out_notify_unset;
> >> }
> >> }
> > 
> > My brain may well have switched off for the day,
> > but its unclear to me how &enic->rq[i] could ever be NULL.
> 
> You missed a ")"

Yes, I did. And the more I stared at it, the more I missed it :-(

>  
> > Also, in the case where a failure occurs for i > 0,
> > it it necessary to unwind the previous rq allocations?
> 
> Yes, good catch.  We'll fix that.
> 
> -scott
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Feldman Dec. 20, 2009, 1:29 a.m. UTC | #4
On 12/19/09 5:15 PM, "Simon Horman" <horms@verge.net.au> wrote:

> On Sat, Dec 19, 2009 at 12:39:03PM -0800, Scott Feldman wrote:
>>>> +  rq_work_done = rq_work_to_do;
>>> 
>>> Is it intentional for rq_work_done = rq_work_to_do to become the
>>> return value?
>> 
>> That was intentional.  If the replacement skb allocation fails, we're
>> returning like we did a full budget's worth of work so we stay scheduled and
>> hopefully the next polling pass we'll get the allocations.  Before this fix,
>> there is a corner case which isn't covered: if hw has used all descs and
>> gens an intr and we get into polling and the replacement alloc fails, then
>> Rx is hung.  Hw is desc starved and we're not going to get any more intrs:
>> game over.
> 
> Ok, understood. Though doesn't this fix just narrow the scope for that
> failure? It seems that it could still occur in a pathological case.

Oh!  The intention was to fix it absolutely.  Please expand on the
pathological case your thinking about.  We need this 100% solved, but now
you've got me worried...  ;)

-scott

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Feldman Dec. 20, 2009, 1:51 a.m. UTC | #5
On 12/19/09 2:41 AM, "Simon Horman" <horms@verge.net.au> wrote:

> On Fri, Dec 18, 2009 at 06:09:46PM -0800, Scott Feldman wrote:
>> for (i = 0; i < enic->rq_count; i++) {
>> -  err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
>> -  if (err) {
>> +  vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
>> +  /* Need at least one buffer on ring to get going */
>> +  if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
>> printk(KERN_ERR PFX
>> "%s: Unable to alloc receive buffers.\n",
>> netdev->name);
>> +   err = -ENOMEM;
>> goto err_out_notify_unset;
>> }
>> }
> 
> Also, in the case where a failure occurs for i > 0,
> it it necessary to unwind the previous rq allocations?

Sorry Simon, I replied wrongly on this one.  The code only cares if _all_
allocations failed (i.e. The ring is empty), and takes the err path.  Since
nothing was allocated, the err path has nothing to cleanup.

If at lease one skb was allocated, then we don't go down the err path, even
if not all of the allocations succeeded.  Bottom line is we only need one
skb on the ring to get the ball rolling.  If we start out with a full
rings-worth of skbs (the expected case), then that's even better.

-scott

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman Dec. 20, 2009, 6:14 a.m. UTC | #6
On Sat, Dec 19, 2009 at 05:29:43PM -0800, Scott Feldman wrote:
> On 12/19/09 5:15 PM, "Simon Horman" <horms@verge.net.au> wrote:
> 
> > On Sat, Dec 19, 2009 at 12:39:03PM -0800, Scott Feldman wrote:
> >>>> +  rq_work_done = rq_work_to_do;
> >>> 
> >>> Is it intentional for rq_work_done = rq_work_to_do to become the
> >>> return value?
> >> 
> >> That was intentional.  If the replacement skb allocation fails, we're
> >> returning like we did a full budget's worth of work so we stay scheduled and
> >> hopefully the next polling pass we'll get the allocations.  Before this fix,
> >> there is a corner case which isn't covered: if hw has used all descs and
> >> gens an intr and we get into polling and the replacement alloc fails, then
> >> Rx is hung.  Hw is desc starved and we're not going to get any more intrs:
> >> game over.
> > 
> > Ok, understood. Though doesn't this fix just narrow the scope for that
> > failure? It seems that it could still occur in a pathological case.
> 
> Oh!  The intention was to fix it absolutely.  Please expand on the
> pathological case your thinking about.  We need this 100% solved, but now
> you've got me worried...  ;)

Perhaps I am misunderstanding the problem, but what if the budget in
net_rx_action() is exhausted before a call to enic_poll*() successfully
allocs?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman Dec. 20, 2009, 6:54 a.m. UTC | #7
On Sat, Dec 19, 2009 at 05:51:31PM -0800, Scott Feldman wrote:
> On 12/19/09 2:41 AM, "Simon Horman" <horms@verge.net.au> wrote:
> 
> > On Fri, Dec 18, 2009 at 06:09:46PM -0800, Scott Feldman wrote:
> >> for (i = 0; i < enic->rq_count; i++) {
> >> -  err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> >> -  if (err) {
> >> +  vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
> >> +  /* Need at least one buffer on ring to get going */
> >> +  if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
> >> printk(KERN_ERR PFX
> >> "%s: Unable to alloc receive buffers.\n",
> >> netdev->name);
> >> +   err = -ENOMEM;
> >> goto err_out_notify_unset;
> >> }
> >> }
> > 
> > Also, in the case where a failure occurs for i > 0,
> > it it necessary to unwind the previous rq allocations?
> 
> Sorry Simon, I replied wrongly on this one.  The code only cares if _all_
> allocations failed (i.e. The ring is empty), and takes the err path.  Since
> nothing was allocated, the err path has nothing to cleanup.
> 
> If at lease one skb was allocated, then we don't go down the err path, even
> if not all of the allocations succeeded.  Bottom line is we only need one
> skb on the ring to get the ball rolling.  If we start out with a full
> rings-worth of skbs (the expected case), then that's even better.

Understood. Thanks for the clarification.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 496e8b6..0265b25 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1092,6 +1092,7 @@  static int enic_poll(struct napi_struct *napi, int budget)
 	unsigned int rq_work_to_do = budget;
 	unsigned int wq_work_to_do = -1; /* no limit */
 	unsigned int  work_done, rq_work_done, wq_work_done;
+	int err;
 
 	/* Service RQ (first) and WQ
 	 */
@@ -1115,16 +1116,19 @@  static int enic_poll(struct napi_struct *napi, int budget)
 			0 /* don't unmask intr */,
 			0 /* don't reset intr timer */);
 
-	if (rq_work_done > 0) {
+	err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
 
-		/* Replenish RQ
-		 */
+	/* Buffer allocation failed. Stay in polling
+	 * mode so we can try to fill the ring again.
+	 */
 
-		vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
+	if (err)
+		rq_work_done = rq_work_to_do;
 
-	} else {
+	if (rq_work_done < rq_work_to_do) {
 
-		/* If no work done, flush all LROs and exit polling
+		/* Some work done, but not enough to stay in polling,
+		 * flush all LROs and exit polling
 		 */
 
 		if (netdev->features & NETIF_F_LRO)
@@ -1143,6 +1147,7 @@  static int enic_poll_msix(struct napi_struct *napi, int budget)
 	struct net_device *netdev = enic->netdev;
 	unsigned int work_to_do = budget;
 	unsigned int work_done;
+	int err;
 
 	/* Service RQ
 	 */
@@ -1150,25 +1155,30 @@  static int enic_poll_msix(struct napi_struct *napi, int budget)
 	work_done = vnic_cq_service(&enic->cq[ENIC_CQ_RQ],
 		work_to_do, enic_rq_service, NULL);
 
-	if (work_done > 0) {
-
-		/* Replenish RQ
-		 */
-
-		vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
-
-		/* Return intr event credits for this polling
-		 * cycle.  An intr event is the completion of a
-		 * RQ packet.
-		 */
+	/* Return intr event credits for this polling
+	 * cycle.  An intr event is the completion of a
+	 * RQ packet.
+	 */
 
+	if (work_done > 0)
 		vnic_intr_return_credits(&enic->intr[ENIC_MSIX_RQ],
 			work_done,
 			0 /* don't unmask intr */,
 			0 /* don't reset intr timer */);
-	} else {
 
-		/* If no work done, flush all LROs and exit polling
+	err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
+
+	/* Buffer allocation failed. Stay in polling mode
+	 * so we can try to fill the ring again.
+	 */
+
+	if (err)
+		work_done = work_to_do;
+
+	if (work_done < work_to_do) {
+
+		/* Some work done, but not enough to stay in polling,
+		 * flush all LROs and exit polling
 		 */
 
 		if (netdev->features & NETIF_F_LRO)
@@ -1333,11 +1343,13 @@  static int enic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < enic->rq_count; i++) {
-		err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
-		if (err) {
+		vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
+		/* Need at least one buffer on ring to get going */
+		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
 			printk(KERN_ERR PFX
 				"%s: Unable to alloc receive buffers.\n",
 				netdev->name);
+			err = -ENOMEM;
 			goto err_out_notify_unset;
 		}
 	}