diff mbox

[net-next] ixgbe: flush when in xmit_more mode and under descriptor pressure

Message ID 1409068105-16634-1-git-send-email-dborkman@redhat.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Daniel Borkmann Aug. 26, 2014, 3:48 p.m. UTC
When xmit_more mode is being used and the ring is about to become
full, enforce a tail pointer write to the hw. Otherwise, we could
risk a Tx hang as pointed out by Alex.

Suggested-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
---
 Hi Alex, something along that lines regarding your first comment?

 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 67 +++++++++++++++------------
 1 file changed, 38 insertions(+), 29 deletions(-)

Comments

Duyck, Alexander H Aug. 26, 2014, 4:01 p.m. UTC | #1
On 08/26/2014 08:48 AM, Daniel Borkmann wrote:
> When xmit_more mode is being used and the ring is about to become
> full, enforce a tail pointer write to the hw. Otherwise, we could
> risk a Tx hang as pointed out by Alex.
> 
> Suggested-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> ---
>  Hi Alex, something along that lines regarding your first comment?
> 
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 67 +++++++++++++++------------
>  1 file changed, 38 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index ba9ceaa..f851e84 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -6853,6 +6883,7 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
>  	u32 tx_flags = first->tx_flags;
>  	u32 cmd_type = ixgbe_tx_cmd_type(skb, tx_flags);
>  	u16 i = tx_ring->next_to_use;
> +	bool desc_pressure;
>  
>  	tx_desc = IXGBE_TX_DESC(tx_ring, i);
>  
> @@ -6958,10 +6989,15 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
>  
>  	tx_ring->next_to_use = i;
>  
> -	if (!skb->xmit_more) {
> +	desc_pressure = (ixgbe_desc_unused(tx_ring) < DESC_NEEDED);
> +	if (!skb->xmit_more || unlikely(desc_pressure)) {
>  		/* notify HW of packet */
>  		ixgbe_write_tail(tx_ring, i);
> +
> +		if (unlikely(desc_pressure))
> +			__ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
>  	}
> +
>  	return;
>  dma_error:
>  	dev_err(tx_ring->dev, "TX DMA map failed\n");
> @@ -6978,6 +7014,7 @@ dma_error:
>  	}
>  
>  	tx_ring->next_to_use = i;
> +	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
>  }
>  

Actually this bit here is much more complicated than it probably needs
to be.

My thought is to just fold ixgbe_maybe_stop_tx into the if statement.

So it shoudl be:
 if (!skb->smit_more || ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED))

If the BQL bit is folded into the xmit_more check somewhere then that
should be enough to resolve any possible ring stalls.

Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann Aug. 26, 2014, 4:21 p.m. UTC | #2
On 08/26/2014 06:01 PM, Alexander Duyck wrote:
...
> My thought is to just fold ixgbe_maybe_stop_tx into the if statement.
>
> So it shoudl be:
>   if (!skb->smit_more || ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED))

Right, that's better; I was just thinking about the DMA error case,
but in that case we release resources back anyway.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H Aug. 26, 2014, 4:24 p.m. UTC | #3
On 08/26/2014 09:21 AM, Daniel Borkmann wrote:
> On 08/26/2014 06:01 PM, Alexander Duyck wrote:
> ...
>> My thought is to just fold ixgbe_maybe_stop_tx into the if statement.
>>
>> So it shoudl be:
>>   if (!skb->smit_more || ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED))
> 
> Right, that's better; I was just thinking about the DMA error case,
> but in that case we release resources back anyway.

Actually the order does need to be reversed though.  We should test for
stop_tx first, then xmit_more.  Doing it the other way around would
cause issues as maybe_stop_tx has some other side effects.

Thaks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ba9ceaa..f851e84 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6837,6 +6837,36 @@  static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
 	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
 }
 
+static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (likely(ixgbe_desc_unused(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	++tx_ring->tx_stats.restart_queue;
+	return 0;
+}
+
+static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+	if (likely(ixgbe_desc_unused(tx_ring) >= size))
+		return 0;
+
+	return __ixgbe_maybe_stop_tx(tx_ring, size);
+}
+
 #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
 		       IXGBE_TXD_CMD_RS)
 
@@ -6853,6 +6883,7 @@  static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 	u32 tx_flags = first->tx_flags;
 	u32 cmd_type = ixgbe_tx_cmd_type(skb, tx_flags);
 	u16 i = tx_ring->next_to_use;
+	bool desc_pressure;
 
 	tx_desc = IXGBE_TX_DESC(tx_ring, i);
 
@@ -6958,10 +6989,15 @@  static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 
 	tx_ring->next_to_use = i;
 
-	if (!skb->xmit_more) {
+	desc_pressure = (ixgbe_desc_unused(tx_ring) < DESC_NEEDED);
+	if (!skb->xmit_more || unlikely(desc_pressure)) {
 		/* notify HW of packet */
 		ixgbe_write_tail(tx_ring, i);
+
+		if (unlikely(desc_pressure))
+			__ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
 	}
+
 	return;
 dma_error:
 	dev_err(tx_ring->dev, "TX DMA map failed\n");
@@ -6978,6 +7014,7 @@  dma_error:
 	}
 
 	tx_ring->next_to_use = i;
+	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
 }
 
 static void ixgbe_atr(struct ixgbe_ring *ring,
@@ -7068,32 +7105,6 @@  static void ixgbe_atr(struct ixgbe_ring *ring,
 					      input, common, ring->queue_index);
 }
 
-static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
-{
-	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
-	/* Herbert's original patch had:
-	 *  smp_mb__after_netif_stop_queue();
-	 * but since that doesn't exist yet, just open code it. */
-	smp_mb();
-
-	/* We need to check again in a case another CPU has just
-	 * made room available. */
-	if (likely(ixgbe_desc_unused(tx_ring) < size))
-		return -EBUSY;
-
-	/* A reprieve! - use start_queue because it doesn't call schedule */
-	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
-	++tx_ring->tx_stats.restart_queue;
-	return 0;
-}
-
-static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
-{
-	if (likely(ixgbe_desc_unused(tx_ring) >= size))
-		return 0;
-	return __ixgbe_maybe_stop_tx(tx_ring, size);
-}
-
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 			      void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -7262,8 +7273,6 @@  xmit_fcoe:
 #endif /* IXGBE_FCOE */
 	ixgbe_tx_map(tx_ring, first, hdr_len);
 
-	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
-
 	return NETDEV_TX_OK;
 
 out_drop: