Message ID | 1409880647-14887-5-git-send-email-bpoirier@suse.de |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
> static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, > struct netdev_queue *txq, struct sk_buff *skb) > { > - struct sk_buff *segs, *nskb; > - u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3; > + unsigned int segs_remaining = skb_shinfo(skb)->gso_segs; > > - /* Estimate the number of fragments in the worst case */ > - tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est); > - if (netif_tx_queue_stopped(txq)) > - return NETDEV_TX_BUSY; > + if (unlikely(tg3_tx_avail(tnapi) <= segs_remaining)) { > + if (!skb_is_nonlinear(skb) || skb_linearize(skb)) > + goto tg3_tso_bug_drop; > + tg3_start_xmit(skb, tp->dev); fyi.. Initially the driver was doing a skb_copy() (tigon3_dma_hwbug_workaround()) for LSO skb that met HW bug conditions but users started reporting page allocation failures due to copying of large LSO skbs. To avoid this Commit 4caab52eb102f1 (tg3: Prevent page allocation failure during TSO workaround) changed the driver logic to do skb_gso_segment() for LSO skbs that met the HW bug conditions. With skb_linearize() we might end up again with memory allocation failures for large LSO skbs though at a much less frequent level (ie when TX queue is almost full). Also some of the tg3 supported chips like 5719, 57766 have dma_limits of 4k, 2k respectively so if the LSO skb that gets linearized has size more than dma_limit then tg3_tx_frag_set() will consume more descriptors and if budget becomes 0 in tg3_tx_frag_set() we end up calling tg3_tso_bug() again and eventually dropping the skb, if descriptors do not get freed still. Instead the skb can be dropped when we know we do not have enough descriptors to handle skb for these chip versions. > + } else { > + struct sk_buff *segs, *nskb; > > - segs = skb_gso_segment(skb, tp->dev->features & > - ~(NETIF_F_TSO | NETIF_F_TSO6)); > - if (IS_ERR(segs) || !segs) > - goto tg3_tso_bug_end; > + segs = skb_gso_segment(skb, tp->dev->features & > + ~(NETIF_F_TSO | NETIF_F_TSO6 | > + NETIF_F_SG)); > + if (IS_ERR(segs) || !segs) > + goto tg3_tso_bug_drop; > > - do { > - nskb = segs; > - segs = segs->next; > - nskb->next = NULL; > - tg3_start_xmit(nskb, tp->dev); > - } while (segs); > + do { > + nskb = segs; > + segs = segs->next; > + nskb->next = NULL; > + if (--segs_remaining) > + __tg3_start_xmit(nskb, tp->dev, segs_remaining); > + else > + tg3_start_xmit(nskb, tp->dev); > + } while (segs); > > -tg3_tso_bug_end: > + dev_kfree_skb_any(skb); > + } > + > + return NETDEV_TX_OK; > + > +tg3_tso_bug_drop: > + tp->tx_dropped++; > dev_kfree_skb_any(skb); > > return NETDEV_TX_OK; > @@ -7895,6 +7908,12 @@ tg3_tso_bug_end: > /* hard_start_xmit for all devices */ > static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) > { > + return __tg3_start_xmit(skb, dev, MAX_SKB_FRAGS + 1); > +} > + > +static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, > + struct net_device *dev, u32 stop_thresh) > +{ > struct tg3 *tp = netdev_priv(dev); > u32 len, entry, base_flags, mss, vlan = 0; > u32 budget; > @@ -8102,7 +8121,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) > tw32_tx_mbox(tnapi->prodmbox, entry); > > tnapi->tx_prod = entry; > - tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1, > + tg3_maybe_stop_txq(tnapi, txq, stop_thresh, > TG3_TX_WAKEUP_THRESH(tnapi)); > > mmiowb(); > @@ -12336,9 +12355,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e > if ((ering->rx_pending > tp->rx_std_ring_mask) || > (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) || > (ering->tx_pending > TG3_TX_RING_SIZE - 1) || > - (ering->tx_pending <= MAX_SKB_FRAGS + 1) || > - (tg3_flag(tp, TSO_BUG) && > - (ering->tx_pending <= (MAX_SKB_FRAGS * 3)))) > + (ering->tx_pending <= MAX_SKB_FRAGS + 1)) > return -EINVAL; > > if (netif_running(dev)) { -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2014-09-05 at 16:35 -0700, Prashant Sreedharan wrote: > fyi.. Initially the driver was doing a skb_copy() > (tigon3_dma_hwbug_workaround()) for LSO skb that met HW bug conditions > but users started reporting page allocation failures due to copying of > large LSO skbs. To avoid this Commit 4caab52eb102f1 (tg3: Prevent page > allocation failure during TSO workaround) changed the driver logic to do > skb_gso_segment() for LSO skbs that met the HW bug conditions. With > skb_linearize() we might end up again with memory allocation failures > for large LSO skbs though at a much less frequent level (ie when TX > queue is almost full). Note that TCP stack has one skb collapse feature, currently limited in allocation of linear skbs fitting a whole page. Instead of this private helper (and pretty limited one btw), we could add a core function, that would build skbs with order-0 fragments. Instead of skb_linearize(), I guess many call sites could instead use this new helper. Because as you said, skb_linearize() of one 64KB GSO packet can ask order-5 allocations, and this generally does not work reliably. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Eric Dumazet <eric.dumazet@gmail.com> Date: Fri, 05 Sep 2014 17:03:30 -0700 > Instead of this private helper (and pretty limited one btw), we could > add a core function, that would build skbs with order-0 fragments. > > Instead of skb_linearize(), I guess many call sites could instead use > this new helper. > > Because as you said, skb_linearize() of one 64KB GSO packet can ask > order-5 allocations, and this generally does not work reliably. xen-netback could make use of this helper too. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 6e6b07c..a9787a1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7830,6 +7830,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, } static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); +static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *, + u32); /* Returns true if the queue has been stopped. Note that it may have been * restarted since. @@ -7866,27 +7868,38 @@ static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi, static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, struct netdev_queue *txq, struct sk_buff *skb) { - struct sk_buff *segs, *nskb; - u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3; + unsigned int segs_remaining = skb_shinfo(skb)->gso_segs; - /* Estimate the number of fragments in the worst case */ - tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est); - if (netif_tx_queue_stopped(txq)) - return NETDEV_TX_BUSY; + if (unlikely(tg3_tx_avail(tnapi) <= segs_remaining)) { + if (!skb_is_nonlinear(skb) || skb_linearize(skb)) + goto tg3_tso_bug_drop; + tg3_start_xmit(skb, tp->dev); + } else { + struct sk_buff *segs, *nskb; - segs = skb_gso_segment(skb, tp->dev->features & - ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) - goto tg3_tso_bug_end; + segs = skb_gso_segment(skb, tp->dev->features & + ~(NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_SG)); + if (IS_ERR(segs) || !segs) + goto tg3_tso_bug_drop; - do { - nskb = segs; - segs = segs->next; - nskb->next = NULL; - tg3_start_xmit(nskb, tp->dev); - } while (segs); + do { + nskb = segs; + segs = segs->next; + nskb->next = NULL; + if (--segs_remaining) + __tg3_start_xmit(nskb, tp->dev, segs_remaining); + else + tg3_start_xmit(nskb, tp->dev); + } while (segs); -tg3_tso_bug_end: + dev_kfree_skb_any(skb); + } + + return NETDEV_TX_OK; + +tg3_tso_bug_drop: + tp->tx_dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -7895,6 +7908,12 @@ tg3_tso_bug_end: /* hard_start_xmit for all devices */ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { + return __tg3_start_xmit(skb, dev, MAX_SKB_FRAGS + 1); +} + +static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, + struct net_device *dev, u32 stop_thresh) +{ struct tg3 *tp = netdev_priv(dev); u32 len, entry, base_flags, mss, vlan = 0; u32 budget; @@ -8102,7 +8121,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) tw32_tx_mbox(tnapi->prodmbox, entry); tnapi->tx_prod = entry; - tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1, + tg3_maybe_stop_txq(tnapi, txq, stop_thresh, TG3_TX_WAKEUP_THRESH(tnapi)); mmiowb(); @@ -12336,9 +12355,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if ((ering->rx_pending > tp->rx_std_ring_mask) || (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) || (ering->tx_pending > TG3_TX_RING_SIZE - 1) || - (ering->tx_pending <= MAX_SKB_FRAGS + 1) || - (tg3_flag(tp, TSO_BUG) && - (ering->tx_pending <= (MAX_SKB_FRAGS * 3)))) + (ering->tx_pending <= MAX_SKB_FRAGS + 1)) return -EINVAL; if (netif_running(dev)) {