From patchwork Mon Mar 23 11:35:37 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Felix Fietkau X-Patchwork-Id: 453393 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id AA9B0140134 for ; Mon, 23 Mar 2015 22:51:32 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752496AbbCWLv1 (ORCPT ); Mon, 23 Mar 2015 07:51:27 -0400 Received: from static.88-198-24-112.clients.your-server.de ([88.198.24.112]:51356 "EHLO nbd.name" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1752113AbbCWLv0 (ORCPT ); Mon, 23 Mar 2015 07:51:26 -0400 X-Greylist: delayed 945 seconds by postgrey-1.27 at vger.kernel.org; Mon, 23 Mar 2015 07:51:26 EDT Received: by nf.lan (Postfix, from userid 501) id 55A5FD0964F5; Mon, 23 Mar 2015 12:35:37 +0100 (CET) From: Felix Fietkau To: netdev@vger.kernel.org Cc: zajec5@gmail.com, hauke@hauke-m.de Subject: [PATCH net-next 3/3] bgmac: implement scatter/gather support Date: Mon, 23 Mar 2015 12:35:37 +0100 Message-Id: <1427110537-18091-3-git-send-email-nbd@openwrt.org> X-Mailer: git-send-email 2.2.2 In-Reply-To: <1427110537-18091-1-git-send-email-nbd@openwrt.org> References: <1427110537-18091-1-git-send-email-nbd@openwrt.org> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Always use software checksumming, since the hardware does not have any checksum offload support. This significantly improves local TCP tx performance. Signed-off-by: Felix Fietkau --- drivers/net/ethernet/broadcom/bgmac.c | 164 +++++++++++++++++++++++++--------- 1 file changed, 121 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index c7da37a..fa8f9e1 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac, bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); } +static void +bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring, + int i, int len, u32 ctl0) +{ + struct bgmac_slot_info *slot; + struct bgmac_dma_desc *dma_desc; + u32 ctl1; + + if (i == ring->num_slots - 1) + ctl0 |= BGMAC_DESC_CTL0_EOT; + + ctl1 = len & BGMAC_DESC_CTL1_LEN; + + slot = &ring->slots[i]; + dma_desc = &ring->cpu_base[i]; + dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); + dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); + dma_desc->ctl0 = cpu_to_le32(ctl0); + dma_desc->ctl1 = cpu_to_le32(ctl1); +} + static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, struct bgmac_dma_ring *ring, struct sk_buff *skb) { struct device *dma_dev = bgmac->core->dma_dev; struct net_device *net_dev = bgmac->net_dev; - struct bgmac_dma_desc *dma_desc; - struct bgmac_slot_info *slot; - u32 ctl0, ctl1; + struct bgmac_slot_info *slot = &ring->slots[ring->end]; int free_slots; + int nr_frags; + u32 flags; + int index = ring->end; + int i; if (skb->len > BGMAC_DESC_CTL1_LEN) { bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); - goto err_stop_drop; + goto err_drop; } + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + + nr_frags = skb_shinfo(skb)->nr_frags; + if (ring->start <= ring->end) free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; else free_slots = ring->start - ring->end; - if (free_slots == 1) { + + if (free_slots <= nr_frags + 1) { bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); netif_stop_queue(net_dev); return NETDEV_TX_BUSY; } - slot = &ring->slots[ring->end]; - slot->skb = skb; - slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len, + slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, slot->dma_addr)) { - bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", - ring->mmio_base); - goto err_stop_drop; - } + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) + goto err_dma_head; - ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF; - if (ring->end == ring->num_slots - 1) - ctl0 |= BGMAC_DESC_CTL0_EOT; - ctl1 = skb->len & BGMAC_DESC_CTL1_LEN; + flags = BGMAC_DESC_CTL0_SOF; + if (!nr_frags) + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; - dma_desc = ring->cpu_base; - dma_desc += ring->end; - dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); - dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); - dma_desc->ctl0 = cpu_to_le32(ctl0); - dma_desc->ctl1 = cpu_to_le32(ctl1); + bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags); + flags = 0; + + for (i = 0; i < nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + int len = skb_frag_size(frag); + + index = (index + 1) % BGMAC_TX_RING_SLOTS; + slot = &ring->slots[index]; + slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0, + len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) + goto err_dma; + + if (i == nr_frags - 1) + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; + + bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags); + } + + slot->skb = skb; netdev_sent_queue(net_dev, skb->len); @@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, /* Increase ring->end to point empty slot. We tell hardware the first * slot it should *not* read. */ - if (++ring->end >= BGMAC_TX_RING_SLOTS) - ring->end = 0; + ring->end = (index + 1) % BGMAC_TX_RING_SLOTS; bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, ring->index_base + ring->end * sizeof(struct bgmac_dma_desc)); - /* Always keep one slot free to allow detecting bugged calls. */ - if (--free_slots == 1) + free_slots -= nr_frags + 1; + if (free_slots < 8) netif_stop_queue(net_dev); return NETDEV_TX_OK; -err_stop_drop: - netif_stop_queue(net_dev); +err_dma: + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), + DMA_TO_DEVICE); + + while (i > 0) { + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; + struct bgmac_slot_info *slot = &ring->slots[index]; + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); + int len = ctl1 & BGMAC_DESC_CTL1_LEN; + + dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); + } + +err_dma_head: + bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", + ring->mmio_base); + +err_drop: dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) while (ring->start != empty_slot) { struct bgmac_slot_info *slot = &ring->slots[ring->start]; + u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1); + int len = ctl1 & BGMAC_DESC_CTL1_LEN; - if (slot->skb) { + if (!slot->dma_addr) { + bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", + ring->start, ring->end); + goto next; + } + + if (ctl1 & BGMAC_DESC_CTL0_SOF) /* Unmap no longer used buffer */ - dma_unmap_single(dma_dev, slot->dma_addr, - slot->skb->len, DMA_TO_DEVICE); - slot->dma_addr = 0; + dma_unmap_single(dma_dev, slot->dma_addr, len, + DMA_TO_DEVICE); + else + dma_unmap_page(dma_dev, slot->dma_addr, len, + DMA_TO_DEVICE); + if (slot->skb) { bytes_compl += slot->skb->len; pkts_compl++; /* Free memory! :) */ dev_kfree_skb(slot->skb); slot->skb = NULL; - } else { - bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", - ring->start, ring->end); } +next: + slot->dma_addr = 0; if (++ring->start >= BGMAC_TX_RING_SLOTS) ring->start = 0; freed = true; } + if (!pkts_compl) + return; + netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl); - if (freed && netif_queue_stopped(bgmac->net_dev)) + if (netif_queue_stopped(bgmac->net_dev)) netif_wake_queue(bgmac->net_dev); } @@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) { struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_dma_desc *dma_desc = ring->cpu_base; struct bgmac_slot_info *slot; int i; for (i = 0; i < ring->num_slots; i++) { + int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN; + slot = &ring->slots[i]; - if (slot->skb) { - if (slot->dma_addr) - dma_unmap_single(dma_dev, slot->dma_addr, - slot->skb->len, DMA_TO_DEVICE); - dev_kfree_skb(slot->skb); - } + dev_kfree_skb(slot->skb); + + if (!slot->dma_addr) + continue; + + if (slot->skb) + dma_unmap_single(dma_dev, slot->dma_addr, + len, DMA_TO_DEVICE); + else + dma_unmap_page(dma_dev, slot->dma_addr, + len, DMA_TO_DEVICE); } } @@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core) goto err_dma_free; } + net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + net_dev->hw_features = net_dev->features; + net_dev->vlan_features = net_dev->features; + err = register_netdev(bgmac->net_dev); if (err) { bgmac_err(bgmac, "Cannot register net device\n");