From patchwork Fri Apr 1 04:36:34 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anirban Chakraborty X-Patchwork-Id: 89178 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id E54EFB6EEB for ; Fri, 1 Apr 2011 15:37:43 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752020Ab1DAEhi (ORCPT ); Fri, 1 Apr 2011 00:37:38 -0400 Received: from ch1outboundpool.messaging.microsoft.com ([216.32.181.181]:6339 "EHLO CH1EHSOBE007.bigfish.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751399Ab1DAEhh convert rfc822-to-8bit (ORCPT ); Fri, 1 Apr 2011 00:37:37 -0400 Received: from mail42-ch1-R.bigfish.com (216.32.181.169) by CH1EHSOBE007.bigfish.com (10.43.70.57) with Microsoft SMTP Server id 14.1.225.8; Fri, 1 Apr 2011 04:37:34 +0000 Received: from mail42-ch1 (localhost.localdomain [127.0.0.1]) by mail42-ch1-R.bigfish.com (Postfix) with ESMTP id E1A75F30324; Fri, 1 Apr 2011 04:37:34 +0000 (UTC) X-SpamScore: 11 X-BigFish: VPS11(zzzz1202hzz8275bhz2ei2a8h637h668h839h1a1r61h) X-Spam-TCS-SCL: 0:0 X-Forefront-Antispam-Report: KIP:(null); UIP:(null); IPVD:NLI; H:avexcashub1.qlogic.com; RD:avexcashub2.qlogic.com; EFVD:NLI Received: from mail42-ch1 (localhost.localdomain [127.0.0.1]) by mail42-ch1 (MessageSwitch) id 1301632653980166_16652; Fri, 1 Apr 2011 04:37:33 +0000 (UTC) Received: from CH1EHSMHS006.bigfish.com (snatpool1.int.messaging.microsoft.com [10.43.68.251]) by mail42-ch1.bigfish.com (Postfix) with ESMTP id EAD0916D804F; Fri, 1 Apr 2011 04:37:33 +0000 (UTC) Received: from avexcashub1.qlogic.com (198.70.193.64) by CH1EHSMHS006.bigfish.com (10.43.70.6) with Microsoft SMTP Server (TLS) id 14.1.225.8; Fri, 1 Apr 2011 04:37:31 +0000 Received: from n5102mn2hhgf91.qlogic.org (10.1.7.24) by avexcashub2.qlogic.org (10.1.4.162) with Microsoft SMTP Server (TLS) id 8.1.436.0; Thu, 31 Mar 2011 21:37:30 -0700 Date: Thu, 31 Mar 2011 21:36:34 -0700 From: Anirban Chakraborty X-X-Sender: anirban@n5102mn2hhgf91.qlogic.org To: David Miller , "netdev@vger.kernel.org" CC: Dept_NX_Linux_NIC_Driver Subject: [PATCH 4/9 net-next-2.6] qlcnic: Code changes to improve performance Message-ID: User-Agent: Alpine 2.00 (OSX 1167 2008-08-23) MIME-Version: 1.0 X-OriginatorOrg: qlogic.com Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Optimized code that resulted in achieving lower CPU utilization on transmit path and higher throughput for small packet sizes (64 bytes). Signed-off-by: Anirban Chakraborty --- drivers/net/qlcnic/qlcnic.h | 30 +++--- drivers/net/qlcnic/qlcnic_main.c | 210 ++++++++++++++++++-------------------- 2 files changed, 115 insertions(+), 125 deletions(-) -- 1.7.4.1 This message and any attached documents contain information from QLogic Corporation or its wholly-owned subsidiaries that may be confidential. If you are not the intended recipient, you may not read, copy, distribute, or use this information. If you have received this transmission in error, please notify the sender immediately by reply e-mail and then delete this message. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h index 15d950a..a5b28d1 100644 --- a/drivers/net/qlcnic/qlcnic.h +++ b/drivers/net/qlcnic/qlcnic.h @@ -434,50 +434,49 @@ struct qlcnic_adapter_stats { * be one Rcv Descriptor for normal packets, one for jumbo and may be others. */ struct qlcnic_host_rds_ring { - u32 producer; + void __iomem *crb_rcv_producer; + struct rcv_desc *desc_head; + struct qlcnic_rx_buffer *rx_buf_arr; u32 num_desc; + u32 producer; u32 dma_size; u32 skb_size; u32 flags; - void __iomem *crb_rcv_producer; - struct rcv_desc *desc_head; - struct qlcnic_rx_buffer *rx_buf_arr; struct list_head free_list; spinlock_t lock; dma_addr_t phys_addr; -}; +} ____cacheline_internodealigned_in_smp; struct qlcnic_host_sds_ring { u32 consumer; u32 num_desc; void __iomem *crb_sts_consumer; - void __iomem *crb_intr_mask; struct status_desc *desc_head; struct qlcnic_adapter *adapter; struct napi_struct napi; struct list_head free_list[NUM_RCV_DESC_RINGS]; + void __iomem *crb_intr_mask; int irq; dma_addr_t phys_addr; char name[IFNAMSIZ+4]; -}; +} ____cacheline_internodealigned_in_smp; struct qlcnic_host_tx_ring { u32 producer; - __le32 *hw_consumer; u32 sw_consumer; - void __iomem *crb_cmd_producer; u32 num_desc; - - struct netdev_queue *txq; - - struct qlcnic_cmd_buffer *cmd_buf_arr; + void __iomem *crb_cmd_producer; struct cmd_desc_type0 *desc_head; + struct qlcnic_cmd_buffer *cmd_buf_arr; + __le32 *hw_consumer; + dma_addr_t phys_addr; dma_addr_t hw_cons_phys_addr; -}; + struct netdev_queue *txq; +} ____cacheline_internodealigned_in_smp; /* * Receive context. There is one such structure per instance of the @@ -1328,8 +1327,7 @@ static const struct qlcnic_brdinfo qlcnic_boards[] = { static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring) { - smp_mb(); - if (tx_ring->producer < tx_ring->sw_consumer) + if (likely(tx_ring->producer < tx_ring->sw_consumer)) return tx_ring->sw_consumer - tx_ring->producer; else return tx_ring->sw_consumer + tx_ring->num_desc - diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c index dde7e44..3b740f5 100644 --- a/drivers/net/qlcnic/qlcnic_main.c +++ b/drivers/net/qlcnic/qlcnic_main.c @@ -1861,6 +1861,7 @@ static void qlcnic_change_filter(struct qlcnic_adapter *adapter, vlan_req->vlan_id = vlan_id; tx_ring->producer = get_next_index(producer, tx_ring->num_desc); + smp_mb(); } #define QLCNIC_MAC_HASH(MAC)\ @@ -1921,58 +1922,122 @@ qlcnic_send_filter(struct qlcnic_adapter *adapter, spin_unlock(&adapter->mac_learn_lock); } -static void -qlcnic_tso_check(struct net_device *netdev, - struct qlcnic_host_tx_ring *tx_ring, +static int +qlcnic_tx_pkt(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, struct sk_buff *skb) { - u8 opcode = TX_ETHER_PKT; - __be16 protocol = skb->protocol; - u16 flags = 0; - int copied, offset, copy_len, hdr_len = 0, tso = 0; + u8 opcode = 0, hdr_len = 0; + u16 flags = 0, vlan_tci = 0; + int copied, offset, copy_len; struct cmd_desc_type0 *hwdesc; struct vlan_ethhdr *vh; - struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; + u16 protocol = ntohs(skb->protocol); u32 producer = tx_ring->producer; - __le16 vlan_oob = first_desc->flags_opcode & - cpu_to_le16(FLAGS_VLAN_OOB); + + if (protocol == ETH_P_8021Q) { + vh = (struct vlan_ethhdr *)skb->data; + flags = FLAGS_VLAN_TAGGED; + vlan_tci = vh->h_vlan_TCI; + } else if (vlan_tx_tag_present(skb)) { + flags = FLAGS_VLAN_OOB; + vlan_tci = vlan_tx_tag_get(skb); + } + if (unlikely(adapter->pvid)) { + if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) + return -EIO; + if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED)) + goto set_flags; + + flags = FLAGS_VLAN_OOB; + vlan_tci = adapter->pvid; + } +set_flags: + qlcnic_set_tx_vlan_tci(first_desc, vlan_tci); + qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); if (*(skb->data) & BIT_0) { flags |= BIT_0; memcpy(&first_desc->eth_addr, skb->data, ETH_ALEN); } - - if ((netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && + opcode = TX_ETHER_PKT; + if ((adapter->netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && skb_shinfo(skb)->gso_size > 0) { hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->total_hdr_length = hdr_len; - if (vlan_oob) { + + opcode = (protocol == ETH_P_IPV6) ? TX_TCP_LSO6 : TX_TCP_LSO; + + /* For LSO, we need to copy the MAC/IP/TCP headers into + * the descriptor ring */ + copied = 0; + offset = 2; + + if (flags & FLAGS_VLAN_OOB) { first_desc->total_hdr_length += VLAN_HLEN; first_desc->tcp_hdr_offset = VLAN_HLEN; first_desc->ip_hdr_offset = VLAN_HLEN; /* Only in case of TSO on vlan device */ flags |= FLAGS_VLAN_TAGGED; + + /* Create a TSO vlan header template for firmware */ + + hwdesc = &tx_ring->desc_head[producer]; + tx_ring->cmd_buf_arr[producer].skb = NULL; + + copy_len = min((int)sizeof(struct cmd_desc_type0) - + offset, hdr_len + VLAN_HLEN); + + vh = (struct vlan_ethhdr *)((char *) hwdesc + 2); + skb_copy_from_linear_data(skb, vh, 12); + vh->h_vlan_proto = htons(ETH_P_8021Q); + vh->h_vlan_TCI = htons(vlan_tci); + + skb_copy_from_linear_data_offset(skb, 12, + (char *)vh + 16, copy_len - 16); + + copied = copy_len - VLAN_HLEN; + offset = 0; + + producer = get_next_index(producer, tx_ring->num_desc); } - opcode = (protocol == cpu_to_be16(ETH_P_IPV6)) ? - TX_TCP_LSO6 : TX_TCP_LSO; - tso = 1; + while (copied < hdr_len) { + + copy_len = min((int)sizeof(struct cmd_desc_type0) - + offset, (hdr_len - copied)); + + hwdesc = &tx_ring->desc_head[producer]; + tx_ring->cmd_buf_arr[producer].skb = NULL; + + skb_copy_from_linear_data_offset(skb, copied, + (char *) hwdesc + offset, copy_len); + + copied += copy_len; + offset = 0; + + producer = get_next_index(producer, tx_ring->num_desc); + } + + tx_ring->producer = producer; + smp_mb(); + adapter->stats.lso_frames++; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4proto; - if (protocol == cpu_to_be16(ETH_P_IP)) { + if (protocol == ETH_P_IP) { l4proto = ip_hdr(skb)->protocol; if (l4proto == IPPROTO_TCP) opcode = TX_TCP_PKT; else if (l4proto == IPPROTO_UDP) opcode = TX_UDP_PKT; - } else if (protocol == cpu_to_be16(ETH_P_IPV6)) { + } else if (protocol == ETH_P_IPV6) { l4proto = ipv6_hdr(skb)->nexthdr; if (l4proto == IPPROTO_TCP) @@ -1981,63 +2046,11 @@ qlcnic_tso_check(struct net_device *netdev, opcode = TX_UDPV6_PKT; } } - first_desc->tcp_hdr_offset += skb_transport_offset(skb); first_desc->ip_hdr_offset += skb_network_offset(skb); qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); - if (!tso) - return; - - /* For LSO, we need to copy the MAC/IP/TCP headers into - * the descriptor ring - */ - copied = 0; - offset = 2; - - if (vlan_oob) { - /* Create a TSO vlan header template for firmware */ - - hwdesc = &tx_ring->desc_head[producer]; - tx_ring->cmd_buf_arr[producer].skb = NULL; - - copy_len = min((int)sizeof(struct cmd_desc_type0) - offset, - hdr_len + VLAN_HLEN); - - vh = (struct vlan_ethhdr *)((char *)hwdesc + 2); - skb_copy_from_linear_data(skb, vh, 12); - vh->h_vlan_proto = htons(ETH_P_8021Q); - vh->h_vlan_TCI = (__be16)swab16((u16)first_desc->vlan_TCI); - - skb_copy_from_linear_data_offset(skb, 12, - (char *)vh + 16, copy_len - 16); - - copied = copy_len - VLAN_HLEN; - offset = 0; - - producer = get_next_index(producer, tx_ring->num_desc); - } - - while (copied < hdr_len) { - - copy_len = min((int)sizeof(struct cmd_desc_type0) - offset, - (hdr_len - copied)); - - hwdesc = &tx_ring->desc_head[producer]; - tx_ring->cmd_buf_arr[producer].skb = NULL; - - skb_copy_from_linear_data_offset(skb, copied, - (char *)hwdesc + offset, copy_len); - - copied += copy_len; - offset = 0; - - producer = get_next_index(producer, tx_ring->num_desc); - } - - tx_ring->producer = producer; - barrier(); - adapter->stats.lso_frames++; + return 0; } static int @@ -2088,39 +2101,21 @@ out_err: return -ENOMEM; } -static int -qlcnic_check_tx_tagging(struct qlcnic_adapter *adapter, - struct sk_buff *skb, - struct cmd_desc_type0 *first_desc) +static void +qlcnic_unmap_buffers(struct pci_dev *pdev, struct sk_buff *skb, + struct qlcnic_cmd_buffer *pbuf) { - u8 opcode = 0; - u16 flags = 0; - __be16 protocol = skb->protocol; - struct vlan_ethhdr *vh; + struct qlcnic_skb_frag *nf = &pbuf->frag_array[0]; + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; - if (protocol == cpu_to_be16(ETH_P_8021Q)) { - vh = (struct vlan_ethhdr *)skb->data; - protocol = vh->h_vlan_encapsulated_proto; - flags = FLAGS_VLAN_TAGGED; - qlcnic_set_tx_vlan_tci(first_desc, ntohs(vh->h_vlan_TCI)); - } else if (vlan_tx_tag_present(skb)) { - flags = FLAGS_VLAN_OOB; - qlcnic_set_tx_vlan_tci(first_desc, vlan_tx_tag_get(skb)); + for (i = 0; i < nr_frags; i++) { + nf = &pbuf->frag_array[i+1]; + pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE); } - if (unlikely(adapter->pvid)) { - if (first_desc->vlan_TCI && - !(adapter->flags & QLCNIC_TAGGING_ENABLED)) - return -EIO; - if (first_desc->vlan_TCI && - (adapter->flags & QLCNIC_TAGGING_ENABLED)) - goto set_flags; - flags = FLAGS_VLAN_OOB; - qlcnic_set_tx_vlan_tci(first_desc, adapter->pvid); - } -set_flags: - qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); - return 0; + nf = &pbuf->frag_array[0]; + pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE); } static inline void @@ -2144,7 +2139,7 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) int i, k; u32 producer; - int frag_count, no_of_desc; + int frag_count; u32 num_txd = tx_ring->num_desc; if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) { @@ -2161,12 +2156,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) frag_count = skb_shinfo(skb)->nr_frags + 1; - /* 4 fragments per cmd des */ - no_of_desc = (frag_count + 3) >> 2; - if (unlikely(qlcnic_tx_avail(tx_ring) <= TX_STOP_THRESH)) { netif_stop_queue(netdev); - smp_mb(); if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) netif_start_queue(netdev); else { @@ -2183,9 +2174,6 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) first_desc = hwdesc = &tx_ring->desc_head[producer]; qlcnic_clear_cmddesc((u64 *)hwdesc); - if (qlcnic_check_tx_tagging(adapter, skb, first_desc)) - goto drop_packet; - if (qlcnic_map_tx_skb(pdev, skb, pbuf)) { adapter->stats.tx_dma_map_error++; goto drop_packet; @@ -2229,8 +2217,10 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } tx_ring->producer = get_next_index(producer, num_txd); + smp_mb(); - qlcnic_tso_check(netdev, tx_ring, first_desc, skb); + if (unlikely(qlcnic_tx_pkt(adapter, first_desc, skb))) + goto unwind_buff; if (qlcnic_mac_learn) qlcnic_send_filter(adapter, tx_ring, first_desc, skb); @@ -2242,6 +2232,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; +unwind_buff: + qlcnic_unmap_buffers(pdev, skb, pbuf); drop_packet: adapter->stats.txdropped++; dev_kfree_skb_any(skb);