Message ID | 20190617074858.32467-3-bpoirier@suse.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | [net-next,01/16] qlge: Remove irq_cnt | expand |
> -----Original Message----- > From: Benjamin Poirier <bpoirier@suse.com> > Sent: Monday, June 17, 2019 1:19 PM > To: Manish Chopra <manishc@marvell.com>; GR-Linux-NIC-Dev <GR-Linux- > NIC-Dev@marvell.com>; netdev@vger.kernel.org > Subject: [EXT] [PATCH net-next 03/16] qlge: Deduplicate lbq_buf_size > > External Email > > ---------------------------------------------------------------------- > lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is > present once in the ql_adapter structure. All rings use the same buf size, keep > only one copy of it. Also factor out the calculation of lbq_buf_size instead of > having two copies. > > Signed-off-by: Benjamin Poirier <bpoirier@suse.com> > --- > drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- > drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- > drivers/net/ethernet/qlogic/qlge/qlge_main.c | 61 +++++++++----------- > 3 files changed, 28 insertions(+), 37 deletions(-) > > diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h > b/drivers/net/ethernet/qlogic/qlge/qlge.h > index 0a156a95e981..ba61b4559dd6 100644 > --- a/drivers/net/ethernet/qlogic/qlge/qlge.h > +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h > @@ -1433,7 +1433,6 @@ struct rx_ring { > /* Large buffer queue elements. */ > u32 lbq_len; /* entry count */ > u32 lbq_size; /* size in bytes of queue */ > - u32 lbq_buf_size; > void *lbq_base; > dma_addr_t lbq_base_dma; > void *lbq_base_indirect; > @@ -2108,6 +2107,7 @@ struct ql_adapter { > struct rx_ring rx_ring[MAX_RX_RINGS]; > struct tx_ring tx_ring[MAX_TX_RINGS]; > unsigned int lbq_buf_order; > + u32 lbq_buf_size; > > int rx_csum; > u32 default_rx_queue; > diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c > b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c > index 31389ab8bdf7..46599d74c6fb 100644 > --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c > +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c > @@ -1630,6 +1630,7 @@ void ql_dump_qdev(struct ql_adapter *qdev) > DUMP_QDEV_FIELD(qdev, "0x%08x", xg_sem_mask); > DUMP_QDEV_FIELD(qdev, "0x%08x", port_link_up); > DUMP_QDEV_FIELD(qdev, "0x%08x", port_init); > + DUMP_QDEV_FIELD(qdev, "%u", lbq_buf_size); > } > #endif > > @@ -1774,7 +1775,6 @@ void ql_dump_rx_ring(struct rx_ring *rx_ring) > pr_err("rx_ring->lbq_curr_idx = %d\n", rx_ring->lbq_curr_idx); > pr_err("rx_ring->lbq_clean_idx = %d\n", rx_ring->lbq_clean_idx); > pr_err("rx_ring->lbq_free_cnt = %d\n", rx_ring->lbq_free_cnt); > - pr_err("rx_ring->lbq_buf_size = %d\n", rx_ring->lbq_buf_size); > > pr_err("rx_ring->sbq_base = %p\n", rx_ring->sbq_base); > pr_err("rx_ring->sbq_base_dma = %llx\n", diff --git > a/drivers/net/ethernet/qlogic/qlge/qlge_main.c > b/drivers/net/ethernet/qlogic/qlge/qlge_main.c > index 038a6bfc79c7..9df06ad3fb93 100644 > --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c > +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c > @@ -995,15 +995,14 @@ static struct bq_desc *ql_get_curr_lchunk(struct > ql_adapter *qdev, > struct bq_desc *lbq_desc = ql_get_curr_lbuf(rx_ring); > > pci_dma_sync_single_for_cpu(qdev->pdev, > - dma_unmap_addr(lbq_desc, > mapaddr), > - rx_ring->lbq_buf_size, > - PCI_DMA_FROMDEVICE); > + dma_unmap_addr(lbq_desc, mapaddr), > + qdev->lbq_buf_size, > PCI_DMA_FROMDEVICE); > > /* If it's the last chunk of our master page then > * we unmap it. > */ > - if ((lbq_desc->p.pg_chunk.offset + rx_ring->lbq_buf_size) > - == ql_lbq_block_size(qdev)) > + if (lbq_desc->p.pg_chunk.offset + qdev->lbq_buf_size == > + ql_lbq_block_size(qdev)) > pci_unmap_page(qdev->pdev, > lbq_desc->p.pg_chunk.map, > ql_lbq_block_size(qdev), > @@ -1074,11 +1073,11 @@ static int ql_get_next_chunk(struct ql_adapter > *qdev, struct rx_ring *rx_ring, > /* Adjust the master page chunk for next > * buffer get. > */ > - rx_ring->pg_chunk.offset += rx_ring->lbq_buf_size; > + rx_ring->pg_chunk.offset += qdev->lbq_buf_size; > if (rx_ring->pg_chunk.offset == ql_lbq_block_size(qdev)) { > rx_ring->pg_chunk.page = NULL; > } else { > - rx_ring->pg_chunk.va += rx_ring->lbq_buf_size; > + rx_ring->pg_chunk.va += qdev->lbq_buf_size; > get_page(rx_ring->pg_chunk.page); > } > return 0; > @@ -1110,12 +1109,12 @@ static void ql_update_lbq(struct ql_adapter > *qdev, struct rx_ring *rx_ring) > lbq_desc->p.pg_chunk.offset; > dma_unmap_addr_set(lbq_desc, mapaddr, map); > dma_unmap_len_set(lbq_desc, maplen, > - rx_ring->lbq_buf_size); > + qdev->lbq_buf_size); > *lbq_desc->addr = cpu_to_le64(map); > > pci_dma_sync_single_for_device(qdev->pdev, map, > - rx_ring->lbq_buf_size, > - PCI_DMA_FROMDEVICE); > + qdev->lbq_buf_size, > + PCI_DMA_FROMDEVICE); > clean_idx++; > if (clean_idx == rx_ring->lbq_len) > clean_idx = 0; > @@ -1880,8 +1879,7 @@ static struct sk_buff *ql_build_rx_skb(struct > ql_adapter *qdev, > } > do { > lbq_desc = ql_get_curr_lchunk(qdev, rx_ring); > - size = (length < rx_ring->lbq_buf_size) ? length : > - rx_ring->lbq_buf_size; > + size = min(length, qdev->lbq_buf_size); > > netif_printk(qdev, rx_status, KERN_DEBUG, qdev- > >ndev, > "Adding page %d to skb for %d bytes.\n", > @@ -2776,12 +2774,12 @@ static int ql_alloc_tx_resources(struct ql_adapter > *qdev, > > static void ql_free_lbq_buffers(struct ql_adapter *qdev, struct rx_ring > *rx_ring) { > - unsigned int last_offset = ql_lbq_block_size(qdev) - > - rx_ring->lbq_buf_size; > + unsigned int last_offset; > struct bq_desc *lbq_desc; > > uint32_t curr_idx, clean_idx; > > + last_offset = ql_lbq_block_size(qdev) - qdev->lbq_buf_size; > curr_idx = rx_ring->lbq_curr_idx; > clean_idx = rx_ring->lbq_clean_idx; > while (curr_idx != clean_idx) { > @@ -3149,8 +3147,8 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, > struct rx_ring *rx_ring) > } while (page_entries < MAX_DB_PAGES_PER_BQ(rx_ring- > >lbq_len)); > cqicb->lbq_addr = > cpu_to_le64(rx_ring->lbq_base_indirect_dma); > - bq_len = (rx_ring->lbq_buf_size == 65536) ? 0 : > - (u16) rx_ring->lbq_buf_size; > + bq_len = (qdev->lbq_buf_size == 65536) ? 0 : > + (u16)qdev->lbq_buf_size; > cqicb->lbq_buf_size = cpu_to_le16(bq_len); > bq_len = (rx_ring->lbq_len == 65536) ? 0 : > (u16) rx_ring->lbq_len; > @@ -4048,16 +4046,21 @@ static int qlge_close(struct net_device *ndev) > return 0; > } > > +static void qlge_set_lb_size(struct ql_adapter *qdev) { > + if (qdev->ndev->mtu <= 1500) > + qdev->lbq_buf_size = LARGE_BUFFER_MIN_SIZE; > + else > + qdev->lbq_buf_size = LARGE_BUFFER_MAX_SIZE; > + qdev->lbq_buf_order = get_order(qdev->lbq_buf_size); } > + > static int ql_configure_rings(struct ql_adapter *qdev) { > int i; > struct rx_ring *rx_ring; > struct tx_ring *tx_ring; > int cpu_cnt = min(MAX_CPUS, (int)num_online_cpus()); > - unsigned int lbq_buf_len = (qdev->ndev->mtu > 1500) ? > - LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE; > - > - qdev->lbq_buf_order = get_order(lbq_buf_len); > > /* In a perfect world we have one RSS ring for each CPU > * and each has it's own vector. To do that we ask for @@ -4105,7 > +4108,6 @@ static int ql_configure_rings(struct ql_adapter *qdev) > rx_ring->lbq_len = NUM_LARGE_BUFFERS; > rx_ring->lbq_size = > rx_ring->lbq_len * sizeof(__le64); > - rx_ring->lbq_buf_size = (u16)lbq_buf_len; > rx_ring->sbq_len = NUM_SMALL_BUFFERS; > rx_ring->sbq_size = > rx_ring->sbq_len * sizeof(__le64); @@ -4121,7 > +4123,6 @@ static int ql_configure_rings(struct ql_adapter *qdev) > rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb); > rx_ring->lbq_len = 0; > rx_ring->lbq_size = 0; > - rx_ring->lbq_buf_size = 0; > rx_ring->sbq_len = 0; > rx_ring->sbq_size = 0; > rx_ring->sbq_buf_size = 0; > @@ -4140,6 +4141,7 @@ static int qlge_open(struct net_device *ndev) > if (err) > return err; > > + qlge_set_lb_size(qdev); > err = ql_configure_rings(qdev); > if (err) > return err; > @@ -4161,9 +4163,7 @@ static int qlge_open(struct net_device *ndev) > > static int ql_change_rx_buffers(struct ql_adapter *qdev) { > - struct rx_ring *rx_ring; > - int i, status; > - u32 lbq_buf_len; > + int status; > > /* Wait for an outstanding reset to complete. */ > if (!test_bit(QL_ADAPTER_UP, &qdev->flags)) { @@ -4186,16 +4186,7 > @@ static int ql_change_rx_buffers(struct ql_adapter *qdev) > if (status) > goto error; > > - /* Get the new rx buffer size. */ > - lbq_buf_len = (qdev->ndev->mtu > 1500) ? > - LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE; > - qdev->lbq_buf_order = get_order(lbq_buf_len); > - > - for (i = 0; i < qdev->rss_ring_count; i++) { > - rx_ring = &qdev->rx_ring[i]; > - /* Set the new size. */ > - rx_ring->lbq_buf_size = lbq_buf_len; > - } > + qlge_set_lb_size(qdev); > > status = ql_adapter_up(qdev); > if (status) > -- > 2.21.0 Not sure if this change is really required, I think fields relevant to rx_ring should be present in the rx_ring structure. There are various other fields like "lbq_len" and "lbq_size" which would be same for all rx rings but still under the relevant rx_ring structure.
On 2019/06/26 09:24, Manish Chopra wrote: > > -----Original Message----- > > From: Benjamin Poirier <bpoirier@suse.com> > > Sent: Monday, June 17, 2019 1:19 PM > > To: Manish Chopra <manishc@marvell.com>; GR-Linux-NIC-Dev <GR-Linux- > > NIC-Dev@marvell.com>; netdev@vger.kernel.org > > Subject: [EXT] [PATCH net-next 03/16] qlge: Deduplicate lbq_buf_size > > > > External Email > > > > ---------------------------------------------------------------------- > > lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is > > present once in the ql_adapter structure. All rings use the same buf size, keep > > only one copy of it. Also factor out the calculation of lbq_buf_size instead of > > having two copies. > > > > Signed-off-by: Benjamin Poirier <bpoirier@suse.com> > > --- [...] > > Not sure if this change is really required, I think fields relevant to rx_ring should be present in the rx_ring structure. > There are various other fields like "lbq_len" and "lbq_size" which would be same for all rx rings but still under the relevant rx_ring structure. Indeed, those members are also removed by this patch series, in patch 11.
On Wed, Jun 26, 2019 at 7:37 AM Benjamin Poirier <bpoirier@suse.com> wrote: > > On 2019/06/26 09:24, Manish Chopra wrote: > > > -----Original Message----- > > > From: Benjamin Poirier <bpoirier@suse.com> > > > Sent: Monday, June 17, 2019 1:19 PM > > > To: Manish Chopra <manishc@marvell.com>; GR-Linux-NIC-Dev <GR-Linux- > > > NIC-Dev@marvell.com>; netdev@vger.kernel.org > > > Subject: [EXT] [PATCH net-next 03/16] qlge: Deduplicate lbq_buf_size > > > > > > External Email > > > > > > ---------------------------------------------------------------------- > > > lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is > > > present once in the ql_adapter structure. All rings use the same buf size, keep > > > only one copy of it. Also factor out the calculation of lbq_buf_size instead of > > > having two copies. > > > > > > Signed-off-by: Benjamin Poirier <bpoirier@suse.com> > > > --- > [...] > > > > Not sure if this change is really required, I think fields relevant to rx_ring should be present in the rx_ring structure. > > There are various other fields like "lbq_len" and "lbq_size" which would be same for all rx rings but still under the relevant rx_ring structure. The one argument against deduplicating might be if the original fields are in a hot cacheline and the new location adds a cacheline access to a hot path. Not sure if that is relevant here. But maybe something to double check.
On 2019/06/26 11:42, Willem de Bruijn wrote: > On Wed, Jun 26, 2019 at 7:37 AM Benjamin Poirier <bpoirier@suse.com> wrote: > > > > On 2019/06/26 09:24, Manish Chopra wrote: > > > > -----Original Message----- > > > > From: Benjamin Poirier <bpoirier@suse.com> > > > > Sent: Monday, June 17, 2019 1:19 PM > > > > To: Manish Chopra <manishc@marvell.com>; GR-Linux-NIC-Dev <GR-Linux- > > > > NIC-Dev@marvell.com>; netdev@vger.kernel.org > > > > Subject: [EXT] [PATCH net-next 03/16] qlge: Deduplicate lbq_buf_size > > > > > > > > External Email > > > > > > > > ---------------------------------------------------------------------- > > > > lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is > > > > present once in the ql_adapter structure. All rings use the same buf size, keep > > > > only one copy of it. Also factor out the calculation of lbq_buf_size instead of > > > > having two copies. > > > > > > > > Signed-off-by: Benjamin Poirier <bpoirier@suse.com> > > > > --- > > [...] > > > > > > Not sure if this change is really required, I think fields relevant to rx_ring should be present in the rx_ring structure. > > > There are various other fields like "lbq_len" and "lbq_size" which would be same for all rx rings but still under the relevant rx_ring structure. > > The one argument against deduplicating might be if the original fields > are in a hot cacheline and the new location adds a cacheline access to > a hot path. Not sure if that is relevant here. But maybe something to > double check. > Thanks for the hint. I didn't check before because my hunch was that this driver is not near that level of optimization but I checked now and got the following results. The other side of the link is doing pktgen of 257B frames (smallest to exercise the lbq path), over 10 runs of 100s, number of frames received by the stack before/after applying this patch: before: 1487000±2000pps after: 1538000±2000pps (3.4% improvement) Looking with perf stat, boiling down the numbers to be per packet, most interesting differences are: cycles -4.32% instructions -0.97% L1-dcache-loads +6.30% L1-dcache-load-misses -1.26% LLC-loads +2.93% LLC-load-misses -1.16% So, fewer cycles/packet, fewer cache misses. Note however that L1-dcache-loads had high variability between runs (~12%). Looking with pahole, struct rx_ring is reduced by 8 bytes but its a 784 bytes structure full of holes to begin with. Numbers are from an old Xeon E3-1275 Full output follows: root@dtest:~# cat /tmp/mini.sh #!/bin/bash da=$(ethtool -S ql1 | awk '/rx_nic_fifo_drop/ {print $2}') ra=$(ip -s link show dev ql1 | awk 'NR == 4 {print $2}') sleep 100 db=$(ethtool -S ql1 | awk '/rx_nic_fifo_drop/ {print $2}') rb=$(ip -s link show dev ql1 | awk 'NR == 4 {print $2}') echo "rx $((rb - ra))" echo "dropped $((db - da))" root@dtest:~# perf stat -a -r10 -d /tmp/mini.sh rx 148763519 dropped 296128797 rx 148784865 dropped 296107878 rx 148675349 dropped 296217234 rx 148634271 dropped 296260358 rx 148813581 dropped 296089785 rx 148755713 dropped 296136071 rx 148434116 dropped 296459743 rx 148390638 dropped 296501405 rx 148500812 dropped 296391286 rx 148973912 dropped 295920014 Performance counter stats for 'system wide' (10 runs): 800,245.24 msec cpu-clock # 8.000 CPUs utilized ( +- 0.00% ) 11,559 context-switches # 14.445 M/sec ( +- 0.13% ) 36 cpu-migrations # 0.044 M/sec ( +- 7.11% ) 62,100 page-faults # 77.601 M/sec ( +- 0.51% ) 362,221,639,977 cycles # 452638.486 GHz ( +- 0.77% ) (49.92%) 515,318,132,327 instructions # 1.42 insn per cycle ( +- 0.64% ) (62.42%) 91,744,969,260 branches # 114646115.533 M/sec ( +- 0.04% ) (62.50%) 549,681,601 branch-misses # 0.60% of all branches ( +- 0.25% ) (62.50%) 115,207,692,963 L1-dcache-loads # 143965544.751 M/sec ( +- 12.09% ) (40.59%) 2,629,809,584 L1-dcache-load-misses # 2.28% of all L1-dcache hits ( +- 3.24% ) (29.90%) 837,806,984 LLC-loads # 1046938.235 M/sec ( +- 0.73% ) (25.31%) 569,343,079 LLC-load-misses # 67.96% of all LL-cache hits ( +- 0.07% ) (37.50%) 100.03177 +- 0.00104 seconds time elapsed ( +- 0.00% ) # # changed the driver now to include this patch # root@dtest:~# perf stat -a -r10 -d /tmp/mini.sh rx 154094001 dropped 290799944 rx 153798301 dropped 291094799 rx 153942166 dropped 290950792 rx 154150506 dropped 290743194 rx 154179791 dropped 290712400 rx 153907213 dropped 290985227 rx 153813515 dropped 291078475 rx 153603554 dropped 291289250 rx 153521414 dropped 291372145 rx 153592955 dropped 291299734 Performance counter stats for 'system wide' (10 runs): 800,234.85 msec cpu-clock # 8.000 CPUs utilized ( +- 0.00% ) 12,061 context-switches # 15.072 M/sec ( +- 0.61% ) 32 cpu-migrations # 0.040 M/sec ( +- 8.40% ) 62,210 page-faults # 77.740 M/sec ( +- 0.60% ) 358,454,546,598 cycles # 447936.882 GHz ( +- 1.34% ) (49.84%) 527,804,259,267 instructions # 1.47 insn per cycle ( +- 1.11% ) (62.35%) 94,904,030,904 branches # 118595275.389 M/sec ( +- 0.05% ) (62.50%) 568,214,180 branch-misses # 0.60% of all branches ( +- 0.12% ) (62.50%) 126,667,377,528 L1-dcache-loads # 158287823.791 M/sec ( +- 12.48% ) (41.21%) 2,685,865,791 L1-dcache-load-misses # 2.12% of all L1-dcache hits ( +- 4.01% ) (29.42%) 891,939,954 LLC-loads # 1114598.225 M/sec ( +- 0.73% ) (25.16%) 582,037,612 LLC-load-misses # 65.26% of all LL-cache hits ( +- 0.09% ) (37.50%) 100.0307117 +- 0.0000810 seconds time elapsed ( +- 0.00% )
On Fri, Jun 28, 2019 at 4:57 AM Benjamin Poirier <bpoirier@suse.com> wrote: > > On 2019/06/26 11:42, Willem de Bruijn wrote: > > On Wed, Jun 26, 2019 at 7:37 AM Benjamin Poirier <bpoirier@suse.com> wrote: > > > > > > On 2019/06/26 09:24, Manish Chopra wrote: > > > > > -----Original Message----- > > > > > From: Benjamin Poirier <bpoirier@suse.com> > > > > > Sent: Monday, June 17, 2019 1:19 PM > > > > > To: Manish Chopra <manishc@marvell.com>; GR-Linux-NIC-Dev <GR-Linux- > > > > > NIC-Dev@marvell.com>; netdev@vger.kernel.org > > > > > Subject: [EXT] [PATCH net-next 03/16] qlge: Deduplicate lbq_buf_size > > > > > > > > > > External Email > > > > > > > > > > ---------------------------------------------------------------------- > > > > > lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is > > > > > present once in the ql_adapter structure. All rings use the same buf size, keep > > > > > only one copy of it. Also factor out the calculation of lbq_buf_size instead of > > > > > having two copies. > > > > > > > > > > Signed-off-by: Benjamin Poirier <bpoirier@suse.com> > > > > > --- > > > [...] > > > > > > > > Not sure if this change is really required, I think fields relevant to rx_ring should be present in the rx_ring structure. > > > > There are various other fields like "lbq_len" and "lbq_size" which would be same for all rx rings but still under the relevant rx_ring structure. > > > > The one argument against deduplicating might be if the original fields > > are in a hot cacheline and the new location adds a cacheline access to > > a hot path. Not sure if that is relevant here. But maybe something to > > double check. > > > > Thanks for the hint. I didn't check before because my hunch was that > this driver is not near that level of optimization but I checked now and > got the following results. Thanks for the data. I didn't mean to ask you to do a lot of extra work. Sorry if it resulted in that. Fully agreed on your point about optimization (see also.. that 784B struct with holes). I support the patch and meant to argue against the previous response: this cleanup makes sense to me, just take a second look at struct layout. To be more crystal clear: Acked-by: Willem de Bruijn <willemb@google.com>
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 0a156a95e981..ba61b4559dd6 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -1433,7 +1433,6 @@ struct rx_ring { /* Large buffer queue elements. */ u32 lbq_len; /* entry count */ u32 lbq_size; /* size in bytes of queue */ - u32 lbq_buf_size; void *lbq_base; dma_addr_t lbq_base_dma; void *lbq_base_indirect; @@ -2108,6 +2107,7 @@ struct ql_adapter { struct rx_ring rx_ring[MAX_RX_RINGS]; struct tx_ring tx_ring[MAX_TX_RINGS]; unsigned int lbq_buf_order; + u32 lbq_buf_size; int rx_csum; u32 default_rx_queue; diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 31389ab8bdf7..46599d74c6fb 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -1630,6 +1630,7 @@ void ql_dump_qdev(struct ql_adapter *qdev) DUMP_QDEV_FIELD(qdev, "0x%08x", xg_sem_mask); DUMP_QDEV_FIELD(qdev, "0x%08x", port_link_up); DUMP_QDEV_FIELD(qdev, "0x%08x", port_init); + DUMP_QDEV_FIELD(qdev, "%u", lbq_buf_size); } #endif @@ -1774,7 +1775,6 @@ void ql_dump_rx_ring(struct rx_ring *rx_ring) pr_err("rx_ring->lbq_curr_idx = %d\n", rx_ring->lbq_curr_idx); pr_err("rx_ring->lbq_clean_idx = %d\n", rx_ring->lbq_clean_idx); pr_err("rx_ring->lbq_free_cnt = %d\n", rx_ring->lbq_free_cnt); - pr_err("rx_ring->lbq_buf_size = %d\n", rx_ring->lbq_buf_size); pr_err("rx_ring->sbq_base = %p\n", rx_ring->sbq_base); pr_err("rx_ring->sbq_base_dma = %llx\n", diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 038a6bfc79c7..9df06ad3fb93 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -995,15 +995,14 @@ static struct bq_desc *ql_get_curr_lchunk(struct ql_adapter *qdev, struct bq_desc *lbq_desc = ql_get_curr_lbuf(rx_ring); pci_dma_sync_single_for_cpu(qdev->pdev, - dma_unmap_addr(lbq_desc, mapaddr), - rx_ring->lbq_buf_size, - PCI_DMA_FROMDEVICE); + dma_unmap_addr(lbq_desc, mapaddr), + qdev->lbq_buf_size, PCI_DMA_FROMDEVICE); /* If it's the last chunk of our master page then * we unmap it. */ - if ((lbq_desc->p.pg_chunk.offset + rx_ring->lbq_buf_size) - == ql_lbq_block_size(qdev)) + if (lbq_desc->p.pg_chunk.offset + qdev->lbq_buf_size == + ql_lbq_block_size(qdev)) pci_unmap_page(qdev->pdev, lbq_desc->p.pg_chunk.map, ql_lbq_block_size(qdev), @@ -1074,11 +1073,11 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring, /* Adjust the master page chunk for next * buffer get. */ - rx_ring->pg_chunk.offset += rx_ring->lbq_buf_size; + rx_ring->pg_chunk.offset += qdev->lbq_buf_size; if (rx_ring->pg_chunk.offset == ql_lbq_block_size(qdev)) { rx_ring->pg_chunk.page = NULL; } else { - rx_ring->pg_chunk.va += rx_ring->lbq_buf_size; + rx_ring->pg_chunk.va += qdev->lbq_buf_size; get_page(rx_ring->pg_chunk.page); } return 0; @@ -1110,12 +1109,12 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring) lbq_desc->p.pg_chunk.offset; dma_unmap_addr_set(lbq_desc, mapaddr, map); dma_unmap_len_set(lbq_desc, maplen, - rx_ring->lbq_buf_size); + qdev->lbq_buf_size); *lbq_desc->addr = cpu_to_le64(map); pci_dma_sync_single_for_device(qdev->pdev, map, - rx_ring->lbq_buf_size, - PCI_DMA_FROMDEVICE); + qdev->lbq_buf_size, + PCI_DMA_FROMDEVICE); clean_idx++; if (clean_idx == rx_ring->lbq_len) clean_idx = 0; @@ -1880,8 +1879,7 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev, } do { lbq_desc = ql_get_curr_lchunk(qdev, rx_ring); - size = (length < rx_ring->lbq_buf_size) ? length : - rx_ring->lbq_buf_size; + size = min(length, qdev->lbq_buf_size); netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev, "Adding page %d to skb for %d bytes.\n", @@ -2776,12 +2774,12 @@ static int ql_alloc_tx_resources(struct ql_adapter *qdev, static void ql_free_lbq_buffers(struct ql_adapter *qdev, struct rx_ring *rx_ring) { - unsigned int last_offset = ql_lbq_block_size(qdev) - - rx_ring->lbq_buf_size; + unsigned int last_offset; struct bq_desc *lbq_desc; uint32_t curr_idx, clean_idx; + last_offset = ql_lbq_block_size(qdev) - qdev->lbq_buf_size; curr_idx = rx_ring->lbq_curr_idx; clean_idx = rx_ring->lbq_clean_idx; while (curr_idx != clean_idx) { @@ -3149,8 +3147,8 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring) } while (page_entries < MAX_DB_PAGES_PER_BQ(rx_ring->lbq_len)); cqicb->lbq_addr = cpu_to_le64(rx_ring->lbq_base_indirect_dma); - bq_len = (rx_ring->lbq_buf_size == 65536) ? 0 : - (u16) rx_ring->lbq_buf_size; + bq_len = (qdev->lbq_buf_size == 65536) ? 0 : + (u16)qdev->lbq_buf_size; cqicb->lbq_buf_size = cpu_to_le16(bq_len); bq_len = (rx_ring->lbq_len == 65536) ? 0 : (u16) rx_ring->lbq_len; @@ -4048,16 +4046,21 @@ static int qlge_close(struct net_device *ndev) return 0; } +static void qlge_set_lb_size(struct ql_adapter *qdev) +{ + if (qdev->ndev->mtu <= 1500) + qdev->lbq_buf_size = LARGE_BUFFER_MIN_SIZE; + else + qdev->lbq_buf_size = LARGE_BUFFER_MAX_SIZE; + qdev->lbq_buf_order = get_order(qdev->lbq_buf_size); +} + static int ql_configure_rings(struct ql_adapter *qdev) { int i; struct rx_ring *rx_ring; struct tx_ring *tx_ring; int cpu_cnt = min(MAX_CPUS, (int)num_online_cpus()); - unsigned int lbq_buf_len = (qdev->ndev->mtu > 1500) ? - LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE; - - qdev->lbq_buf_order = get_order(lbq_buf_len); /* In a perfect world we have one RSS ring for each CPU * and each has it's own vector. To do that we ask for @@ -4105,7 +4108,6 @@ static int ql_configure_rings(struct ql_adapter *qdev) rx_ring->lbq_len = NUM_LARGE_BUFFERS; rx_ring->lbq_size = rx_ring->lbq_len * sizeof(__le64); - rx_ring->lbq_buf_size = (u16)lbq_buf_len; rx_ring->sbq_len = NUM_SMALL_BUFFERS; rx_ring->sbq_size = rx_ring->sbq_len * sizeof(__le64); @@ -4121,7 +4123,6 @@ static int ql_configure_rings(struct ql_adapter *qdev) rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb); rx_ring->lbq_len = 0; rx_ring->lbq_size = 0; - rx_ring->lbq_buf_size = 0; rx_ring->sbq_len = 0; rx_ring->sbq_size = 0; rx_ring->sbq_buf_size = 0; @@ -4140,6 +4141,7 @@ static int qlge_open(struct net_device *ndev) if (err) return err; + qlge_set_lb_size(qdev); err = ql_configure_rings(qdev); if (err) return err; @@ -4161,9 +4163,7 @@ static int qlge_open(struct net_device *ndev) static int ql_change_rx_buffers(struct ql_adapter *qdev) { - struct rx_ring *rx_ring; - int i, status; - u32 lbq_buf_len; + int status; /* Wait for an outstanding reset to complete. */ if (!test_bit(QL_ADAPTER_UP, &qdev->flags)) { @@ -4186,16 +4186,7 @@ static int ql_change_rx_buffers(struct ql_adapter *qdev) if (status) goto error; - /* Get the new rx buffer size. */ - lbq_buf_len = (qdev->ndev->mtu > 1500) ? - LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE; - qdev->lbq_buf_order = get_order(lbq_buf_len); - - for (i = 0; i < qdev->rss_ring_count; i++) { - rx_ring = &qdev->rx_ring[i]; - /* Set the new size. */ - rx_ring->lbq_buf_size = lbq_buf_len; - } + qlge_set_lb_size(qdev); status = ql_adapter_up(qdev); if (status)
lbq_buf_size is duplicated to every rx_ring structure whereas lbq_buf_order is present once in the ql_adapter structure. All rings use the same buf size, keep only one copy of it. Also factor out the calculation of lbq_buf_size instead of having two copies. Signed-off-by: Benjamin Poirier <bpoirier@suse.com> --- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 61 +++++++++----------- 3 files changed, 28 insertions(+), 37 deletions(-)