diff mbox series

[net-next,04/12] ibmvnic: Introduce xmit_more support using batched subCRQ hcalls

Message ID 1605208207-1896-5-git-send-email-tlfalcon@linux.ibm.com
State Superseded
Headers show
Series ibmvnic: Performance improvements and other updates | expand

Commit Message

Thomas Falcon Nov. 12, 2020, 7:09 p.m. UTC
Include support for the xmit_more feature utilizing the
H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
of multiple subordinate Command Response Queue descriptors in one
hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
calls and thus hypervisor call overhead per TX descriptor.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 151 +++++++++++++++++------------
 1 file changed, 91 insertions(+), 60 deletions(-)

Comments

Jakub Kicinski Nov. 14, 2020, 11:46 p.m. UTC | #1
On Thu, 12 Nov 2020 13:09:59 -0600 Thomas Falcon wrote:
> Include support for the xmit_more feature utilizing the
> H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
> of multiple subordinate Command Response Queue descriptors in one
> hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
> calls and thus hypervisor call overhead per TX descriptor.
> 
> Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>

The common bug with xmit_more is not flushing the already queued
notifications when there is a drop. Any time you drop a skb you need 
to check it's not an skb that was the end of an xmit_more train and 
if so flush notifications (or just always flush on error).

Looking at the driver e.g. this starting goto:

        if (ibmvnic_xmit_workarounds(skb, netdev)) {                            
                tx_dropped++;                                                   
                tx_send_failed++;                                               
                ret = NETDEV_TX_OK;                                             
                goto out;                                                       
        }  

Does not seem to hit any flush on its way out AFAICS.
Thomas Falcon Nov. 16, 2020, 6:40 p.m. UTC | #2
On 11/14/20 5:46 PM, Jakub Kicinski wrote:
> On Thu, 12 Nov 2020 13:09:59 -0600 Thomas Falcon wrote:
>> Include support for the xmit_more feature utilizing the
>> H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
>> of multiple subordinate Command Response Queue descriptors in one
>> hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
>> calls and thus hypervisor call overhead per TX descriptor.
>>
>> Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
> The common bug with xmit_more is not flushing the already queued
> notifications when there is a drop. Any time you drop a skb you need
> to check it's not an skb that was the end of an xmit_more train and
> if so flush notifications (or just always flush on error).
>
> Looking at the driver e.g. this starting goto:
>
>          if (ibmvnic_xmit_workarounds(skb, netdev)) {
>                  tx_dropped++;
>                  tx_send_failed++;
>                  ret = NETDEV_TX_OK;
>                  goto out;
>          }
>
> Does not seem to hit any flush on its way out AFAICS.

Hi, I included those updates in a later patch to ease review but see now 
that that was a mistake. I will merge those bits back into this patch 
and resubmit.

Thanks!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 524020691ef8..0f6aba760d65 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1165,6 +1165,7 @@  static int __ibmvnic_open(struct net_device *netdev)
 		if (prev_state == VNIC_CLOSED)
 			enable_irq(adapter->tx_scrq[i]->irq);
 		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
 	}
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
@@ -1529,10 +1530,12 @@  static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int queue_num = skb_get_queue_mapping(skb);
 	u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_ind_xmit_queue *ind_bufp;
 	struct ibmvnic_tx_buff *tx_buff = NULL;
 	struct ibmvnic_sub_crq_queue *tx_scrq;
 	struct ibmvnic_tx_pool *tx_pool;
 	unsigned int tx_send_failed = 0;
+	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int tx_map_failed = 0;
 	unsigned int tx_dropped = 0;
 	unsigned int tx_packets = 0;
@@ -1547,7 +1550,7 @@  static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int index = 0;
 	u8 proto = 0;
 	u64 handle;
-	netdev_tx_t ret = NETDEV_TX_OK;
+	int i;
 
 	if (test_bit(0, &adapter->resetting)) {
 		if (!netif_subqueue_stopped(netdev, skb))
@@ -1666,55 +1669,37 @@  static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
 		hdrs += 2;
 	}
-	/* determine if l2/3/4 headers are sent to firmware */
-	if ((*hdrs >> 7) & 1) {
+
+	if ((*hdrs >> 7) & 1)
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
-		tx_crq.v1.n_crq_elem = num_entries;
-		tx_buff->num_entries = num_entries;
-		tx_buff->indir_arr[0] = tx_crq;
-		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
-						    sizeof(tx_buff->indir_arr),
-						    DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, tx_buff->indir_dma)) {
-			dev_kfree_skb_any(skb);
-			tx_buff->skb = NULL;
-			if (!firmware_has_feature(FW_FEATURE_CMO))
-				dev_err(dev, "tx: unable to map descriptor array\n");
-			tx_map_failed++;
-			tx_dropped++;
-			ret = NETDEV_TX_OK;
-			goto tx_err_out;
-		}
-		lpar_rc = send_subcrq_indirect(adapter, handle,
-					       (u64)tx_buff->indir_dma,
-					       (u64)num_entries);
-		dma_unmap_single(dev, tx_buff->indir_dma,
-				 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
-	} else {
-		tx_buff->num_entries = num_entries;
-		lpar_rc = send_subcrq(adapter, handle,
-				      &tx_crq);
-	}
-	if (lpar_rc != H_SUCCESS) {
-		if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
-			dev_err_ratelimited(dev, "tx: send failed\n");
-		dev_kfree_skb_any(skb);
-		tx_buff->skb = NULL;
 
-		if (lpar_rc == H_CLOSED || adapter->failover_pending) {
-			/* Disable TX and report carrier off if queue is closed
-			 * or pending failover.
-			 * Firmware guarantees that a signal will be sent to the
-			 * driver, triggering a reset or some other action.
-			 */
-			netif_tx_stop_all_queues(netdev);
-			netif_carrier_off(netdev);
-		}
+	netdev_tx_sent_queue(txq, skb->len);
 
-		tx_send_failed++;
-		tx_dropped++;
-		ret = NETDEV_TX_OK;
-		goto tx_err_out;
+	tx_crq.v1.n_crq_elem = num_entries;
+	tx_buff->num_entries = num_entries;
+	ind_bufp = &tx_scrq->ind_buf;
+	/* flush buffer if current entry can not fit */
+	if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
+		lpar_rc = send_subcrq_indirect(adapter, handle,
+					       (u64)ind_bufp->indir_dma,
+					       (u64)ind_bufp->index);
+		if (lpar_rc != H_SUCCESS)
+			goto tx_flush_err;
+		ind_bufp->index = 0;
+	}
+
+	tx_buff->indir_arr[0] = tx_crq;
+	memcpy(&ind_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+	       num_entries * sizeof(struct ibmvnic_generic_scrq));
+	ind_bufp->index += num_entries;
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq) ||
+	    ind_bufp->index == IBMVNIC_MAX_IND_DESCS) {
+		lpar_rc = send_subcrq_indirect(adapter, handle,
+					       (u64)ind_bufp->indir_dma,
+					       (u64)ind_bufp->index);
+		ind_bufp->index = 0;
+		if (lpar_rc != H_SUCCESS)
+			goto tx_err;
 	}
 
 	if (atomic_add_return(num_entries, &tx_scrq->used)
@@ -1729,14 +1714,51 @@  static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	ret = NETDEV_TX_OK;
 	goto out;
 
-tx_err_out:
-	/* roll back consumer index and map array*/
-	if (tx_pool->consumer_index == 0)
-		tx_pool->consumer_index =
-			tx_pool->num_buffers - 1;
-	else
-		tx_pool->consumer_index--;
-	tx_pool->free_map[tx_pool->consumer_index] = index;
+tx_flush_err:
+	dev_kfree_skb_any(skb);
+	tx_buff->skb = NULL;
+	tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
+				  tx_pool->num_buffers - 1 :
+				  tx_pool->consumer_index - 1;
+	tx_dropped++;
+tx_err:
+	if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
+		dev_err_ratelimited(dev, "tx: send failed\n");
+	for (i = ind_bufp->index - 1; i >= 0; --i) {
+		tx_crq = ind_bufp->indir_arr[i];
+		if (tx_crq.v1.type != IBMVNIC_TX_DESC)
+			continue;
+		index = be32_to_cpu(tx_crq.v1.correlator);
+		if (index & IBMVNIC_TSO_POOL_MASK) {
+			tx_pool = &adapter->tso_pool[queue_num];
+			index &= ~IBMVNIC_TSO_POOL_MASK;
+		} else {
+			tx_pool = &adapter->tx_pool[queue_num];
+		}
+		tx_pool->free_map[tx_pool->consumer_index] = index;
+		tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
+					  tx_pool->num_buffers - 1 :
+					  tx_pool->consumer_index - 1;
+		tx_buff = &tx_pool->tx_buff[index];
+		netdev->stats.tx_packets--;
+		netdev->stats.tx_bytes -= tx_buff->skb->len;
+		adapter->tx_stats_buffers[queue_num].packets--;
+		adapter->tx_stats_buffers[queue_num].bytes -= tx_buff->skb->len;
+		dev_kfree_skb_any(tx_buff->skb);
+		tx_buff->skb = NULL;
+		tx_dropped++;
+	}
+	ind_bufp->index = 0;
+
+	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
+		/* Disable TX and report carrier off if queue is closed
+		 * or pending failover.
+		 * Firmware guarantees that a signal will be sent to the
+		 * driver, triggering a reset or some other action.
+		 */
+		netif_tx_stop_all_queues(netdev);
+		netif_carrier_off(netdev);
+	}
 out:
 	netdev->stats.tx_dropped += tx_dropped;
 	netdev->stats.tx_bytes += tx_bytes;
@@ -3115,6 +3137,7 @@  static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
+	struct netdev_queue *txq;
 	union sub_crq *next;
 	int index;
 	int i, j;
@@ -3123,6 +3146,8 @@  static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
 		int num_entries = 0;
+		int total_bytes = 0;
+		int num_packets = 0;
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		/* ensure that we are reading the correct queue entry */
@@ -3150,13 +3175,16 @@  static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 				txbuff->data_dma[j] = 0;
 			}
 
-			if (txbuff->last_frag) {
-				dev_kfree_skb_any(txbuff->skb);
+			num_packets++;
+			num_entries += txbuff->num_entries;
+			if (txbuff->skb) {
+				total_bytes += txbuff->skb->len;
+				dev_consume_skb_irq(txbuff->skb);
 				txbuff->skb = NULL;
+			} else {
+				netdev_warn(adapter->netdev,
+					    "TX completion received with NULL socket buffer\n");
 			}
-
-			num_entries += txbuff->num_entries;
-
 			tx_pool->free_map[tx_pool->producer_index] = index;
 			tx_pool->producer_index =
 				(tx_pool->producer_index + 1) %
@@ -3165,6 +3193,9 @@  static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
+		txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
+		netdev_tx_completed_queue(txq, num_packets, total_bytes);
+
 		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
 		    __netif_subqueue_stopped(adapter->netdev,