diff mbox series

[next-queue,v2,2/4] i40e: Record number TXes cleaned during NAPI

Message ID 1665004913-25656-3-git-send-email-jdamato@fastly.com
State Superseded
Headers show
Series i40e: Add an i40e_napi_poll tracepoint | expand

Commit Message

Joe Damato Oct. 5, 2022, 9:21 p.m. UTC
Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
the number TXs cleaned.

Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.

Care has been taken to avoid changing the control flow of any functions
involved.

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
 drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
 3 files changed, 24 insertions(+), 10 deletions(-)

Comments

Samudrala, Sridhar Oct. 6, 2022, 12:16 a.m. UTC | #1
On 10/5/2022 4:21 PM, Joe Damato wrote:
> Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
> the number TXs cleaned.
>
> Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
>
> Care has been taken to avoid changing the control flow of any functions
> involved.
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---
>   drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
>   drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
>   drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
>   3 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index b97c95f..a2cc98e 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
>    * @vsi: the VSI we care about
>    * @tx_ring: Tx ring to clean
>    * @napi_budget: Used to determine if we are in netpoll
> + * @tx_cleaned: Out parameter set to the number of TXes cleaned
>    *
>    * Returns true if there's any budget left (e.g. the clean is finished)
>    **/
>   static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> -			      struct i40e_ring *tx_ring, int napi_budget)
> +			      struct i40e_ring *tx_ring, int napi_budget,
> +			      unsigned int *tx_cleaned)
>   {
>   	int i = tx_ring->next_to_clean;
>   	struct i40e_tx_buffer *tx_buf;
> @@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>   	i40e_arm_wb(tx_ring, vsi, budget);
>   
>   	if (ring_is_xdp(tx_ring))
> -		return !!budget;
> +		goto out;
>   
>   	/* notify netdev of completed buffers */
>   	netdev_tx_completed_queue(txring_txq(tx_ring),
> @@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>   		}
>   	}
>   
> +out:
> +	*tx_cleaned = total_packets;
>   	return !!budget;
>   }
>   
> @@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>   			       container_of(napi, struct i40e_q_vector, napi);
>   	struct i40e_vsi *vsi = q_vector->vsi;
>   	struct i40e_ring *ring;
> +	bool tx_clean_complete = true;
>   	bool clean_complete = true;
>   	bool arm_wb = false;
>   	int budget_per_ring;
>   	int work_done = 0;
> +	unsigned int tx_cleaned = 0;
>   
>   	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
>   		napi_complete(napi);
> @@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>   	 */
>   	i40e_for_each_ring(ring, q_vector->tx) {
>   		bool wd = ring->xsk_pool ?
> -			  i40e_clean_xdp_tx_irq(vsi, ring) :
> -			  i40e_clean_tx_irq(vsi, ring, budget);
> +			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
> +			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
>   
>   		if (!wd) {
> -			clean_complete = false;
> +			clean_complete = tx_clean_complete = false;
>   			continue;
>   		}
>   		arm_wb |= ring->arm_wb;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index 790aaeff..f98ce7e4 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
>    * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
>    * @xdp_ring: XDP Tx ring
>    * @budget: NAPI budget
> + * @tx_cleaned: Out parameter of the TX packets processed
>    *
>    * Returns true if the work is finished.
>    **/
> -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
> +			 unsigned int *tx_cleaned)
>   {
>   	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
>   	u32 nb_pkts, nb_processed = 0;
>   	unsigned int total_bytes = 0;
>   
>   	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> -	if (!nb_pkts)
> +	if (!nb_pkts) {
> +		*tx_cleaned = 0;
>   		return true;
> +	}
>   
>   	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
>   		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> @@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>   
>   	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
>   
> +	*tx_cleaned = nb_pkts;

With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
May be we need 2 counters for xdp.


>   	return nb_pkts < budget;
>   }
>   
> @@ -581,10 +586,12 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
>    * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries
>    * @vsi: Current VSI
>    * @tx_ring: XDP Tx ring
> + * @tx_cleaned: out parameter of number of TXes cleaned
>    *
>    * Returns true if cleanup/tranmission is done.
>    **/
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
> +			   unsigned int *tx_cleaned)
>   {
>   	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
>   	u32 i, completed_frames, xsk_frames = 0;
> @@ -634,7 +641,7 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
>   	if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
>   		xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
>   
> -	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
> +	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned);
>   }
>   
>   /**
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> index 821df24..396ed11 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> @@ -30,7 +30,8 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
>   bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
>   int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
>   
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
> +			   unsigned int *tx_cleaned);
>   int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
>   int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
>   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
Joe Damato Oct. 6, 2022, 12:31 a.m. UTC | #2
On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote:
> On 10/5/2022 4:21 PM, Joe Damato wrote:
> >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
> >the number TXs cleaned.
> >
> >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
> >
> >Care has been taken to avoid changing the control flow of any functions
> >involved.
> >
> >Signed-off-by: Joe Damato <jdamato@fastly.com>
> >---
> >  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
> >  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
> >  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
> >  3 files changed, 24 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >index b97c95f..a2cc98e 100644
> >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> >   * @vsi: the VSI we care about
> >   * @tx_ring: Tx ring to clean
> >   * @napi_budget: Used to determine if we are in netpoll
> >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
> >   *
> >   * Returns true if there's any budget left (e.g. the clean is finished)
> >   **/
> >  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >-			      struct i40e_ring *tx_ring, int napi_budget)
> >+			      struct i40e_ring *tx_ring, int napi_budget,
> >+			      unsigned int *tx_cleaned)
> >  {
> >  	int i = tx_ring->next_to_clean;
> >  	struct i40e_tx_buffer *tx_buf;
> >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >  	i40e_arm_wb(tx_ring, vsi, budget);
> >  	if (ring_is_xdp(tx_ring))
> >-		return !!budget;
> >+		goto out;
> >  	/* notify netdev of completed buffers */
> >  	netdev_tx_completed_queue(txring_txq(tx_ring),
> >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >  		}
> >  	}
> >+out:
> >+	*tx_cleaned = total_packets;
> >  	return !!budget;
> >  }
> >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >  			       container_of(napi, struct i40e_q_vector, napi);
> >  	struct i40e_vsi *vsi = q_vector->vsi;
> >  	struct i40e_ring *ring;
> >+	bool tx_clean_complete = true;
> >  	bool clean_complete = true;
> >  	bool arm_wb = false;
> >  	int budget_per_ring;
> >  	int work_done = 0;
> >+	unsigned int tx_cleaned = 0;
> >  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> >  		napi_complete(napi);
> >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >  	 */
> >  	i40e_for_each_ring(ring, q_vector->tx) {
> >  		bool wd = ring->xsk_pool ?
> >-			  i40e_clean_xdp_tx_irq(vsi, ring) :
> >-			  i40e_clean_tx_irq(vsi, ring, budget);
> >+			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
> >+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
> >  		if (!wd) {
> >-			clean_complete = false;
> >+			clean_complete = tx_clean_complete = false;
> >  			continue;
> >  		}
> >  		arm_wb |= ring->arm_wb;
> >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >index 790aaeff..f98ce7e4 100644
> >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
> >   * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
> >   * @xdp_ring: XDP Tx ring
> >   * @budget: NAPI budget
> >+ * @tx_cleaned: Out parameter of the TX packets processed
> >   *
> >   * Returns true if the work is finished.
> >   **/
> >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
> >+			 unsigned int *tx_cleaned)
> >  {
> >  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
> >  	u32 nb_pkts, nb_processed = 0;
> >  	unsigned int total_bytes = 0;
> >  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> >-	if (!nb_pkts)
> >+	if (!nb_pkts) {
> >+		*tx_cleaned = 0;
> >  		return true;
> >+	}
> >  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
> >  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
> >+	*tx_cleaned = nb_pkts;
> 
> With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
> packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
> May be we need 2 counters for xdp.

I think there's two issues you are describing, which are separate in my
mind.

  1.) The name "tx_cleaned", and
  2.) Whether nb_pkts is the right thing to write as the out param.

For #1: I'm OK to change the name if that's the blocker here; please
suggest a suitable alternative that you'll accept.

For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because
nb_pkts affects clean_complete in i40e_napi_poll which in turn determines
whether or not polling mode is entered.

The purpose of the tracepoint is to determine when/why/how you are entering
polling mode, so if nb_pkts plays a role in that calculation, it's the
right number to output.


> >  	return nb_pkts < budget;
> >  }
> >@@ -581,10 +586,12 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
> >   * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries
> >   * @vsi: Current VSI
> >   * @tx_ring: XDP Tx ring
> >+ * @tx_cleaned: out parameter of number of TXes cleaned
> >   *
> >   * Returns true if cleanup/tranmission is done.
> >   **/
> >-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> >+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
> >+			   unsigned int *tx_cleaned)
> >  {
> >  	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
> >  	u32 i, completed_frames, xsk_frames = 0;
> >@@ -634,7 +641,7 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> >  	if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
> >  		xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
> >-	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
> >+	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned);
> >  }
> >  /**
> >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> >index 821df24..396ed11 100644
> >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> >@@ -30,7 +30,8 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
> >  bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
> >  int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
> >-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> >+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
> >+			   unsigned int *tx_cleaned);
> >  int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
> >  int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
> >  void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
>
Joe Damato Oct. 6, 2022, 1 a.m. UTC | #3
On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote:
> On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote:
> > On 10/5/2022 4:21 PM, Joe Damato wrote:
> > >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
> > >the number TXs cleaned.
> > >
> > >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
> > >
> > >Care has been taken to avoid changing the control flow of any functions
> > >involved.
> > >
> > >Signed-off-by: Joe Damato <jdamato@fastly.com>
> > >---
> > >  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
> > >  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
> > >  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
> > >  3 files changed, 24 insertions(+), 10 deletions(-)
> > >
> > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > >index b97c95f..a2cc98e 100644
> > >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> > >   * @vsi: the VSI we care about
> > >   * @tx_ring: Tx ring to clean
> > >   * @napi_budget: Used to determine if we are in netpoll
> > >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
> > >   *
> > >   * Returns true if there's any budget left (e.g. the clean is finished)
> > >   **/
> > >  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > >-			      struct i40e_ring *tx_ring, int napi_budget)
> > >+			      struct i40e_ring *tx_ring, int napi_budget,
> > >+			      unsigned int *tx_cleaned)
> > >  {
> > >  	int i = tx_ring->next_to_clean;
> > >  	struct i40e_tx_buffer *tx_buf;
> > >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > >  	i40e_arm_wb(tx_ring, vsi, budget);
> > >  	if (ring_is_xdp(tx_ring))
> > >-		return !!budget;
> > >+		goto out;
> > >  	/* notify netdev of completed buffers */
> > >  	netdev_tx_completed_queue(txring_txq(tx_ring),
> > >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > >  		}
> > >  	}
> > >+out:
> > >+	*tx_cleaned = total_packets;
> > >  	return !!budget;
> > >  }
> > >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> > >  			       container_of(napi, struct i40e_q_vector, napi);
> > >  	struct i40e_vsi *vsi = q_vector->vsi;
> > >  	struct i40e_ring *ring;
> > >+	bool tx_clean_complete = true;
> > >  	bool clean_complete = true;
> > >  	bool arm_wb = false;
> > >  	int budget_per_ring;
> > >  	int work_done = 0;
> > >+	unsigned int tx_cleaned = 0;
> > >  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> > >  		napi_complete(napi);
> > >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> > >  	 */
> > >  	i40e_for_each_ring(ring, q_vector->tx) {
> > >  		bool wd = ring->xsk_pool ?
> > >-			  i40e_clean_xdp_tx_irq(vsi, ring) :
> > >-			  i40e_clean_tx_irq(vsi, ring, budget);
> > >+			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
> > >+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
> > >  		if (!wd) {
> > >-			clean_complete = false;
> > >+			clean_complete = tx_clean_complete = false;
> > >  			continue;
> > >  		}
> > >  		arm_wb |= ring->arm_wb;
> > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > >index 790aaeff..f98ce7e4 100644
> > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
> > >   * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
> > >   * @xdp_ring: XDP Tx ring
> > >   * @budget: NAPI budget
> > >+ * @tx_cleaned: Out parameter of the TX packets processed
> > >   *
> > >   * Returns true if the work is finished.
> > >   **/
> > >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> > >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
> > >+			 unsigned int *tx_cleaned)
> > >  {
> > >  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
> > >  	u32 nb_pkts, nb_processed = 0;
> > >  	unsigned int total_bytes = 0;
> > >  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> > >-	if (!nb_pkts)
> > >+	if (!nb_pkts) {
> > >+		*tx_cleaned = 0;
> > >  		return true;
> > >+	}
> > >  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
> > >  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> > >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> > >  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
> > >+	*tx_cleaned = nb_pkts;
> > 
> > With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
> > packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
> > May be we need 2 counters for xdp.
> 
> I think there's two issues you are describing, which are separate in my
> mind.
> 
>   1.) The name "tx_cleaned", and
>   2.) Whether nb_pkts is the right thing to write as the out param.
> 
> For #1: I'm OK to change the name if that's the blocker here; please
> suggest a suitable alternative that you'll accept.
> 
> For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because
> nb_pkts affects clean_complete in i40e_napi_poll which in turn determines
> whether or not polling mode is entered.
> 
> The purpose of the tracepoint is to determine when/why/how you are entering
> polling mode, so if nb_pkts plays a role in that calculation, it's the
> right number to output.

I suppose the alternative is to only fire the tracepoint when *not* in XDP.
Then the changes to the XDP stuff can be dropped and a separate set of
tracepoints for XDP can be created in the future.

That might reduce the complexity a bit, and will probably still be pretty
useful for people tuning their non-XDP workloads.
Maciej Fijalkowski Oct. 6, 2022, 1:03 p.m. UTC | #4
On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote:
> On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote:
> > On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote:
> > > On 10/5/2022 4:21 PM, Joe Damato wrote:
> > > >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
> > > >the number TXs cleaned.
> > > >
> > > >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
> > > >
> > > >Care has been taken to avoid changing the control flow of any functions
> > > >involved.
> > > >
> > > >Signed-off-by: Joe Damato <jdamato@fastly.com>
> > > >---
> > > >  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
> > > >  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
> > > >  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
> > > >  3 files changed, 24 insertions(+), 10 deletions(-)
> > > >
> > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > >index b97c95f..a2cc98e 100644
> > > >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> > > >   * @vsi: the VSI we care about
> > > >   * @tx_ring: Tx ring to clean
> > > >   * @napi_budget: Used to determine if we are in netpoll
> > > >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
> > > >   *
> > > >   * Returns true if there's any budget left (e.g. the clean is finished)
> > > >   **/
> > > >  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > > >-			      struct i40e_ring *tx_ring, int napi_budget)
> > > >+			      struct i40e_ring *tx_ring, int napi_budget,
> > > >+			      unsigned int *tx_cleaned)
> > > >  {
> > > >  	int i = tx_ring->next_to_clean;
> > > >  	struct i40e_tx_buffer *tx_buf;
> > > >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > > >  	i40e_arm_wb(tx_ring, vsi, budget);
> > > >  	if (ring_is_xdp(tx_ring))
> > > >-		return !!budget;
> > > >+		goto out;
> > > >  	/* notify netdev of completed buffers */
> > > >  	netdev_tx_completed_queue(txring_txq(tx_ring),
> > > >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > > >  		}
> > > >  	}
> > > >+out:
> > > >+	*tx_cleaned = total_packets;
> > > >  	return !!budget;
> > > >  }
> > > >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> > > >  			       container_of(napi, struct i40e_q_vector, napi);
> > > >  	struct i40e_vsi *vsi = q_vector->vsi;
> > > >  	struct i40e_ring *ring;
> > > >+	bool tx_clean_complete = true;
> > > >  	bool clean_complete = true;
> > > >  	bool arm_wb = false;
> > > >  	int budget_per_ring;
> > > >  	int work_done = 0;
> > > >+	unsigned int tx_cleaned = 0;
> > > >  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> > > >  		napi_complete(napi);
> > > >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> > > >  	 */
> > > >  	i40e_for_each_ring(ring, q_vector->tx) {
> > > >  		bool wd = ring->xsk_pool ?
> > > >-			  i40e_clean_xdp_tx_irq(vsi, ring) :
> > > >-			  i40e_clean_tx_irq(vsi, ring, budget);
> > > >+			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
> > > >+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
> > > >  		if (!wd) {
> > > >-			clean_complete = false;
> > > >+			clean_complete = tx_clean_complete = false;
> > > >  			continue;
> > > >  		}
> > > >  		arm_wb |= ring->arm_wb;
> > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > >index 790aaeff..f98ce7e4 100644
> > > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
> > > >   * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
> > > >   * @xdp_ring: XDP Tx ring
> > > >   * @budget: NAPI budget
> > > >+ * @tx_cleaned: Out parameter of the TX packets processed
> > > >   *
> > > >   * Returns true if the work is finished.
> > > >   **/
> > > >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> > > >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
> > > >+			 unsigned int *tx_cleaned)
> > > >  {
> > > >  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
> > > >  	u32 nb_pkts, nb_processed = 0;
> > > >  	unsigned int total_bytes = 0;
> > > >  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> > > >-	if (!nb_pkts)
> > > >+	if (!nb_pkts) {
> > > >+		*tx_cleaned = 0;
> > > >  		return true;
> > > >+	}
> > > >  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
> > > >  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> > > >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> > > >  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
> > > >+	*tx_cleaned = nb_pkts;
> > > 
> > > With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
> > > packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
> > > May be we need 2 counters for xdp.
> > 
> > I think there's two issues you are describing, which are separate in my
> > mind.
> > 
> >   1.) The name "tx_cleaned", and
> >   2.) Whether nb_pkts is the right thing to write as the out param.
> > 
> > For #1: I'm OK to change the name if that's the blocker here; please
> > suggest a suitable alternative that you'll accept.
> > 
> > For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because
> > nb_pkts affects clean_complete in i40e_napi_poll which in turn determines
> > whether or not polling mode is entered.
> > 
> > The purpose of the tracepoint is to determine when/why/how you are entering
> > polling mode, so if nb_pkts plays a role in that calculation, it's the
> > right number to output.
> 
> I suppose the alternative is to only fire the tracepoint when *not* in XDP.
> Then the changes to the XDP stuff can be dropped and a separate set of
> tracepoints for XDP can be created in the future.

Let's be clear that it's the AF_XDP quirk that we have in here that actual
xmit happens within NAPI polling routine.

Sridhar is right with having xsk_frames as tx_cleaned but you're also
right that nb_pkts affects napi polling. But then if you look at Rx side
there is an analogous case with buffer allocation affecting napi polling.

> 
> That might reduce the complexity a bit, and will probably still be pretty
> useful for people tuning their non-XDP workloads.
Samudrala, Sridhar Oct. 6, 2022, 2:57 p.m. UTC | #5
On 10/6/2022 8:03 AM, Maciej Fijalkowski wrote:
> On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote:
>> On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote:
>>> On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote:
>>>> On 10/5/2022 4:21 PM, Joe Damato wrote:
>>>>> Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
>>>>> the number TXs cleaned.
>>>>>
>>>>> Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
>>>>>
>>>>> Care has been taken to avoid changing the control flow of any functions
>>>>> involved.
>>>>>
>>>>> Signed-off-by: Joe Damato <jdamato@fastly.com>
>>>>> ---
>>>>>   drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
>>>>>   drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
>>>>>   drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
>>>>>   3 files changed, 24 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>>>> index b97c95f..a2cc98e 100644
>>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>>>> @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
>>>>>    * @vsi: the VSI we care about
>>>>>    * @tx_ring: Tx ring to clean
>>>>>    * @napi_budget: Used to determine if we are in netpoll
>>>>> + * @tx_cleaned: Out parameter set to the number of TXes cleaned
>>>>>    *
>>>>>    * Returns true if there's any budget left (e.g. the clean is finished)
>>>>>    **/
>>>>>   static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>>>> -			      struct i40e_ring *tx_ring, int napi_budget)
>>>>> +			      struct i40e_ring *tx_ring, int napi_budget,
>>>>> +			      unsigned int *tx_cleaned)
>>>>>   {
>>>>>   	int i = tx_ring->next_to_clean;
>>>>>   	struct i40e_tx_buffer *tx_buf;
>>>>> @@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>>>>   	i40e_arm_wb(tx_ring, vsi, budget);
>>>>>   	if (ring_is_xdp(tx_ring))
>>>>> -		return !!budget;
>>>>> +		goto out;
>>>>>   	/* notify netdev of completed buffers */
>>>>>   	netdev_tx_completed_queue(txring_txq(tx_ring),
>>>>> @@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>>>>   		}
>>>>>   	}
>>>>> +out:
>>>>> +	*tx_cleaned = total_packets;
>>>>>   	return !!budget;
>>>>>   }
>>>>> @@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>>>>>   			       container_of(napi, struct i40e_q_vector, napi);
>>>>>   	struct i40e_vsi *vsi = q_vector->vsi;
>>>>>   	struct i40e_ring *ring;
>>>>> +	bool tx_clean_complete = true;
>>>>>   	bool clean_complete = true;
>>>>>   	bool arm_wb = false;
>>>>>   	int budget_per_ring;
>>>>>   	int work_done = 0;
>>>>> +	unsigned int tx_cleaned = 0;
>>>>>   	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
>>>>>   		napi_complete(napi);
>>>>> @@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>>>>>   	 */
>>>>>   	i40e_for_each_ring(ring, q_vector->tx) {
>>>>>   		bool wd = ring->xsk_pool ?
>>>>> -			  i40e_clean_xdp_tx_irq(vsi, ring) :
>>>>> -			  i40e_clean_tx_irq(vsi, ring, budget);
>>>>> +			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
>>>>> +			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
>>>>>   		if (!wd) {
>>>>> -			clean_complete = false;
>>>>> +			clean_complete = tx_clean_complete = false;
>>>>>   			continue;
>>>>>   		}
>>>>>   		arm_wb |= ring->arm_wb;
>>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>>>>> index 790aaeff..f98ce7e4 100644
>>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>>>>> @@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
>>>>>    * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
>>>>>    * @xdp_ring: XDP Tx ring
>>>>>    * @budget: NAPI budget
>>>>> + * @tx_cleaned: Out parameter of the TX packets processed
>>>>>    *
>>>>>    * Returns true if the work is finished.
>>>>>    **/
>>>>> -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>>>>> +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
>>>>> +			 unsigned int *tx_cleaned)
>>>>>   {
>>>>>   	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
>>>>>   	u32 nb_pkts, nb_processed = 0;
>>>>>   	unsigned int total_bytes = 0;
>>>>>   	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
>>>>> -	if (!nb_pkts)
>>>>> +	if (!nb_pkts) {
>>>>> +		*tx_cleaned = 0;
>>>>>   		return true;
>>>>> +	}
>>>>>   	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
>>>>>   		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
>>>>> @@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>>>>>   	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
>>>>> +	*tx_cleaned = nb_pkts;
>>>> With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
>>>> packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
>>>> May be we need 2 counters for xdp.
>>> I think there's two issues you are describing, which are separate in my
>>> mind.
>>>
>>>    1.) The name "tx_cleaned", and
>>>    2.) Whether nb_pkts is the right thing to write as the out param.
>>>
>>> For #1: I'm OK to change the name if that's the blocker here; please
>>> suggest a suitable alternative that you'll accept.
>>>
>>> For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because
>>> nb_pkts affects clean_complete in i40e_napi_poll which in turn determines
>>> whether or not polling mode is entered.
>>>
>>> The purpose of the tracepoint is to determine when/why/how you are entering
>>> polling mode, so if nb_pkts plays a role in that calculation, it's the
>>> right number to output.
>> I suppose the alternative is to only fire the tracepoint when *not* in XDP.
>> Then the changes to the XDP stuff can be dropped and a separate set of
>> tracepoints for XDP can be created in the future.
> Let's be clear that it's the AF_XDP quirk that we have in here that actual
> xmit happens within NAPI polling routine.
>
> Sridhar is right with having xsk_frames as tx_cleaned but you're also
> right that nb_pkts affects napi polling. But then if you look at Rx side
> there is an analogous case with buffer allocation affecting napi polling.

To be correct,  I would suggest 2 out parameters to i40e_clean_xdp_tx_irq()
tx_cleaned and xdp_transmitted.  tx_cleaned should be filled in
with xsk_frames. Add xdp_transmitted as an out parameter to i40e_xmit_zc()
and fill it with nb_pkts.

I am not completely clear on the reasoning behind setting clean_complete
based on number of packets transmitted in case of XDP.


>
>> That might reduce the complexity a bit, and will probably still be pretty
>> useful for people tuning their non-XDP workloads.

This option is fine too.
Joe Damato Oct. 6, 2022, 5:32 p.m. UTC | #6
On Thu, Oct 06, 2022 at 09:57:19AM -0500, Samudrala, Sridhar wrote:
> On 10/6/2022 8:03 AM, Maciej Fijalkowski wrote:
> >On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote:
> >>On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote:
> >>>On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote:
> >>>>On 10/5/2022 4:21 PM, Joe Damato wrote:
> >>>>>Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores
> >>>>>the number TXs cleaned.
> >>>>>
> >>>>>Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same.
> >>>>>
> >>>>>Care has been taken to avoid changing the control flow of any functions
> >>>>>involved.
> >>>>>
> >>>>>Signed-off-by: Joe Damato <jdamato@fastly.com>
> >>>>>---
> >>>>>  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++-----
> >>>>>  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 15 +++++++++++----
> >>>>>  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  3 ++-
> >>>>>  3 files changed, 24 insertions(+), 10 deletions(-)
> >>>>>
> >>>>>diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>>>index b97c95f..a2cc98e 100644
> >>>>>--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>>>+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>>>@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> >>>>>   * @vsi: the VSI we care about
> >>>>>   * @tx_ring: Tx ring to clean
> >>>>>   * @napi_budget: Used to determine if we are in netpoll
> >>>>>+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
> >>>>>   *
> >>>>>   * Returns true if there's any budget left (e.g. the clean is finished)
> >>>>>   **/
> >>>>>  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>>>-			      struct i40e_ring *tx_ring, int napi_budget)
> >>>>>+			      struct i40e_ring *tx_ring, int napi_budget,
> >>>>>+			      unsigned int *tx_cleaned)
> >>>>>  {
> >>>>>  	int i = tx_ring->next_to_clean;
> >>>>>  	struct i40e_tx_buffer *tx_buf;
> >>>>>@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>>>  	i40e_arm_wb(tx_ring, vsi, budget);
> >>>>>  	if (ring_is_xdp(tx_ring))
> >>>>>-		return !!budget;
> >>>>>+		goto out;
> >>>>>  	/* notify netdev of completed buffers */
> >>>>>  	netdev_tx_completed_queue(txring_txq(tx_ring),
> >>>>>@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>>>  		}
> >>>>>  	}
> >>>>>+out:
> >>>>>+	*tx_cleaned = total_packets;
> >>>>>  	return !!budget;
> >>>>>  }
> >>>>>@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >>>>>  			       container_of(napi, struct i40e_q_vector, napi);
> >>>>>  	struct i40e_vsi *vsi = q_vector->vsi;
> >>>>>  	struct i40e_ring *ring;
> >>>>>+	bool tx_clean_complete = true;
> >>>>>  	bool clean_complete = true;
> >>>>>  	bool arm_wb = false;
> >>>>>  	int budget_per_ring;
> >>>>>  	int work_done = 0;
> >>>>>+	unsigned int tx_cleaned = 0;
> >>>>>  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> >>>>>  		napi_complete(napi);
> >>>>>@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >>>>>  	 */
> >>>>>  	i40e_for_each_ring(ring, q_vector->tx) {
> >>>>>  		bool wd = ring->xsk_pool ?
> >>>>>-			  i40e_clean_xdp_tx_irq(vsi, ring) :
> >>>>>-			  i40e_clean_tx_irq(vsi, ring, budget);
> >>>>>+			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
> >>>>>+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
> >>>>>  		if (!wd) {
> >>>>>-			clean_complete = false;
> >>>>>+			clean_complete = tx_clean_complete = false;
> >>>>>  			continue;
> >>>>>  		}
> >>>>>  		arm_wb |= ring->arm_wb;
> >>>>>diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >>>>>index 790aaeff..f98ce7e4 100644
> >>>>>--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >>>>>+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> >>>>>@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
> >>>>>   * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
> >>>>>   * @xdp_ring: XDP Tx ring
> >>>>>   * @budget: NAPI budget
> >>>>>+ * @tx_cleaned: Out parameter of the TX packets processed
> >>>>>   *
> >>>>>   * Returns true if the work is finished.
> >>>>>   **/
> >>>>>-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >>>>>+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
> >>>>>+			 unsigned int *tx_cleaned)
> >>>>>  {
> >>>>>  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
> >>>>>  	u32 nb_pkts, nb_processed = 0;
> >>>>>  	unsigned int total_bytes = 0;
> >>>>>  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> >>>>>-	if (!nb_pkts)
> >>>>>+	if (!nb_pkts) {
> >>>>>+		*tx_cleaned = 0;
> >>>>>  		return true;
> >>>>>+	}
> >>>>>  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
> >>>>>  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> >>>>>@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >>>>>  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
> >>>>>+	*tx_cleaned = nb_pkts;
> >>>>With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted
> >>>>packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq
> >>>>May be we need 2 counters for xdp.
> >>>I think there's two issues you are describing, which are separate in my
> >>>mind.
> >>>
> >>>   1.) The name "tx_cleaned", and
> >>>   2.) Whether nb_pkts is the right thing to write as the out param.
> >>>
> >>>For #1: I'm OK to change the name if that's the blocker here; please
> >>>suggest a suitable alternative that you'll accept.
> >>>
> >>>For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because
> >>>nb_pkts affects clean_complete in i40e_napi_poll which in turn determines
> >>>whether or not polling mode is entered.
> >>>
> >>>The purpose of the tracepoint is to determine when/why/how you are entering
> >>>polling mode, so if nb_pkts plays a role in that calculation, it's the
> >>>right number to output.
> >>I suppose the alternative is to only fire the tracepoint when *not* in XDP.
> >>Then the changes to the XDP stuff can be dropped and a separate set of
> >>tracepoints for XDP can be created in the future.
> >Let's be clear that it's the AF_XDP quirk that we have in here that actual
> >xmit happens within NAPI polling routine.
> >
> >Sridhar is right with having xsk_frames as tx_cleaned but you're also
> >right that nb_pkts affects napi polling. But then if you look at Rx side
> >there is an analogous case with buffer allocation affecting napi polling.
> 
> To be correct,  I would suggest 2 out parameters to i40e_clean_xdp_tx_irq()
> tx_cleaned and xdp_transmitted.  tx_cleaned should be filled in
> with xsk_frames. Add xdp_transmitted as an out parameter to i40e_xmit_zc()
> and fill it with nb_pkts.

Sorry, but I don't see the value in the second param. NAPI decides what to
do based on nb_pkts. That's the only parameter that matters for the purpose
of NAPI going into poll mode or not, right?

If so: I don't see any reason why a second parameter is necessary.

As I mentioned earlier: if it's just that the name of the parameter isn't
right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then
that's an easy fix; I'll just change the name.

It doesn't seem helpful to have xsk_frames as an out parameter for
i40e_napi_poll tracepoint; that value is not used to determine anything
about i40e's NAPI.

> I am not completely clear on the reasoning behind setting clean_complete
> based on number of packets transmitted in case of XDP.
> >
> >>That might reduce the complexity a bit, and will probably still be pretty
> >>useful for people tuning their non-XDP workloads.
> 
> This option is fine too.

I'll give Jesse a chance to weigh in before I proceed with spinning a v3.
Jesse Brandeburg Oct. 6, 2022, 10:35 p.m. UTC | #7
On 10/6/2022 10:32 AM, Joe Damato wrote:
> Sorry, but I don't see the value in the second param. NAPI decides what to
> do based on nb_pkts. That's the only parameter that matters for the purpose
> of NAPI going into poll mode or not, right?
> 
> If so: I don't see any reason why a second parameter is necessary.

Sridhar and I talked about this offline. We agree now that you can just 
proceed with the single parameter.

> 
> As I mentioned earlier: if it's just that the name of the parameter isn't
> right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then
> that's an easy fix; I'll just change the name.

I think the name change isn't necessary, since we're not going to extend 
this patch with full XDP events printed (see below)

> 
> It doesn't seem helpful to have xsk_frames as an out parameter for
> i40e_napi_poll tracepoint; that value is not used to determine anything
> about i40e's NAPI.
> 
>> I am not completely clear on the reasoning behind setting clean_complete
>> based on number of packets transmitted in case of XDP.
>>>
>>>> That might reduce the complexity a bit, and will probably still be pretty
>>>> useful for people tuning their non-XDP workloads.
>>
>> This option is fine too.
> 
> I'll give Jesse a chance to weigh in before I proceed with spinning a v3.

I'm ok with the patch you have now, that shows nb_pkts because it's the 
input to the polling decision. We can add the detail about XDP transmits 
cleaned in a later series or patch that is by someone who wants the XDP 
details in the napi poll context.
Joe Damato Oct. 6, 2022, 10:56 p.m. UTC | #8
On Thu, Oct 06, 2022 at 03:35:36PM -0700, Jesse Brandeburg wrote:
> On 10/6/2022 10:32 AM, Joe Damato wrote:
> >Sorry, but I don't see the value in the second param. NAPI decides what to
> >do based on nb_pkts. That's the only parameter that matters for the purpose
> >of NAPI going into poll mode or not, right?
> >
> >If so: I don't see any reason why a second parameter is necessary.
> 
> Sridhar and I talked about this offline. We agree now that you can just
> proceed with the single parameter.

OK, thanks.

> >
> >As I mentioned earlier: if it's just that the name of the parameter isn't
> >right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then
> >that's an easy fix; I'll just change the name.
> 
> I think the name change isn't necessary, since we're not going to extend
> this patch with full XDP events printed (see below)
> 
> >
> >It doesn't seem helpful to have xsk_frames as an out parameter for
> >i40e_napi_poll tracepoint; that value is not used to determine anything
> >about i40e's NAPI.
> >
> >>I am not completely clear on the reasoning behind setting clean_complete
> >>based on number of packets transmitted in case of XDP.
> >>>
> >>>>That might reduce the complexity a bit, and will probably still be pretty
> >>>>useful for people tuning their non-XDP workloads.
> >>
> >>This option is fine too.
> >
> >I'll give Jesse a chance to weigh in before I proceed with spinning a v3.
> 
> I'm ok with the patch you have now, that shows nb_pkts because it's the
> input to the polling decision. We can add the detail about XDP transmits
> cleaned in a later series or patch that is by someone who wants the XDP
> details in the napi poll context.

Thanks for the detailed and thoughtful feedback, it is much appreciated.

I'll leave this patch the way it is then and tweak the RX patch to include
an rx_clean_complete boolean as I mentioned in my response to that patch
and send out a v3.

FWIW, I had assumed that you would suggest dropping the XDP stuff so I
pre-emptively spun a branch locally that dropped it... it is a much smaller
change of course, but I suspect that this tracepoint might useful for XDP
users, so I think the decision to leave it with nb_pkts makes sense.

Thanks again for the review. I'll send a v3 shortly.
Maciej Fijalkowski Oct. 7, 2022, 8:08 a.m. UTC | #9
On Thu, Oct 06, 2022 at 03:56:57PM -0700, Joe Damato wrote:
> On Thu, Oct 06, 2022 at 03:35:36PM -0700, Jesse Brandeburg wrote:
> > On 10/6/2022 10:32 AM, Joe Damato wrote:
> > >Sorry, but I don't see the value in the second param. NAPI decides what to
> > >do based on nb_pkts. That's the only parameter that matters for the purpose
> > >of NAPI going into poll mode or not, right?
> > >
> > >If so: I don't see any reason why a second parameter is necessary.
> > 
> > Sridhar and I talked about this offline. We agree now that you can just
> > proceed with the single parameter.
> 
> OK, thanks.
> 
> > >
> > >As I mentioned earlier: if it's just that the name of the parameter isn't
> > >right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then
> > >that's an easy fix; I'll just change the name.
> > 
> > I think the name change isn't necessary, since we're not going to extend
> > this patch with full XDP events printed (see below)

So better to keep the twisted naming?

> > 
> > >
> > >It doesn't seem helpful to have xsk_frames as an out parameter for
> > >i40e_napi_poll tracepoint; that value is not used to determine anything
> > >about i40e's NAPI.
> > >
> > >>I am not completely clear on the reasoning behind setting clean_complete
> > >>based on number of packets transmitted in case of XDP.
> > >>>
> > >>>>That might reduce the complexity a bit, and will probably still be pretty
> > >>>>useful for people tuning their non-XDP workloads.
> > >>
> > >>This option is fine too.
> > >
> > >I'll give Jesse a chance to weigh in before I proceed with spinning a v3.
> > 
> > I'm ok with the patch you have now, that shows nb_pkts because it's the
> > input to the polling decision. We can add the detail about XDP transmits
> > cleaned in a later series or patch that is by someone who wants the XDP
> > details in the napi poll context.

Please spell out whole AF_XDP instead of referring to XDP. Future readers
might get confused. XDP is totally fine with what Joe is doing, I'm trying
to bring up whole AF_XDP term and I feel like I'm being ignored.

number of produced packets to HW tx ring != number of produced packets to
AF_XDP CQ ring.

> 
> Thanks for the detailed and thoughtful feedback, it is much appreciated.
> 
> I'll leave this patch the way it is then and tweak the RX patch to include
> an rx_clean_complete boolean as I mentioned in my response to that patch
> and send out a v3.
> 
> FWIW, I had assumed that you would suggest dropping the XDP stuff so I
> pre-emptively spun a branch locally that dropped it... it is a much smaller
> change of course, but I suspect that this tracepoint might useful for XDP
> users, so I think the decision to leave it with nb_pkts makes sense.
> 
> Thanks again for the review. I'll send a v3 shortly.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b97c95f..a2cc98e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -923,11 +923,13 @@  void i40e_detect_recover_hung(struct i40e_vsi *vsi)
  * @vsi: the VSI we care about
  * @tx_ring: Tx ring to clean
  * @napi_budget: Used to determine if we are in netpoll
+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
  *
  * Returns true if there's any budget left (e.g. the clean is finished)
  **/
 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
-			      struct i40e_ring *tx_ring, int napi_budget)
+			      struct i40e_ring *tx_ring, int napi_budget,
+			      unsigned int *tx_cleaned)
 {
 	int i = tx_ring->next_to_clean;
 	struct i40e_tx_buffer *tx_buf;
@@ -1026,7 +1028,7 @@  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	i40e_arm_wb(tx_ring, vsi, budget);
 
 	if (ring_is_xdp(tx_ring))
-		return !!budget;
+		goto out;
 
 	/* notify netdev of completed buffers */
 	netdev_tx_completed_queue(txring_txq(tx_ring),
@@ -1048,6 +1050,8 @@  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		}
 	}
 
+out:
+	*tx_cleaned = total_packets;
 	return !!budget;
 }
 
@@ -2689,10 +2693,12 @@  int i40e_napi_poll(struct napi_struct *napi, int budget)
 			       container_of(napi, struct i40e_q_vector, napi);
 	struct i40e_vsi *vsi = q_vector->vsi;
 	struct i40e_ring *ring;
+	bool tx_clean_complete = true;
 	bool clean_complete = true;
 	bool arm_wb = false;
 	int budget_per_ring;
 	int work_done = 0;
+	unsigned int tx_cleaned = 0;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
 		napi_complete(napi);
@@ -2704,11 +2710,11 @@  int i40e_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
 		bool wd = ring->xsk_pool ?
-			  i40e_clean_xdp_tx_irq(vsi, ring) :
-			  i40e_clean_tx_irq(vsi, ring, budget);
+			  i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) :
+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
 
 		if (!wd) {
-			clean_complete = false;
+			clean_complete = tx_clean_complete = false;
 			continue;
 		}
 		arm_wb |= ring->arm_wb;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 790aaeff..f98ce7e4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -530,18 +530,22 @@  static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
  * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
  * @xdp_ring: XDP Tx ring
  * @budget: NAPI budget
+ * @tx_cleaned: Out parameter of the TX packets processed
  *
  * Returns true if the work is finished.
  **/
-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget,
+			 unsigned int *tx_cleaned)
 {
 	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
 	u32 nb_pkts, nb_processed = 0;
 	unsigned int total_bytes = 0;
 
 	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
-	if (!nb_pkts)
+	if (!nb_pkts) {
+		*tx_cleaned = 0;
 		return true;
+	}
 
 	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
 		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
@@ -558,6 +562,7 @@  static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 
 	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
 
+	*tx_cleaned = nb_pkts;
 	return nb_pkts < budget;
 }
 
@@ -581,10 +586,12 @@  static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
  * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries
  * @vsi: Current VSI
  * @tx_ring: XDP Tx ring
+ * @tx_cleaned: out parameter of number of TXes cleaned
  *
  * Returns true if cleanup/tranmission is done.
  **/
-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
+			   unsigned int *tx_cleaned)
 {
 	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
 	u32 i, completed_frames, xsk_frames = 0;
@@ -634,7 +641,7 @@  bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
 	if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
 		xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
 
-	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
+	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 821df24..396ed11 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -30,7 +30,8 @@  int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
 bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
 int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 
-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring,
+			   unsigned int *tx_cleaned);
 int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
 void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);