diff mbox series

[net-next,4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit

Message ID 20200421152154.10965-5-ioana.ciornei@nxp.com
State Changes Requested
Delegated to: David Miller
Headers show
Series dpaa2-eth: add support for xdp bulk enqueue | expand

Commit Message

Ioana Ciornei April 21, 2020, 3:21 p.m. UTC
Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
to store all the frames dequeued in a NAPI cycle so we instead are
enqueueing all the frames received in a ndo_xdp_xmit call right away.

After setting up all FDs for the xdp_frames received, enqueue multiple
frames at a time until all are sent or the maximum number of retries is
hit.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60 ++++++++++---------
 1 file changed, 32 insertions(+), 28 deletions(-)

Comments

Jesper Dangaard Brouer April 22, 2020, 7:12 a.m. UTC | #1
On Tue, 21 Apr 2020 18:21:54 +0300
Ioana Ciornei <ioana.ciornei@nxp.com> wrote:

> Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> to store all the frames dequeued in a NAPI cycle so we instead are
> enqueueing all the frames received in a ndo_xdp_xmit call right away.
> 
> After setting up all FDs for the xdp_frames received, enqueue multiple
> frames at a time until all are sent or the maximum number of retries is
> hit.
> 
> Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> ---
>  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60 ++++++++++---------
>  1 file changed, 32 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> index 9a0432cd893c..08b4efad46fd 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
>  			      struct xdp_frame **frames, u32 flags)
>  {
>  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> +	int total_enqueued = 0, retries = 0, enqueued;
>  	struct dpaa2_eth_drv_stats *percpu_extras;
>  	struct rtnl_link_stats64 *percpu_stats;
> +	int num_fds, i, err, max_retries;
>  	struct dpaa2_eth_fq *fq;
> -	struct dpaa2_fd fd;
> -	int drops = 0;
> -	int i, err;
> +	struct dpaa2_fd *fds;
>  
>  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>  		return -EINVAL;
> @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
>  	if (!netif_running(net_dev))
>  		return -ENETDOWN;
>  
> +	/* create the array of frame descriptors */
> +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);

I don't like that you have an allocation on the transmit fast-path.

There are a number of ways you can avoid this.

Option (1) Given we know that (currently) devmap will max bulk 16
xdp_frames, we can have a call-stack local array with struct dpaa2_fd,
that contains 16 elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
512 bytes, so it might be acceptable.  (And add code to alloc if n >
16, to be compatible with someone increasing max bulk in devmap).

Option (2) extend struct dpaa2_eth_priv with an array of 16 struct
dpaa2_fd's, that can be used as fds storage.

> +	if (!fds)
> +		return -ENOMEM;
> +

[...]
> +	kfree(fds);
Ioana Ciornei April 22, 2020, 7:51 a.m. UTC | #2
> Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> .ndo_xdp_xmit
> 
> On Tue, 21 Apr 2020 18:21:54 +0300
> Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> 
> > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> > to store all the frames dequeued in a NAPI cycle so we instead are
> > enqueueing all the frames received in a ndo_xdp_xmit call right away.
> >
> > After setting up all FDs for the xdp_frames received, enqueue multiple
> > frames at a time until all are sent or the maximum number of retries
> > is hit.
> >
> > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > ---
> >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > ++++++++++---------
> >  1 file changed, 32 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > index 9a0432cd893c..08b4efad46fd 100644
> > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device
> *net_dev, int n,
> >  			      struct xdp_frame **frames, u32 flags)  {
> >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > +	int total_enqueued = 0, retries = 0, enqueued;
> >  	struct dpaa2_eth_drv_stats *percpu_extras;
> >  	struct rtnl_link_stats64 *percpu_stats;
> > +	int num_fds, i, err, max_retries;
> >  	struct dpaa2_eth_fq *fq;
> > -	struct dpaa2_fd fd;
> > -	int drops = 0;
> > -	int i, err;
> > +	struct dpaa2_fd *fds;
> >
> >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> >  		return -EINVAL;
> > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device
> *net_dev, int n,
> >  	if (!netif_running(net_dev))
> >  		return -ENETDOWN;
> >
> > +	/* create the array of frame descriptors */
> > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
> 
> I don't like that you have an allocation on the transmit fast-path.
> 
> There are a number of ways you can avoid this.
> 
> Option (1) Given we know that (currently) devmap will max bulk 16 xdp_frames,
> we can have a call-stack local array with struct dpaa2_fd, that contains 16
> elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
> 512 bytes, so it might be acceptable.  (And add code to alloc if n > 16, to be
> compatible with someone increasing max bulk in devmap).
> 

I didn't know about the 16 max xdp_frames . Thanks.

> Option (2) extend struct dpaa2_eth_priv with an array of 16 struct dpaa2_fd's,
> that can be used as fds storage.

I have more of a noob question here before proceeding with one of the two options.

The ndo_xdp_xmit() callback can be called concurrently from multiple softirq contexts, right?

If the above is true, then I think the dpaa2_eth_ch_xdp is the right struct to place the array of 16 FDs.

Also, is there any caveat to just use DEV_MAP_BULK_SIZE when declaring the array?

Ioana

> 
> > +	if (!fds)
> > +		return -ENOMEM;
> > +
> 
> [...]
> > +	kfree(fds);
> 
>
Jesper Dangaard Brouer April 22, 2020, 8:37 a.m. UTC | #3
On Wed, 22 Apr 2020 07:51:46 +0000
Ioana Ciornei <ioana.ciornei@nxp.com> wrote:

> > Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> > .ndo_xdp_xmit
> > 
> > On Tue, 21 Apr 2020 18:21:54 +0300
> > Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> >   
> > > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > > We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> > > to store all the frames dequeued in a NAPI cycle so we instead are
> > > enqueueing all the frames received in a ndo_xdp_xmit call right away.
> > >
> > > After setting up all FDs for the xdp_frames received, enqueue multiple
> > > frames at a time until all are sent or the maximum number of retries
> > > is hit.
> > >
> > > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > > ---
> > >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > > ++++++++++---------
> > >  1 file changed, 32 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > index 9a0432cd893c..08b4efad46fd 100644
> > > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device  
> > *net_dev, int n,  
> > >  			      struct xdp_frame **frames, u32 flags)  {
> > >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > > +	int total_enqueued = 0, retries = 0, enqueued;
> > >  	struct dpaa2_eth_drv_stats *percpu_extras;
> > >  	struct rtnl_link_stats64 *percpu_stats;
> > > +	int num_fds, i, err, max_retries;
> > >  	struct dpaa2_eth_fq *fq;
> > > -	struct dpaa2_fd fd;
> > > -	int drops = 0;
> > > -	int i, err;
> > > +	struct dpaa2_fd *fds;
> > >
> > >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> > >  		return -EINVAL;
> > > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device  
> > *net_dev, int n,  
> > >  	if (!netif_running(net_dev))
> > >  		return -ENETDOWN;
> > >
> > > +	/* create the array of frame descriptors */
> > > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);  
> > 
> > I don't like that you have an allocation on the transmit fast-path.
> > 
> > There are a number of ways you can avoid this.
> > 
> > Option (1) Given we know that (currently) devmap will max bulk 16 xdp_frames,
> > we can have a call-stack local array with struct dpaa2_fd, that contains 16
> > elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
> > 512 bytes, so it might be acceptable.  (And add code to alloc if n > 16, to be
> > compatible with someone increasing max bulk in devmap).
> >   
> 
> I didn't know about the 16 max xdp_frames . Thanks.
> 
> > Option (2) extend struct dpaa2_eth_priv with an array of 16 struct dpaa2_fd's,
> > that can be used as fds storage.  
> 
> I have more of a noob question here before proceeding with one of the
> two options.
> 
> The ndo_xdp_xmit() callback can be called concurrently from multiple
> softirq contexts, right?

Yes.
 
> If the above is true, then I think the dpaa2_eth_ch_xdp is the right
> struct to place the array of 16 FDs.

Good point, and it sounds correct to use dpaa2_eth_ch_xdp.

> Also, is there any caveat to just use DEV_MAP_BULK_SIZE when
> declaring the array?

Using DEV_MAP_BULK_SIZE sounds like a good idea.  Even if someone
decide to increase that to 64, then this is only 2048 bytes(32*64).


> >   
> > > +	if (!fds)
> > > +		return -ENOMEM;
> > > +  
> > 
> > [...]  
> > > +	kfree(fds);  
> > 
> >   
>
Ioana Ciornei April 22, 2020, 9:06 a.m. UTC | #4
> Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> .ndo_xdp_xmit
> 
> On Wed, 22 Apr 2020 07:51:46 +0000
> Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> 
> > > Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> > > .ndo_xdp_xmit
> > >
> > > On Tue, 21 Apr 2020 18:21:54 +0300
> > > Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> > >
> > > > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > > > We cannot use the XDP_XMIT_FLUSH since the architecture is not
> > > > capable to store all the frames dequeued in a NAPI cycle so we
> > > > instead are enqueueing all the frames received in a ndo_xdp_xmit call right
> away.
> > > >
> > > > After setting up all FDs for the xdp_frames received, enqueue
> > > > multiple frames at a time until all are sent or the maximum number
> > > > of retries is hit.
> > > >
> > > > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > > > ---
> > > >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > > > ++++++++++---------
> > > >  1 file changed, 32 insertions(+), 28 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > index 9a0432cd893c..08b4efad46fd 100644
> > > > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct
> > > > net_device
> > > *net_dev, int n,
> > > >  			      struct xdp_frame **frames, u32 flags)  {
> > > >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > > > +	int total_enqueued = 0, retries = 0, enqueued;
> > > >  	struct dpaa2_eth_drv_stats *percpu_extras;
> > > >  	struct rtnl_link_stats64 *percpu_stats;
> > > > +	int num_fds, i, err, max_retries;
> > > >  	struct dpaa2_eth_fq *fq;
> > > > -	struct dpaa2_fd fd;
> > > > -	int drops = 0;
> > > > -	int i, err;
> > > > +	struct dpaa2_fd *fds;
> > > >
> > > >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> > > >  		return -EINVAL;
> > > > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct
> > > > net_device
> > > *net_dev, int n,
> > > >  	if (!netif_running(net_dev))
> > > >  		return -ENETDOWN;
> > > >
> > > > +	/* create the array of frame descriptors */
> > > > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
> > >
> > > I don't like that you have an allocation on the transmit fast-path.
> > >
> > > There are a number of ways you can avoid this.
> > >
> > > Option (1) Given we know that (currently) devmap will max bulk 16
> > > xdp_frames, we can have a call-stack local array with struct
> > > dpaa2_fd, that contains 16 elements, sizeof(struct dpaa2_fd)==32
> > > bytes times 16 is
> > > 512 bytes, so it might be acceptable.  (And add code to alloc if n >
> > > 16, to be compatible with someone increasing max bulk in devmap).
> > >
> >
> > I didn't know about the 16 max xdp_frames . Thanks.
> >
> > > Option (2) extend struct dpaa2_eth_priv with an array of 16 struct
> > > dpaa2_fd's, that can be used as fds storage.
> >
> > I have more of a noob question here before proceeding with one of the
> > two options.
> >
> > The ndo_xdp_xmit() callback can be called concurrently from multiple
> > softirq contexts, right?
> 
> Yes.
> 
> > If the above is true, then I think the dpaa2_eth_ch_xdp is the right
> > struct to place the array of 16 FDs.
> 
> Good point, and it sounds correct to use dpaa2_eth_ch_xdp.
> 
> > Also, is there any caveat to just use DEV_MAP_BULK_SIZE when declaring
> > the array?
> 
> Using DEV_MAP_BULK_SIZE sounds like a good idea.  Even if someone decide to
> increase that to 64, then this is only 2048 bytes(32*64).
> 

Currently the macro is private to devmap.c. Maybe move it to the xdp.h header
file for access from drivers? I'll include this in v2.

Ioana

> 
> > >
> > > > +	if (!fds)
> > > > +		return -ENOMEM;
> > > > +
> > >
> > > [...]
> > > > +	kfree(fds);
> > >
> > >
> >
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 9a0432cd893c..08b4efad46fd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1933,12 +1933,12 @@  static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int total_enqueued = 0, retries = 0, enqueued;
 	struct dpaa2_eth_drv_stats *percpu_extras;
 	struct rtnl_link_stats64 *percpu_stats;
+	int num_fds, i, err, max_retries;
 	struct dpaa2_eth_fq *fq;
-	struct dpaa2_fd fd;
-	int drops = 0;
-	int i, err;
+	struct dpaa2_fd *fds;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -1946,41 +1946,45 @@  static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	if (!netif_running(net_dev))
 		return -ENETDOWN;
 
+	/* create the array of frame descriptors */
+	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
+	if (!fds)
+		return -ENOMEM;
+
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
+	/* create a FD for each xdp_frame in the list received */
 	for (i = 0; i < n; i++) {
-		struct xdp_frame *xdpf = frames[i];
+		err = dpaa2_eth_xdp_create_fd(net_dev, frames[i], &fds[i]);
+		if (err)
+			break;
+	}
+	num_fds = i;
 
-		/* create the FD from the xdp_frame */
-		err = dpaa2_eth_xdp_create_fd(net_dev, xdpf, &fd);
-		if (err) {
-			percpu_stats->tx_dropped++;
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
+	/* try to enqueue all the FDs until the max number of retries is hit */
+	fq = &priv->fq[smp_processor_id()];
+	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+	while (total_enqueued < num_fds && retries < max_retries) {
+		err = priv->enqueue(priv, fq, &fds[total_enqueued],
+				    0, num_fds - total_enqueued, &enqueued);
+		if (err == -EBUSY) {
+			percpu_extras->tx_portal_busy += ++retries;
 			continue;
 		}
+		total_enqueued += enqueued;
+	}
 
-		/* enqueue the newly created FD */
-		fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
-		for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-			err = priv->enqueue(priv, fq, &fd, 0, 1);
-			if (err != -EBUSY)
-				break;
-		}
+	/* update statistics */
+	percpu_stats->tx_packets += total_enqueued;
+	for (i = 0; i < total_enqueued; i++)
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
+	for (i = total_enqueued; i < n; i++)
+		xdp_return_frame_rx_napi(frames[i]);
 
-		percpu_extras->tx_portal_busy += i;
-		if (unlikely(err < 0)) {
-			percpu_stats->tx_errors++;
-			xdp_return_frame_rx_napi(xdpf);
-			continue;
-		}
-
-		percpu_stats->tx_packets++;
-		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
-	}
+	kfree(fds);
 
-	return n - drops;
+	return total_enqueued;
 }
 
 static int update_xps(struct dpaa2_eth_priv *priv)