diff mbox series

um: Migrate vector drivers to NAPI

Message ID 20220121085557.1250299-1-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series um: Migrate vector drivers to NAPI | expand

Commit Message

Anton Ivanov Jan. 21, 2022, 8:55 a.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Migrate UML vector drivers from a bespoke scheduling mechanism
to NAPI.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/vector_kern.c | 99 ++++++++++++++++-------------------
 arch/um/drivers/vector_kern.h |  3 +-
 2 files changed, 48 insertions(+), 54 deletions(-)

Comments

Johannes Berg Jan. 21, 2022, 9:29 a.m. UTC | #1
On Fri, 2022-01-21 at 08:55 +0000, anton.ivanov@cambridgegreys.com
wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Migrate UML vector drivers from a bespoke scheduling mechanism
> to NAPI.

Nice :)

> -static void vector_rx(struct vector_private *vp)
> -{
> -	int err;
> -	int iter = 0;
> -
> -	if ((vp->options & VECTOR_RX) > 0)
> -		while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))

You can remove the MAX_ITERATIONS define now.


> +static int vector_poll(struct napi_struct *napi, int budget)
>  {
> -	struct vector_private *vp = from_tasklet(vp, t, tx_poll);
> +	struct vector_private *vp = container_of(napi, struct vector_private, napi);
> +	int work_done = 0;
> +	int err;
> +	bool tx_enqueued = false;
>  
> -	vp->estats.tx_kicks++;
> -	vector_send(vp->tx_queue);
> +	if ((vp->options & VECTOR_TX) != 0)
> +		tx_enqueued = (vector_send(vp->tx_queue) > 0);
> +	if ((vp->options & VECTOR_RX) > 0)
> +		err = vector_mmsg_rx(vp);
> +	else {
> +		err = vector_legacy_rx(vp);

It feels like you should honour the NAPI and pass the budget down, using
it to limit the number of packets processed for TX/RX, something like

tx_enqueued = vector_send(..., budget);
budget -= tx_enqueued;

if (budget > 0)
	err = vector_mmsg_rx(vp, budget);

etc.

> @@ -1264,6 +1251,10 @@ static int vector_net_open(struct net_device *dev)
>  		if (vp->header_txbuffer == NULL)
>  			goto out_close;
>  	}
> +	/* NAPI */
> +

That comment feels a bit pointless, but whatever :)

> +	netif_napi_add(vp->dev, &vp->napi, vector_poll, 64);
> +	napi_enable(&vp->napi);

I think this should only be done once when you create the netdev?


> @@ -1312,9 +1303,11 @@ static int vector_net_open(struct net_device *dev)
>  	 * SIGIOs never arrive, and the net never works.
>  	 */
>  
> -	vector_rx(vp);
>  
>  	vector_reset_stats(vp);
> +
> +	napi_schedule(&vp->napi);
> 

The comment should probably move with this?

johannes
Anton Ivanov Jan. 21, 2022, 9:56 a.m. UTC | #2
On 21/01/2022 09:29, Johannes Berg wrote:
> On Fri, 2022-01-21 at 08:55 +0000, anton.ivanov@cambridgegreys.com
> wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Migrate UML vector drivers from a bespoke scheduling mechanism
>> to NAPI.
> 
> Nice :)
> 
>> -static void vector_rx(struct vector_private *vp)
>> -{
>> -	int err;
>> -	int iter = 0;
>> -
>> -	if ((vp->options & VECTOR_RX) > 0)
>> -		while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
> 
> You can remove the MAX_ITERATIONS define now.

Thanks, missed that.

> 
> 
>> +static int vector_poll(struct napi_struct *napi, int budget)
>>   {
>> -	struct vector_private *vp = from_tasklet(vp, t, tx_poll);
>> +	struct vector_private *vp = container_of(napi, struct vector_private, napi);
>> +	int work_done = 0;
>> +	int err;
>> +	bool tx_enqueued = false;
>>   
>> -	vp->estats.tx_kicks++;
>> -	vector_send(vp->tx_queue);
>> +	if ((vp->options & VECTOR_TX) != 0)
>> +		tx_enqueued = (vector_send(vp->tx_queue) > 0);
>> +	if ((vp->options & VECTOR_RX) > 0)
>> +		err = vector_mmsg_rx(vp);
>> +	else {
>> +		err = vector_legacy_rx(vp);
> 
> It feels like you should honour the NAPI and pass the budget down, using
> it to limit the number of packets processed for TX/RX, something like
> 
> tx_enqueued = vector_send(..., budget);
> budget -= tx_enqueued;

Good idea :)

> 
> if (budget > 0)
> 	err = vector_mmsg_rx(vp, budget);
> 
> etc.
> 
>> @@ -1264,6 +1251,10 @@ static int vector_net_open(struct net_device *dev)
>>   		if (vp->header_txbuffer == NULL)
>>   			goto out_close;
>>   	}
>> +	/* NAPI */
>> +
> 
> That comment feels a bit pointless, but whatever :)

Thanks, I used that to make my life easier editing it. Forgot to remove it.

> 
>> +	netif_napi_add(vp->dev, &vp->napi, vector_poll, 64);
>> +	napi_enable(&vp->napi);
> 
> I think this should only be done once when you create the netdev?

I need to have a look around the kernel and think on this one. Historically, UML does a few things in open() which are closer to device instantiation for real hardware.

> 
> 
>> @@ -1312,9 +1303,11 @@ static int vector_net_open(struct net_device *dev)
>>   	 * SIGIOs never arrive, and the net never works.
>>   	 */
>>   
>> -	vector_rx(vp);
>>   
>>   	vector_reset_stats(vp);
>> +
>> +	napi_schedule(&vp->napi);
>>
> 
> The comment should probably move with this?

Yes. Well spotted.

> 
> johannes
>
Anton Ivanov Jan. 21, 2022, 10:08 a.m. UTC | #3
On 21/01/2022 09:56, Anton Ivanov wrote:
> 
> 
> On 21/01/2022 09:29, Johannes Berg wrote:
>> On Fri, 2022-01-21 at 08:55 +0000, anton.ivanov@cambridgegreys.com
>> wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> Migrate UML vector drivers from a bespoke scheduling mechanism
>>> to NAPI.
>>
>> Nice :)
>>
>>> -static void vector_rx(struct vector_private *vp)
>>> -{
>>> -    int err;
>>> -    int iter = 0;
>>> -
>>> -    if ((vp->options & VECTOR_RX) > 0)
>>> -        while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
>>
>> You can remove the MAX_ITERATIONS define now.
> 
> Thanks, missed that.
> 
>>
>>
>>> +static int vector_poll(struct napi_struct *napi, int budget)
>>>   {
>>> -    struct vector_private *vp = from_tasklet(vp, t, tx_poll);
>>> +    struct vector_private *vp = container_of(napi, struct vector_private, napi);
>>> +    int work_done = 0;
>>> +    int err;
>>> +    bool tx_enqueued = false;
>>> -    vp->estats.tx_kicks++;
>>> -    vector_send(vp->tx_queue);
>>> +    if ((vp->options & VECTOR_TX) != 0)
>>> +        tx_enqueued = (vector_send(vp->tx_queue) > 0);
>>> +    if ((vp->options & VECTOR_RX) > 0)
>>> +        err = vector_mmsg_rx(vp);
>>> +    else {
>>> +        err = vector_legacy_rx(vp);
>>
>> It feels like you should honour the NAPI and pass the budget down, using
>> it to limit the number of packets processed for TX/RX, something like
>>
>> tx_enqueued = vector_send(..., budget);
>> budget -= tx_enqueued;
> 
> Good idea :)
> 
>>
>> if (budget > 0)
>>     err = vector_mmsg_rx(vp, budget);
>>
>> etc.
>>
>>> @@ -1264,6 +1251,10 @@ static int vector_net_open(struct net_device *dev)
>>>           if (vp->header_txbuffer == NULL)
>>>               goto out_close;
>>>       }
>>> +    /* NAPI */
>>> +
>>
>> That comment feels a bit pointless, but whatever :)
> 
> Thanks, I used that to make my life easier editing it. Forgot to remove it.
> 
>>
>>> +    netif_napi_add(vp->dev, &vp->napi, vector_poll, 64);
>>> +    napi_enable(&vp->napi);
>>
>> I think this should only be done once when you create the netdev?
> 
> I need to have a look around the kernel and think on this one. Historically, UML does a few things in open() which are closer to device instantiation for real hardware.

There are quite a few  drivers which do it in open and disable it in close.

Example: Atheros AR71xx

So this looks OK in terms of where it is and how vector_kern is using it.

> 
>>
>>
>>> @@ -1312,9 +1303,11 @@ static int vector_net_open(struct net_device *dev)
>>>        * SIGIOs never arrive, and the net never works.
>>>        */
>>> -    vector_rx(vp);
>>>       vector_reset_stats(vp);
>>> +
>>> +    napi_schedule(&vp->napi);
>>>
>>
>> The comment should probably move with this?
> 
> Yes. Well spotted.
> 
>>
>> johannes
>>
>
Anton Ivanov Jan. 21, 2022, 11:11 a.m. UTC | #4
On 21/01/2022 09:29, Johannes Berg wrote:
> On Fri, 2022-01-21 at 08:55 +0000, anton.ivanov@cambridgegreys.com
> wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Migrate UML vector drivers from a bespoke scheduling mechanism
>> to NAPI.
> 
> Nice :)
> 
>> -static void vector_rx(struct vector_private *vp)
>> -{
>> -	int err;
>> -	int iter = 0;
>> -
>> -	if ((vp->options & VECTOR_RX) > 0)
>> -		while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
> 
> You can remove the MAX_ITERATIONS define now.
> 
> 
>> +static int vector_poll(struct napi_struct *napi, int budget)
>>   {
>> -	struct vector_private *vp = from_tasklet(vp, t, tx_poll);
>> +	struct vector_private *vp = container_of(napi, struct vector_private, napi);
>> +	int work_done = 0;
>> +	int err;
>> +	bool tx_enqueued = false;
>>   
>> -	vp->estats.tx_kicks++;
>> -	vector_send(vp->tx_queue);
>> +	if ((vp->options & VECTOR_TX) != 0)
>> +		tx_enqueued = (vector_send(vp->tx_queue) > 0);
>> +	if ((vp->options & VECTOR_RX) > 0)
>> +		err = vector_mmsg_rx(vp);
>> +	else {
>> +		err = vector_legacy_rx(vp);
> 
> It feels like you should honour the NAPI and pass the budget down, using
> it to limit the number of packets processed for TX/RX, something like
> 
> tx_enqueued = vector_send(..., budget);
> budget -= tx_enqueued;

Looking through the source of other drivers - they do not account tx in budget.

It tried to add it as an experiment - this incurs quite a performance penalty.

> 
> if (budget > 0)
> 	err = vector_mmsg_rx(vp, budget);

This one works as advertised it will be in v2 which will hit the mailing list shortly.


> 
> etc.
> 
>> @@ -1264,6 +1251,10 @@ static int vector_net_open(struct net_device *dev)
>>   		if (vp->header_txbuffer == NULL)
>>   			goto out_close;
>>   	}
>> +	/* NAPI */
>> +
> 
> That comment feels a bit pointless, but whatever :)
> 
>> +	netif_napi_add(vp->dev, &vp->napi, vector_poll, 64);
>> +	napi_enable(&vp->napi);
> 
> I think this should only be done once when you create the netdev?
> 
> 
>> @@ -1312,9 +1303,11 @@ static int vector_net_open(struct net_device *dev)
>>   	 * SIGIOs never arrive, and the net never works.
>>   	 */
>>   
>> -	vector_rx(vp);
>>   
>>   	vector_reset_stats(vp);
>> +
>> +	napi_schedule(&vp->napi);
>>
> 
> The comment should probably move with this?
> 
> johannes
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
>
diff mbox series

Patch

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 4fc1a5d70dcf..1bad55cb116b 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -67,6 +67,7 @@  static LIST_HEAD(vector_devices);
 static int driver_registered;
 
 static void vector_eth_configure(int n, struct arglist *def);
+static int vector_mmsg_rx(struct vector_private *vp);
 
 /* Argument accessors to set variables (and/or set default values)
  * mtu, buffer sizing, default headroom, etc
@@ -458,7 +459,6 @@  static int vector_send(struct vector_queue *qi)
 					vp->estats.tx_queue_running_average =
 						(vp->estats.tx_queue_running_average + result) >> 1;
 				}
-				netif_trans_update(qi->dev);
 				netif_wake_queue(qi->dev);
 				/* if TX is busy, break out of the send loop,
 				 *  poll write IRQ will reschedule xmit for us
@@ -470,8 +470,6 @@  static int vector_send(struct vector_queue *qi)
 			}
 		}
 		spin_unlock(&qi->head_lock);
-	} else {
-		tasklet_schedule(&vp->tx_poll);
 	}
 	return queue_depth;
 }
@@ -608,7 +606,7 @@  static struct vector_queue *create_queue(
 
 /*
  * We do not use the RX queue as a proper wraparound queue for now
- * This is not necessary because the consumption via netif_rx()
+ * This is not necessary because the consumption via napi_gro_receive()
  * happens in-line. While we can try using the return code of
  * netif_rx() for flow control there are no drivers doing this today.
  * For this RX specific use we ignore the tail/head locks and
@@ -896,7 +894,7 @@  static int vector_legacy_rx(struct vector_private *vp)
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			vp->dev->stats.rx_bytes += skb->len;
 			vp->dev->stats.rx_packets++;
-			netif_rx(skb);
+			napi_gro_receive(&vp->napi, skb);
 		} else {
 			dev_kfree_skb_irq(skb);
 		}
@@ -1021,7 +1019,7 @@  static int vector_mmsg_rx(struct vector_private *vp)
 			 */
 			vp->dev->stats.rx_bytes += skb->len;
 			vp->dev->stats.rx_packets++;
-			netif_rx(skb);
+			napi_gro_receive(&vp->napi, skb);
 		} else {
 			/* Overlay header too short to do anything - discard.
 			 * We can actually keep this skb and reuse it,
@@ -1044,23 +1042,6 @@  static int vector_mmsg_rx(struct vector_private *vp)
 	return packet_count;
 }
 
-static void vector_rx(struct vector_private *vp)
-{
-	int err;
-	int iter = 0;
-
-	if ((vp->options & VECTOR_RX) > 0)
-		while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
-			iter++;
-	else
-		while (((err = vector_legacy_rx(vp)) > 0) && (iter < MAX_ITERATIONS))
-			iter++;
-	if ((err != 0) && net_ratelimit())
-		netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
-	if (iter == MAX_ITERATIONS)
-		netdev_err(vp->dev, "vector_rx: device stuck, remote end may have closed the connection\n");
-}
-
 static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct vector_private *vp = netdev_priv(dev);
@@ -1085,25 +1066,15 @@  static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_sent_queue(vp->dev, skb->len);
 	queue_depth = vector_enqueue(vp->tx_queue, skb);
 
-	/* if the device queue is full, stop the upper layers and
-	 * flush it.
-	 */
-
-	if (queue_depth >= vp->tx_queue->max_depth - 1) {
-		vp->estats.tx_kicks++;
-		netif_stop_queue(dev);
-		vector_send(vp->tx_queue);
-		return NETDEV_TX_OK;
-	}
-	if (netdev_xmit_more()) {
+	if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) {
 		mod_timer(&vp->tl, vp->coalesce);
 		return NETDEV_TX_OK;
+	} else {
+		queue_depth = vector_send(vp->tx_queue);
+		if (queue_depth > 0)
+			napi_schedule(&vp->napi);
 	}
-	if (skb->len < TX_SMALL_PACKET) {
-		vp->estats.tx_kicks++;
-		vector_send(vp->tx_queue);
-	} else
-		tasklet_schedule(&vp->tx_poll);
+
 	return NETDEV_TX_OK;
 }
 
@@ -1114,7 +1085,7 @@  static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
 
 	if (!netif_running(dev))
 		return IRQ_NONE;
-	vector_rx(vp);
+	napi_schedule(&vp->napi);
 	return IRQ_HANDLED;
 
 }
@@ -1133,8 +1104,7 @@  static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
 	 * tweaking the IRQ mask less costly
 	 */
 
-	if (vp->in_write_poll)
-		tasklet_schedule(&vp->tx_poll);
+	napi_schedule(&vp->napi);
 	return IRQ_HANDLED;
 
 }
@@ -1161,7 +1131,7 @@  static int vector_net_close(struct net_device *dev)
 		um_free_irq(vp->tx_irq, dev);
 		vp->tx_irq = 0;
 	}
-	tasklet_kill(&vp->tx_poll);
+	netif_napi_del(&vp->napi);
 	if (vp->fds->rx_fd > 0) {
 		if (vp->bpf)
 			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
@@ -1193,15 +1163,32 @@  static int vector_net_close(struct net_device *dev)
 	return 0;
 }
 
-/* TX tasklet */
-
-static void vector_tx_poll(struct tasklet_struct *t)
+static int vector_poll(struct napi_struct *napi, int budget)
 {
-	struct vector_private *vp = from_tasklet(vp, t, tx_poll);
+	struct vector_private *vp = container_of(napi, struct vector_private, napi);
+	int work_done = 0;
+	int err;
+	bool tx_enqueued = false;
 
-	vp->estats.tx_kicks++;
-	vector_send(vp->tx_queue);
+	if ((vp->options & VECTOR_TX) != 0)
+		tx_enqueued = (vector_send(vp->tx_queue) > 0);
+	if ((vp->options & VECTOR_RX) > 0)
+		err = vector_mmsg_rx(vp);
+	else {
+		err = vector_legacy_rx(vp);
+		if (err > 0)
+			err = 1;
+	}
+	if (err > 0)
+		work_done += err;
+
+	if (tx_enqueued || err > 0)
+		napi_schedule(napi);
+	if (work_done < budget)
+		napi_complete_done(napi, work_done);
+	return work_done;
 }
+
 static void vector_reset_tx(struct work_struct *work)
 {
 	struct vector_private *vp =
@@ -1264,6 +1251,10 @@  static int vector_net_open(struct net_device *dev)
 		if (vp->header_txbuffer == NULL)
 			goto out_close;
 	}
+	/* NAPI */
+
+	netif_napi_add(vp->dev, &vp->napi, vector_poll, 64);
+	napi_enable(&vp->napi);
 
 	/* READ IRQ */
 	err = um_request_irq(
@@ -1312,9 +1303,11 @@  static int vector_net_open(struct net_device *dev)
 	 * SIGIOs never arrive, and the net never works.
 	 */
 
-	vector_rx(vp);
 
 	vector_reset_stats(vp);
+
+	napi_schedule(&vp->napi);
+
 	vdevice = find_device(vp->unit);
 	vdevice->opened = 1;
 
@@ -1543,15 +1536,16 @@  static const struct net_device_ops vector_netdev_ops = {
 #endif
 };
 
-
 static void vector_timer_expire(struct timer_list *t)
 {
 	struct vector_private *vp = from_timer(vp, t, tl);
 
 	vp->estats.tx_kicks++;
-	vector_send(vp->tx_queue);
+	napi_schedule(&vp->napi);
 }
 
+
+
 static void vector_eth_configure(
 		int n,
 		struct arglist *def
@@ -1634,7 +1628,6 @@  static void vector_eth_configure(
 	});
 
 	dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
-	tasklet_setup(&vp->tx_poll, vector_tx_poll);
 	INIT_WORK(&vp->reset_tx, vector_reset_tx);
 
 	timer_setup(&vp->tl, vector_timer_expire, 0);
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index 8fff93a75a92..2a1fa8e0f3e1 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -14,6 +14,7 @@ 
 #include <linux/ctype.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
+
 #include "vector_user.h"
 
 /* Queue structure specially adapted for multiple enqueue/dequeue
@@ -72,6 +73,7 @@  struct vector_private {
 	struct list_head list;
 	spinlock_t lock;
 	struct net_device *dev;
+	struct napi_struct		napi	____cacheline_aligned;
 
 	int unit;
 
@@ -115,7 +117,6 @@  struct vector_private {
 
 	spinlock_t stats_lock;
 
-	struct tasklet_struct tx_poll;
 	bool rexmit_scheduled;
 	bool opened;
 	bool in_write_poll;