diff mbox series

[RFC] virtio_net: add local_bh_disable() around u64_stats_update_begin

Message ID 20181016165545.guksrl23ulcudxrk@linutronix.de
State RFC, archived
Delegated to: David Miller
Headers show
Series [RFC] virtio_net: add local_bh_disable() around u64_stats_update_begin | expand

Commit Message

Sebastian Andrzej Siewior Oct. 16, 2018, 4:55 p.m. UTC
on 32bit, lockdep notices:
| ================================
| WARNING: inconsistent lock state
| 4.19.0-rc8+ #9 Tainted: G        W
| --------------------------------
| inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
| ip/1106 [HC0[0]:SC1[1]:HE1:SE0] takes:
| (ptrval) (&syncp->seq#2){+.?.}, at: net_rx_action+0xc8/0x380
| {SOFTIRQ-ON-W} state was registered at:
|   lock_acquire+0x7e/0x170
|   try_fill_recv+0x5fa/0x700
|   virtnet_open+0xe0/0x180
|   __dev_open+0xae/0x130
|   __dev_change_flags+0x17f/0x200
|   dev_change_flags+0x23/0x60
|   do_setlink+0x2bb/0xa20
|   rtnl_newlink+0x523/0x830
|   rtnetlink_rcv_msg+0x14b/0x470
|   netlink_rcv_skb+0x6e/0xf0
|   rtnetlink_rcv+0xd/0x10
|   netlink_unicast+0x16e/0x1f0
|   netlink_sendmsg+0x1af/0x3a0
|   ___sys_sendmsg+0x20f/0x240
|   __sys_sendmsg+0x39/0x80
|   sys_socketcall+0x13a/0x2a0
|   do_int80_syscall_32+0x50/0x180
|   restore_all+0x0/0xb2
| irq event stamp: 3326
| hardirqs last  enabled at (3326): [<c159e6d0>] net_rx_action+0x80/0x380
| hardirqs last disabled at (3325): [<c159e6aa>] net_rx_action+0x5a/0x380
| softirqs last  enabled at (3322): [<c14b440d>] virtnet_napi_enable+0xd/0x60
| softirqs last disabled at (3323): [<c101d63d>] call_on_stack+0xd/0x50
|
| other info that might help us debug this:
|  Possible unsafe locking scenario:
|
|        CPU0
|        ----
|   lock(&syncp->seq#2);
|   <Interrupt>
|     lock(&syncp->seq#2);
|
|  *** DEADLOCK ***

This is the "up" path which is not a hotpath. There is also
refill_work().
It might be unwise to add the local_bh_disable() to try_fill_recv()
because if it is used mostly in BH so that local_bh_en+dis might be a
waste of cycles.

Adding local_bh_disable() around try_fill_recv() for the non-BH call
sites would render GFP_KERNEL pointless.

Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
worker might run on CPU1.

Do we care or is this just stupid stats?  Any suggestions?

This warning appears since commit 461f03dc99cf6 ("virtio_net: Add kick stats").

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/net/virtio_net.c | 2 ++
 1 file changed, 2 insertions(+)

Comments

Stephen Hemminger Oct. 16, 2018, 5:59 p.m. UTC | #1
On Tue, 16 Oct 2018 18:55:45 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> on 32bit, lockdep notices:
> | ================================
> | WARNING: inconsistent lock state
> | 4.19.0-rc8+ #9 Tainted: G        W
> | --------------------------------
> | inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
> | ip/1106 [HC0[0]:SC1[1]:HE1:SE0] takes:
> | (ptrval) (&syncp->seq#2){+.?.}, at: net_rx_action+0xc8/0x380
> | {SOFTIRQ-ON-W} state was registered at:
> |   lock_acquire+0x7e/0x170
> |   try_fill_recv+0x5fa/0x700
> |   virtnet_open+0xe0/0x180
> |   __dev_open+0xae/0x130
> |   __dev_change_flags+0x17f/0x200
> |   dev_change_flags+0x23/0x60
> |   do_setlink+0x2bb/0xa20
> |   rtnl_newlink+0x523/0x830
> |   rtnetlink_rcv_msg+0x14b/0x470
> |   netlink_rcv_skb+0x6e/0xf0
> |   rtnetlink_rcv+0xd/0x10
> |   netlink_unicast+0x16e/0x1f0
> |   netlink_sendmsg+0x1af/0x3a0
> |   ___sys_sendmsg+0x20f/0x240
> |   __sys_sendmsg+0x39/0x80
> |   sys_socketcall+0x13a/0x2a0
> |   do_int80_syscall_32+0x50/0x180
> |   restore_all+0x0/0xb2
> | irq event stamp: 3326
> | hardirqs last  enabled at (3326): [<c159e6d0>] net_rx_action+0x80/0x380
> | hardirqs last disabled at (3325): [<c159e6aa>] net_rx_action+0x5a/0x380
> | softirqs last  enabled at (3322): [<c14b440d>] virtnet_napi_enable+0xd/0x60
> | softirqs last disabled at (3323): [<c101d63d>] call_on_stack+0xd/0x50
> |
> | other info that might help us debug this:
> |  Possible unsafe locking scenario:
> |
> |        CPU0
> |        ----
> |   lock(&syncp->seq#2);
> |   <Interrupt>
> |     lock(&syncp->seq#2);
> |
> |  *** DEADLOCK ***
> 
> This is the "up" path which is not a hotpath. There is also
> refill_work().
> It might be unwise to add the local_bh_disable() to try_fill_recv()
> because if it is used mostly in BH so that local_bh_en+dis might be a
> waste of cycles.
> 
> Adding local_bh_disable() around try_fill_recv() for the non-BH call
> sites would render GFP_KERNEL pointless.
> 
> Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
> means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
> worker might run on CPU1.
> 
> Do we care or is this just stupid stats?  Any suggestions?
> 
> This warning appears since commit 461f03dc99cf6 ("virtio_net: Add kick stats").
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  drivers/net/virtio_net.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index dab504ec5e502..d782160cfa882 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1206,9 +1206,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>  			break;
>  	} while (rq->vq->num_free);
>  	if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
> +		local_bh_disable();
>  		u64_stats_update_begin(&rq->stats.syncp);
>  		rq->stats.kicks++;
>  		u64_stats_update_end(&rq->stats.syncp);
> +		local_bh_enable();
>  	}
>  
>  	return !oom;

Since there already is u64_stats_update_begin_irqsave inline, why not introduce
u64_stats_update_begin_bh which encapsulates the local_bh_disable
Stephen Hemminger Oct. 16, 2018, 6:01 p.m. UTC | #2
On Tue, 16 Oct 2018 18:55:45 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
> means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
> worker might run on CPU1.

On modern CPU's increment of native types is atomic but not locked.
u64_stats_update_begin is a no-op on UP and also if BIT_PER_LONG != 32
Sebastian Andrzej Siewior Oct. 16, 2018, 6:21 p.m. UTC | #3
On 2018-10-16 10:59:30 [-0700], Stephen Hemminger wrote:
> Since there already is u64_stats_update_begin_irqsave inline, why not introduce
> u64_stats_update_begin_bh which encapsulates the local_bh_disable

CPU0				CPU1
refill_work()			virtnet_receive()
 try_fill_recv()		 try_fill_recv()
  u64_stats_update_begin_bh()	  u64_stats_update_begin_bh()

both CPUs may operate on the `rq'.

Sebastian
Sebastian Andrzej Siewior Oct. 16, 2018, 6:42 p.m. UTC | #4
On 2018-10-16 11:01:14 [-0700], Stephen Hemminger wrote:
> On Tue, 16 Oct 2018 18:55:45 +0200
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
> > means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
> > worker might run on CPU1.
> 
> On modern CPU's increment of native types is atomic but not locked.
> u64_stats_update_begin is a no-op on UP and also if BIT_PER_LONG != 32

On ARM64 you have load, inc, store. So if two CPUs increment the counter
simultaneously we might lose one increment. That is why I asked if we
care or not.

Sebastian
Stephen Hemminger Oct. 16, 2018, 6:44 p.m. UTC | #5
On Tue, 16 Oct 2018 20:42:07 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> On 2018-10-16 11:01:14 [-0700], Stephen Hemminger wrote:
> > On Tue, 16 Oct 2018 18:55:45 +0200
> > Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >   
> > > Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
> > > means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
> > > worker might run on CPU1.  
> > 
> > On modern CPU's increment of native types is atomic but not locked.
> > u64_stats_update_begin is a no-op on UP and also if BIT_PER_LONG != 32  
> 
> On ARM64 you have load, inc, store. So if two CPUs increment the counter
> simultaneously we might lose one increment. That is why I asked if we
> care or not.
> 
> Sebastian

The point is that kicks is just a counter, not important as part of the
device operation. The point of the u64_stats is to avoid problems with
high/low 32 bit wrap on increment. So this is ok on ARM64.
Toshiaki Makita Oct. 17, 2018, 1:13 a.m. UTC | #6
On 2018/10/17 1:55, Sebastian Andrzej Siewior wrote:
> on 32bit, lockdep notices:
> | ================================
> | WARNING: inconsistent lock state
> | 4.19.0-rc8+ #9 Tainted: G        W
> | --------------------------------
> | inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
> | ip/1106 [HC0[0]:SC1[1]:HE1:SE0] takes:
> | (ptrval) (&syncp->seq#2){+.?.}, at: net_rx_action+0xc8/0x380
> | {SOFTIRQ-ON-W} state was registered at:
> |   lock_acquire+0x7e/0x170
> |   try_fill_recv+0x5fa/0x700
> |   virtnet_open+0xe0/0x180
> |   __dev_open+0xae/0x130
> |   __dev_change_flags+0x17f/0x200
> |   dev_change_flags+0x23/0x60
> |   do_setlink+0x2bb/0xa20
> |   rtnl_newlink+0x523/0x830
> |   rtnetlink_rcv_msg+0x14b/0x470
> |   netlink_rcv_skb+0x6e/0xf0
> |   rtnetlink_rcv+0xd/0x10
> |   netlink_unicast+0x16e/0x1f0
> |   netlink_sendmsg+0x1af/0x3a0
> |   ___sys_sendmsg+0x20f/0x240
> |   __sys_sendmsg+0x39/0x80
> |   sys_socketcall+0x13a/0x2a0
> |   do_int80_syscall_32+0x50/0x180
> |   restore_all+0x0/0xb2
> | irq event stamp: 3326
> | hardirqs last  enabled at (3326): [<c159e6d0>] net_rx_action+0x80/0x380
> | hardirqs last disabled at (3325): [<c159e6aa>] net_rx_action+0x5a/0x380
> | softirqs last  enabled at (3322): [<c14b440d>] virtnet_napi_enable+0xd/0x60
> | softirqs last disabled at (3323): [<c101d63d>] call_on_stack+0xd/0x50
> |
> | other info that might help us debug this:
> |  Possible unsafe locking scenario:
> |
> |        CPU0
> |        ----
> |   lock(&syncp->seq#2);
> |   <Interrupt>
> |     lock(&syncp->seq#2);
> |
> |  *** DEADLOCK ***

IIUC try_fill_recv is called only when NAPI is disabled from process
context, so there should be no point to race with virtnet_receive which
is called from NAPI handler.

I'm not sure what condition triggered this warning.


Toshiaki Makita


> 
> This is the "up" path which is not a hotpath. There is also
> refill_work().
> It might be unwise to add the local_bh_disable() to try_fill_recv()
> because if it is used mostly in BH so that local_bh_en+dis might be a
> waste of cycles.
> 
> Adding local_bh_disable() around try_fill_recv() for the non-BH call
> sites would render GFP_KERNEL pointless.
> 
> Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
> means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
> worker might run on CPU1.
> 
> Do we care or is this just stupid stats?  Any suggestions?
> 
> This warning appears since commit 461f03dc99cf6 ("virtio_net: Add kick stats").
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  drivers/net/virtio_net.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index dab504ec5e502..d782160cfa882 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1206,9 +1206,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>  			break;
>  	} while (rq->vq->num_free);
>  	if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
> +		local_bh_disable();
>  		u64_stats_update_begin(&rq->stats.syncp);
>  		rq->stats.kicks++;
>  		u64_stats_update_end(&rq->stats.syncp);
> +		local_bh_enable();
>  	}
>  
>  	return !oom;
>
Jason Wang Oct. 17, 2018, 6:48 a.m. UTC | #7
On 2018/10/17 上午9:13, Toshiaki Makita wrote:
> On 2018/10/17 1:55, Sebastian Andrzej Siewior wrote:
>> on 32bit, lockdep notices:
>> | ================================
>> | WARNING: inconsistent lock state
>> | 4.19.0-rc8+ #9 Tainted: G        W
>> | --------------------------------
>> | inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
>> | ip/1106 [HC0[0]:SC1[1]:HE1:SE0] takes:
>> | (ptrval) (&syncp->seq#2){+.?.}, at: net_rx_action+0xc8/0x380
>> | {SOFTIRQ-ON-W} state was registered at:
>> |   lock_acquire+0x7e/0x170
>> |   try_fill_recv+0x5fa/0x700
>> |   virtnet_open+0xe0/0x180
>> |   __dev_open+0xae/0x130
>> |   __dev_change_flags+0x17f/0x200
>> |   dev_change_flags+0x23/0x60
>> |   do_setlink+0x2bb/0xa20
>> |   rtnl_newlink+0x523/0x830
>> |   rtnetlink_rcv_msg+0x14b/0x470
>> |   netlink_rcv_skb+0x6e/0xf0
>> |   rtnetlink_rcv+0xd/0x10
>> |   netlink_unicast+0x16e/0x1f0
>> |   netlink_sendmsg+0x1af/0x3a0
>> |   ___sys_sendmsg+0x20f/0x240
>> |   __sys_sendmsg+0x39/0x80
>> |   sys_socketcall+0x13a/0x2a0
>> |   do_int80_syscall_32+0x50/0x180
>> |   restore_all+0x0/0xb2
>> | irq event stamp: 3326
>> | hardirqs last  enabled at (3326): [<c159e6d0>] net_rx_action+0x80/0x380
>> | hardirqs last disabled at (3325): [<c159e6aa>] net_rx_action+0x5a/0x380
>> | softirqs last  enabled at (3322): [<c14b440d>] virtnet_napi_enable+0xd/0x60
>> | softirqs last disabled at (3323): [<c101d63d>] call_on_stack+0xd/0x50
>> |
>> | other info that might help us debug this:
>> |  Possible unsafe locking scenario:
>> |
>> |        CPU0
>> |        ----
>> |   lock(&syncp->seq#2);
>> |   <Interrupt>
>> |     lock(&syncp->seq#2);
>> |
>> |  *** DEADLOCK ***
> IIUC try_fill_recv is called only when NAPI is disabled from process
> context, so there should be no point to race with virtnet_receive which
> is called from NAPI handler.
>
> I'm not sure what condition triggered this warning.
>
>
> Toshiaki Makita


Or maybe NAPI is enabled unexpectedly somewhere?

Btw, the schedule_delayed_work() in virtnet_open() is also suspicious, 
if the work is executed before virtnet_napi_enable(), there will be a 
deadloop for napi_disable().

Thanks


>
>
>> This is the "up" path which is not a hotpath. There is also
>> refill_work().
>> It might be unwise to add the local_bh_disable() to try_fill_recv()
>> because if it is used mostly in BH so that local_bh_en+dis might be a
>> waste of cycles.
>>
>> Adding local_bh_disable() around try_fill_recv() for the non-BH call
>> sites would render GFP_KERNEL pointless.
>>
>> Also, ptr->var++ is not an atomic operation even on 64bit CPUs. Which
>> means if try_fill_recv() runs on CPU0 (via virtnet_receive()) then the
>> worker might run on CPU1.
>>
>> Do we care or is this just stupid stats?  Any suggestions?
>>
>> This warning appears since commit 461f03dc99cf6 ("virtio_net: Add kick stats").
>>
>> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
>> ---
>>   drivers/net/virtio_net.c | 2 ++
>>   1 file changed, 2 insertions(+)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index dab504ec5e502..d782160cfa882 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -1206,9 +1206,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
>>   			break;
>>   	} while (rq->vq->num_free);
>>   	if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
>> +		local_bh_disable();
>>   		u64_stats_update_begin(&rq->stats.syncp);
>>   		rq->stats.kicks++;
>>   		u64_stats_update_end(&rq->stats.syncp);
>> +		local_bh_enable();
>>   	}
>>   
>>   	return !oom;
>>
Sebastian Andrzej Siewior Oct. 18, 2018, 8:47 a.m. UTC | #8
On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
> 
> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
> > I'm not sure what condition triggered this warning.

If the seqlock is acquired once in softirq and then in process context
again it is enough evidence for lockdep to trigger this warning.

> > Toshiaki Makita
> 
> 
> Or maybe NAPI is enabled unexpectedly somewhere?
> 
> Btw, the schedule_delayed_work() in virtnet_open() is also suspicious, if
> the work is executed before virtnet_napi_enable(), there will be a deadloop
> for napi_disable().

something like this? It is also likely if it runs OOM on queue 2, it
will run OOM again on queue 3.

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fbcfb4d272336..87d6ec4765270 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1263,22 +1263,22 @@ static void refill_work(struct work_struct *work)
 {
 	struct virtnet_info *vi =
 		container_of(work, struct virtnet_info, refill.work);
-	bool still_empty;
+	int still_empty = 0;
 	int i;
 
 	for (i = 0; i < vi->curr_queue_pairs; i++) {
 		struct receive_queue *rq = &vi->rq[i];
 
 		napi_disable(&rq->napi);
-		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
+		if (!try_fill_recv(vi, rq, GFP_KERNEL))
+		    still_empty++;
 		virtnet_napi_enable(rq->vq, &rq->napi);
-
-		/* In theory, this can happen: if we don't get any buffers in
-		 * we will *never* try to fill again.
-		 */
-		if (still_empty)
-			schedule_delayed_work(&vi->refill, HZ/2);
 	}
+	/* In theory, this can happen: if we don't get any buffers in
+	 * we will *never* try to fill again.
+	 */
+	if (still_empty)
+		schedule_delayed_work(&vi->refill, HZ/2);
 }
 
 static int virtnet_receive(struct receive_queue *rq, int budget,
@@ -1407,12 +1407,13 @@ static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	int i, err;
+	int need_refill = 0;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		if (i < vi->curr_queue_pairs)
 			/* Make sure we have some buffers: if oom use wq. */
 			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
-				schedule_delayed_work(&vi->refill, 0);
+				need_refill++;
 
 		err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
 		if (err < 0)
@@ -1428,6 +1429,8 @@ static int virtnet_open(struct net_device *dev)
 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
 	}
+	if (need_refill)
+		schedule_delayed_work(&vi->refill, 0);
 
 	return 0;
 }
@@ -2236,6 +2239,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 	int err, i;
+	int need_refill = 0;
 
 	err = init_vqs(vi);
 	if (err)
@@ -2246,13 +2250,15 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	if (netif_running(vi->dev)) {
 		for (i = 0; i < vi->curr_queue_pairs; i++)
 			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
-				schedule_delayed_work(&vi->refill, 0);
+				need_refill++;
 
 		for (i = 0; i < vi->max_queue_pairs; i++) {
 			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 			virtnet_napi_tx_enable(vi, vi->sq[i].vq,
 					       &vi->sq[i].napi);
 		}
+		if (need_refill)
+			schedule_delayed_work(&vi->refill, 0);
 	}
 
 	netif_device_attach(vi->dev);

> Thanks

Sebastian
Toshiaki Makita Oct. 18, 2018, 9 a.m. UTC | #9
On 2018/10/18 17:47, Sebastian Andrzej Siewior wrote:
> On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
>>
>> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
>>> I'm not sure what condition triggered this warning.
> 
> If the seqlock is acquired once in softirq and then in process context
> again it is enough evidence for lockdep to trigger this warning.

No. As I said that should not happen because of NAPI guard.
Sebastian Andrzej Siewior Oct. 18, 2018, 9:08 a.m. UTC | #10
On 2018-10-18 18:00:05 [+0900], Toshiaki Makita wrote:
> On 2018/10/18 17:47, Sebastian Andrzej Siewior wrote:
> > On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
> >>
> >> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
> >>> I'm not sure what condition triggered this warning.
> > 
> > If the seqlock is acquired once in softirq and then in process context
> > again it is enough evidence for lockdep to trigger this warning.
> 
> No. As I said that should not happen because of NAPI guard.
Again: lockdep saw the lock in softirq context once and in process
context once and this is what triggers the warning. It does not matter
if NAPI is enabled or not during the access in process context. If you
want to allow this you need further lockdep annotation…

… but: refill_work() disables NAPI for &vi->rq[1] and refills + updates
stats while NAPI is enabled for &vi->rq[0].

Sebastian
Toshiaki Makita Oct. 18, 2018, 9:19 a.m. UTC | #11
On 2018/10/18 18:08, Sebastian Andrzej Siewior wrote:
> On 2018-10-18 18:00:05 [+0900], Toshiaki Makita wrote:
>> On 2018/10/18 17:47, Sebastian Andrzej Siewior wrote:
>>> On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
>>>>
>>>> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
>>>>> I'm not sure what condition triggered this warning.
>>>
>>> If the seqlock is acquired once in softirq and then in process context
>>> again it is enough evidence for lockdep to trigger this warning.
>>
>> No. As I said that should not happen because of NAPI guard.
> Again: lockdep saw the lock in softirq context once and in process
> context once and this is what triggers the warning. It does not matter
> if NAPI is enabled or not during the access in process context. If you
> want to allow this you need further lockdep annotation…
> 
> … but: refill_work() disables NAPI for &vi->rq[1] and refills + updates
> stats while NAPI is enabled for &vi->rq[0].

Do you mean this is false positive? rq[0] and rq[1] never race with each
other...
Sebastian Andrzej Siewior Oct. 18, 2018, 9:30 a.m. UTC | #12
On 2018-10-18 18:19:21 [+0900], Toshiaki Makita wrote:
> On 2018/10/18 18:08, Sebastian Andrzej Siewior wrote:
> > Again: lockdep saw the lock in softirq context once and in process
> > context once and this is what triggers the warning. It does not matter
> > if NAPI is enabled or not during the access in process context. If you
> > want to allow this you need further lockdep annotation…
> > 
> > … but: refill_work() disables NAPI for &vi->rq[1] and refills + updates
> > stats while NAPI is enabled for &vi->rq[0].
> 
> Do you mean this is false positive? rq[0] and rq[1] never race with each
> other...

Why? So you can't refill rq[1] and then be interrupted and process NAPI
for rq[0]?

But as I said. If lockdep saw the lock in acquired in softirq (what it
did) _and_ in process context (what it did as well) _once_ then this is
enough evidence for the warning.
If you claim that this can not happen due to NAPI guard [0] then this is
something lockdep does not know about.

[0] which I currently don't understand and therefore sent the patch [1]
    as Jason pointed out that in the ->ndo_open case the work is
    scheduled and then NAPI is enabled (which means the worker could
    disable NAPI and refill but before it finishes, ->ndo_open would
    continue and enable NAPI)).
[1] 20181018084753.wefvsypdevbzoadg@linutronix.de

Sebastian
Toshiaki Makita Oct. 18, 2018, 9:53 a.m. UTC | #13
On 2018/10/18 18:30, Sebastian Andrzej Siewior wrote:
> On 2018-10-18 18:19:21 [+0900], Toshiaki Makita wrote:
>> On 2018/10/18 18:08, Sebastian Andrzej Siewior wrote:
>>> Again: lockdep saw the lock in softirq context once and in process
>>> context once and this is what triggers the warning. It does not matter
>>> if NAPI is enabled or not during the access in process context. If you
>>> want to allow this you need further lockdep annotation…
>>>
>>> … but: refill_work() disables NAPI for &vi->rq[1] and refills + updates
>>> stats while NAPI is enabled for &vi->rq[0].
>>
>> Do you mean this is false positive? rq[0] and rq[1] never race with each
>> other...
> 
> Why? So you can't refill rq[1] and then be interrupted and process NAPI
> for rq[0]?

Probably I don't understand what you are trying to say, but rq[0] and
rq[1] are different objects so they can be processed concurrently but
should not affect each other.

> 
> But as I said. If lockdep saw the lock in acquired in softirq (what it
> did) _and_ in process context (what it did as well) _once_ then this is
> enough evidence for the warning.
> If you claim that this can not happen due to NAPI guard [0] then this is
> something lockdep does not know about.

The point is that it is not the problem of stats. try_fill_recv() should
not be called for the same rq concurrently in the first place. This
requirement should be satisfied by NAPI guard, so if the NAPI guard
logic is buggy then we need to fix it.

> [0] which I currently don't understand and therefore sent the patch [1]
>     as Jason pointed out that in the ->ndo_open case the work is
>     scheduled and then NAPI is enabled (which means the worker could
>     disable NAPI and refill but before it finishes, ->ndo_open would
>     continue and enable NAPI)).

It may be related but I have not investigated it deeply. I'll check if
it can cause this problem later.

> [1] 20181018084753.wefvsypdevbzoadg@linutronix.de
Rafael David Tinoco Oct. 18, 2018, 1:21 p.m. UTC | #14
On Thu, Oct 18, 2018 at 6:19 AM, Toshiaki Makita
<makita.toshiaki@lab.ntt.co.jp> wrote:
> On 2018/10/18 18:08, Sebastian Andrzej Siewior wrote:
>> On 2018-10-18 18:00:05 [+0900], Toshiaki Makita wrote:
>>> On 2018/10/18 17:47, Sebastian Andrzej Siewior wrote:
>>>> On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
>>>>>
>>>>> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
>>>>>> I'm not sure what condition triggered this warning.
>>>>
>>>> If the seqlock is acquired once in softirq and then in process context
>>>> again it is enough evidence for lockdep to trigger this warning.
>>>
>>> No. As I said that should not happen because of NAPI guard.
>> Again: lockdep saw the lock in softirq context once and in process
>> context once and this is what triggers the warning. It does not matter
>> if NAPI is enabled or not during the access in process context. If you
>> want to allow this you need further lockdep annotation…
>>
>> … but: refill_work() disables NAPI for &vi->rq[1] and refills + updates
>> stats while NAPI is enabled for &vi->rq[0].
>
> Do you mean this is false positive? rq[0] and rq[1] never race with each
> other...
>

I just came to this thread after having the same "false positive"
warning on an armhf kvm guest dmesg.

It appears to me that, at least for my case, the sequence:

u64_stats_update_begin() -> write_seqcount_begin() ->
write_seqcount_begin_nested() -> raw_write_seqcount_begin()

is only incrementing s->sequence++. With that, whenever we have:

CONFIG_TRACE_IRQ_FLAGS and CONFIG_DEBUG_LOCK_ALLOC enabled

we might face this false-positive warning since there are no locks,
but just a sequencer, right ? So, Having a barrier, after incrementing
the sequence, like I have now, won't block the other context to
"acquire" the "same lock" (not a lock for this particular case)
warning done in "seqcount_acquire()".

Hope this helps the discussion.

Link: https://bugs.linaro.org/show_bug.cgi?id=4027

Thank

Rafael Tinoco
Jason Wang Oct. 19, 2018, 2:17 a.m. UTC | #15
On 2018/10/18 下午4:47, Sebastian Andrzej Siewior wrote:
> On 2018-10-17 14:48:02 [+0800], Jason Wang wrote:
>> On 2018/10/17 上午9:13, Toshiaki Makita wrote:
>>> I'm not sure what condition triggered this warning.
> If the seqlock is acquired once in softirq and then in process context
> again it is enough evidence for lockdep to trigger this warning.
>
>>> Toshiaki Makita
>>
>> Or maybe NAPI is enabled unexpectedly somewhere?
>>
>> Btw, the schedule_delayed_work() in virtnet_open() is also suspicious, if
>> the work is executed before virtnet_napi_enable(), there will be a deadloop
>> for napi_disable().
> something like this? It is also likely if it runs OOM on queue 2, it
> will run OOM again on queue 3.
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index fbcfb4d272336..87d6ec4765270 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1263,22 +1263,22 @@ static void refill_work(struct work_struct *work)
>   {
>   	struct virtnet_info *vi =
>   		container_of(work, struct virtnet_info, refill.work);
> -	bool still_empty;
> +	int still_empty = 0;
>   	int i;
>   
>   	for (i = 0; i < vi->curr_queue_pairs; i++) {
>   		struct receive_queue *rq = &vi->rq[i];
>   
>   		napi_disable(&rq->napi);
> -		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> +		if (!try_fill_recv(vi, rq, GFP_KERNEL))
> +		    still_empty++;
>   		virtnet_napi_enable(rq->vq, &rq->napi);
> -
> -		/* In theory, this can happen: if we don't get any buffers in
> -		 * we will *never* try to fill again.
> -		 */
> -		if (still_empty)
> -			schedule_delayed_work(&vi->refill, HZ/2);
>   	}
> +	/* In theory, this can happen: if we don't get any buffers in
> +	 * we will *never* try to fill again.
> +	 */
> +	if (still_empty)
> +		schedule_delayed_work(&vi->refill, HZ/2);
>   }


I think this part is not a must or an independent optimization?


Thanks


>   
>   static int virtnet_receive(struct receive_queue *rq, int budget,
> @@ -1407,12 +1407,13 @@ static int virtnet_open(struct net_device *dev)
>   {
>   	struct virtnet_info *vi = netdev_priv(dev);
>   	int i, err;
> +	int need_refill = 0;
>   
>   	for (i = 0; i < vi->max_queue_pairs; i++) {
>   		if (i < vi->curr_queue_pairs)
>   			/* Make sure we have some buffers: if oom use wq. */
>   			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> -				schedule_delayed_work(&vi->refill, 0);
> +				need_refill++;
>   
>   		err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
>   		if (err < 0)
> @@ -1428,6 +1429,8 @@ static int virtnet_open(struct net_device *dev)
>   		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
>   		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
>   	}
> +	if (need_refill)
> +		schedule_delayed_work(&vi->refill, 0);
>   
>   	return 0;
>   }
> @@ -2236,6 +2239,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
>   {
>   	struct virtnet_info *vi = vdev->priv;
>   	int err, i;
> +	int need_refill = 0;
>   
>   	err = init_vqs(vi);
>   	if (err)
> @@ -2246,13 +2250,15 @@ static int virtnet_restore_up(struct virtio_device *vdev)
>   	if (netif_running(vi->dev)) {
>   		for (i = 0; i < vi->curr_queue_pairs; i++)
>   			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> -				schedule_delayed_work(&vi->refill, 0);
> +				need_refill++;
>   
>   		for (i = 0; i < vi->max_queue_pairs; i++) {
>   			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
>   			virtnet_napi_tx_enable(vi, vi->sq[i].vq,
>   					       &vi->sq[i].napi);
>   		}
> +		if (need_refill)
> +			schedule_delayed_work(&vi->refill, 0);
>   	}
>   
>   	netif_device_attach(vi->dev);
>
>> Thanks
> Sebastian
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index dab504ec5e502..d782160cfa882 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1206,9 +1206,11 @@  static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
 			break;
 	} while (rq->vq->num_free);
 	if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
+		local_bh_disable();
 		u64_stats_update_begin(&rq->stats.syncp);
 		rq->stats.kicks++;
 		u64_stats_update_end(&rq->stats.syncp);
+		local_bh_enable();
 	}
 
 	return !oom;