Message ID | 20190708110344.23278-1-i.maximets@samsung.com |
---|---|
State | Accepted |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf] xdp: fix potential deadlock on socket mutex | expand |
On 8 Jul 2019, at 4:03, Ilya Maximets wrote: > There are 2 call chains: > > a) xsk_bind --> xdp_umem_assign_dev > b) unregister_netdevice_queue --> xsk_notifier > > with the following locking order: > > a) xs->mutex --> rtnl_lock > b) rtnl_lock --> xdp.lock --> xs->mutex > > Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a > deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in > the bind call chain (a). > > Reported-by: syzbot+bf64ec93de836d7f4c2c@syzkaller.appspotmail.com > Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound > to xdp socket") > Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Thanks, Ilya! I think in the long run the locking needs to be revisited, but this should fix the deadlock for now. Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com> > This patch is a fix for patch that is not yet in mainline, but > already in 'net' tree. I'm not sure what is the correct process > for applying such fixes. > > net/xdp/xdp_umem.c | 16 ++++++---------- > net/xdp/xsk.c | 2 ++ > 2 files changed, 8 insertions(+), 10 deletions(-) > > diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c > index 20c91f02d3d8..83de74ca729a 100644 > --- a/net/xdp/xdp_umem.c > +++ b/net/xdp/xdp_umem.c > @@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, > struct net_device *dev, > struct netdev_bpf bpf; > int err = 0; > > + ASSERT_RTNL(); > + > force_zc = flags & XDP_ZEROCOPY; > force_copy = flags & XDP_COPY; > > if (force_zc && force_copy) > return -EINVAL; > > - rtnl_lock(); > - if (xdp_get_umem_from_qid(dev, queue_id)) { > - err = -EBUSY; > - goto out_rtnl_unlock; > - } > + if (xdp_get_umem_from_qid(dev, queue_id)) > + return -EBUSY; > > err = xdp_reg_umem_at_qid(dev, umem, queue_id); > if (err) > - goto out_rtnl_unlock; > + return err; > > umem->dev = dev; > umem->queue_id = queue_id; > @@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, > struct net_device *dev, > > if (force_copy) > /* For copy-mode, we are done. */ > - goto out_rtnl_unlock; > + return 0; > > if (!dev->netdev_ops->ndo_bpf || > !dev->netdev_ops->ndo_xsk_async_xmit) { > @@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, > struct net_device *dev, > err = dev->netdev_ops->ndo_bpf(dev, &bpf); > if (err) > goto err_unreg_umem; > - rtnl_unlock(); > > umem->zc = true; > return 0; > @@ -135,8 +133,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, > struct net_device *dev, > err = 0; /* fallback to copy mode */ > if (err) > xdp_clear_umem_at_qid(dev, queue_id); > -out_rtnl_unlock: > - rtnl_unlock(); > return err; > } > > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c > index 703cf5ea448b..2aa6072a3e55 100644 > --- a/net/xdp/xsk.c > +++ b/net/xdp/xsk.c > @@ -416,6 +416,7 @@ static int xsk_bind(struct socket *sock, struct > sockaddr *addr, int addr_len) > if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) > return -EINVAL; > > + rtnl_lock(); > mutex_lock(&xs->mutex); > if (xs->state != XSK_READY) { > err = -EBUSY; > @@ -501,6 +502,7 @@ static int xsk_bind(struct socket *sock, struct > sockaddr *addr, int addr_len) > xs->state = XSK_BOUND; > out_release: > mutex_unlock(&xs->mutex); > + rtnl_unlock(); > return err; > } > > -- > 2.17.1
On 07/08/2019 01:03 PM, Ilya Maximets wrote: > There are 2 call chains: > > a) xsk_bind --> xdp_umem_assign_dev > b) unregister_netdevice_queue --> xsk_notifier > > with the following locking order: > > a) xs->mutex --> rtnl_lock > b) rtnl_lock --> xdp.lock --> xs->mutex > > Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a > deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in > the bind call chain (a). > > Reported-by: syzbot+bf64ec93de836d7f4c2c@syzkaller.appspotmail.com > Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound to xdp socket") > Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Applied, thanks!
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 20c91f02d3d8..83de74ca729a 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, struct netdev_bpf bpf; int err = 0; + ASSERT_RTNL(); + force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; if (force_zc && force_copy) return -EINVAL; - rtnl_lock(); - if (xdp_get_umem_from_qid(dev, queue_id)) { - err = -EBUSY; - goto out_rtnl_unlock; - } + if (xdp_get_umem_from_qid(dev, queue_id)) + return -EBUSY; err = xdp_reg_umem_at_qid(dev, umem, queue_id); if (err) - goto out_rtnl_unlock; + return err; umem->dev = dev; umem->queue_id = queue_id; @@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, if (force_copy) /* For copy-mode, we are done. */ - goto out_rtnl_unlock; + return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) { @@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = dev->netdev_ops->ndo_bpf(dev, &bpf); if (err) goto err_unreg_umem; - rtnl_unlock(); umem->zc = true; return 0; @@ -135,8 +133,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = 0; /* fallback to copy mode */ if (err) xdp_clear_umem_at_qid(dev, queue_id); -out_rtnl_unlock: - rtnl_unlock(); return err; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 703cf5ea448b..2aa6072a3e55 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -416,6 +416,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) return -EINVAL; + rtnl_lock(); mutex_lock(&xs->mutex); if (xs->state != XSK_READY) { err = -EBUSY; @@ -501,6 +502,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->state = XSK_BOUND; out_release: mutex_unlock(&xs->mutex); + rtnl_unlock(); return err; }
There are 2 call chains: a) xsk_bind --> xdp_umem_assign_dev b) unregister_netdevice_queue --> xsk_notifier with the following locking order: a) xs->mutex --> rtnl_lock b) rtnl_lock --> xdp.lock --> xs->mutex Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in the bind call chain (a). Reported-by: syzbot+bf64ec93de836d7f4c2c@syzkaller.appspotmail.com Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound to xdp socket") Signed-off-by: Ilya Maximets <i.maximets@samsung.com> --- This patch is a fix for patch that is not yet in mainline, but already in 'net' tree. I'm not sure what is the correct process for applying such fixes. net/xdp/xdp_umem.c | 16 ++++++---------- net/xdp/xsk.c | 2 ++ 2 files changed, 8 insertions(+), 10 deletions(-)