Message ID | 1356027360.21834.2973.camel@edumazet-glaptop |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Thu, 20 Dec 2012 10:16:00 -0800 Eric Dumazet <erdnetdev@gmail.com> wrote: > On Thu, 2012-12-20 at 08:34 -0800, Stephen Hemminger wrote: > > On Thu, 20 Dec 2012 16:00:32 +0200 > > Yan Burman <yanb@mellanox.com> wrote: > > > > > Hi. > > > > > > When working with vxlan from current net-next, I got a lockdep warning > > > (below). > > > It seems to happen when I have host B pinging host A and while the pings > > > continue, > > > I do "ip link del" on the vxlan interface on host A. The lockdep warning > > > is on host A. > > > Tell me if you need some more info. > > > > > > > Looks like the case of nested ARP requests, the initial request is coming > > from neigh_timer (ARP retransmit), but inside neigh_probe the lock > > is dropped? > > Bug is from arp_solicit(), releasing the lock after arp_send() > > Its used to protect neigh->ha > > We could instead copy neigh->ha, without taking n->lock but ha_lock > seqlock, using neigh_ha_snapshot() helper > > Yan, could you test the following patch ? > > Thanks > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > index ce6fbdf..1169ed4 100644 > --- a/net/ipv4/arp.c > +++ b/net/ipv4/arp.c > @@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) > static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > { > __be32 saddr = 0; > - u8 *dst_ha = NULL; > + u8 dst_ha[MAX_ADDR_LEN]; > struct net_device *dev = neigh->dev; > __be32 target = *(__be32 *)neigh->primary_key; > int probes = atomic_read(&neigh->probes); > @@ -363,9 +363,9 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > if (probes < 0) { > if (!(neigh->nud_state & NUD_VALID)) > pr_debug("trying to ucast probe in NUD_INVALID\n"); > - dst_ha = neigh->ha; > - read_lock_bh(&neigh->lock); > + neigh_ha_snapshot(dst_ha, neigh, dev); > } else { > + memset(dst_ha, 0, dev->addr_len); > probes -= neigh->parms->app_probes; > if (probes < 0) { > #ifdef CONFIG_ARPD > @@ -377,8 +377,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > > arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, > dst_ha, dev->dev_addr, NULL); > - if (dst_ha) > - read_unlock_bh(&neigh->lock); > } > > static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) I like this. Getting rid of yet another read lock -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 20-Dec-12 20:16, Eric Dumazet wrote: > On Thu, 2012-12-20 at 08:34 -0800, Stephen Hemminger wrote: >> On Thu, 20 Dec 2012 16:00:32 +0200 >> Yan Burman <yanb@mellanox.com> wrote: >> >>> Hi. >>> >>> When working with vxlan from current net-next, I got a lockdep warning >>> (below). >>> It seems to happen when I have host B pinging host A and while the pings >>> continue, >>> I do "ip link del" on the vxlan interface on host A. The lockdep warning >>> is on host A. >>> Tell me if you need some more info. >>> >> Looks like the case of nested ARP requests, the initial request is coming >> from neigh_timer (ARP retransmit), but inside neigh_probe the lock >> is dropped? > Bug is from arp_solicit(), releasing the lock after arp_send() > > Its used to protect neigh->ha > > We could instead copy neigh->ha, without taking n->lock but ha_lock > seqlock, using neigh_ha_snapshot() helper > > Yan, could you test the following patch ? > > Thanks > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > index ce6fbdf..1169ed4 100644 > --- a/net/ipv4/arp.c > +++ b/net/ipv4/arp.c > @@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) > static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > { > __be32 saddr = 0; > - u8 *dst_ha = NULL; > + u8 dst_ha[MAX_ADDR_LEN]; > struct net_device *dev = neigh->dev; > __be32 target = *(__be32 *)neigh->primary_key; > int probes = atomic_read(&neigh->probes); > @@ -363,9 +363,9 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > if (probes < 0) { > if (!(neigh->nud_state & NUD_VALID)) > pr_debug("trying to ucast probe in NUD_INVALID\n"); > - dst_ha = neigh->ha; > - read_lock_bh(&neigh->lock); > + neigh_ha_snapshot(dst_ha, neigh, dev); > } else { > + memset(dst_ha, 0, dev->addr_len); > probes -= neigh->parms->app_probes; > if (probes < 0) { > #ifdef CONFIG_ARPD > @@ -377,8 +377,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) > > arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, > dst_ha, dev->dev_addr, NULL); > - if (dst_ha) > - read_unlock_bh(&neigh->lock); > } > > static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) > > I am not being able to reproduce the problem now either with or without the patch... I did get the warning twice when I first reported the issue -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ce6fbdf..1169ed4 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { __be32 saddr = 0; - u8 *dst_ha = NULL; + u8 dst_ha[MAX_ADDR_LEN]; struct net_device *dev = neigh->dev; __be32 target = *(__be32 *)neigh->primary_key; int probes = atomic_read(&neigh->probes); @@ -363,9 +363,9 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); - dst_ha = neigh->ha; - read_lock_bh(&neigh->lock); + neigh_ha_snapshot(dst_ha, neigh, dev); } else { + memset(dst_ha, 0, dev->addr_len); probes -= neigh->parms->app_probes; if (probes < 0) { #ifdef CONFIG_ARPD @@ -377,8 +377,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_ha, dev->dev_addr, NULL); - if (dst_ha) - read_unlock_bh(&neigh->lock); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)