Message ID | 200911162308.59730.opurdila@ixiacom.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Octavian Purdila a écrit : > Tests performed with per device sysctl/sysfs entries disabled: > > $ insmod /lib/modules/dummy.ko numdummies=8000 > $ time rmmod dummy > > Without the patch: With the patch: > real 0m 3.65s real 0m 0.27s > user 0m 0.00s user 0m 0.00s > sys 0m 3.42s sys 0m 0.24s > > Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> > --- > net/core/dev.c | 28 ++++++++++++++++++++++++++-- > net/ipv4/fib_frontend.c | 13 ++++++++----- > 2 files changed, 34 insertions(+), 7 deletions(-) > > diff --git a/net/core/dev.c b/net/core/dev.c > index 4b24d79..b0a14f0 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -4720,7 +4720,8 @@ static void net_set_todo(struct net_device *dev) > > static void rollback_registered_many(struct list_head *head) > { > - struct net_device *dev; > + struct net_device *dev, *aux, *fdev; > + LIST_HEAD(rt_flush_list); > > BUG_ON(dev_boot_phase); > ASSERT_RTNL(); > @@ -4778,8 +4779,28 @@ static void rollback_registered_many(struct list_head *head) > > synchronize_net(); > > - list_for_each_entry(dev, head, unreg_list) > + /* flush route cache by resending one NETDEV_UNREGISTER per namespace */ > + list_for_each_entry_safe(dev, aux, head, unreg_list) { > + int needs_flush = 1; > + list_for_each_entry(fdev, &rt_flush_list, unreg_list) { > + if (dev_net(dev) == dev_net(fdev)) { > + needs_flush = 0; > + dev_put(dev); > + break; > + } > + } > + if (needs_flush) { > + list_del(&dev->unreg_list); > + list_add(&dev->unreg_list, &rt_flush_list); list_move ... > + } > + } > + > + list_for_each_entry_safe(dev, aux, &rt_flush_list, unreg_list) { > + list_del_init(&dev->unreg_list); > + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); > + list_add(&dev->unreg_list, head); > dev_put(dev); > + } > } > > static void rollback_registered(struct net_device *dev) > @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); > * unregister_netdevice_many - unregister many devices > * @head: list of devices > * > + * WARNING: This function modifies the list. It may change the order of the > + * elements in the list. However, you can assume it does not add or delete > + * elements to/from the list. Sorry I dont understand this comment > */ > void unregister_netdevice_many(struct list_head *head) > { > diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c > index 816e218..1972760 100644 > --- a/net/ipv4/fib_frontend.c > +++ b/net/ipv4/fib_frontend.c > @@ -895,11 +895,11 @@ static void nl_fib_lookup_exit(struct net *net) > net->ipv4.fibnl = NULL; > } > > -static void fib_disable_ip(struct net_device *dev, int force) > +static void fib_disable_ip(struct net_device *dev, int force, int delay) > { > if (fib_sync_down_dev(dev, force)) > fib_flush(dev_net(dev)); > - rt_cache_flush(dev_net(dev), 0); > + rt_cache_flush(dev_net(dev), delay); > arp_ifdown(dev); > } > > @@ -922,7 +922,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, > /* Last address was deleted from this interface. > Disable IP. > */ > - fib_disable_ip(dev, 1); > + fib_disable_ip(dev, 1, 0); > } else { > rt_cache_flush(dev_net(dev), -1); > } > @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo > struct in_device *in_dev = __in_dev_get_rtnl(dev); > > if (event == NETDEV_UNREGISTER) { > - fib_disable_ip(dev, 2); > + /* if this event is part of a batch then don't flush the cache > + * now; we will receive another event at the end of the batch */ > + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1; hmm... a bit ugly... > + fib_disable_ip(dev, 2, rt_flush); > return NOTIFY_DONE; > } > > @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo > rt_cache_flush(dev_net(dev), -1); > break; > case NETDEV_DOWN: > - fib_disable_ip(dev, 0); > + fib_disable_ip(dev, 0, 0); > break; > case NETDEV_CHANGEMTU: > case NETDEV_CHANGE: Are you sure you want to overload NETDEV_UNREGISTER ? Maybe it would be cleaner to add a new value, NETDEV_UNREGISTER_PERNET or something for the final loop... -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Monday 16 November 2009 23:32:55 you wrote: > > @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); > > * unregister_netdevice_many - unregister many devices > > * @head: list of devices > > * > > + * WARNING: This function modifies the list. It may change the order of > > the + * elements in the list. However, you can assume it does not add or > > delete + * elements to/from the list. > > Sorry I dont understand this comment > The list passed to unregister_netdevice_many(), as the "head" parameter, may be altered, e.g. order may change between the elements. That is because we temporarily move the items from the list to the rt_flush_list for the flush. When we add the items back they may not be added in the same place. Perhaps the confusion comes from the fact that I did not specified which list? (i.e. head) > > @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block > > *this, unsigned long event, vo struct in_device *in_dev = > > __in_dev_get_rtnl(dev); > > > > if (event == NETDEV_UNREGISTER) { > > - fib_disable_ip(dev, 2); > > + /* if this event is part of a batch then don't flush the cache > > + * now; we will receive another event at the end of the batch */ > > + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1; > > hmm... a bit ugly... > Would it be better if I would add a dev_is_batch_unregister() instead? Or add a new device flag to explicitly signal the batch unregister? > > + fib_disable_ip(dev, 2, rt_flush); > > return NOTIFY_DONE; > > } > > > > @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block > > *this, unsigned long event, vo rt_cache_flush(dev_net(dev), -1); > > break; > > case NETDEV_DOWN: > > - fib_disable_ip(dev, 0); > > + fib_disable_ip(dev, 0, 0); > > break; > > case NETDEV_CHANGEMTU: > > case NETDEV_CHANGE: > > Are you sure you want to overload NETDEV_UNREGISTER ? > > Maybe it would be cleaner to add a new value, NETDEV_UNREGISTER_PERNET or > something for the final loop... > Hmm, I think that will allow us to get rid of the ugly test: never flush the cache for NETDEV_UNREGISTER, only flush it for NETDEV_UNREGISTER_PERNET. We just need to make sure to add NETDEV_UNREGISTER_PERNET in other places where NETDEV_UNREGISTER is called. I'll try this in the next patch. Thanks for reviewing. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Octavian Purdila a écrit : > On Monday 16 November 2009 23:32:55 you wrote: > >>> @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); >>> * unregister_netdevice_many - unregister many devices >>> * @head: list of devices >>> * >>> + * WARNING: This function modifies the list. It may change the order of >>> the + * elements in the list. However, you can assume it does not add or >>> delete + * elements to/from the list. >> Sorry I dont understand this comment >> > > The list passed to unregister_netdevice_many(), as the "head" parameter, may > be altered, e.g. order may change between the elements. > > That is because we temporarily move the items from the list to the > rt_flush_list for the flush. When we add the items back they may not be added in > the same place. > Ah, I got it now, confusion is that comment makes more sense for rollback_registered_many() because when reading unregister_netdevice_many() it is clear it doesnt change the list... void unregister_netdevice_many(struct list_head *head) { struct net_device *dev; if (!list_empty(head)) { rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); } } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/core/dev.c b/net/core/dev.c index 4b24d79..b0a14f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4720,7 +4720,8 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { - struct net_device *dev; + struct net_device *dev, *aux, *fdev; + LIST_HEAD(rt_flush_list); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -4778,8 +4779,28 @@ static void rollback_registered_many(struct list_head *head) synchronize_net(); - list_for_each_entry(dev, head, unreg_list) + /* flush route cache by resending one NETDEV_UNREGISTER per namespace */ + list_for_each_entry_safe(dev, aux, head, unreg_list) { + int needs_flush = 1; + list_for_each_entry(fdev, &rt_flush_list, unreg_list) { + if (dev_net(dev) == dev_net(fdev)) { + needs_flush = 0; + dev_put(dev); + break; + } + } + if (needs_flush) { + list_del(&dev->unreg_list); + list_add(&dev->unreg_list, &rt_flush_list); + } + } + + list_for_each_entry_safe(dev, aux, &rt_flush_list, unreg_list) { + list_del_init(&dev->unreg_list); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + list_add(&dev->unreg_list, head); dev_put(dev); + } } static void rollback_registered(struct net_device *dev) @@ -5374,6 +5395,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); * unregister_netdevice_many - unregister many devices * @head: list of devices * + * WARNING: This function modifies the list. It may change the order of the + * elements in the list. However, you can assume it does not add or delete + * elements to/from the list. */ void unregister_netdevice_many(struct list_head *head) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 816e218..1972760 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -895,11 +895,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force) +static void fib_disable_ip(struct net_device *dev, int force, int delay) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev), delay); arp_ifdown(dev); } @@ -922,7 +922,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(dev, 1); + fib_disable_ip(dev, 1, 0); } else { rt_cache_flush(dev_net(dev), -1); } @@ -937,7 +937,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2); + /* if this event is part of a batch then don't flush the cache + * now; we will receive another event at the end of the batch */ + int rt_flush = list_empty(&dev->unreg_list) ? 0 : -1; + fib_disable_ip(dev, 2, rt_flush); return NOTIFY_DONE; } @@ -955,7 +958,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0); + fib_disable_ip(dev, 0, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE:
Tests performed with per device sysctl/sysfs entries disabled: $ insmod /lib/modules/dummy.ko numdummies=8000 $ time rmmod dummy Without the patch: With the patch: real 0m 3.65s real 0m 0.27s user 0m 0.00s user 0m 0.00s sys 0m 3.42s sys 0m 0.24s Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> --- net/core/dev.c | 28 ++++++++++++++++++++++++++-- net/ipv4/fib_frontend.c | 13 ++++++++----- 2 files changed, 34 insertions(+), 7 deletions(-)