Message ID | 20190516181620.126962-1-tracywwnj@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | [v2,net] ipv6: fix src addr routing with the exception table | expand |
On Thu, May 16, 2019 at 11:16:20AM -0700, Wei Wang wrote: > From: Wei Wang <weiwan@google.com> > > When inserting route cache into the exception table, the key is > generated with both src_addr and dest_addr with src addr routing. > However, current logic always assumes the src_addr used to generate the > key is a /128 host address. This is not true in the following scenarios: > 1. When the route is a gateway route or does not have next hop. > (rt6_is_gw_or_nonexthop() == false) > 2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL. > This means, when looking for a route cache in the exception table, we > have to do the lookup twice: first time with the passed in /128 host > address, second time with the src_addr stored in fib6_info. > > This solves the pmtu discovery issue reported by Mikael Magnusson where > a route cache with a lower mtu info is created for a gateway route with > src addr. However, the lookup code is not able to find this route cache. > > Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") > Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se> > Bisected-by: David Ahern <dsahern@gmail.com> > Signed-off-by: Wei Wang <weiwan@google.com> > Cc: Martin Lau <kafai@fb.com> > Cc: Eric Dumazet <edumazet@google.com> > --- > Changes from v1: > - restructure the code to only include the new logic in > rt6_find_cached_rt() > --- > net/ipv6/route.c | 49 +++++++++++++++++++++++++----------------------- > 1 file changed, 26 insertions(+), 23 deletions(-) > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > index 23a20d62daac..35873b57c7f1 100644 > --- a/net/ipv6/route.c > +++ b/net/ipv6/route.c > @@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, > int iif, int type, u32 portid, u32 seq, > unsigned int flags); > static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, > - struct in6_addr *daddr, > - struct in6_addr *saddr); > + const struct in6_addr *daddr, > + const struct in6_addr *saddr); > > #ifdef CONFIG_IPV6_ROUTE_INFO > static struct fib6_info *rt6_add_route_info(struct net *net, > @@ -1566,31 +1566,44 @@ void rt6_flush_exceptions(struct fib6_info *rt) > * Caller has to hold rcu_read_lock() > */ > static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, > - struct in6_addr *daddr, > - struct in6_addr *saddr) > + const struct in6_addr *daddr, > + const struct in6_addr *saddr) > { > struct rt6_exception_bucket *bucket; > struct in6_addr *src_key = NULL; > struct rt6_exception *rt6_ex; > struct rt6_info *ret = NULL; > > - bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); > - > #ifdef CONFIG_IPV6_SUBTREES > /* fib6i_src.plen != 0 indicates f6i is in subtree > * and exception table is indexed by a hash of > * both fib6_dst and fib6_src. > - * Otherwise, the exception table is indexed by > - * a hash of only fib6_dst. > + * However, the src addr used to create the hash > + * might not be exactly the passed in saddr which > + * is a /128 addr from the flow. > + * So we need to use f6i->fib6_src to redo lookup > + * if the passed in saddr does not find anything. > + * (See the logic in ip6_rt_cache_alloc() on how > + * rt->rt6i_src is updated.) > */ > if (res->f6i->fib6_src.plen) > src_key = saddr; > +find_ex: > #endif > + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); > rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); > > if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) > ret = rt6_ex->rt6i; > > +#ifdef CONFIG_IPV6_SUBTREES > + /* Use fib6_src as src_key and redo lookup */ > + if (!ret && src_key == saddr) { I am worry about the "src_key == saddr" check. e.g. what if "saddr == &res->f6i->fib6_src.addr" in the future. May be "!ret && src_key && src_key != &res->f6i->fib6_src.addr"? Other than that, Acked-by: Martin KaFai Lau <kafai@fb.com> Thanks for the v2. > + src_key = &res->f6i->fib6_src.addr; > + goto find_ex; > + } > +#endif > + > return ret; > }
On Thu, May 16, 2019 at 12:15 PM Martin Lau <kafai@fb.com> wrote: > > On Thu, May 16, 2019 at 11:16:20AM -0700, Wei Wang wrote: > > From: Wei Wang <weiwan@google.com> > > > > When inserting route cache into the exception table, the key is > > generated with both src_addr and dest_addr with src addr routing. > > However, current logic always assumes the src_addr used to generate the > > key is a /128 host address. This is not true in the following scenarios: > > 1. When the route is a gateway route or does not have next hop. > > (rt6_is_gw_or_nonexthop() == false) > > 2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL. > > This means, when looking for a route cache in the exception table, we > > have to do the lookup twice: first time with the passed in /128 host > > address, second time with the src_addr stored in fib6_info. > > > > This solves the pmtu discovery issue reported by Mikael Magnusson where > > a route cache with a lower mtu info is created for a gateway route with > > src addr. However, the lookup code is not able to find this route cache. > > > > Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") > > Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se> > > Bisected-by: David Ahern <dsahern@gmail.com> > > Signed-off-by: Wei Wang <weiwan@google.com> > > Cc: Martin Lau <kafai@fb.com> > > Cc: Eric Dumazet <edumazet@google.com> > > --- > > Changes from v1: > > - restructure the code to only include the new logic in > > rt6_find_cached_rt() > > --- > > net/ipv6/route.c | 49 +++++++++++++++++++++++++----------------------- > > 1 file changed, 26 insertions(+), 23 deletions(-) > > > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > > index 23a20d62daac..35873b57c7f1 100644 > > --- a/net/ipv6/route.c > > +++ b/net/ipv6/route.c > > @@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, > > int iif, int type, u32 portid, u32 seq, > > unsigned int flags); > > static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, > > - struct in6_addr *daddr, > > - struct in6_addr *saddr); > > + const struct in6_addr *daddr, > > + const struct in6_addr *saddr); > > > > #ifdef CONFIG_IPV6_ROUTE_INFO > > static struct fib6_info *rt6_add_route_info(struct net *net, > > @@ -1566,31 +1566,44 @@ void rt6_flush_exceptions(struct fib6_info *rt) > > * Caller has to hold rcu_read_lock() > > */ > > static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, > > - struct in6_addr *daddr, > > - struct in6_addr *saddr) > > + const struct in6_addr *daddr, > > + const struct in6_addr *saddr) > > { > > struct rt6_exception_bucket *bucket; > > struct in6_addr *src_key = NULL; > > struct rt6_exception *rt6_ex; > > struct rt6_info *ret = NULL; > > > > - bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); > > - > > #ifdef CONFIG_IPV6_SUBTREES > > /* fib6i_src.plen != 0 indicates f6i is in subtree > > * and exception table is indexed by a hash of > > * both fib6_dst and fib6_src. > > - * Otherwise, the exception table is indexed by > > - * a hash of only fib6_dst. > > + * However, the src addr used to create the hash > > + * might not be exactly the passed in saddr which > > + * is a /128 addr from the flow. > > + * So we need to use f6i->fib6_src to redo lookup > > + * if the passed in saddr does not find anything. > > + * (See the logic in ip6_rt_cache_alloc() on how > > + * rt->rt6i_src is updated.) > > */ > > if (res->f6i->fib6_src.plen) > > src_key = saddr; > > +find_ex: > > #endif > > + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); > > rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); > > > > if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) > > ret = rt6_ex->rt6i; > > > > +#ifdef CONFIG_IPV6_SUBTREES > > + /* Use fib6_src as src_key and redo lookup */ > > + if (!ret && src_key == saddr) { > I am worry about the "src_key == saddr" check. > e.g. what if "saddr == &res->f6i->fib6_src.addr" in the future. > > May be "!ret && src_key && src_key != &res->f6i->fib6_src.addr"? > > Other than that, > Acked-by: Martin KaFai Lau <kafai@fb.com> > > Thanks for the v2. > Hmm... That does seem to be a valid concern, although it is very unlikely I think... But to be safe, the check you proposed looks good to me. I will change it in v3. > > + src_key = &res->f6i->fib6_src.addr; > > + goto find_ex; > > + } > > +#endif > > + > > return ret; > > }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 23a20d62daac..35873b57c7f1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags); static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, - struct in6_addr *daddr, - struct in6_addr *saddr); + const struct in6_addr *daddr, + const struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO static struct fib6_info *rt6_add_route_info(struct net *net, @@ -1566,31 +1566,44 @@ void rt6_flush_exceptions(struct fib6_info *rt) * Caller has to hold rcu_read_lock() */ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, - struct in6_addr *daddr, - struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct rt6_info *ret = NULL; - bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); - #ifdef CONFIG_IPV6_SUBTREES /* fib6i_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of * both fib6_dst and fib6_src. - * Otherwise, the exception table is indexed by - * a hash of only fib6_dst. + * However, the src addr used to create the hash + * might not be exactly the passed in saddr which + * is a /128 addr from the flow. + * So we need to use f6i->fib6_src to redo lookup + * if the passed in saddr does not find anything. + * (See the logic in ip6_rt_cache_alloc() on how + * rt->rt6i_src is updated.) */ if (res->f6i->fib6_src.plen) src_key = saddr; +find_ex: #endif + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) ret = rt6_ex->rt6i; +#ifdef CONFIG_IPV6_SUBTREES + /* Use fib6_src as src_key and redo lookup */ + if (!ret && src_key == saddr) { + src_key = &res->f6i->fib6_src.addr; + goto find_ex; + } +#endif + return ret; } @@ -2665,12 +2678,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr) { - struct rt6_exception_bucket *bucket; const struct fib6_nh *nh = res->nh; struct fib6_info *f6i = res->f6i; - const struct in6_addr *src_key; - struct rt6_exception *rt6_ex; struct inet6_dev *idev; + struct rt6_info *rt; u32 mtu = 0; if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { @@ -2679,18 +2690,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, goto out; } - src_key = NULL; -#ifdef CONFIG_IPV6_SUBTREES - if (f6i->fib6_src.plen) - src_key = saddr; -#endif - - bucket = rcu_dereference(f6i->rt6i_exception_bucket); - rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); - if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) - mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); - - if (likely(!mtu)) { + rt = rt6_find_cached_rt(res, daddr, saddr); + if (unlikely(rt)) { + mtu = dst_metric_raw(&rt->dst, RTAX_MTU); + } else { struct net_device *dev = nh->fib_nh_dev; mtu = IPV6_MIN_MTU;