diff mbox series

[net-next,07/13] ipv6: Move gateway checks to a fib6_nh setting

Message ID 20190327032942.20473-8-dsahern@kernel.org
State Changes Requested
Delegated to: David Miller
Headers show
Series net: Move fib_nh and fib6_nh to a common struct | expand

Commit Message

David Ahern March 27, 2019, 3:29 a.m. UTC
From: David Ahern <dsahern@gmail.com>

The gateway setting is not per fib6_info entry but per-fib6_nh. Add a new
fib_nh_has_gw flag to fib6_nh and convert references to RTF_GATEWAY to
the new flag. For IPv6 address the flag is cheaper than checking that
nh_gw is non-0 like IPv4 does.

While this increases fib6_nh by 8-bytes, the effective allocation size of
a fib6_info is unchanged. The 8 bytes is recovered later with the
fib_nh_common change.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   |  1 +
 include/net/ip6_route.h |  4 ++--
 net/core/filter.c       |  2 +-
 net/ipv6/addrconf.c     | 25 +++++++++++++------------
 net/ipv6/ip6_fib.c      |  9 ++++++---
 net/ipv6/route.c        | 26 ++++++++++++++++----------
 6 files changed, 39 insertions(+), 28 deletions(-)

Comments

Ido Schimmel March 27, 2019, 9:08 a.m. UTC | #1
On Tue, Mar 26, 2019 at 08:29:36PM -0700, David Ahern wrote:
> @@ -989,8 +989,11 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
>  
>  	rt->rt6i_dst = ort->fib6_dst;
>  	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
> -	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
>  	rt->rt6i_flags = ort->fib6_flags;
> +	if (ort->fib6_nh.fib_nh_has_gw) {
> +		rt->rt6i_gateway = ort->fib6_nh.nh_gw;
> +		rt->rt6i_flags |= RTF_GATEWAY;

Not sure I understand why this line is needed. Can you explain?

> +	}
>  	rt6_set_from(rt, ort);
>  #ifdef CONFIG_IPV6_SUBTREES
>  	rt->rt6i_src = ort->fib6_src;
> @@ -1870,7 +1873,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,

...

> @@ -3134,7 +3138,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>  	} else
>  		rt->fib6_prefsrc.plen = 0;
>  
> -	rt->fib6_flags = cfg->fc_flags;
> +	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;

This is going to break mlxsw. See:

mlxsw_sp_fib6_rt_can_mp()
mlxsw_sp_rt6_is_gateway()

>  
>  	return rt;
>  out:
> @@ -3470,7 +3474,8 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
David Ahern March 27, 2019, 2:21 p.m. UTC | #2
On 3/27/19 3:08 AM, Ido Schimmel wrote:
> On Tue, Mar 26, 2019 at 08:29:36PM -0700, David Ahern wrote:
>> @@ -989,8 +989,11 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
>>  
>>  	rt->rt6i_dst = ort->fib6_dst;
>>  	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
>> -	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
>>  	rt->rt6i_flags = ort->fib6_flags;
>> +	if (ort->fib6_nh.fib_nh_has_gw) {
>> +		rt->rt6i_gateway = ort->fib6_nh.nh_gw;
>> +		rt->rt6i_flags |= RTF_GATEWAY;
> 
> Not sure I understand why this line is needed. Can you explain?

The RTF_GATEWAY flag in an rt6_info still has many users.

> 
>> +	}
>>  	rt6_set_from(rt, ort);
>>  #ifdef CONFIG_IPV6_SUBTREES
>>  	rt->rt6i_src = ort->fib6_src;
>> @@ -1870,7 +1873,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
> 
> ...
> 
>> @@ -3134,7 +3138,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  	} else
>>  		rt->fib6_prefsrc.plen = 0;
>>  
>> -	rt->fib6_flags = cfg->fc_flags;
>> +	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
> 
> This is going to break mlxsw. See:
> 
> mlxsw_sp_fib6_rt_can_mp()
> mlxsw_sp_rt6_is_gateway()

hmmm... not sure how I missed those. Will rescan the code for any other
missed checks on a fib6_info.
diff mbox series

Patch

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 2d2a468b3d6d..3b04b318cf13 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -126,6 +126,7 @@  struct rt6_exception {
 
 struct fib6_nh {
 	struct in6_addr		nh_gw;
+	bool			fib_nh_has_gw;
 	struct net_device	*nh_dev;
 	struct lwtunnel_state	*nh_lwtstate;
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7ab119936e69..95cd8a2f6284 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -68,8 +68,8 @@  static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
 {
-	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
-	       RTF_GATEWAY;
+	return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) &&
+		f6i->fib6_nh.fib_nh_has_gw;
 }
 
 void ip6_route_input(struct sk_buff *skb);
diff --git a/net/core/filter.c b/net/core/filter.c
index 647c63a7b25b..248207171475 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4677,7 +4677,7 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (f6i->fib6_nh.nh_lwtstate)
 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
-	if (f6i->fib6_flags & RTF_GATEWAY)
+	if (f6i->fib6_nh.fib_nh_has_gw)
 		*dst = f6i->fib6_nh.nh_gw;
 
 	dev = f6i->fib6_nh.nh_dev;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ae17a966ae3..c5ac08fc6cc9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -173,7 +173,8 @@  static int addrconf_ifdown(struct net_device *dev, int how);
 static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
-						  u32 flags, u32 noflags);
+						  u32 flags, u32 noflags,
+						  bool no_gw);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_work(struct work_struct *w);
@@ -1230,10 +1231,8 @@  cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 {
 	struct fib6_info *f6i;
 
-	f6i = addrconf_get_prefix_route(&ifp->addr,
-				       ifp->prefix_len,
-				       ifp->idev->dev,
-				       0, RTF_GATEWAY | RTF_DEFAULT);
+	f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+					ifp->idev->dev, 0, RTF_DEFAULT, true);
 	if (f6i) {
 		if (del_rt)
 			ip6_del_rt(dev_net(ifp->idev->dev), f6i);
@@ -2402,7 +2401,8 @@  addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
 static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
-						  u32 flags, u32 noflags)
+						  u32 flags, u32 noflags,
+						  bool no_gw)
 {
 	struct fib6_node *fn;
 	struct fib6_info *rt = NULL;
@@ -2421,6 +2421,8 @@  static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
 			continue;
+		if (no_gw && rt->fib6_nh.fib_nh_has_gw)
+			continue;
 		if ((rt->fib6_flags & flags) != flags)
 			continue;
 		if ((rt->fib6_flags & noflags) != 0)
@@ -2717,7 +2719,7 @@  void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 					       pinfo->prefix_len,
 					       dev,
 					       RTF_ADDRCONF | RTF_PREFIX_RT,
-					       RTF_GATEWAY | RTF_DEFAULT);
+					       RTF_DEFAULT, true);
 
 		if (rt) {
 			/* Autoconf prefix route */
@@ -4588,10 +4590,8 @@  static int modify_prefix_route(struct inet6_ifaddr *ifp,
 	struct fib6_info *f6i;
 	u32 prio;
 
-	f6i = addrconf_get_prefix_route(&ifp->addr,
-					ifp->prefix_len,
-					ifp->idev->dev,
-					0, RTF_GATEWAY | RTF_DEFAULT);
+	f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+					ifp->idev->dev, 0, RTF_DEFAULT, true);
 	if (!f6i)
 		return -ENOENT;
 
@@ -5972,7 +5972,8 @@  static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			struct fib6_info *rt;
 
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
-						       ifp->idev->dev, 0, 0);
+						       ifp->idev->dev, 0, 0,
+						       false);
 			if (rt)
 				ip6_del_rt(net, rt);
 		}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index db886085369b..91ce84ecdb57 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2294,6 +2294,7 @@  static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
+	unsigned int flags = rt->fib6_flags;
 	const struct net_device *dev;
 
 	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@ -2303,15 +2304,17 @@  static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 #else
 	seq_puts(seq, "00000000000000000000000000000000 00 ");
 #endif
-	if (rt->fib6_flags & RTF_GATEWAY)
+	if (rt->fib6_nh.fib_nh_has_gw) {
+		flags |= RTF_GATEWAY;
 		seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
-	else
+	} else {
 		seq_puts(seq, "00000000000000000000000000000000");
+	}
 
 	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
 		   rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
-		   rt->fib6_flags, dev ? dev->name : "");
+		   flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4923a5f88e9b..b99c05b580e6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -533,7 +533,7 @@  static void rt6_probe(struct fib6_info *rt)
 	 * Router Reachability Probe MUST be rate-limited
 	 * to no more than one per minute.
 	 */
-	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
+	if (!rt || !rt->fib6_nh.fib_nh_has_gw)
 		return;
 
 	nh_gw = &rt->fib6_nh.nh_gw;
@@ -595,7 +595,7 @@  static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 	struct neighbour *neigh;
 
 	if (rt->fib6_flags & RTF_NONEXTHOP ||
-	    !(rt->fib6_flags & RTF_GATEWAY))
+	    !rt->fib6_nh.fib_nh_has_gw)
 		return RT6_NUD_SUCCEED;
 
 	rcu_read_lock_bh();
@@ -784,7 +784,7 @@  static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 
 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
 {
-	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+	return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
@@ -989,8 +989,11 @@  static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 
 	rt->rt6i_dst = ort->fib6_dst;
 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
-	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->fib6_flags;
+	if (ort->fib6_nh.fib_nh_has_gw) {
+		rt->rt6i_gateway = ort->fib6_nh.nh_gw;
+		rt->rt6i_flags |= RTF_GATEWAY;
+	}
 	rt6_set_from(rt, ort);
 #ifdef CONFIG_IPV6_SUBTREES
 	rt->rt6i_src = ort->fib6_src;
@@ -1870,7 +1873,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		rcu_read_unlock();
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(f6i->fib6_flags & RTF_GATEWAY))) {
+			    !f6i->fib6_nh.fib_nh_has_gw)) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
@@ -2440,7 +2443,7 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 			continue;
 		if (rt->fib6_flags & RTF_REJECT)
 			break;
-		if (!(rt->fib6_flags & RTF_GATEWAY))
+		if (!rt->fib6_nh.fib_nh_has_gw)
 			continue;
 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
 			continue;
@@ -2977,6 +2980,7 @@  int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 			goto out;
 
 		fib6_nh->nh_gw = cfg->fc_gateway;
+		fib6_nh->fib_nh_has_gw = 1;
 	}
 
 	err = -ENODEV;
@@ -3134,7 +3138,7 @@  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 	} else
 		rt->fib6_prefsrc.plen = 0;
 
-	rt->fib6_flags = cfg->fc_flags;
+	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
 
 	return rt;
 out:
@@ -3470,7 +3474,8 @@  static struct fib6_info *rt6_get_route_info(struct net *net,
 	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
 			continue;
-		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
+		    !rt->fib6_nh.fib_nh_has_gw)
 			continue;
 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
@@ -3791,7 +3796,7 @@  void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
 }
 
-#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
 /* Remove routers and update dst entries when gateway turn into host. */
 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
@@ -3799,6 +3804,7 @@  static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+	    rt->fib6_nh.fib_nh_has_gw &&
 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
 		return -1;
 	}
@@ -4587,7 +4593,7 @@  static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
 		rcu_read_unlock();
 	}
 
-	if (rt->fib6_flags & RTF_GATEWAY) {
+	if (rt->fib6_nh.fib_nh_has_gw) {
 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
 			goto nla_put_failure;
 	}