diff mbox

[RFC,net-next,1/2] net: allow user to set IPv6 nexthop for IPv4 route

Message ID 1427403928-1342-2-git-send-email-gospo@cumulusnetworks.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Andy Gospodarek March 26, 2015, 9:05 p.m. UTC
This adds kernel infrastructure to allow userspace (read: routing protocols) to
support adding IPv6 next-hops to IPv4 routes.  This is essentially added to
support a feature of MP-BGP outlined in RFC-5549.  

Thise does not encompass all that is needed to support RFC-5549, but this
demonstrates the netlink infrastructure needed to correctly enable the kernel
infrastructure that would support this RFC.  For more informtion the full RFC
is here: http://www.ietf.org/rfc/rfc5549.txt.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
---

I still consider this an RFC as I need to work out details when IPv6 is not
enabled, but I wanted to get eyes on this as quickly as possible to allow
others to comment on the general implementation.

 include/net/ip_fib.h           |  3 +++
 include/net/route.h            |  2 ++
 include/uapi/linux/rtnetlink.h |  1 +
 net/ipv4/fib_frontend.c        |  4 ++++
 net/ipv4/fib_semantics.c       | 24 +++++++++++++++++++-----
 net/ipv4/ip_output.c           | 30 +++++++++++++++++++++++++-----
 net/ipv4/route.c               | 12 ++++++++++++
 7 files changed, 66 insertions(+), 10 deletions(-)

Comments

Sowmini Varadhan March 26, 2015, 9:42 p.m. UTC | #1
On Thu, Mar 26, 2015 at 5:05 PM, Andy Gospodarek
<gospo@cumulusnetworks.com> wrote:

> +                       if (nla_len(nla) == 4) {
> +                               nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
> +                       } else if (nla_len(nla) == 16) {
> +                               nla_memcpy(&nexthop_nh->nh_gw6, nla, nla_len(nla));
> +                       } else return -EINVAL;

would be better to use sizeof (in_addr_t) and sizeof (struct in6_addr)
for clarity? same applies for:


> +                       if (!ipv6_addr_any(&nh->nh_gw6) &&
> +                           nla_put(skb, RTA_GATEWAY, 16, &nh->nh_gw6))
> +                               goto nla_put_failure;


I havent read the MPBGP spec, but I'm also intrigued by how ND and NUD
will work in this model, since the packet that triggers each is not an
ipv6 packet.

--Sowmini
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov March 26, 2015, 11:12 p.m. UTC | #2
Hello,

On Thu, 26 Mar 2015, Andy Gospodarek wrote:

> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 66c1e4f..7de2924 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -1033,6 +1041,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
>  		if (fi->fib_nh->nh_oif &&
>  		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
>  			goto nla_put_failure;
> +		if (!ipv6_addr_any(&fi->fib_nh->nh_gw6) &&
> +		    nla_put(skb, RTA_GATEWAY, 16, &fi->fib_nh->nh_gw6))

	RTA_GATEWAY6

> +			goto nla_put_failure;
>  #ifdef CONFIG_IP_ROUTE_CLASSID
>  		if (fi->fib_nh[0].nh_tclassid &&
>  		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
> @@ -1060,6 +1071,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
>  			if (nh->nh_gw &&
>  			    nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
>  				goto nla_put_failure;
> +			if (!ipv6_addr_any(&nh->nh_gw6) &&
> +			    nla_put(skb, RTA_GATEWAY, 16, &nh->nh_gw6))

	RTA_GATEWAY6

> @@ -193,10 +196,27 @@ static inline int ip_finish_output2(struct sk_buff *skb)
>  	}
>  
>  	rcu_read_lock_bh();
> -	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
> -	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
> -	if (unlikely(!neigh))
> -		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +	/* If there is an ipv6 gateway specified, use it */
> +	if (!rt->rt_gateway && !ipv6_addr_any(&rt->rt_gateway6)) {

	DST_NOCACHE routes can fill rt_gateway even when
nh_gw=0 (rt_set_nexthop), so above check can be:

	if (rt->rt_uses_gateway && !ipv6_addr_any(&rt->rt_gateway6)) {

	Not sure, some places may prefer to see rt_uses_gateway=1
for v4 and rt_uses_gateway=2 for v6 nexthop. Then above check
can be just 'if (rt->rt_uses_gateway == 2) {'.

> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index be8703d..c654b41 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1400,6 +1400,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
>  			rt->rt_gateway = nh->nh_gw;
>  			rt->rt_uses_gateway = 1;
>  		}
> +		if (!ipv6_addr_any(&nh->nh_gw6)) {
> +			memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
> +			rt->rt_uses_gateway = 1;
> +		}
>  		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
>  #ifdef CONFIG_IP_ROUTE_CLASSID
>  		rt->dst.tclassid = nh->nh_tclassid;
> @@ -1417,6 +1421,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
>  			rt->dst.flags |= DST_NOCACHE;
>  			if (!rt->rt_gateway)
>  				rt->rt_gateway = daddr;
> +			if (ipv6_addr_any(&rt->rt_gateway6)) {
> +				memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
> +				rt->rt_uses_gateway = 1;
> +			}

	Above hunk is not needed, rt_gateway6 is set above.

>  			rt_add_uncached_list(rt);
>  		}
>  	} else
> @@ -1488,6 +1496,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
>  	rth->rt_pmtu	= 0;
>  	rth->rt_gateway	= 0;
>  	rth->rt_uses_gateway = 0;
> +	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));

	We can remove such initializations if
rt_uses_gateway=2 is used as v6 indication, for example:

rt_uses_gateway rt_gateway nh_gw   nh_gw6 DST_NOCACHE What
===============================================================================
0               0          0/LOCAL N      N           Non-gatewayed, cached
0               DADDR      0/LOCAL N      Y           Non-gatewayed, not cached
2               0          0       GW6    N           Gatewayed, cached
2               DADDR      0       GW6    Y           Gatewayed, not cached
1               GW         GW      N      Y/N         Gatewayed


	May be more places need to be changed:

- fib_check_nh: validate v6 address type, set nh_dev,
	allow only nh->nh_scope == RT_SCOPE_LINK (I assume
	we do not plan to use local v6 address in nh_gw6).
- nh_comp: needs nh_gw6 comparison
- fib_create_info: more checks are needed around
	'if (cfg->fc_scope == RT_SCOPE_HOST) {' check

	There are other places that use rt_uses_gateway
and rt_gateway.

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek March 27, 2015, 12:27 a.m. UTC | #3
On Fri, Mar 27, 2015 at 01:12:36AM +0200, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Thu, 26 Mar 2015, Andy Gospodarek wrote:
> 
> > diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> > index 66c1e4f..7de2924 100644
> > --- a/net/ipv4/fib_semantics.c
> > +++ b/net/ipv4/fib_semantics.c
> > @@ -1033,6 +1041,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
> >  		if (fi->fib_nh->nh_oif &&
> >  		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
> >  			goto nla_put_failure;
> > +		if (!ipv6_addr_any(&fi->fib_nh->nh_gw6) &&
> > +		    nla_put(skb, RTA_GATEWAY, 16, &fi->fib_nh->nh_gw6))
> 
> 	RTA_GATEWAY6
> 
> > +			goto nla_put_failure;
> >  #ifdef CONFIG_IP_ROUTE_CLASSID
> >  		if (fi->fib_nh[0].nh_tclassid &&
> >  		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
> > @@ -1060,6 +1071,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
> >  			if (nh->nh_gw &&
> >  			    nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
> >  				goto nla_put_failure;
> > +			if (!ipv6_addr_any(&nh->nh_gw6) &&
> > +			    nla_put(skb, RTA_GATEWAY, 16, &nh->nh_gw6))
> 
> 	RTA_GATEWAY6
> 

Thanks for both of these, my original patch overloaded RTA_GATEWAY and I
missed these in the conversion.

> > @@ -193,10 +196,27 @@ static inline int ip_finish_output2(struct sk_buff *skb)
> >  	}
> >  
> >  	rcu_read_lock_bh();
> > -	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
> > -	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
> > -	if (unlikely(!neigh))
> > -		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
> > +
> > +#if IS_ENABLED(CONFIG_IPV6)
> > +	/* If there is an ipv6 gateway specified, use it */
> > +	if (!rt->rt_gateway && !ipv6_addr_any(&rt->rt_gateway6)) {
> 
> 	DST_NOCACHE routes can fill rt_gateway even when
> nh_gw=0 (rt_set_nexthop), so above check can be:
> 
> 	if (rt->rt_uses_gateway && !ipv6_addr_any(&rt->rt_gateway6)) {
> 
> 	Not sure, some places may prefer to see rt_uses_gateway=1
> for v4 and rt_uses_gateway=2 for v6 nexthop. Then above check
> can be just 'if (rt->rt_uses_gateway == 2) {'.
> 

OK, thanks.

> > diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> > index be8703d..c654b41 100644
> > --- a/net/ipv4/route.c
> > +++ b/net/ipv4/route.c
> > @@ -1400,6 +1400,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
> >  			rt->rt_gateway = nh->nh_gw;
> >  			rt->rt_uses_gateway = 1;
> >  		}
> > +		if (!ipv6_addr_any(&nh->nh_gw6)) {
> > +			memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
> > +			rt->rt_uses_gateway = 1;
> > +		}
> >  		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
> >  #ifdef CONFIG_IP_ROUTE_CLASSID
> >  		rt->dst.tclassid = nh->nh_tclassid;
> > @@ -1417,6 +1421,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
> >  			rt->dst.flags |= DST_NOCACHE;
> >  			if (!rt->rt_gateway)
> >  				rt->rt_gateway = daddr;
> > +			if (ipv6_addr_any(&rt->rt_gateway6)) {
> > +				memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
> > +				rt->rt_uses_gateway = 1;
> > +			}
> 
> 	Above hunk is not needed, rt_gateway6 is set above.
> 

Excellent, I will drop that too.

> >  			rt_add_uncached_list(rt);
> >  		}
> >  	} else
> > @@ -1488,6 +1496,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> >  	rth->rt_pmtu	= 0;
> >  	rth->rt_gateway	= 0;
> >  	rth->rt_uses_gateway = 0;
> > +	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));
> 
> 	We can remove such initializations if
> rt_uses_gateway=2 is used as v6 indication, for example:
> 
> rt_uses_gateway rt_gateway nh_gw   nh_gw6 DST_NOCACHE What
> ===============================================================================
> 0               0          0/LOCAL N      N           Non-gatewayed, cached
> 0               DADDR      0/LOCAL N      Y           Non-gatewayed, not cached
> 2               0          0       GW6    N           Gatewayed, cached
> 2               DADDR      0       GW6    Y           Gatewayed, not cached
> 1               GW         GW      N      Y/N         Gatewayed
> 
> 
> 	May be more places need to be changed:
> 
> - fib_check_nh: validate v6 address type, set nh_dev,
> 	allow only nh->nh_scope == RT_SCOPE_LINK (I assume
> 	we do not plan to use local v6 address in nh_gw6).
> - nh_comp: needs nh_gw6 comparison
> - fib_create_info: more checks are needed around
> 	'if (cfg->fc_scope == RT_SCOPE_HOST) {' check
> 
> 	There are other places that use rt_uses_gateway
> and rt_gateway.
> 
> Regards
> 

Awesome.  Thanks for the thorough review.  I'll check these bits out and
see what else needs to be done for v1 of this series.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robert Shearman March 27, 2015, 11:53 a.m. UTC | #4
On 26/03/15 21:05, Andy Gospodarek wrote:
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index bea910f..26cdd01 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -306,6 +306,7 @@ enum rtattr_type_t {
>          RTA_VIA,
>          RTA_NEWDST,
>          RTA_PREF,
> +       RTA_GATEWAY6,

There shouldn't be any need to add a new attribute. The RTA_VIA 
attribute that Eric Biederman recently introduced should have the 
semantics that you require:

"RTA_VIA specifies the specifies the next machine to send a packet to
like RTA_GATEWAY.  RTA_VIA differs from RTA_GATEWAY in that it
includes the address family of the address of the next machine to send
a packet to."

Thanks,
Rob
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sowmini Varadhan March 27, 2015, 1:23 p.m. UTC | #5
On Thu, Mar 26, 2015 at 5:05 PM, Andy Gospodarek
<gospo@cumulusnetworks.com> wrote:
> This adds kernel infrastructure to allow userspace (read: routing protocols) to
> support adding IPv6 next-hops to IPv4 routes.  This is essentially added to
> support a feature of MP-BGP outlined in RFC-5549.
>
> Thise does not encompass all that is needed to support RFC-5549, but this
> demonstrates the netlink infrastructure needed to correctly enable the kernel
> infrastructure that would support this RFC.  For more informtion the full RFC
> is here: http://www.ietf.org/rfc/rfc5549.txt.

Is there some impact to redirect behavior? RFC 5549 does not seem to
discuss it at all, but the firsthop would be processing (and
potentially sending)
an icmp redirect to the host. I dont think there is a good way to construct
this, either with icmpv4 or icmpv6, and maybe some checks are needed to
not even try, when the IP mix-match is encountered.

--Sowmini
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek March 27, 2015, 2:10 p.m. UTC | #6
On Fri, Mar 27, 2015 at 11:53:01AM +0000, Robert Shearman wrote:
> On 26/03/15 21:05, Andy Gospodarek wrote:
> >diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> >index bea910f..26cdd01 100644
> >--- a/include/uapi/linux/rtnetlink.h
> >+++ b/include/uapi/linux/rtnetlink.h
> >@@ -306,6 +306,7 @@ enum rtattr_type_t {
> >         RTA_VIA,
> >         RTA_NEWDST,
> >         RTA_PREF,
> >+       RTA_GATEWAY6,
> 
> There shouldn't be any need to add a new attribute. The RTA_VIA attribute
> that Eric Biederman recently introduced should have the semantics that you
> require:
> 
> "RTA_VIA specifies the specifies the next machine to send a packet to
> like RTA_GATEWAY.  RTA_VIA differs from RTA_GATEWAY in that it
> includes the address family of the address of the next machine to send
> a packet to."

Absolutely.  I did this first set before Eric added the RTA_VIA
attribute, so I need to not only use it, but also add support in the
ipv4 code to use that attribute if set.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek March 27, 2015, 2:35 p.m. UTC | #7
On Fri, Mar 27, 2015 at 09:23:57AM -0400, Sowmini Varadhan wrote:
> On Thu, Mar 26, 2015 at 5:05 PM, Andy Gospodarek
> <gospo@cumulusnetworks.com> wrote:
> > This adds kernel infrastructure to allow userspace (read: routing protocols) to
> > support adding IPv6 next-hops to IPv4 routes.  This is essentially added to
> > support a feature of MP-BGP outlined in RFC-5549.
> >
> > Thise does not encompass all that is needed to support RFC-5549, but this
> > demonstrates the netlink infrastructure needed to correctly enable the kernel
> > infrastructure that would support this RFC.  For more informtion the full RFC
> > is here: http://www.ietf.org/rfc/rfc5549.txt.
> 
> Is there some impact to redirect behavior? RFC 5549 does not seem to
> discuss it at all, but the firsthop would be processing (and
> potentially sending)
> an icmp redirect to the host. I dont think there is a good way to construct
> this, either with icmpv4 or icmpv6, and maybe some checks are needed to
> not even try, when the IP mix-match is encountered.
> 

You are correct that there is not a great way to construct this since
the frames at strictly IPv4 frames.  I'll have to dig into this a bit
more to see if there is something worth doing for this case.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sowmini Varadhan March 27, 2015, 2:37 p.m. UTC | #8
On Fri, Mar 27, 2015 at 10:35 AM, Andy Gospodarek
<gospo@cumulusnetworks.com> wrote:

>
> You are correct that there is not a great way to construct this since
> the frames at strictly IPv4 frames.  I'll have to dig into this a bit
> more to see if there is something worth doing for this case.

Most of the complexity is probably the router-side behavior (for when/how
to construct redirects). I suspect the host side should be straight-forward,
and not too much change.

--Sowmini
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek April 8, 2015, 2:52 a.m. UTC | #9
On Tue, Apr 07, 2015 at 11:14:22PM +0200, Bjørnar Ness wrote:
> Hello, Andy.
> 
> Are you working on the next version of this patchset with the suggested
> modifications?

Yes, I am.

I was planning to finish them tomorrow and test them before submitting
the next version.  Are you asking as you wanted to provide some testing
feedback?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed..a45fbe0 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@ 
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
+#include <linux/in6.h>
 
 struct fib_config {
 	u8			fc_dst_len;
@@ -44,6 +45,7 @@  struct fib_config {
 	u32			fc_flow;
 	u32			fc_nlflags;
 	struct nl_info		fc_nlinfo;
+	struct in6_addr		fc_gw6;
  };
 
 struct fib_info;
@@ -89,6 +91,7 @@  struct fib_nh {
 	struct rtable __rcu * __percpu *nh_pcpu_rth_output;
 	struct rtable __rcu	*nh_rth_input;
 	struct fnhe_hash_bucket	__rcu *nh_exceptions;
+	struct in6_addr		nh_gw6;
 };
 
 /*
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..b66a6c4 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -35,6 +35,7 @@ 
 #include <linux/ip.h>
 #include <linux/cache.h>
 #include <linux/security.h>
+#include <linux/in6.h>
 
 /* IPv4 datagram length is stored into 16bit field (tot_len) */
 #define IP_MAX_MTU	0xFFFFU
@@ -66,6 +67,7 @@  struct rtable {
 
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
+	struct in6_addr		rt_gateway6;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index bea910f..26cdd01 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -306,6 +306,7 @@  enum rtattr_type_t {
 	RTA_VIA,
 	RTA_NEWDST,
 	RTA_PREF,
+	RTA_GATEWAY6,
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e5b6b05..3775b05 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -591,6 +591,7 @@  const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_METRICS]		= { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
 	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_GATEWAY6]		= { .len = sizeof(struct in6_addr) },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -636,6 +637,9 @@  static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 		case RTA_GATEWAY:
 			cfg->fc_gw = nla_get_be32(attr);
 			break;
+		case RTA_GATEWAY6:
+			nla_memcpy(&cfg->fc_gw6, attr, sizeof(struct in6_addr));
+			break;
 		case RTA_PRIORITY:
 			cfg->fc_priority = nla_get_u32(attr);
 			break;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 66c1e4f..7de2924 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -468,7 +468,11 @@  static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
+			if (nla_len(nla) == 4) {
+				nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
+			} else if (nla_len(nla) == 16) {
+				nla_memcpy(&nexthop_nh->nh_gw6, nla, nla_len(nla));
+			} else return -EINVAL;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
@@ -495,9 +499,10 @@  int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (cfg->fc_oif || cfg->fc_gw) {
+	if (cfg->fc_oif || cfg->fc_gw || !ipv6_addr_any(&cfg->fc_gw6)) {
 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
-		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
+		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw) &&
+		    (ipv6_addr_any(&cfg->fc_gw6) || !ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->nh_gw6)))
 			return 0;
 		return 1;
 	}
@@ -759,7 +764,7 @@  __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
 
 struct fib_info *fib_create_info(struct fib_config *cfg)
 {
-	int err;
+	int err = 0;
 	struct fib_info *fi = NULL;
 	struct fib_info *ofi;
 	int nhs = 1;
@@ -869,6 +874,8 @@  struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto err_inval;
 		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
+		if (!ipv6_addr_any(&cfg->fc_gw6) && ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->nh_gw6))
+			goto err_inval;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
@@ -882,6 +889,7 @@  struct fib_info *fib_create_info(struct fib_config *cfg)
 		nh->nh_oif = cfg->fc_oif;
 		nh->nh_gw = cfg->fc_gw;
 		nh->nh_flags = cfg->fc_flags;
+		memcpy(&nh->nh_gw6,&cfg->fc_gw6,sizeof(struct in6_addr));
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		nh->nh_tclassid = cfg->fc_flow;
 		if (nh->nh_tclassid)
@@ -893,7 +901,7 @@  struct fib_info *fib_create_info(struct fib_config *cfg)
 	}
 
 	if (fib_props[cfg->fc_type].error) {
-		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp || !ipv6_addr_any(&cfg->fc_gw6))
 			goto err_inval;
 		goto link_it;
 	} else {
@@ -1033,6 +1041,9 @@  int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		if (fi->fib_nh->nh_oif &&
 		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
 			goto nla_put_failure;
+		if (!ipv6_addr_any(&fi->fib_nh->nh_gw6) &&
+		    nla_put(skb, RTA_GATEWAY, 16, &fi->fib_nh->nh_gw6))
+			goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1060,6 +1071,9 @@  int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			if (nh->nh_gw &&
 			    nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
 				goto nla_put_failure;
+			if (!ipv6_addr_any(&nh->nh_gw6) &&
+			    nla_put(skb, RTA_GATEWAY, 16, &nh->nh_gw6))
+				goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			if (nh->nh_tclassid &&
 			    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8259e77..1b63e6a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -79,6 +79,9 @@ 
 #include <linux/mroute.h>
 #include <linux/netlink.h>
 #include <linux/tcp.h>
+#include <net/ndisc.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
 
 int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 EXPORT_SYMBOL(sysctl_ip_default_ttl);
@@ -169,7 +172,7 @@  static inline int ip_finish_output2(struct sk_buff *skb)
 	struct rtable *rt = (struct rtable *)dst;
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
-	struct neighbour *neigh;
+	struct neighbour *neigh = NULL;
 	u32 nexthop;
 
 	if (rt->rt_type == RTN_MULTICAST) {
@@ -193,10 +196,27 @@  static inline int ip_finish_output2(struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
-	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-	if (unlikely(!neigh))
-		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* If there is an ipv6 gateway specified, use it */
+	if (!rt->rt_gateway && !ipv6_addr_any(&rt->rt_gateway6)) {
+		neigh = __ipv6_neigh_lookup_noref(dst->dev, &rt->rt_gateway6);
+
+		if (unlikely(!neigh)) {
+			neigh = __neigh_create(&nd_tbl, &rt->rt_gateway6, dst->dev, false);
+		}
+	}
+#endif
+	/* No ipv6 gateway created, so use ipv4 */
+	if (likely(!neigh)) {
+		nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
+		neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
+
+		if (unlikely(!neigh)) {
+			neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+		}
+	}
+
 	if (!IS_ERR(neigh)) {
 		int res = dst_neigh_output(dst, neigh, skb);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be8703d..c654b41 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1400,6 +1400,10 @@  static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
+		if (!ipv6_addr_any(&nh->nh_gw6)) {
+			memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
+			rt->rt_uses_gateway = 1;
+		}
 		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
@@ -1417,6 +1421,10 @@  static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->dst.flags |= DST_NOCACHE;
 			if (!rt->rt_gateway)
 				rt->rt_gateway = daddr;
+			if (ipv6_addr_any(&rt->rt_gateway6)) {
+				memcpy(&rt->rt_gateway6, &nh->nh_gw6, sizeof(struct in6_addr));
+				rt->rt_uses_gateway = 1;
+			}
 			rt_add_uncached_list(rt);
 		}
 	} else
@@ -1488,6 +1496,7 @@  static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->rt_uses_gateway = 0;
+	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));
 	INIT_LIST_HEAD(&rth->rt_uncached);
 	if (our) {
 		rth->dst.input= ip_local_deliver;
@@ -1618,6 +1627,7 @@  static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->rt_uses_gateway = 0;
+	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));
 	INIT_LIST_HEAD(&rth->rt_uncached);
 	RT_CACHE_STAT_INC(in_slow_tot);
 
@@ -1792,6 +1802,7 @@  local_input:
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->rt_uses_gateway = 0;
+	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));
 	INIT_LIST_HEAD(&rth->rt_uncached);
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res.type == RTN_UNREACHABLE) {
@@ -1981,6 +1992,7 @@  add:
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway = 0;
 	rth->rt_uses_gateway = 0;
+	memset(&rth->rt_gateway6, 0, sizeof(struct in6_addr));
 	INIT_LIST_HEAD(&rth->rt_uncached);
 
 	RT_CACHE_STAT_INC(out_slow_tot);