diff mbox

[ovs-dev,RFC] datapath: allow tunnels to be created with rtnetlink

Message ID 1474380065-2957-1-git-send-email-cascardo@redhat.com
State RFC
Delegated to: pravin shelar
Headers show

Commit Message

Thadeu Lima de Souza Cascardo Sept. 20, 2016, 2:01 p.m. UTC
In order to use rtnetlink to create new tunnel vports, the backported
tunnels require some code that was removed from their upstream version,
mainly the necessary code for newlink and for start_xmit.

This patch adds back the necessary code for VXLAN, GRE and Geneve
tunnels.

Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
---
 datapath/linux/Modules.mk                       |   1 +
 datapath/linux/compat/geneve.c                  |  15 +--
 datapath/linux/compat/include/linux/if_tunnel.h |  71 ++++++++++++
 datapath/linux/compat/ip_gre.c                  |  65 ++++++++---
 datapath/linux/compat/vxlan.c                   | 147 +++++++++++++++++++++---
 5 files changed, 261 insertions(+), 38 deletions(-)
 create mode 100644 datapath/linux/compat/include/linux/if_tunnel.h

Comments

Pravin Shelar Oct. 20, 2016, 5:30 p.m. UTC | #1
On Tue, Sep 20, 2016 at 7:01 AM, Thadeu Lima de Souza Cascardo
<cascardo@redhat.com> wrote:
> In order to use rtnetlink to create new tunnel vports, the backported
> tunnels require some code that was removed from their upstream version,
> mainly the necessary code for newlink and for start_xmit.
>
> This patch adds back the necessary code for VXLAN, GRE and Geneve
> tunnels.
>
> Signed-off-by: Eric Garver <e@erig.me>
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
> ---
>  datapath/linux/Modules.mk                       |   1 +
>  datapath/linux/compat/geneve.c                  |  15 +--
>  datapath/linux/compat/include/linux/if_tunnel.h |  71 ++++++++++++
>  datapath/linux/compat/ip_gre.c                  |  65 ++++++++---
>  datapath/linux/compat/vxlan.c                   | 147 +++++++++++++++++++++---
>  5 files changed, 261 insertions(+), 38 deletions(-)
>  create mode 100644 datapath/linux/compat/include/linux/if_tunnel.h
>
> diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
> index 26f6d22..ad7d14a 100644
> --- a/datapath/linux/Modules.mk
> +++ b/datapath/linux/Modules.mk
> @@ -38,6 +38,7 @@ openvswitch_headers += \
>         linux/compat/include/linux/if.h \
>         linux/compat/include/linux/if_ether.h \
>         linux/compat/include/linux/if_link.h \
> +       linux/compat/include/linux/if_tunnel.h \
>         linux/compat/include/linux/if_vlan.h \
>         linux/compat/include/linux/in.h \
>         linux/compat/include/linux/jiffies.h \
> diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
> index 0c5b58a..79bb0ba 100644
> --- a/datapath/linux/compat/geneve.c
> +++ b/datapath/linux/compat/geneve.c
> @@ -1112,9 +1112,8 @@ tx_error:
>  }
>  #endif
>
> -netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
> +static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> -       struct net_device *dev = skb->dev;
>         struct geneve_dev *geneve = netdev_priv(dev);
>         struct ip_tunnel_info *info = NULL;
>
> @@ -1128,18 +1127,12 @@ netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>  #endif
>         return geneve_xmit_skb(skb, dev, info);
>  }
> -EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>
> -static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> +netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>  {
> -       /* Drop All packets coming from networking stack. OVS-CB is
> -        * not initialized for these packets.
> -        */
> -
> -       dev_kfree_skb(skb);
> -       dev->stats.tx_dropped++;
> -       return NETDEV_TX_OK;
> +       return geneve_dev_xmit(skb, skb->dev);
>  }
This would crash kernel.
OVS compat code expect dst entry in skb-cb, packet coming from kernel
would not have it.

> +EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>
>  static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
>  {
> diff --git a/datapath/linux/compat/include/linux/if_tunnel.h b/datapath/linux/compat/include/linux/if_tunnel.h
> new file mode 100644
> index 0000000..476fe3c
> --- /dev/null
> +++ b/datapath/linux/compat/include/linux/if_tunnel.h
> @@ -0,0 +1,71 @@
> +#ifndef _LINUX_IF_TUNNEL_WRAPPER_H
> +#define _LINUX_IF_TUNNEL_WRAPPER_H
> +
> +#include_next<linux/if_tunnel.h>
> +
> +/* GRE section */
> +enum {
> +#define IFLA_GRE_UNSPEC rpl_IFLA_GRE_UNSPEC
> +       IFLA_GRE_UNSPEC,
> +
> +#define IFLA_GRE_LINK rpl_IFLA_GRE_LINK
> +       IFLA_GRE_LINK,
> +
> +#define IFLA_GRE_IFLAGS rpl_IFLA_GRE_IFLAGS
> +       IFLA_GRE_IFLAGS,
> +
> +#define IFLA_GRE_OFLAGS rpl_IFLA_GRE_OFLAGS
> +       IFLA_GRE_OFLAGS,
> +
> +#define IFLA_GRE_IKEY rpl_IFLA_GRE_IKEY
> +       IFLA_GRE_IKEY,
> +
> +#define IFLA_GRE_OKEY rpl_IFLA_GRE_OKEY
> +       IFLA_GRE_OKEY,
> +
> +#define IFLA_GRE_LOCAL rpl_IFLA_GRE_LOCAL
> +       IFLA_GRE_LOCAL,
> +
> +#define IFLA_GRE_REMOTE rpl_IFLA_GRE_REMOTE
> +       IFLA_GRE_REMOTE,
> +
> +#define IFLA_GRE_TTL rpl_IFLA_GRE_TTL
> +       IFLA_GRE_TTL,
> +
> +#define IFLA_GRE_TOS rpl_IFLA_GRE_TOS
> +       IFLA_GRE_TOS,
> +
> +#define IFLA_GRE_PMTUDISC rpl_IFLA_GRE_PMTUDISC
> +       IFLA_GRE_PMTUDISC,
> +
> +#define IFLA_GRE_ENCAP_LIMIT rpl_IFLA_GRE_ENCAP_LIMIT
> +       IFLA_GRE_ENCAP_LIMIT,
> +
> +#define IFLA_GRE_FLOWINFO rpl_IFLA_GRE_FLOWINFO
> +       IFLA_GRE_FLOWINFO,
> +
> +#define IFLA_GRE_FLAGS rpl_IFLA_GRE_FLAGS
> +       IFLA_GRE_FLAGS,
> +
> +#define IFLA_GRE_ENCAP_TYPE rpl_IFLA_GRE_ENCAP_TYPE
> +       IFLA_GRE_ENCAP_TYPE,
> +
> +#define IFLA_GRE_ENCAP_FLAGS rpl_IFLA_GRE_ENCAP_FLAGS
> +       IFLA_GRE_ENCAP_FLAGS,
> +
> +#define IFLA_GRE_ENCAP_SPORT rpl_IFLA_GRE_ENCAP_SPORT
> +       IFLA_GRE_ENCAP_SPORT,
> +
> +#define IFLA_GRE_ENCAP_DPORT rpl_IFLA_GRE_ENCAP_DPORT
> +       IFLA_GRE_ENCAP_DPORT,
> +
> +#define IFLA_GRE_COLLECT_METADATA rpl_IFLA_GRE_COLLECT_METADATA
> +       IFLA_GRE_COLLECT_METADATA,
> +
> +#define __IFLA_GRE_MAX rpl__IFLA_GRE_MAX
> +       __IFLA_GRE_MAX
> +};
> +#undef IFLA_GRE_MAX
> +#define IFLA_GRE_MAX   (__IFLA_GRE_MAX - 1)
> +

After thinking about the actual issue of using netdevices for tunnel
port this is what I think.
These are tree different implementations of OVS tunnel.

Case 1. OVS kernel module is upstream. It is straight forward to
tunnel devices on upstream kernel module. STT and lisp are not
available.
Case 2. OVS kernel module is out of tree. In this case OVS has compat
code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel
modules for geneve, gre and vxlan, for rest of vport. (stt and lisp)
we are using netdevices from compat code.
Case 3. OVS kernel module is out of tree and not using upstream tunnel
devices. we have to fallback to  OVS compat code for all tunnel
modules.

Now to detect these cases userspace could probe for tunnel device
"ovs_geneve" or "ovs_vxlan" if it exist it is case 3, and userspace
vswitchd has to use existing vport APIs. Otherwise we could use netdev
based tunnel devices. And create tunnel devices for each type of
tunnel port.
Thadeu Lima de Souza Cascardo Oct. 25, 2016, 6 p.m. UTC | #2
On Thu, Oct 20, 2016 at 10:30:41AM -0700, Pravin Shelar wrote:
> On Tue, Sep 20, 2016 at 7:01 AM, Thadeu Lima de Souza Cascardo
> <cascardo@redhat.com> wrote:
> > In order to use rtnetlink to create new tunnel vports, the backported
> > tunnels require some code that was removed from their upstream version,
> > mainly the necessary code for newlink and for start_xmit.
> >
> > This patch adds back the necessary code for VXLAN, GRE and Geneve
> > tunnels.
> >
> > Signed-off-by: Eric Garver <e@erig.me>
> > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
> > ---
> >  datapath/linux/Modules.mk                       |   1 +
> >  datapath/linux/compat/geneve.c                  |  15 +--
> >  datapath/linux/compat/include/linux/if_tunnel.h |  71 ++++++++++++
> >  datapath/linux/compat/ip_gre.c                  |  65 ++++++++---
> >  datapath/linux/compat/vxlan.c                   | 147 +++++++++++++++++++++---
> >  5 files changed, 261 insertions(+), 38 deletions(-)
> >  create mode 100644 datapath/linux/compat/include/linux/if_tunnel.h
> >
> > diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
> > index 26f6d22..ad7d14a 100644
> > --- a/datapath/linux/Modules.mk
> > +++ b/datapath/linux/Modules.mk
> > @@ -38,6 +38,7 @@ openvswitch_headers += \
> >         linux/compat/include/linux/if.h \
> >         linux/compat/include/linux/if_ether.h \
> >         linux/compat/include/linux/if_link.h \
> > +       linux/compat/include/linux/if_tunnel.h \
> >         linux/compat/include/linux/if_vlan.h \
> >         linux/compat/include/linux/in.h \
> >         linux/compat/include/linux/jiffies.h \
> > diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
> > index 0c5b58a..79bb0ba 100644
> > --- a/datapath/linux/compat/geneve.c
> > +++ b/datapath/linux/compat/geneve.c
> > @@ -1112,9 +1112,8 @@ tx_error:
> >  }
> >  #endif
> >
> > -netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
> > +static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> >  {
> > -       struct net_device *dev = skb->dev;
> >         struct geneve_dev *geneve = netdev_priv(dev);
> >         struct ip_tunnel_info *info = NULL;
> >
> > @@ -1128,18 +1127,12 @@ netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
> >  #endif
> >         return geneve_xmit_skb(skb, dev, info);
> >  }
> > -EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
> >
> > -static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> > +netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
> >  {
> > -       /* Drop All packets coming from networking stack. OVS-CB is
> > -        * not initialized for these packets.
> > -        */
> > -
> > -       dev_kfree_skb(skb);
> > -       dev->stats.tx_dropped++;
> > -       return NETDEV_TX_OK;
> > +       return geneve_dev_xmit(skb, skb->dev);
> >  }
> This would crash kernel.
> OVS compat code expect dst entry in skb-cb, packet coming from kernel
> would not have it.
> 
> > +EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
> >
> >  static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
> >  {
> > diff --git a/datapath/linux/compat/include/linux/if_tunnel.h b/datapath/linux/compat/include/linux/if_tunnel.h
> > new file mode 100644
> > index 0000000..476fe3c
> > --- /dev/null
> > +++ b/datapath/linux/compat/include/linux/if_tunnel.h
> > @@ -0,0 +1,71 @@
> > +#ifndef _LINUX_IF_TUNNEL_WRAPPER_H
> > +#define _LINUX_IF_TUNNEL_WRAPPER_H
> > +
> > +#include_next<linux/if_tunnel.h>
> > +
> > +/* GRE section */
> > +enum {
> > +#define IFLA_GRE_UNSPEC rpl_IFLA_GRE_UNSPEC
> > +       IFLA_GRE_UNSPEC,
> > +
> > +#define IFLA_GRE_LINK rpl_IFLA_GRE_LINK
> > +       IFLA_GRE_LINK,
> > +
> > +#define IFLA_GRE_IFLAGS rpl_IFLA_GRE_IFLAGS
> > +       IFLA_GRE_IFLAGS,
> > +
> > +#define IFLA_GRE_OFLAGS rpl_IFLA_GRE_OFLAGS
> > +       IFLA_GRE_OFLAGS,
> > +
> > +#define IFLA_GRE_IKEY rpl_IFLA_GRE_IKEY
> > +       IFLA_GRE_IKEY,
> > +
> > +#define IFLA_GRE_OKEY rpl_IFLA_GRE_OKEY
> > +       IFLA_GRE_OKEY,
> > +
> > +#define IFLA_GRE_LOCAL rpl_IFLA_GRE_LOCAL
> > +       IFLA_GRE_LOCAL,
> > +
> > +#define IFLA_GRE_REMOTE rpl_IFLA_GRE_REMOTE
> > +       IFLA_GRE_REMOTE,
> > +
> > +#define IFLA_GRE_TTL rpl_IFLA_GRE_TTL
> > +       IFLA_GRE_TTL,
> > +
> > +#define IFLA_GRE_TOS rpl_IFLA_GRE_TOS
> > +       IFLA_GRE_TOS,
> > +
> > +#define IFLA_GRE_PMTUDISC rpl_IFLA_GRE_PMTUDISC
> > +       IFLA_GRE_PMTUDISC,
> > +
> > +#define IFLA_GRE_ENCAP_LIMIT rpl_IFLA_GRE_ENCAP_LIMIT
> > +       IFLA_GRE_ENCAP_LIMIT,
> > +
> > +#define IFLA_GRE_FLOWINFO rpl_IFLA_GRE_FLOWINFO
> > +       IFLA_GRE_FLOWINFO,
> > +
> > +#define IFLA_GRE_FLAGS rpl_IFLA_GRE_FLAGS
> > +       IFLA_GRE_FLAGS,
> > +
> > +#define IFLA_GRE_ENCAP_TYPE rpl_IFLA_GRE_ENCAP_TYPE
> > +       IFLA_GRE_ENCAP_TYPE,
> > +
> > +#define IFLA_GRE_ENCAP_FLAGS rpl_IFLA_GRE_ENCAP_FLAGS
> > +       IFLA_GRE_ENCAP_FLAGS,
> > +
> > +#define IFLA_GRE_ENCAP_SPORT rpl_IFLA_GRE_ENCAP_SPORT
> > +       IFLA_GRE_ENCAP_SPORT,
> > +
> > +#define IFLA_GRE_ENCAP_DPORT rpl_IFLA_GRE_ENCAP_DPORT
> > +       IFLA_GRE_ENCAP_DPORT,
> > +
> > +#define IFLA_GRE_COLLECT_METADATA rpl_IFLA_GRE_COLLECT_METADATA
> > +       IFLA_GRE_COLLECT_METADATA,
> > +
> > +#define __IFLA_GRE_MAX rpl__IFLA_GRE_MAX
> > +       __IFLA_GRE_MAX
> > +};
> > +#undef IFLA_GRE_MAX
> > +#define IFLA_GRE_MAX   (__IFLA_GRE_MAX - 1)
> > +
> 
> After thinking about the actual issue of using netdevices for tunnel
> port this is what I think.
> These are tree different implementations of OVS tunnel.
> 
> Case 1. OVS kernel module is upstream. It is straight forward to
> tunnel devices on upstream kernel module. STT and lisp are not
> available.
> Case 2. OVS kernel module is out of tree. In this case OVS has compat
> code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel
> modules for geneve, gre and vxlan, for rest of vport. (stt and lisp)
> we are using netdevices from compat code.
> Case 3. OVS kernel module is out of tree and not using upstream tunnel
> devices. we have to fallback to  OVS compat code for all tunnel
> modules.
> 
> Now to detect these cases userspace could probe for tunnel device
> "ovs_geneve" or "ovs_vxlan" if it exist it is case 3, and userspace
> vswitchd has to use existing vport APIs. Otherwise we could use netdev
> based tunnel devices. And create tunnel devices for each type of
> tunnel port.

The fallback option should already work, then. We can make sure during testing
that is the case, so there would be no need to verify ovs_vxlan is present in
case 3. Would that be OK for you?

So, in summary, we drop this patch, submit what we had before, make sure it
works in the following scenarions:

1) upstream ovs and tunnels are used;
  1a) metadata tunnels can be created, those are used;
  1b) we use compat vports if the configuration allows that;

2) out-of-tree ovs and out-of-tree tunnels are used;
   we make sure using rtnetlink will fail and compat vport is used;
   NOTE: this should work even with the old out-of-tree code that named
         drivers as vxlan instead of ovs_vxlan.

3) out-of-tree ovs and upstream/in-tree tunnels are used;
   it should work just like with upstream ovs, unless the out-of-tree code does
   not support metadata tunnels, in which case, it should fallback to compat
   code.

In all cases, whenever a tunnel configuration that is not supported is used, it
will fail to setup the tunnel. For example, if GPE would be used and it was not
supported by creating the netdev, it won't work as well. As the compat code does
not receive new features, when out-of-tree tunnel drivers are used, those new
features won't be supported.

One question that is left (though I tried to cover it in the scenarios above)
is: do we need to support "old" out-of-tree versions with the new userspace?
That is, if the user updates the userspace, should we require that the
out-of-tree kernel datapath be updated to the matching release? In that case, we
don't need to test the new userspace with the old kernel datapath.

Thanks.
Cascardo.
Pravin Shelar Oct. 26, 2016, 3:21 a.m. UTC | #3
On Tue, Oct 25, 2016 at 11:00 AM, Thadeu Lima de Souza Cascardo
<cascardo@redhat.com> wrote:
> On Thu, Oct 20, 2016 at 10:30:41AM -0700, Pravin Shelar wrote:
>> On Tue, Sep 20, 2016 at 7:01 AM, Thadeu Lima de Souza Cascardo
>> <cascardo@redhat.com> wrote:
>> > In order to use rtnetlink to create new tunnel vports, the backported
>> > tunnels require some code that was removed from their upstream version,
>> > mainly the necessary code for newlink and for start_xmit.
>> >
>> > This patch adds back the necessary code for VXLAN, GRE and Geneve
>> > tunnels.
>> >
>> > Signed-off-by: Eric Garver <e@erig.me>
>> > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
>> > ---
>> >  datapath/linux/Modules.mk                       |   1 +
>> >  datapath/linux/compat/geneve.c                  |  15 +--
>> >  datapath/linux/compat/include/linux/if_tunnel.h |  71 ++++++++++++
>> >  datapath/linux/compat/ip_gre.c                  |  65 ++++++++---
>> >  datapath/linux/compat/vxlan.c                   | 147 +++++++++++++++++++++---
>> >  5 files changed, 261 insertions(+), 38 deletions(-)
>> >  create mode 100644 datapath/linux/compat/include/linux/if_tunnel.h
>> >
>> > diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
>> > index 26f6d22..ad7d14a 100644
>> > --- a/datapath/linux/Modules.mk
>> > +++ b/datapath/linux/Modules.mk
>> > @@ -38,6 +38,7 @@ openvswitch_headers += \
>> >         linux/compat/include/linux/if.h \
>> >         linux/compat/include/linux/if_ether.h \
>> >         linux/compat/include/linux/if_link.h \
>> > +       linux/compat/include/linux/if_tunnel.h \
>> >         linux/compat/include/linux/if_vlan.h \
>> >         linux/compat/include/linux/in.h \
>> >         linux/compat/include/linux/jiffies.h \
>> > diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
>> > index 0c5b58a..79bb0ba 100644
>> > --- a/datapath/linux/compat/geneve.c
>> > +++ b/datapath/linux/compat/geneve.c
>> > @@ -1112,9 +1112,8 @@ tx_error:
>> >  }
>> >  #endif
>> >
>> > -netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>> > +static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>> >  {
>> > -       struct net_device *dev = skb->dev;
>> >         struct geneve_dev *geneve = netdev_priv(dev);
>> >         struct ip_tunnel_info *info = NULL;
>> >
>> > @@ -1128,18 +1127,12 @@ netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>> >  #endif
>> >         return geneve_xmit_skb(skb, dev, info);
>> >  }
>> > -EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>> >
>> > -static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>> > +netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>> >  {
>> > -       /* Drop All packets coming from networking stack. OVS-CB is
>> > -        * not initialized for these packets.
>> > -        */
>> > -
>> > -       dev_kfree_skb(skb);
>> > -       dev->stats.tx_dropped++;
>> > -       return NETDEV_TX_OK;
>> > +       return geneve_dev_xmit(skb, skb->dev);
>> >  }
>> This would crash kernel.
>> OVS compat code expect dst entry in skb-cb, packet coming from kernel
>> would not have it.
>>
>> > +EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>> >
>> >  static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
>> >  {
>> > diff --git a/datapath/linux/compat/include/linux/if_tunnel.h b/datapath/linux/compat/include/linux/if_tunnel.h
>> > new file mode 100644
>> > index 0000000..476fe3c
>> > --- /dev/null
>> > +++ b/datapath/linux/compat/include/linux/if_tunnel.h
>> > @@ -0,0 +1,71 @@
>> > +#ifndef _LINUX_IF_TUNNEL_WRAPPER_H
>> > +#define _LINUX_IF_TUNNEL_WRAPPER_H
>> > +
>> > +#include_next<linux/if_tunnel.h>
>> > +
>> > +/* GRE section */
>> > +enum {
>> > +#define IFLA_GRE_UNSPEC rpl_IFLA_GRE_UNSPEC
>> > +       IFLA_GRE_UNSPEC,
>> > +
>> > +#define IFLA_GRE_LINK rpl_IFLA_GRE_LINK
>> > +       IFLA_GRE_LINK,
>> > +
>> > +#define IFLA_GRE_IFLAGS rpl_IFLA_GRE_IFLAGS
>> > +       IFLA_GRE_IFLAGS,
>> > +
>> > +#define IFLA_GRE_OFLAGS rpl_IFLA_GRE_OFLAGS
>> > +       IFLA_GRE_OFLAGS,
>> > +
>> > +#define IFLA_GRE_IKEY rpl_IFLA_GRE_IKEY
>> > +       IFLA_GRE_IKEY,
>> > +
>> > +#define IFLA_GRE_OKEY rpl_IFLA_GRE_OKEY
>> > +       IFLA_GRE_OKEY,
>> > +
>> > +#define IFLA_GRE_LOCAL rpl_IFLA_GRE_LOCAL
>> > +       IFLA_GRE_LOCAL,
>> > +
>> > +#define IFLA_GRE_REMOTE rpl_IFLA_GRE_REMOTE
>> > +       IFLA_GRE_REMOTE,
>> > +
>> > +#define IFLA_GRE_TTL rpl_IFLA_GRE_TTL
>> > +       IFLA_GRE_TTL,
>> > +
>> > +#define IFLA_GRE_TOS rpl_IFLA_GRE_TOS
>> > +       IFLA_GRE_TOS,
>> > +
>> > +#define IFLA_GRE_PMTUDISC rpl_IFLA_GRE_PMTUDISC
>> > +       IFLA_GRE_PMTUDISC,
>> > +
>> > +#define IFLA_GRE_ENCAP_LIMIT rpl_IFLA_GRE_ENCAP_LIMIT
>> > +       IFLA_GRE_ENCAP_LIMIT,
>> > +
>> > +#define IFLA_GRE_FLOWINFO rpl_IFLA_GRE_FLOWINFO
>> > +       IFLA_GRE_FLOWINFO,
>> > +
>> > +#define IFLA_GRE_FLAGS rpl_IFLA_GRE_FLAGS
>> > +       IFLA_GRE_FLAGS,
>> > +
>> > +#define IFLA_GRE_ENCAP_TYPE rpl_IFLA_GRE_ENCAP_TYPE
>> > +       IFLA_GRE_ENCAP_TYPE,
>> > +
>> > +#define IFLA_GRE_ENCAP_FLAGS rpl_IFLA_GRE_ENCAP_FLAGS
>> > +       IFLA_GRE_ENCAP_FLAGS,
>> > +
>> > +#define IFLA_GRE_ENCAP_SPORT rpl_IFLA_GRE_ENCAP_SPORT
>> > +       IFLA_GRE_ENCAP_SPORT,
>> > +
>> > +#define IFLA_GRE_ENCAP_DPORT rpl_IFLA_GRE_ENCAP_DPORT
>> > +       IFLA_GRE_ENCAP_DPORT,
>> > +
>> > +#define IFLA_GRE_COLLECT_METADATA rpl_IFLA_GRE_COLLECT_METADATA
>> > +       IFLA_GRE_COLLECT_METADATA,
>> > +
>> > +#define __IFLA_GRE_MAX rpl__IFLA_GRE_MAX
>> > +       __IFLA_GRE_MAX
>> > +};
>> > +#undef IFLA_GRE_MAX
>> > +#define IFLA_GRE_MAX   (__IFLA_GRE_MAX - 1)
>> > +
>>
>> After thinking about the actual issue of using netdevices for tunnel
>> port this is what I think.
>> These are tree different implementations of OVS tunnel.
>>
>> Case 1. OVS kernel module is upstream. It is straight forward to
>> tunnel devices on upstream kernel module. STT and lisp are not
>> available.
>> Case 2. OVS kernel module is out of tree. In this case OVS has compat
>> code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel
>> modules for geneve, gre and vxlan, for rest of vport. (stt and lisp)
>> we are using netdevices from compat code.
>> Case 3. OVS kernel module is out of tree and not using upstream tunnel
>> devices. we have to fallback to  OVS compat code for all tunnel
>> modules.
>>
>> Now to detect these cases userspace could probe for tunnel device
>> "ovs_geneve" or "ovs_vxlan" if it exist it is case 3, and userspace
>> vswitchd has to use existing vport APIs. Otherwise we could use netdev
>> based tunnel devices. And create tunnel devices for each type of
>> tunnel port.
>
> The fallback option should already work, then. We can make sure during testing
> that is the case, so there would be no need to verify ovs_vxlan is present in
> case 3. Would that be OK for you?
>
I am not sure how exactly fallback would work. But I think we need to
check ovs_geneve (or ovs_vxlan, ovs_gre) to see if we need to use
netdev-vport in userspace.

> So, in summary, we drop this patch, submit what we had before, make sure it
> works in the following scenarions:
>
> 1) upstream ovs and tunnels are used;
>   1a) metadata tunnels can be created, those are used;
>   1b) we use compat vports if the configuration allows that;
>
> 2) out-of-tree ovs and out-of-tree tunnels are used;
>    we make sure using rtnetlink will fail and compat vport is used;
>    NOTE: this should work even with the old out-of-tree code that named
>          drivers as vxlan instead of ovs_vxlan.
>
> 3) out-of-tree ovs and upstream/in-tree tunnels are used;
>    it should work just like with upstream ovs, unless the out-of-tree code does
>    not support metadata tunnels, in which case, it should fallback to compat
>    code.
>
> In all cases, whenever a tunnel configuration that is not supported is used, it
> will fail to setup the tunnel. For example, if GPE would be used and it was not
> supported by creating the netdev, it won't work as well. As the compat code does
> not receive new features, when out-of-tree tunnel drivers are used, those new
> features won't be supported.
>
> One question that is left (though I tried to cover it in the scenarios above)
> is: do we need to support "old" out-of-tree versions with the new userspace?
> That is, if the user updates the userspace, should we require that the
> out-of-tree kernel datapath be updated to the matching release? In that case, we
> don't need to test the new userspace with the old kernel datapath.
>

Yes, userspace should work with older datapath. There is no need to
explicitly check for datapath.
we could probe for device type in following order to detect kernel
datapath support:

1. probe for ovs_geneve: If successful user comapt layer otherwise step 2.
2. probe for the LWT netdevice (e.g. vxlan or geneve). if sucessful
use it. otherwise use netdev-vport type to manage tunnels.

The idea is to give priority to compat implementation if it is
defined. so we need to check for ovs_geneve devices first.
Thadeu Lima de Souza Cascardo Oct. 26, 2016, 9:55 a.m. UTC | #4
On Tue, Oct 25, 2016 at 08:21:55PM -0700, Pravin Shelar wrote:
> > The fallback option should already work, then. We can make sure during testing
> > that is the case, so there would be no need to verify ovs_vxlan is present in
> > case 3. Would that be OK for you?
> >
> I am not sure how exactly fallback would work. But I think we need to
> check ovs_geneve (or ovs_vxlan, ovs_gre) to see if we need to use
> netdev-vport in userspace.
> 

The fallback to compat layer happens whenever creating the netdev interface
fails to create it with the expected parameters. In the case of VXLAN and
Geneve, if we use the metadata option with no destination, creation will fail,
and, then, we fallback to using vport compat mode. In the case of GRE, we need
to read the parameters back and check for the metadata option, all of which my
last version of the patch does.

> > So, in summary, we drop this patch, submit what we had before, make sure it
> > works in the following scenarions:
> >
> > 1) upstream ovs and tunnels are used;
> >   1a) metadata tunnels can be created, those are used;
> >   1b) we use compat vports if the configuration allows that;
> >
> > 2) out-of-tree ovs and out-of-tree tunnels are used;
> >    we make sure using rtnetlink will fail and compat vport is used;
> >    NOTE: this should work even with the old out-of-tree code that named
> >          drivers as vxlan instead of ovs_vxlan.
> >
> > 3) out-of-tree ovs and upstream/in-tree tunnels are used;
> >    it should work just like with upstream ovs, unless the out-of-tree code does
> >    not support metadata tunnels, in which case, it should fallback to compat
> >    code.
> >
> > In all cases, whenever a tunnel configuration that is not supported is used, it
> > will fail to setup the tunnel. For example, if GPE would be used and it was not
> > supported by creating the netdev, it won't work as well. As the compat code does
> > not receive new features, when out-of-tree tunnel drivers are used, those new
> > features won't be supported.
> >
> > One question that is left (though I tried to cover it in the scenarios above)
> > is: do we need to support "old" out-of-tree versions with the new userspace?
> > That is, if the user updates the userspace, should we require that the
> > out-of-tree kernel datapath be updated to the matching release? In that case, we
> > don't need to test the new userspace with the old kernel datapath.
> >
> 
> Yes, userspace should work with older datapath. There is no need to
> explicitly check for datapath.
> we could probe for device type in following order to detect kernel
> datapath support:
> 
> 1. probe for ovs_geneve: If successful user comapt layer otherwise step 2.
> 2. probe for the LWT netdevice (e.g. vxlan or geneve). if sucessful
> use it. otherwise use netdev-vport type to manage tunnels.
> 
> The idea is to give priority to compat implementation if it is
> defined. so we need to check for ovs_geneve devices first.

I agree that there should be no need to check for the specific datapath. I am
just considering the case where we had out-of-tree tunnels named "geneve" instead
of "ovs_geneve". So, I don't think we should check for "ovs_geneve" at all: it
will fail for older versions of the out-of-tree tunnels, and it's not necessary.

I don't think this check is necessary because as I said: if creating the
netdevice fails, we will use the compat layer. And we should give priority for
the netdevice option, because it supports more options. With the out-of-tree
tunnels code as of now, whenever we try to create the device, it will fail, so
we will fallback anyway. This patch was exactly trying to allow using those
drivers with the new method, in order to allow those new options to be supported
with the out-of-tree drivers.

If you think we don't need to support those out-of-tree drivers with the new
options, no problem. We will just drop this patch. Otherwise, we might need to
fix it. But so far, I don't see any necessary changes to the userspace part that
has already been submitted. Which way should we go?

Thanks.
Cascardo.
Pravin Shelar Oct. 26, 2016, 9:05 p.m. UTC | #5
On Wed, Oct 26, 2016 at 2:55 AM, Thadeu Lima de Souza Cascardo
<cascardo@redhat.com> wrote:
> On Tue, Oct 25, 2016 at 08:21:55PM -0700, Pravin Shelar wrote:
>> > The fallback option should already work, then. We can make sure during testing
>> > that is the case, so there would be no need to verify ovs_vxlan is present in
>> > case 3. Would that be OK for you?
>> >
>> I am not sure how exactly fallback would work. But I think we need to
>> check ovs_geneve (or ovs_vxlan, ovs_gre) to see if we need to use
>> netdev-vport in userspace.
>>
>
> The fallback to compat layer happens whenever creating the netdev interface
> fails to create it with the expected parameters. In the case of VXLAN and
> Geneve, if we use the metadata option with no destination, creation will fail,
> and, then, we fallback to using vport compat mode. In the case of GRE, we need
> to read the parameters back and check for the metadata option, all of which my
> last version of the patch does.
>
>> > So, in summary, we drop this patch, submit what we had before, make sure it
>> > works in the following scenarions:
>> >
>> > 1) upstream ovs and tunnels are used;
>> >   1a) metadata tunnels can be created, those are used;
>> >   1b) we use compat vports if the configuration allows that;
>> >
>> > 2) out-of-tree ovs and out-of-tree tunnels are used;
>> >    we make sure using rtnetlink will fail and compat vport is used;
>> >    NOTE: this should work even with the old out-of-tree code that named
>> >          drivers as vxlan instead of ovs_vxlan.
>> >
>> > 3) out-of-tree ovs and upstream/in-tree tunnels are used;
>> >    it should work just like with upstream ovs, unless the out-of-tree code does
>> >    not support metadata tunnels, in which case, it should fallback to compat
>> >    code.
>> >
>> > In all cases, whenever a tunnel configuration that is not supported is used, it
>> > will fail to setup the tunnel. For example, if GPE would be used and it was not
>> > supported by creating the netdev, it won't work as well. As the compat code does
>> > not receive new features, when out-of-tree tunnel drivers are used, those new
>> > features won't be supported.
>> >
>> > One question that is left (though I tried to cover it in the scenarios above)
>> > is: do we need to support "old" out-of-tree versions with the new userspace?
>> > That is, if the user updates the userspace, should we require that the
>> > out-of-tree kernel datapath be updated to the matching release? In that case, we
>> > don't need to test the new userspace with the old kernel datapath.
>> >
>>
>> Yes, userspace should work with older datapath. There is no need to
>> explicitly check for datapath.
>> we could probe for device type in following order to detect kernel
>> datapath support:
>>
>> 1. probe for ovs_geneve: If successful user comapt layer otherwise step 2.
>> 2. probe for the LWT netdevice (e.g. vxlan or geneve). if sucessful
>> use it. otherwise use netdev-vport type to manage tunnels.
>>
>> The idea is to give priority to compat implementation if it is
>> defined. so we need to check for ovs_geneve devices first.
>
> I agree that there should be no need to check for the specific datapath. I am
> just considering the case where we had out-of-tree tunnels named "geneve" instead
> of "ovs_geneve". So, I don't think we should check for "ovs_geneve" at all: it
> will fail for older versions of the out-of-tree tunnels, and it's not necessary.
>
If OVS compat layer renames the geneve implementation to "geneve" then
user can not even use the kernel geneve driver if OVS module is
loaded. That is the reason for having separate name.
Do you see any issue with probing for device type "ovs_geneve"?

> I don't think this check is necessary because as I said: if creating the
> netdevice fails, we will use the compat layer. And we should give priority for
> the netdevice option, because it supports more options. With the out-of-tree
> tunnels code as of now, whenever we try to create the device, it will fail, so
> we will fallback anyway. This patch was exactly trying to allow using those
> drivers with the new method, in order to allow those new options to be supported
> with the out-of-tree drivers.
>
> If you think we don't need to support those out-of-tree drivers with the new
> options, no problem. We will just drop this patch. Otherwise, we might need to
> fix it. But so far, I don't see any necessary changes to the userspace part that
> has already been submitted. Which way should we go?
>
> Thanks.
> Cascardo.
Thadeu Lima de Souza Cascardo Oct. 27, 2016, 12:08 p.m. UTC | #6
On Wed, Oct 26, 2016 at 02:05:22PM -0700, Pravin Shelar wrote:
> On Wed, Oct 26, 2016 at 2:55 AM, Thadeu Lima de Souza Cascardo
> <cascardo@redhat.com> wrote:
> > On Tue, Oct 25, 2016 at 08:21:55PM -0700, Pravin Shelar wrote:
> >> > The fallback option should already work, then. We can make sure during testing
> >> > that is the case, so there would be no need to verify ovs_vxlan is present in
> >> > case 3. Would that be OK for you?
> >> >
> >> I am not sure how exactly fallback would work. But I think we need to
> >> check ovs_geneve (or ovs_vxlan, ovs_gre) to see if we need to use
> >> netdev-vport in userspace.
> >>
> >
> > The fallback to compat layer happens whenever creating the netdev interface
> > fails to create it with the expected parameters. In the case of VXLAN and
> > Geneve, if we use the metadata option with no destination, creation will fail,
> > and, then, we fallback to using vport compat mode. In the case of GRE, we need
> > to read the parameters back and check for the metadata option, all of which my
> > last version of the patch does.
> >
> >> > So, in summary, we drop this patch, submit what we had before, make sure it
> >> > works in the following scenarions:
> >> >
> >> > 1) upstream ovs and tunnels are used;
> >> >   1a) metadata tunnels can be created, those are used;
> >> >   1b) we use compat vports if the configuration allows that;
> >> >
> >> > 2) out-of-tree ovs and out-of-tree tunnels are used;
> >> >    we make sure using rtnetlink will fail and compat vport is used;
> >> >    NOTE: this should work even with the old out-of-tree code that named
> >> >          drivers as vxlan instead of ovs_vxlan.
> >> >
> >> > 3) out-of-tree ovs and upstream/in-tree tunnels are used;
> >> >    it should work just like with upstream ovs, unless the out-of-tree code does
> >> >    not support metadata tunnels, in which case, it should fallback to compat
> >> >    code.
> >> >
> >> > In all cases, whenever a tunnel configuration that is not supported is used, it
> >> > will fail to setup the tunnel. For example, if GPE would be used and it was not
> >> > supported by creating the netdev, it won't work as well. As the compat code does
> >> > not receive new features, when out-of-tree tunnel drivers are used, those new
> >> > features won't be supported.
> >> >
> >> > One question that is left (though I tried to cover it in the scenarios above)
> >> > is: do we need to support "old" out-of-tree versions with the new userspace?
> >> > That is, if the user updates the userspace, should we require that the
> >> > out-of-tree kernel datapath be updated to the matching release? In that case, we
> >> > don't need to test the new userspace with the old kernel datapath.
> >> >
> >>
> >> Yes, userspace should work with older datapath. There is no need to
> >> explicitly check for datapath.
> >> we could probe for device type in following order to detect kernel
> >> datapath support:
> >>
> >> 1. probe for ovs_geneve: If successful user comapt layer otherwise step 2.
> >> 2. probe for the LWT netdevice (e.g. vxlan or geneve). if sucessful
> >> use it. otherwise use netdev-vport type to manage tunnels.
> >>
> >> The idea is to give priority to compat implementation if it is
> >> defined. so we need to check for ovs_geneve devices first.
> >
> > I agree that there should be no need to check for the specific datapath. I am
> > just considering the case where we had out-of-tree tunnels named "geneve" instead
> > of "ovs_geneve". So, I don't think we should check for "ovs_geneve" at all: it
> > will fail for older versions of the out-of-tree tunnels, and it's not necessary.
> >
> If OVS compat layer renames the geneve implementation to "geneve" then
> user can not even use the kernel geneve driver if OVS module is
> loaded. That is the reason for having separate name.
> Do you see any issue with probing for device type "ovs_geneve"?
> 

If ovs_geneve can be created using rtnetlink and support the new options, as we
intended with this patch, sure. That was our intention with this RFC. Then, we
are back to the path that we were in: adding support to the out-of-tree tunnel
code so we could create the devices using RTM_NEWLINK. Do you mean that?

Otherwise, as I said, I find it unnecessary to probe for ovs_geneve, unless
loading the in-tree driver will prevent the out-of-tree driver to be loaded. In
which case, we are prefering the out-of-tree driver over the in-tree one no
matter what.

Does that make sense?

Cascardo.

> > I don't think this check is necessary because as I said: if creating the
> > netdevice fails, we will use the compat layer. And we should give priority for
> > the netdevice option, because it supports more options. With the out-of-tree
> > tunnels code as of now, whenever we try to create the device, it will fail, so
> > we will fallback anyway. This patch was exactly trying to allow using those
> > drivers with the new method, in order to allow those new options to be supported
> > with the out-of-tree drivers.
> >
> > If you think we don't need to support those out-of-tree drivers with the new
> > options, no problem. We will just drop this patch. Otherwise, we might need to
> > fix it. But so far, I don't see any necessary changes to the userspace part that
> > has already been submitted. Which way should we go?
> >
> > Thanks.
> > Cascardo.
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
Pravin Shelar Nov. 14, 2016, 10:49 p.m. UTC | #7
On Thu, Oct 27, 2016 at 5:08 AM, Thadeu Lima de Souza Cascardo
<cascardo@redhat.com> wrote:
> On Wed, Oct 26, 2016 at 02:05:22PM -0700, Pravin Shelar wrote:
>> On Wed, Oct 26, 2016 at 2:55 AM, Thadeu Lima de Souza Cascardo
>> <cascardo@redhat.com> wrote:
>> > On Tue, Oct 25, 2016 at 08:21:55PM -0700, Pravin Shelar wrote:
>> >> > The fallback option should already work, then. We can make sure during testing
>> >> > that is the case, so there would be no need to verify ovs_vxlan is present in
>> >> > case 3. Would that be OK for you?
>> >> >
>> >> I am not sure how exactly fallback would work. But I think we need to
>> >> check ovs_geneve (or ovs_vxlan, ovs_gre) to see if we need to use
>> >> netdev-vport in userspace.
>> >>
>> >
>> > The fallback to compat layer happens whenever creating the netdev interface
>> > fails to create it with the expected parameters. In the case of VXLAN and
>> > Geneve, if we use the metadata option with no destination, creation will fail,
>> > and, then, we fallback to using vport compat mode. In the case of GRE, we need
>> > to read the parameters back and check for the metadata option, all of which my
>> > last version of the patch does.
>> >
>> >> > So, in summary, we drop this patch, submit what we had before, make sure it
>> >> > works in the following scenarions:
>> >> >
>> >> > 1) upstream ovs and tunnels are used;
>> >> >   1a) metadata tunnels can be created, those are used;
>> >> >   1b) we use compat vports if the configuration allows that;
>> >> >
>> >> > 2) out-of-tree ovs and out-of-tree tunnels are used;
>> >> >    we make sure using rtnetlink will fail and compat vport is used;
>> >> >    NOTE: this should work even with the old out-of-tree code that named
>> >> >          drivers as vxlan instead of ovs_vxlan.
>> >> >
>> >> > 3) out-of-tree ovs and upstream/in-tree tunnels are used;
>> >> >    it should work just like with upstream ovs, unless the out-of-tree code does
>> >> >    not support metadata tunnels, in which case, it should fallback to compat
>> >> >    code.
>> >> >
>> >> > In all cases, whenever a tunnel configuration that is not supported is used, it
>> >> > will fail to setup the tunnel. For example, if GPE would be used and it was not
>> >> > supported by creating the netdev, it won't work as well. As the compat code does
>> >> > not receive new features, when out-of-tree tunnel drivers are used, those new
>> >> > features won't be supported.
>> >> >
>> >> > One question that is left (though I tried to cover it in the scenarios above)
>> >> > is: do we need to support "old" out-of-tree versions with the new userspace?
>> >> > That is, if the user updates the userspace, should we require that the
>> >> > out-of-tree kernel datapath be updated to the matching release? In that case, we
>> >> > don't need to test the new userspace with the old kernel datapath.
>> >> >
>> >>
>> >> Yes, userspace should work with older datapath. There is no need to
>> >> explicitly check for datapath.
>> >> we could probe for device type in following order to detect kernel
>> >> datapath support:
>> >>
>> >> 1. probe for ovs_geneve: If successful user comapt layer otherwise step 2.
>> >> 2. probe for the LWT netdevice (e.g. vxlan or geneve). if sucessful
>> >> use it. otherwise use netdev-vport type to manage tunnels.
>> >>
>> >> The idea is to give priority to compat implementation if it is
>> >> defined. so we need to check for ovs_geneve devices first.
>> >
>> > I agree that there should be no need to check for the specific datapath. I am
>> > just considering the case where we had out-of-tree tunnels named "geneve" instead
>> > of "ovs_geneve". So, I don't think we should check for "ovs_geneve" at all: it
>> > will fail for older versions of the out-of-tree tunnels, and it's not necessary.
>> >
>> If OVS compat layer renames the geneve implementation to "geneve" then
>> user can not even use the kernel geneve driver if OVS module is
>> loaded. That is the reason for having separate name.
>> Do you see any issue with probing for device type "ovs_geneve"?
>>
>
> If ovs_geneve can be created using rtnetlink and support the new options, as we
> intended with this patch, sure. That was our intention with this RFC. Then, we
> are back to the path that we were in: adding support to the out-of-tree tunnel
> code so we could create the devices using RTM_NEWLINK. Do you mean that?
>
We can not create compat tunnel device and attach it as netdev due to
reasons mentioned before.

> Otherwise, as I said, I find it unnecessary to probe for ovs_geneve, unless
> loading the in-tree driver will prevent the out-of-tree driver to be loaded. In
> which case, we are prefering the out-of-tree driver over the in-tree one no
> matter what.
>
> Does that make sense?
>
Yes, we need to prefer out-of-tree tunnel driver in some kernel
configuration. We already have that logic in compat code to determine
those cases. in those cases compat code exposes ovs_geneve or
ovs_vxlan tunnel devices.
So the reason to probe for ovs_geneve is that it tells us if we should
prefer out-of-tree tunnel driver or in-tree tunnel driver.

I am not sure how would you figure out which tunnel implementation to
use without probing for ovs_geneve.
diff mbox

Patch

diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 26f6d22..ad7d14a 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -38,6 +38,7 @@  openvswitch_headers += \
 	linux/compat/include/linux/if.h \
 	linux/compat/include/linux/if_ether.h \
 	linux/compat/include/linux/if_link.h \
+	linux/compat/include/linux/if_tunnel.h \
 	linux/compat/include/linux/if_vlan.h \
 	linux/compat/include/linux/in.h \
 	linux/compat/include/linux/jiffies.h \
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index 0c5b58a..79bb0ba 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -1112,9 +1112,8 @@  tx_error:
 }
 #endif
 
-netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
+static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device *dev = skb->dev;
 	struct geneve_dev *geneve = netdev_priv(dev);
 	struct ip_tunnel_info *info = NULL;
 
@@ -1128,18 +1127,12 @@  netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
 #endif
 	return geneve_xmit_skb(skb, dev, info);
 }
-EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
 
-static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
 {
-	/* Drop All packets coming from networking stack. OVS-CB is
-	 * not initialized for these packets.
-	 */
-
-	dev_kfree_skb(skb);
-	dev->stats.tx_dropped++;
-	return NETDEV_TX_OK;
+	return geneve_dev_xmit(skb, skb->dev);
 }
+EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
diff --git a/datapath/linux/compat/include/linux/if_tunnel.h b/datapath/linux/compat/include/linux/if_tunnel.h
new file mode 100644
index 0000000..476fe3c
--- /dev/null
+++ b/datapath/linux/compat/include/linux/if_tunnel.h
@@ -0,0 +1,71 @@ 
+#ifndef _LINUX_IF_TUNNEL_WRAPPER_H
+#define _LINUX_IF_TUNNEL_WRAPPER_H
+
+#include_next<linux/if_tunnel.h>
+
+/* GRE section */
+enum {
+#define IFLA_GRE_UNSPEC rpl_IFLA_GRE_UNSPEC
+	IFLA_GRE_UNSPEC,
+
+#define IFLA_GRE_LINK rpl_IFLA_GRE_LINK
+	IFLA_GRE_LINK,
+
+#define IFLA_GRE_IFLAGS rpl_IFLA_GRE_IFLAGS
+	IFLA_GRE_IFLAGS,
+
+#define IFLA_GRE_OFLAGS rpl_IFLA_GRE_OFLAGS
+	IFLA_GRE_OFLAGS,
+
+#define IFLA_GRE_IKEY rpl_IFLA_GRE_IKEY
+	IFLA_GRE_IKEY,
+
+#define IFLA_GRE_OKEY rpl_IFLA_GRE_OKEY
+	IFLA_GRE_OKEY,
+
+#define IFLA_GRE_LOCAL rpl_IFLA_GRE_LOCAL
+	IFLA_GRE_LOCAL,
+
+#define IFLA_GRE_REMOTE rpl_IFLA_GRE_REMOTE
+	IFLA_GRE_REMOTE,
+
+#define IFLA_GRE_TTL rpl_IFLA_GRE_TTL
+	IFLA_GRE_TTL,
+
+#define IFLA_GRE_TOS rpl_IFLA_GRE_TOS
+	IFLA_GRE_TOS,
+
+#define IFLA_GRE_PMTUDISC rpl_IFLA_GRE_PMTUDISC
+	IFLA_GRE_PMTUDISC,
+
+#define IFLA_GRE_ENCAP_LIMIT rpl_IFLA_GRE_ENCAP_LIMIT
+	IFLA_GRE_ENCAP_LIMIT,
+
+#define IFLA_GRE_FLOWINFO rpl_IFLA_GRE_FLOWINFO
+	IFLA_GRE_FLOWINFO,
+
+#define IFLA_GRE_FLAGS rpl_IFLA_GRE_FLAGS
+	IFLA_GRE_FLAGS,
+
+#define IFLA_GRE_ENCAP_TYPE rpl_IFLA_GRE_ENCAP_TYPE
+	IFLA_GRE_ENCAP_TYPE,
+
+#define IFLA_GRE_ENCAP_FLAGS rpl_IFLA_GRE_ENCAP_FLAGS
+	IFLA_GRE_ENCAP_FLAGS,
+
+#define IFLA_GRE_ENCAP_SPORT rpl_IFLA_GRE_ENCAP_SPORT
+	IFLA_GRE_ENCAP_SPORT,
+
+#define IFLA_GRE_ENCAP_DPORT rpl_IFLA_GRE_ENCAP_DPORT
+	IFLA_GRE_ENCAP_DPORT,
+
+#define IFLA_GRE_COLLECT_METADATA rpl_IFLA_GRE_COLLECT_METADATA
+	IFLA_GRE_COLLECT_METADATA,
+
+#define __IFLA_GRE_MAX rpl__IFLA_GRE_MAX
+	__IFLA_GRE_MAX
+};
+#undef IFLA_GRE_MAX
+#define IFLA_GRE_MAX	(__IFLA_GRE_MAX - 1)
+
+#endif
diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
index 03c5435..ab04dab 100644
--- a/datapath/linux/compat/ip_gre.c
+++ b/datapath/linux/compat/ip_gre.c
@@ -273,9 +273,8 @@  static struct rtable *gre_get_rt(struct sk_buff *skb,
 	return ip_route_output_key(net, fl);
 }
 
-netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
+static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device *dev = skb->dev;
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct flowi4 fl;
@@ -338,7 +337,6 @@  err_free_skb:
 	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
-EXPORT_SYMBOL(rpl_gre_fb_xmit);
 
 #define GRE_FEATURES	(NETIF_F_SG |		\
 			 NETIF_F_FRAGLIST |	\
@@ -443,6 +441,47 @@  static void ipgre_netlink_parms(struct net_device *dev,
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.protocol = IPPROTO_GRE;
+
+	if (!data)
+		return;
+
+	if (data[IFLA_GRE_LINK])
+		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+	if (data[IFLA_GRE_IFLAGS])
+		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+
+	if (data[IFLA_GRE_OFLAGS])
+		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+
+	if (data[IFLA_GRE_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+	if (data[IFLA_GRE_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+	if (data[IFLA_GRE_LOCAL])
+		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
+
+	if (data[IFLA_GRE_REMOTE])
+		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
+
+	if (data[IFLA_GRE_TTL])
+		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
+
+	if (data[IFLA_GRE_TOS])
+		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+
+	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+		parms->iph.frag_off = htons(IP_DF);
+
+	if (data[IFLA_GRE_COLLECT_METADATA]) {
+		struct ip_tunnel *t = netdev_priv(dev);
+
+		t->collect_md = true;
+		if (dev->type == ARPHRD_IPGRE)
+			dev->type = ARPHRD_NONE;
+	}
 }
 
 static int gre_tap_init(struct net_device *dev)
@@ -453,16 +492,11 @@  static int gre_tap_init(struct net_device *dev)
 	return ip_tunnel_init(dev);
 }
 
-static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
 {
-	/* Drop All packets coming from networking stack. OVS-CB is
-	 * not initialized for these packets.
-	 */
-
-	dev_kfree_skb(skb);
-	dev->stats.tx_dropped++;
-	return NETDEV_TX_OK;
+	return gre_dev_xmit(skb, skb->dev);
 }
+EXPORT_SYMBOL(rpl_gre_fb_xmit);
 
 int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
@@ -518,11 +552,9 @@  static int ipgre_newlink(struct net_device *dev,
 #endif
 {
 	struct ip_tunnel_parm p;
-	int err;
 
 	ipgre_netlink_parms(dev, data, tb, &p);
-	err = ip_tunnel_newlink(dev, tb, &p);
-	return err;
+	return ip_tunnel_newlink(dev, tb, &p);
 
 }
 
@@ -580,6 +612,11 @@  static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		       !!(p->iph.frag_off & htons(IP_DF))))
 		goto nla_put_failure;
 
+	if (t->collect_md) {
+		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+			goto nla_put_failure;
+	}
+
 	return 0;
 
 nla_put_failure:
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 47a5a68..73b260e 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -1225,9 +1225,8 @@  tx_free:
  * Outer UDP destination is the VXLAN assigned port.
  *           source port is based on hash of flow
  */
-netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
+static netdev_tx_t vxlan_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device *dev = skb->dev;
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	const struct ip_tunnel_info *info;
 
@@ -1244,7 +1243,6 @@  netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
-EXPORT_SYMBOL_GPL(rpl_vxlan_xmit);
 
 /* Walk the forwarding table and purge stale entries */
 static void vxlan_cleanup(unsigned long arg)
@@ -1466,16 +1464,11 @@  int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(ovs_vxlan_fill_metadata_dst);
 
-static netdev_tx_t vxlan_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
 {
-	/* Drop All packets coming from networking stack. OVS-CB is
-	 * not initialized for these packets.
-	 */
-
-	dev_kfree_skb(skb);
-	dev->stats.tx_dropped++;
-	return NETDEV_TX_OK;
+	return vxlan_dev_xmit(skb, skb->dev);
 }
+EXPORT_SYMBOL_GPL(rpl_vxlan_xmit);
 
 static const struct net_device_ops vxlan_netdev_ether_ops = {
 	.ndo_init		= vxlan_init,
@@ -1950,8 +1943,136 @@  static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
-	pr_info("unsupported operation\n");
-	return -EINVAL;
+	struct vxlan_config conf;
+	int err;
+
+	memset(&conf, 0, sizeof(conf));
+
+	if (data[IFLA_VXLAN_ID])
+		conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+
+	if (data[IFLA_VXLAN_GROUP]) {
+		conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+	} else if (data[IFLA_VXLAN_GROUP6]) {
+		if (!IS_ENABLED(CONFIG_IPV6))
+			return -EPFNOSUPPORT;
+
+		conf.remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
+		conf.remote_ip.sa.sa_family = AF_INET6;
+	}
+
+	if (data[IFLA_VXLAN_LOCAL]) {
+		conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
+		conf.saddr.sa.sa_family = AF_INET;
+	} else if (data[IFLA_VXLAN_LOCAL6]) {
+		if (!IS_ENABLED(CONFIG_IPV6))
+			return -EPFNOSUPPORT;
+
+		/* TODO: respect scope id */
+		conf.saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
+		conf.saddr.sa.sa_family = AF_INET6;
+	}
+
+	if (data[IFLA_VXLAN_LINK])
+		conf.remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
+
+	if (data[IFLA_VXLAN_TOS])
+		conf.tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
+
+	if (data[IFLA_VXLAN_TTL])
+		conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+
+	if (data[IFLA_VXLAN_LABEL])
+		conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+			     IPV6_FLOWLABEL_MASK;
+
+	if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
+		conf.flags |= VXLAN_F_LEARN;
+
+	if (data[IFLA_VXLAN_AGEING])
+		conf.age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+
+	if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY]))
+		conf.flags |= VXLAN_F_PROXY;
+
+	if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC]))
+		conf.flags |= VXLAN_F_RSC;
+
+	if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS]))
+		conf.flags |= VXLAN_F_L2MISS;
+
+	if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS]))
+		conf.flags |= VXLAN_F_L3MISS;
+
+	if (data[IFLA_VXLAN_LIMIT])
+		conf.addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+
+	if (data[IFLA_VXLAN_COLLECT_METADATA] &&
+	    nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
+		conf.flags |= VXLAN_F_COLLECT_METADATA;
+
+	if (data[IFLA_VXLAN_PORT_RANGE]) {
+		const struct ifla_vxlan_port_range *p
+			= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
+		conf.port_min = ntohs(p->low);
+		conf.port_max = ntohs(p->high);
+	}
+
+	if (data[IFLA_VXLAN_PORT])
+		conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+
+	if (data[IFLA_VXLAN_UDP_CSUM] &&
+	    !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+		conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+		conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+
+	if (data[IFLA_VXLAN_REMCSUM_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
+		conf.flags |= VXLAN_F_REMCSUM_TX;
+
+	if (data[IFLA_VXLAN_REMCSUM_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
+		conf.flags |= VXLAN_F_REMCSUM_RX;
+
+	if (data[IFLA_VXLAN_GBP])
+		conf.flags |= VXLAN_F_GBP;
+
+	if (data[IFLA_VXLAN_GPE])
+		conf.flags |= VXLAN_F_GPE;
+
+	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
+		conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+
+	if (tb[IFLA_MTU])
+		conf.mtu = nla_get_u32(tb[IFLA_MTU]);
+
+	err = vxlan_dev_configure(src_net, dev, &conf);
+	switch (err) {
+	case -ENODEV:
+		pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
+		break;
+
+	case -EPERM:
+		pr_info("IPv6 is disabled via sysctl\n");
+		break;
+
+	case -EEXIST:
+		pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
+		break;
+
+	case -EINVAL:
+		pr_info("unsupported combination of extensions\n");
+		break;
+	}
+
+	return err;
 }
 
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)