diff mbox

[net-next,v21,3/4] openvswitch: 802.1AD Flow handling, actions, vlan parsing, netlink attributes

Message ID 1472762727-25844-4-git-send-email-e@erig.me
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Garver Sept. 1, 2016, 8:45 p.m. UTC
Add support for 802.1ad including the ability to push and pop double
tagged vlans. Add support for 802.1ad to netlink parsing and flow
conversion. Uses double nested encap attributes to represent double
tagged vlan. Inner TPID encoded along with ctci in nested attributes.

This is based on Thomas F Herbert's original v20 patch. I made some
small clean ups and bug fixes.

Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
---
 net/openvswitch/actions.c      |  16 +--
 net/openvswitch/flow.c         |  64 ++++++++----
 net/openvswitch/flow.h         |   8 +-
 net/openvswitch/flow_netlink.c | 227 ++++++++++++++++++++++++++++++-----------
 net/openvswitch/vport.c        |   7 +-
 5 files changed, 235 insertions(+), 87 deletions(-)

Comments

Pravin Shelar Sept. 2, 2016, 9:42 p.m. UTC | #1
On Thu, Sep 1, 2016 at 1:45 PM, Eric Garver <e@erig.me> wrote:
> Add support for 802.1ad including the ability to push and pop double
> tagged vlans. Add support for 802.1ad to netlink parsing and flow
> conversion. Uses double nested encap attributes to represent double
> tagged vlan. Inner TPID encoded along with ctci in nested attributes.
>
> This is based on Thomas F Herbert's original v20 patch. I made some
> small clean ups and bug fixes.
>
> Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
> Signed-off-by: Eric Garver <e@erig.me>

Thanks for working on this. This version looks pretty clone to complete.

> ---
>  net/openvswitch/actions.c      |  16 +--
>  net/openvswitch/flow.c         |  64 ++++++++----
>  net/openvswitch/flow.h         |   8 +-
>  net/openvswitch/flow_netlink.c | 227 ++++++++++++++++++++++++++++++-----------
>  net/openvswitch/vport.c        |   7 +-
>  5 files changed, 235 insertions(+), 87 deletions(-)
>
...
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 0ea128eeeab2..13f6ebdf379b 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -302,24 +302,56 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
>                                   sizeof(struct icmp6hdr));
>  }
>
> -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
> +/**
> + * Parse vlan tag from vlan header.
> + * Returns ERROR on memory error.
> + * Returns 0 if it encounters a non-vlan or incomplete packet.
> + * Returns 1 after successfully parsing vlan tag.
> + */
> +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *vlan)
>  {
> -       struct qtag_prefix {
> -               __be16 eth_type; /* ETH_P_8021Q */
> -               __be16 tci;
> -       };
> -       struct qtag_prefix *qp;
> +       struct vlan_head *qp = (struct vlan_head *)skb->data;
>
> -       if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
> +       if (likely(!eth_type_vlan(qp->tpid)))
>                 return 0;
>
> -       if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
> -                                        sizeof(__be16))))
> +       if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
> +               return 0;
> +
> +       if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
> +                                sizeof(__be16))))
>                 return -ENOMEM;
>
pskb_may_pull() can change skb->data, so you need to refresh qp pointer.

> -       qp = (struct qtag_prefix *) skb->data;
> -       key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
> -       __skb_pull(skb, sizeof(struct qtag_prefix));
> +       vlan->tci = qp->tci | htons(VLAN_TAG_PRESENT);
> +       vlan->tpid = qp->tpid;
> +
> +       __skb_pull(skb, sizeof(struct vlan_head));
> +       return 1;
> +}
> +
...
...

> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index c78a6a1476fb..fbe9e0e4792d 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
...

> +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
> +                                    u64 *key_attrs, bool inner,
> +                                    const struct nlattr **a, bool is_mask,
> +                                    bool log)
> +{
> +       int err;
> +       const struct nlattr *encap;
> +
> +       err = encode_vlan_from_nlattrs(match, a, is_mask, inner, log);
> +       if (err)
> +               return err;
> +
> +       *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
> +
> +       /* Ensure that tci key attribute isn't
> +        * overwritten by encapsulated customer tci.
> +        * Ethertype is cleared because it is c_tpid.
> +        */
> +       *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
> +       *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
> +
> +       encap = a[OVS_KEY_ATTR_ENCAP];
> +
> +       if (is_mask)
> +               err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
> +       else
> +               err = parse_flow_nlattrs(encap, a, key_attrs, log);
> +
> +       return err;
> +}
> +
> +static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
> +                                  u64 *key_attrs, bool *ie_valid,
> +                                  const struct nlattr **a, bool is_mask,
> +                                  bool log)
> +{
> +       int err;
> +
> +       err = __parse_vlan_from_nlattrs(match, key_attrs,
> +                                       false, a, is_mask, log);
> +       if (err)
> +               return err;
> +
> +       if (!is_mask) {
> +               if ((*key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
> +                   eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
> +
> +                       if (!((*key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
> +                             (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
> +                               OVS_NLERR(log, "Invalid Inner VLAN frame");
> +                               return -EINVAL;
> +                       }
> +                       *ie_valid = true;
> +               }
> +       } else {
> +               if (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)) {
> +                       if (!*ie_valid) {
> +                               OVS_NLERR(log, "Encap mask attribute is set for non-CVLAN frame.");
> +                               return -EINVAL;
> +                       }
> +               }
> +       }
> +
This check can be moved to __parse_vlan_from_nlattrs(). This way there
is no need to check ATTR_VLAN and ATTR_ENCAP here and in
ovs_nla_get_match() function.
...

> @@ -1182,11 +1301,11 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
>                       bool log)
>  {
>         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
> -       const struct nlattr *encap;
>         struct nlattr *newmask = NULL;
>         u64 key_attrs = 0;
>         u64 mask_attrs = 0;
>         bool encap_valid = false;
> +       bool i_encap_valid = false;
>         int err;
>
>         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
> @@ -1195,35 +1314,20 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
>
>         if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
>             (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
> -           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
> -               __be16 tci;
> +           eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
>
>                 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
>                       (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
>                         OVS_NLERR(log, "Invalid Vlan frame.");
>                         return -EINVAL;
>                 }
...
...
> @@ -1464,17 +1547,37 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
>         ether_addr_copy(eth_key->eth_src, output->eth.src);
>         ether_addr_copy(eth_key->eth_dst, output->eth.dst);
>
> -       if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
> +       if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
>                 __be16 eth_type;
> -               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
> +               eth_type = !is_mask ? output->eth.vlan.tpid : htons(0xffff);
>                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
> -                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
> +                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.vlan.tci))
>                         goto nla_put_failure;
>                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
> -               if (!swkey->eth.tci)
> +               if (!swkey->eth.vlan.tci)
>                         goto unencap;
> -       } else
> -               encap = NULL;
> +
> +               if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
> +                       /* Customer tci is nested but uses same key attribute.
> +                        */
> +                       eth_type = !is_mask ? output->eth.cvlan.tpid : htons(0xffff);
> +                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
> +                           nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.cvlan.tci))
> +                               goto nla_put_failure;
> +                       in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
> +                       if (!swkey->eth.cvlan.tci)
> +                               goto unencap;
> +               }
> +       }
there could be helper function to serialize "struct vlan_head" to
avoid duplicate code above.

> +
> +       if (eth_type_vlan(swkey->eth.type) && encap && in_encap) {

I think check for eth.type is sufficient here. encap and in_encap
should be true in this case anyways.

> +               /* There are 3 VLAN tags, we don't know anything about the rest of the
> +                * packet, so truncate here.
> +                */
> +               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))

After this output->eth.type would be serialized three times, can you
move the check for third vlan tag after it is serialized so as to
avoid it.

> +                       goto nla_put_failure;
> +               goto unencap;
> +       }
>
>         if (swkey->eth.type == htons(ETH_P_802_2)) {
>                 /*
Pravin Shelar Sept. 2, 2016, 10:21 p.m. UTC | #2
On Fri, Sep 2, 2016 at 2:42 PM, pravin shelar <pshelar@ovn.org> wrote:
> On Thu, Sep 1, 2016 at 1:45 PM, Eric Garver <e@erig.me> wrote:
>> Add support for 802.1ad including the ability to push and pop double
>> tagged vlans. Add support for 802.1ad to netlink parsing and flow
>> conversion. Uses double nested encap attributes to represent double
>> tagged vlan. Inner TPID encoded along with ctci in nested attributes.
>>
>> This is based on Thomas F Herbert's original v20 patch. I made some
>> small clean ups and bug fixes.
>>
>> Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
>> Signed-off-by: Eric Garver <e@erig.me>
>

checkpatch.pl is also not happy with the patch, It found bunch of issues.

./scripts/checkpatch.pl
0003-openvswitch-802.1AD-Flow-handling-actions-vlan-parsi.patch
...
...
total: 6 errors, 9 warnings, 1 checks, 460 lines checked
Eric Garver Sept. 3, 2016, 5:30 p.m. UTC | #3
Pravin,

Thanks for the feedback. Some replies below.

Thanks.
Eric.

On Fri, Sep 02, 2016 at 02:42:30PM -0700, pravin shelar wrote:
> On Thu, Sep 1, 2016 at 1:45 PM, Eric Garver <e@erig.me> wrote:
> > Add support for 802.1ad including the ability to push and pop double
> > tagged vlans. Add support for 802.1ad to netlink parsing and flow
> > conversion. Uses double nested encap attributes to represent double
> > tagged vlan. Inner TPID encoded along with ctci in nested attributes.
> >
> > This is based on Thomas F Herbert's original v20 patch. I made some
> > small clean ups and bug fixes.
> >
> > Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
> > Signed-off-by: Eric Garver <e@erig.me>
> 
> Thanks for working on this. This version looks pretty clone to complete.
> 
> > ---
> >  net/openvswitch/actions.c      |  16 +--
> >  net/openvswitch/flow.c         |  64 ++++++++----
> >  net/openvswitch/flow.h         |   8 +-
> >  net/openvswitch/flow_netlink.c | 227 ++++++++++++++++++++++++++++++-----------
> >  net/openvswitch/vport.c        |   7 +-
> >  5 files changed, 235 insertions(+), 87 deletions(-)
> >
> ...
> > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> > index 0ea128eeeab2..13f6ebdf379b 100644
> > --- a/net/openvswitch/flow.c
> > +++ b/net/openvswitch/flow.c
> > @@ -302,24 +302,56 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
> >                                   sizeof(struct icmp6hdr));
> >  }
> >
> > -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
> > +/**
> > + * Parse vlan tag from vlan header.
> > + * Returns ERROR on memory error.
> > + * Returns 0 if it encounters a non-vlan or incomplete packet.
> > + * Returns 1 after successfully parsing vlan tag.
> > + */
> > +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *vlan)
> >  {
> > -       struct qtag_prefix {
> > -               __be16 eth_type; /* ETH_P_8021Q */
> > -               __be16 tci;
> > -       };
> > -       struct qtag_prefix *qp;
> > +       struct vlan_head *qp = (struct vlan_head *)skb->data;
> >
> > -       if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
> > +       if (likely(!eth_type_vlan(qp->tpid)))
> >                 return 0;
> >
> > -       if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
> > -                                        sizeof(__be16))))
> > +       if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
> > +               return 0;
> > +
> > +       if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
> > +                                sizeof(__be16))))
> >                 return -ENOMEM;
> >
> pskb_may_pull() can change skb->data, so you need to refresh qp pointer.
> 

Good catch!

> > -       qp = (struct qtag_prefix *) skb->data;
> > -       key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
> > -       __skb_pull(skb, sizeof(struct qtag_prefix));
> > +       vlan->tci = qp->tci | htons(VLAN_TAG_PRESENT);
> > +       vlan->tpid = qp->tpid;
> > +
> > +       __skb_pull(skb, sizeof(struct vlan_head));
> > +       return 1;
> > +}
> > +
> ...
> ...
> 
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index c78a6a1476fb..fbe9e0e4792d 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> ...
> 
> > +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
> > +                                    u64 *key_attrs, bool inner,
> > +                                    const struct nlattr **a, bool is_mask,
> > +                                    bool log)
> > +{
> > +       int err;
> > +       const struct nlattr *encap;
> > +
> > +       err = encode_vlan_from_nlattrs(match, a, is_mask, inner, log);
> > +       if (err)
> > +               return err;
> > +
> > +       *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
> > +
> > +       /* Ensure that tci key attribute isn't
> > +        * overwritten by encapsulated customer tci.
> > +        * Ethertype is cleared because it is c_tpid.
> > +        */
> > +       *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
> > +       *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
> > +
> > +       encap = a[OVS_KEY_ATTR_ENCAP];
> > +
> > +       if (is_mask)
> > +               err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
> > +       else
> > +               err = parse_flow_nlattrs(encap, a, key_attrs, log);
> > +
> > +       return err;
> > +}
> > +
> > +static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
> > +                                  u64 *key_attrs, bool *ie_valid,
> > +                                  const struct nlattr **a, bool is_mask,
> > +                                  bool log)
> > +{
> > +       int err;
> > +
> > +       err = __parse_vlan_from_nlattrs(match, key_attrs,
> > +                                       false, a, is_mask, log);
> > +       if (err)
> > +               return err;
> > +
> > +       if (!is_mask) {
> > +               if ((*key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
> > +                   eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
> > +
> > +                       if (!((*key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
> > +                             (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
> > +                               OVS_NLERR(log, "Invalid Inner VLAN frame");
> > +                               return -EINVAL;
> > +                       }
> > +                       *ie_valid = true;
> > +               }
> > +       } else {
> > +               if (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)) {
> > +                       if (!*ie_valid) {
> > +                               OVS_NLERR(log, "Encap mask attribute is set for non-CVLAN frame.");
> > +                               return -EINVAL;
> > +                       }
> > +               }
> > +       }
> > +
> This check can be moved to __parse_vlan_from_nlattrs(). This way there
> is no need to check ATTR_VLAN and ATTR_ENCAP here and in
> ovs_nla_get_match() function.
> ...
> 

I agree there could be some refactoring done here.

> > @@ -1182,11 +1301,11 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
> >                       bool log)
> >  {
> >         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
> > -       const struct nlattr *encap;
> >         struct nlattr *newmask = NULL;
> >         u64 key_attrs = 0;
> >         u64 mask_attrs = 0;
> >         bool encap_valid = false;
> > +       bool i_encap_valid = false;
> >         int err;
> >
> >         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
> > @@ -1195,35 +1314,20 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
> >
> >         if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
> >             (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
> > -           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
> > -               __be16 tci;
> > +           eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
> >
> >                 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
> >                       (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
> >                         OVS_NLERR(log, "Invalid Vlan frame.");
> >                         return -EINVAL;
> >                 }
> ...
> ...
> > @@ -1464,17 +1547,37 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> >         ether_addr_copy(eth_key->eth_src, output->eth.src);
> >         ether_addr_copy(eth_key->eth_dst, output->eth.dst);
> >
> > -       if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
> > +       if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
> >                 __be16 eth_type;
> > -               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
> > +               eth_type = !is_mask ? output->eth.vlan.tpid : htons(0xffff);
> >                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
> > -                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
> > +                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.vlan.tci))
> >                         goto nla_put_failure;
> >                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
> > -               if (!swkey->eth.tci)
> > +               if (!swkey->eth.vlan.tci)
> >                         goto unencap;
> > -       } else
> > -               encap = NULL;
> > +
> > +               if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
> > +                       /* Customer tci is nested but uses same key attribute.
> > +                        */
> > +                       eth_type = !is_mask ? output->eth.cvlan.tpid : htons(0xffff);
> > +                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
> > +                           nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.cvlan.tci))
> > +                               goto nla_put_failure;
> > +                       in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
> > +                       if (!swkey->eth.cvlan.tci)
> > +                               goto unencap;
> > +               }
> > +       }
> there could be helper function to serialize "struct vlan_head" to
> avoid duplicate code above.
> 

Good idea. I'll add a helper.

> > +
> > +       if (eth_type_vlan(swkey->eth.type) && encap && in_encap) {
> 
> I think check for eth.type is sufficient here. encap and in_encap
> should be true in this case anyways.
> 

Yes. You're right. Call it paranoia.
Would a BUG_ON(!(encap && in_encap)) be better?

> > +               /* There are 3 VLAN tags, we don't know anything about the rest of the
> > +                * packet, so truncate here.
> > +                */
> > +               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
> 
> After this output->eth.type would be serialized three times, can you
> move the check for third vlan tag after it is serialized so as to
> avoid it.
> 

Will do.

> > +                       goto nla_put_failure;
> > +               goto unencap;
> > +       }
> >
> >         if (swkey->eth.type == htons(ETH_P_802_2)) {
> >                 /*
Jiri Benc Sept. 4, 2016, 11:05 a.m. UTC | #4
On Sat, 3 Sep 2016 13:30:12 -0400, Eric Garver wrote:
> Would a BUG_ON(!(encap && in_encap)) be better?

Please don't crash the kernel for something that could very well
continue without problems. Use WARN_ON at most.

And if you go that way, WARN_ON_ONCE or rate limiting seems to be even
more appropriate, because if this triggers, it's quite possible it will
trigger repeatedly and the resulting log flood would practically make
the machine useless anyway.

Thanks,

 Jiri
Eric Garver Sept. 6, 2016, 12:34 p.m. UTC | #5
On Sun, Sep 04, 2016 at 01:05:46PM +0200, Jiri Benc wrote:
> On Sat, 3 Sep 2016 13:30:12 -0400, Eric Garver wrote:
> > Would a BUG_ON(!(encap && in_encap)) be better?
> 
> Please don't crash the kernel for something that could very well
> continue without problems. Use WARN_ON at most.

Thanks Jiri. WARN_ON_ONCE() is more appropriate for what I was looking
for.

> And if you go that way, WARN_ON_ONCE or rate limiting seems to be even
> more appropriate, because if this triggers, it's quite possible it will
> trigger repeatedly and the resulting log flood would practically make
> the machine useless anyway.
> 
> Thanks,
> 
>  Jiri
diff mbox

Patch

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ca91fc33f8a9..4fe9032b1160 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -246,20 +246,24 @@  static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 	int err;
 
 	err = skb_vlan_pop(skb);
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		invalidate_flow_key(key);
-	else
-		key->eth.tci = 0;
+	} else {
+		key->eth.vlan.tci = 0;
+		key->eth.vlan.tpid = 0;
+	}
 	return err;
 }
 
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		invalidate_flow_key(key);
-	else
-		key->eth.tci = vlan->vlan_tci;
+	} else {
+		key->eth.vlan.tci = vlan->vlan_tci;
+		key->eth.vlan.tpid = vlan->vlan_tpid;
+	}
 	return skb_vlan_push(skb, vlan->vlan_tpid,
 			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 0ea128eeeab2..13f6ebdf379b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -302,24 +302,56 @@  static bool icmp6hdr_ok(struct sk_buff *skb)
 				  sizeof(struct icmp6hdr));
 }
 
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+/**
+ * Parse vlan tag from vlan header.
+ * Returns ERROR on memory error.
+ * Returns 0 if it encounters a non-vlan or incomplete packet.
+ * Returns 1 after successfully parsing vlan tag.
+ */
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *vlan)
 {
-	struct qtag_prefix {
-		__be16 eth_type; /* ETH_P_8021Q */
-		__be16 tci;
-	};
-	struct qtag_prefix *qp;
+	struct vlan_head *qp = (struct vlan_head *)skb->data;
 
-	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+	if (likely(!eth_type_vlan(qp->tpid)))
 		return 0;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
-					 sizeof(__be16))))
+	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
+		return 0;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
+				 sizeof(__be16))))
 		return -ENOMEM;
 
-	qp = (struct qtag_prefix *) skb->data;
-	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
-	__skb_pull(skb, sizeof(struct qtag_prefix));
+	vlan->tci = qp->tci | htons(VLAN_TAG_PRESENT);
+	vlan->tpid = qp->tpid;
+
+	__skb_pull(skb, sizeof(struct vlan_head));
+	return 1;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int res;
+
+	key->eth.vlan.tci = 0;
+	key->eth.vlan.tpid = 0;
+	key->eth.cvlan.tci = 0;
+	key->eth.cvlan.tpid = 0;
+
+	if (likely(skb_vlan_tag_present(skb))) {
+		key->eth.vlan.tci = htons(skb->vlan_tci);
+		key->eth.vlan.tpid = skb->vlan_proto;
+	} else {
+		/* Parse outer vlan tag in the non-accelerated case. */
+		res = parse_vlan_tag(skb, &key->eth.vlan);
+		if (res <= 0)
+			return res;
+	}
+
+	/* Parse inner vlan tag. */
+	res = parse_vlan_tag(skb, &key->eth.cvlan);
+	if (res <= 0)
+		return res;
 
 	return 0;
 }
@@ -480,12 +512,8 @@  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	 * update skb->csum here.
 	 */
 
-	key->eth.tci = 0;
-	if (skb_vlan_tag_present(skb))
-		key->eth.tci = htons(skb->vlan_tci);
-	else if (eth->h_proto == htons(ETH_P_8021Q))
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
+	if (unlikely(parse_vlan(skb, key)))
+		return -ENOMEM;
 
 	key->eth.type = parse_ethertype(skb);
 	if (unlikely(key->eth.type == htons(0)))
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 03378e75a67c..156a3029c17b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -50,6 +50,11 @@  struct ovs_tunnel_info {
 	struct metadata_dst	*tun_dst;
 };
 
+struct vlan_head {
+	__be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+	__be16 tci;  /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+};
+
 #define OVS_SW_FLOW_KEY_METADATA_SIZE			\
 	(offsetof(struct sw_flow_key, recirc_id) +	\
 	FIELD_SIZEOF(struct sw_flow_key, recirc_id))
@@ -69,7 +74,8 @@  struct sw_flow_key {
 	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
-		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		struct vlan_head vlan;
+		struct vlan_head cvlan;
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
 	union {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c78a6a1476fb..fbe9e0e4792d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -935,7 +935,7 @@  static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 			return -EINVAL;
 		}
 
-		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
 		attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
 	}
 
@@ -1160,6 +1160,125 @@  static void mask_set_nlattr(struct nlattr *attr, u8 val)
 	nlattr_set(attr, val, ovs_key_lens);
 }
 
+static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
+				    const struct nlattr *a[],
+				    bool is_mask, bool inner, bool log)
+{
+	__be16 tci = 0;
+	__be16 tpid = 0;
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (a[OVS_KEY_ATTR_ETHERTYPE])
+		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+	if (is_mask && tpid != htons(0xffff)) {
+		OVS_NLERR(log, "Must have an exact match on the %s TPID (mask=0x%x).",
+		          (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
+		return -EINVAL;
+	}
+	if (!(tci & htons(VLAN_TAG_PRESENT))) {
+		if (!is_mask && !tci) {
+			/* Corner case for truncated VLAN header. */
+			if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
+				OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
+				          (inner) ? "C-VLAN" : "VLAN");
+				return -EINVAL;
+			}
+		} else {
+			if (is_mask)
+				OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
+				          (inner) ? "C-VLAN" : "VLAN");
+			else
+				OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
+				          (inner) ? "C-VLAN" : "VLAN");
+			return -EINVAL;
+		}
+	}
+	if (likely(!inner)) {
+		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
+	} else {
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
+	}
+	return 0;
+}
+
+static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				     u64 *key_attrs, bool inner,
+				     const struct nlattr **a, bool is_mask,
+				     bool log)
+{
+	int err;
+	const struct nlattr *encap;
+
+	err = encode_vlan_from_nlattrs(match, a, is_mask, inner, log);
+	if (err)
+		return err;
+
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+
+	/* Ensure that tci key attribute isn't
+	 * overwritten by encapsulated customer tci.
+	 * Ethertype is cleared because it is c_tpid.
+	 */
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+
+	encap = a[OVS_KEY_ATTR_ENCAP];
+
+	if (is_mask)
+		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
+	else
+		err = parse_flow_nlattrs(encap, a, key_attrs, log);
+
+	return err;
+}
+
+static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				   u64 *key_attrs, bool *ie_valid,
+				   const struct nlattr **a, bool is_mask,
+				   bool log)
+{
+	int err;
+
+	err = __parse_vlan_from_nlattrs(match, key_attrs,
+	                                false, a, is_mask, log);
+	if (err)
+		return err;
+
+	if (!is_mask) {
+		if ((*key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+		    eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
+
+			if (!((*key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+			      (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+				OVS_NLERR(log, "Invalid Inner VLAN frame");
+				return -EINVAL;
+			}
+			*ie_valid = true;
+		}
+	} else {
+		if (*key_attrs & (1 << OVS_KEY_ATTR_ENCAP)) {
+			if (!*ie_valid) {
+				OVS_NLERR(log, "Encap mask attribute is set for non-CVLAN frame.");
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (*ie_valid) {
+		err = __parse_vlan_from_nlattrs(match, key_attrs,
+		                                true, a, is_mask, log);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 /**
  * ovs_nla_get_match - parses Netlink attributes into a flow key and
  * mask. In case the 'mask' is NULL, the flow is treated as exact match
@@ -1182,11 +1301,11 @@  int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
 		      bool log)
 {
 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-	const struct nlattr *encap;
 	struct nlattr *newmask = NULL;
 	u64 key_attrs = 0;
 	u64 mask_attrs = 0;
 	bool encap_valid = false;
+	bool i_encap_valid = false;
 	int err;
 
 	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
@@ -1195,35 +1314,20 @@  int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
 
 	if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
 	    (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
-	    (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
-		__be16 tci;
+	    eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]))) {
 
 		if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
 		      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
 			OVS_NLERR(log, "Invalid Vlan frame.");
 			return -EINVAL;
 		}
-
-		key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-		encap = a[OVS_KEY_ATTR_ENCAP];
-		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
 		encap_valid = true;
 
-		if (tci & htons(VLAN_TAG_PRESENT)) {
-			err = parse_flow_nlattrs(encap, a, &key_attrs, log);
-			if (err)
-				return err;
-		} else if (!tci) {
-			/* Corner case for truncated 802.1Q header. */
-			if (nla_len(encap)) {
-				OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
-				return -EINVAL;
-			}
-		} else {
-			OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
-			return  -EINVAL;
-		}
+		err = parse_vlan_from_nlattrs(match, &key_attrs,
+					      &i_encap_valid, a, false,
+					      log);
+		if (err)
+			return err;
 	}
 
 	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
@@ -1265,11 +1369,10 @@  int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
 			goto free_newmask;
 
 		/* Always match on tci. */
-		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
 
 		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
-			__be16 eth_type = 0;
-			__be16 tci = 0;
 
 			if (!encap_valid) {
 				OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
@@ -1277,33 +1380,11 @@  int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
 				goto free_newmask;
 			}
 
-			mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
-			if (a[OVS_KEY_ATTR_ETHERTYPE])
-				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
-			if (eth_type == htons(0xffff)) {
-				mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-				encap = a[OVS_KEY_ATTR_ENCAP];
-				err = parse_flow_mask_nlattrs(encap, a,
-							      &mask_attrs, log);
-				if (err)
-					goto free_newmask;
-			} else {
-				OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
-					  ntohs(eth_type));
-				err = -EINVAL;
-				goto free_newmask;
-			}
-
-			if (a[OVS_KEY_ATTR_VLAN])
-				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
-			if (!(tci & htons(VLAN_TAG_PRESENT))) {
-				OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
-					  ntohs(tci));
-				err = -EINVAL;
+			err = parse_vlan_from_nlattrs(match, &mask_attrs,
+						      &i_encap_valid,
+						      a, true, log);
+			if (err)
 				goto free_newmask;
-			}
 		}
 
 		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
@@ -1415,7 +1496,9 @@  static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 			     struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
-	struct nlattr *nla, *encap;
+	struct nlattr *nla;
+	struct nlattr *encap = NULL;
+	struct nlattr *in_encap = NULL;
 
 	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
 		goto nla_put_failure;
@@ -1464,17 +1547,37 @@  static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 	ether_addr_copy(eth_key->eth_src, output->eth.src);
 	ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 
-	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+	if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
 		__be16 eth_type;
-		eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+		eth_type = !is_mask ? output->eth.vlan.tpid : htons(0xffff);
 		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
-		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.vlan.tci))
 			goto nla_put_failure;
 		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-		if (!swkey->eth.tci)
+		if (!swkey->eth.vlan.tci)
 			goto unencap;
-	} else
-		encap = NULL;
+
+		if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+			/* Customer tci is nested but uses same key attribute.
+			 */
+			eth_type = !is_mask ? output->eth.cvlan.tpid : htons(0xffff);
+			if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+			    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.cvlan.tci))
+				goto nla_put_failure;
+			in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+			if (!swkey->eth.cvlan.tci)
+				goto unencap;
+		}
+	}
+
+	if (eth_type_vlan(swkey->eth.type) && encap && in_encap) {
+		/* There are 3 VLAN tags, we don't know anything about the rest of the
+		 * packet, so truncate here.
+		 */
+		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+			goto nla_put_failure;
+		goto unencap;
+	}
 
 	if (swkey->eth.type == htons(ETH_P_802_2)) {
 		/*
@@ -1619,6 +1722,8 @@  static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 	}
 
 unencap:
+	if (in_encap)
+		nla_nest_end(skb, in_encap);
 	if (encap)
 		nla_nest_end(skb, encap);
 
@@ -2283,7 +2388,7 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			vlan = nla_data(a);
-			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+			if (!eth_type_vlan(vlan->vlan_tpid))
 				return -EINVAL;
 			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 				return -EINVAL;
@@ -2388,7 +2493,7 @@  int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
 	(*sfa)->orig_len = nla_len(attr);
 	err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
-				     key->eth.tci, log);
+				     key->eth.vlan.tci, log);
 	if (err)
 		ovs_nla_free_flow_actions(*sfa);
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6b21fd068d87..8f198437c724 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -485,9 +485,14 @@  static unsigned int packet_length(const struct sk_buff *skb)
 {
 	unsigned int length = skb->len - ETH_HLEN;
 
-	if (skb->protocol == htons(ETH_P_8021Q))
+	if (skb_vlan_tagged(skb))
 		length -= VLAN_HLEN;
 
+	/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
+	 * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
+	 * account for 802.1ad. e.g. is_skb_forwardable().
+	 */
+
 	return length;
 }