Message ID | 1571580702-18476-1-git-send-email-martinvarghesenokia@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | [v2] Change in Openvswitch to support MPLS label depth of 3 in ingress direction | expand |
On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese <martinvarghesenokia@gmail.com> wrote: > > From: Martin Varghese <martin.varghese@nokia.com> > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > direction though the userspace OVS supports a max depth of 3 labels. > This change enables openvswitch module to support a max depth of > 3 labels in the ingress. > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > --- > Changes in v2 > - Moved MPLS count validation from datapath to configuration. > - Fixed set mpls function. > This patch looks pretty close now. > net/openvswitch/actions.c | 2 +- > net/openvswitch/flow.c | 20 ++++++++++----- > net/openvswitch/flow.h | 9 ++++--- > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > 4 files changed, 66 insertions(+), 22 deletions(-) > ... > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > index d7559c6..21de061 100644 > --- a/net/openvswitch/flow_netlink.c > +++ b/net/openvswitch/flow_netlink.c > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > .next = ovs_tunnel_key_lens, }, > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > const struct ovs_key_mpls *mpls_key; > + u32 hdr_len; > + u32 label_count, label_count_mask, i; > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > - mpls_key->mpls_lse, is_mask); > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > + > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > + hdr_len % sizeof(struct ovs_key_mpls)) > + return -EINVAL; > + > + label_count_mask = GENMASK(label_count - 1, 0); > + > + for (i = 0 ; i < label_count; i++) > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > + mpls_key[i].mpls_lse, is_mask); > + > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > + label_count_mask, is_mask); > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > } > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > } else if (eth_p_mpls(swkey->eth.type)) { > + u8 i, num_labels; > struct ovs_key_mpls *mpls_key; > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > + num_labels = hweight_long(output->mpls.num_labels_mask); > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > + num_labels * sizeof(*mpls_key)); > if (!nla) > goto nla_put_failure; > + > mpls_key = nla_data(nla); > - mpls_key->mpls_lse = output->mpls.top_lse; > + for (i = 0; i < num_labels; i++) > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > } > > if ((swkey->eth.type == htons(ETH_P_IP) || > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > u8 mac_proto = ovs_key_mac_proto(key); > const struct nlattr *a; > int rem, err; > + u32 mpls_label_count = 0; > + > + if (eth_p_mpls(eth_type)) > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > The MPLS push and pop action could be part of nested actions in sample, so the count needs to be global count across such nested actions. have a look at validate_and_copy_sample(). > nla_for_each_nested(a, attr, rem) { > /* Expected argument lengths, (u32)-1 for variable length. */ > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > !eth_p_mpls(eth_type))) > return -EINVAL; > eth_type = mpls->mpls_ethertype; > + mpls_label_count++; > break; > } > > - case OVS_ACTION_ATTR_POP_MPLS: > + case OVS_ACTION_ATTR_POP_MPLS: { > + __be16 proto; > if (vlan_tci & htons(VLAN_CFI_MASK) || > !eth_p_mpls(eth_type)) > return -EINVAL; > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > - * as there is no check here to ensure that the new > - * eth_type is valid and thus set actions could > - * write off the end of the packet or otherwise > - * corrupt it. > + /* Disallow subsequent L2.5+ set actions as there is > + * no check here to ensure that the new eth type is > + * valid and thus set actions could write off the > + * end of the packet or otherwise corrupt it. > * > * Support for these actions is planned using packet > * recirculation. > */ This comment needs updated. > - eth_type = htons(0); > + proto = nla_get_be16(a); > + mpls_label_count--; > + > + if (!eth_p_mpls(proto) || !mpls_label_count) > + eth_type = htons(0); > + else > + eth_type = proto; > + > break; > + } > > case OVS_ACTION_ATTR_SET: > err = validate_set(a, key, sfa, > -- > 1.8.3.1 >
On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote: > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese > <martinvarghesenokia@gmail.com> wrote: > > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > direction though the userspace OVS supports a max depth of 3 labels. > > This change enables openvswitch module to support a max depth of > > 3 labels in the ingress. > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > --- > > Changes in v2 > > - Moved MPLS count validation from datapath to configuration. > > - Fixed set mpls function. > > > This patch looks pretty close now. > > > net/openvswitch/actions.c | 2 +- > > net/openvswitch/flow.c | 20 ++++++++++----- > > net/openvswitch/flow.h | 9 ++++--- > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > ... > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > index d7559c6..21de061 100644 > > --- a/net/openvswitch/flow_netlink.c > > +++ b/net/openvswitch/flow_netlink.c > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > .next = ovs_tunnel_key_lens, }, > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > const struct ovs_key_mpls *mpls_key; > > + u32 hdr_len; > > + u32 label_count, label_count_mask, i; > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > - mpls_key->mpls_lse, is_mask); > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > + > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > + hdr_len % sizeof(struct ovs_key_mpls)) > > + return -EINVAL; > > + > > + label_count_mask = GENMASK(label_count - 1, 0); > > + > > + for (i = 0 ; i < label_count; i++) > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > + mpls_key[i].mpls_lse, is_mask); > > + > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > + label_count_mask, is_mask); > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > } > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > } else if (eth_p_mpls(swkey->eth.type)) { > > + u8 i, num_labels; > > struct ovs_key_mpls *mpls_key; > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > + num_labels * sizeof(*mpls_key)); > > if (!nla) > > goto nla_put_failure; > > + > > mpls_key = nla_data(nla); > > - mpls_key->mpls_lse = output->mpls.top_lse; > > + for (i = 0; i < num_labels; i++) > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > } > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > u8 mac_proto = ovs_key_mac_proto(key); > > const struct nlattr *a; > > int rem, err; > > + u32 mpls_label_count = 0; > > + > > + if (eth_p_mpls(eth_type)) > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > The MPLS push and pop action could be part of nested actions in > sample, so the count needs to be global count across such nested > actions. have a look at validate_and_copy_sample(). > can we embed the mpls_label_count in struct sw_flow_actions? > > nla_for_each_nested(a, attr, rem) { > > /* Expected argument lengths, (u32)-1 for variable length. */ > > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > !eth_p_mpls(eth_type))) > > return -EINVAL; > > eth_type = mpls->mpls_ethertype; > > + mpls_label_count++; > > break; > > } > > > > - case OVS_ACTION_ATTR_POP_MPLS: > > + case OVS_ACTION_ATTR_POP_MPLS: { > > + __be16 proto; > > if (vlan_tci & htons(VLAN_CFI_MASK) || > > !eth_p_mpls(eth_type)) > > return -EINVAL; > > > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > > - * as there is no check here to ensure that the new > > - * eth_type is valid and thus set actions could > > - * write off the end of the packet or otherwise > > - * corrupt it. > > + /* Disallow subsequent L2.5+ set actions as there is > > + * no check here to ensure that the new eth type is > > + * valid and thus set actions could write off the > > + * end of the packet or otherwise corrupt it. > > * > > * Support for these actions is planned using packet > > * recirculation. > > */ > This comment needs updated. > > Disallow subsequent L2.5+ set and mpls_pop actions once the last MPLS label in the packet is popped as there is no check here to ensure that the new eth_type is valid and thus set actions could write off the end of the packet or otherwise corrupt it. Support for these actions is planned using packet recirculation. > > - eth_type = htons(0); > > + proto = nla_get_be16(a); > > + mpls_label_count--; > > + > > + if (!eth_p_mpls(proto) || !mpls_label_count) > > + eth_type = htons(0); > > + else > > + eth_type = proto; > > + > > break; > > + } > > > > case OVS_ACTION_ATTR_SET: > > err = validate_set(a, key, sfa, > > -- > > 1.8.3.1 > >
On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote: > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese > <martinvarghesenokia@gmail.com> wrote: > > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > direction though the userspace OVS supports a max depth of 3 labels. > > This change enables openvswitch module to support a max depth of > > 3 labels in the ingress. > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > --- > > Changes in v2 > > - Moved MPLS count validation from datapath to configuration. > > - Fixed set mpls function. > > > This patch looks pretty close now. > > > net/openvswitch/actions.c | 2 +- > > net/openvswitch/flow.c | 20 ++++++++++----- > > net/openvswitch/flow.h | 9 ++++--- > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > ... > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > index d7559c6..21de061 100644 > > --- a/net/openvswitch/flow_netlink.c > > +++ b/net/openvswitch/flow_netlink.c > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > .next = ovs_tunnel_key_lens, }, > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > const struct ovs_key_mpls *mpls_key; > > + u32 hdr_len; > > + u32 label_count, label_count_mask, i; > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > - mpls_key->mpls_lse, is_mask); > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > + > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > + hdr_len % sizeof(struct ovs_key_mpls)) > > + return -EINVAL; > > + > > + label_count_mask = GENMASK(label_count - 1, 0); > > + > > + for (i = 0 ; i < label_count; i++) > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > + mpls_key[i].mpls_lse, is_mask); > > + > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > + label_count_mask, is_mask); > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > } > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > } else if (eth_p_mpls(swkey->eth.type)) { > > + u8 i, num_labels; > > struct ovs_key_mpls *mpls_key; > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > + num_labels * sizeof(*mpls_key)); > > if (!nla) > > goto nla_put_failure; > > + > > mpls_key = nla_data(nla); > > - mpls_key->mpls_lse = output->mpls.top_lse; > > + for (i = 0; i < num_labels; i++) > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > } > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > u8 mac_proto = ovs_key_mac_proto(key); > > const struct nlattr *a; > > int rem, err; > > + u32 mpls_label_count = 0; > > + > > + if (eth_p_mpls(eth_type)) > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > The MPLS push and pop action could be part of nested actions in > sample, so the count needs to be global count across such nested > actions. have a look at validate_and_copy_sample(). > Embedding mpls_label_count in struct sw_flow_actions will not work for clone I guess we need to move the below code to ovs_nla_copy_actions and extend the arguments of __ovs_nla_copy_actions to take mpls_label_count also if (eth_p_mpls(eth_type)) mpls_label_count = hweight_long(key->mpls.num_labels_mask) > > nla_for_each_nested(a, attr, rem) { > > /* Expected argument lengths, (u32)-1 for variable length. */ > > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > !eth_p_mpls(eth_type))) > > return -EINVAL; > > eth_type = mpls->mpls_ethertype; > > + mpls_label_count++; > > break; > > } > > > > - case OVS_ACTION_ATTR_POP_MPLS: > > + case OVS_ACTION_ATTR_POP_MPLS: { > > + __be16 proto; > > if (vlan_tci & htons(VLAN_CFI_MASK) || > > !eth_p_mpls(eth_type)) > > return -EINVAL; > > > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > > - * as there is no check here to ensure that the new > > - * eth_type is valid and thus set actions could > > - * write off the end of the packet or otherwise > > - * corrupt it. > > + /* Disallow subsequent L2.5+ set actions as there is > > + * no check here to ensure that the new eth type is > > + * valid and thus set actions could write off the > > + * end of the packet or otherwise corrupt it. > > * > > * Support for these actions is planned using packet > > * recirculation. > > */ > This comment needs updated. > > > > - eth_type = htons(0); > > + proto = nla_get_be16(a); > > + mpls_label_count--; > > + > > + if (!eth_p_mpls(proto) || !mpls_label_count) > > + eth_type = htons(0); > > + else > > + eth_type = proto; > > + > > break; > > + } > > > > case OVS_ACTION_ATTR_SET: > > err = validate_set(a, key, sfa, > > -- > > 1.8.3.1 > >
On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese <martinvarghesenokia@gmail.com> wrote: > > On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote: > > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese > > <martinvarghesenokia@gmail.com> wrote: > > > > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > > direction though the userspace OVS supports a max depth of 3 labels. > > > This change enables openvswitch module to support a max depth of > > > 3 labels in the ingress. > > > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > > --- > > > Changes in v2 > > > - Moved MPLS count validation from datapath to configuration. > > > - Fixed set mpls function. > > > > > This patch looks pretty close now. > > > > > net/openvswitch/actions.c | 2 +- > > > net/openvswitch/flow.c | 20 ++++++++++----- > > > net/openvswitch/flow.h | 9 ++++--- > > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > > > ... > > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > > index d7559c6..21de061 100644 > > > --- a/net/openvswitch/flow_netlink.c > > > +++ b/net/openvswitch/flow_netlink.c > > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > > .next = ovs_tunnel_key_lens, }, > > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > > const struct ovs_key_mpls *mpls_key; > > > + u32 hdr_len; > > > + u32 label_count, label_count_mask, i; > > > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > > - mpls_key->mpls_lse, is_mask); > > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > > + > > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > > + hdr_len % sizeof(struct ovs_key_mpls)) > > > + return -EINVAL; > > > + > > > + label_count_mask = GENMASK(label_count - 1, 0); > > > + > > > + for (i = 0 ; i < label_count; i++) > > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > > + mpls_key[i].mpls_lse, is_mask); > > > + > > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > > + label_count_mask, is_mask); > > > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > > } > > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > > } else if (eth_p_mpls(swkey->eth.type)) { > > > + u8 i, num_labels; > > > struct ovs_key_mpls *mpls_key; > > > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > > + num_labels * sizeof(*mpls_key)); > > > if (!nla) > > > goto nla_put_failure; > > > + > > > mpls_key = nla_data(nla); > > > - mpls_key->mpls_lse = output->mpls.top_lse; > > > + for (i = 0; i < num_labels; i++) > > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > > } > > > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > u8 mac_proto = ovs_key_mac_proto(key); > > > const struct nlattr *a; > > > int rem, err; > > > + u32 mpls_label_count = 0; > > > + > > > + if (eth_p_mpls(eth_type)) > > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > > > The MPLS push and pop action could be part of nested actions in > > sample, so the count needs to be global count across such nested > > actions. have a look at validate_and_copy_sample(). > > > Embedding mpls_label_count in struct sw_flow_actions will not work for clone > > I guess we need to move the below code to ovs_nla_copy_actions and extend the arguments of __ovs_nla_copy_actions to take mpls_label_count also > if (eth_p_mpls(eth_type)) > mpls_label_count = hweight_long(key->mpls.num_labels_mask) > > I am not suggesting changing sw_flow_actions, You can define count variable in ovs_nla_copy_actions() and pass it as a pointer to nested function. That can be used to keep track of MPLS labels at all nested actions.
On Tue, Oct 22, 2019 at 08:59:46PM -0700, Pravin Shelar wrote: > On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese > <martinvarghesenokia@gmail.com> wrote: > > > > On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote: > > > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese > > > <martinvarghesenokia@gmail.com> wrote: > > > > > > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > > > direction though the userspace OVS supports a max depth of 3 labels. > > > > This change enables openvswitch module to support a max depth of > > > > 3 labels in the ingress. > > > > > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > > > --- > > > > Changes in v2 > > > > - Moved MPLS count validation from datapath to configuration. > > > > - Fixed set mpls function. > > > > > > > This patch looks pretty close now. > > > > > > > net/openvswitch/actions.c | 2 +- > > > > net/openvswitch/flow.c | 20 ++++++++++----- > > > > net/openvswitch/flow.h | 9 ++++--- > > > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > > > > > ... > > > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > > > index d7559c6..21de061 100644 > > > > --- a/net/openvswitch/flow_netlink.c > > > > +++ b/net/openvswitch/flow_netlink.c > > > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > > > .next = ovs_tunnel_key_lens, }, > > > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > > > const struct ovs_key_mpls *mpls_key; > > > > + u32 hdr_len; > > > > + u32 label_count, label_count_mask, i; > > > > > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > > > - mpls_key->mpls_lse, is_mask); > > > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > > > + > > > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > > > + hdr_len % sizeof(struct ovs_key_mpls)) > > > > + return -EINVAL; > > > > + > > > > + label_count_mask = GENMASK(label_count - 1, 0); > > > > + > > > > + for (i = 0 ; i < label_count; i++) > > > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > > > + mpls_key[i].mpls_lse, is_mask); > > > > + > > > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > > > + label_count_mask, is_mask); > > > > > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > > > } > > > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > > > } else if (eth_p_mpls(swkey->eth.type)) { > > > > + u8 i, num_labels; > > > > struct ovs_key_mpls *mpls_key; > > > > > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > > > + num_labels * sizeof(*mpls_key)); > > > > if (!nla) > > > > goto nla_put_failure; > > > > + > > > > mpls_key = nla_data(nla); > > > > - mpls_key->mpls_lse = output->mpls.top_lse; > > > > + for (i = 0; i < num_labels; i++) > > > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > > > } > > > > > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > > u8 mac_proto = ovs_key_mac_proto(key); > > > > const struct nlattr *a; > > > > int rem, err; > > > > + u32 mpls_label_count = 0; > > > > + > > > > + if (eth_p_mpls(eth_type)) > > > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > > > > > The MPLS push and pop action could be part of nested actions in > > > sample, so the count needs to be global count across such nested > > > actions. have a look at validate_and_copy_sample(). > > > > > Embedding mpls_label_count in struct sw_flow_actions will not work for clone > > > > I guess we need to move the below code to ovs_nla_copy_actions and extend the arguments of __ovs_nla_copy_actions to take mpls_label_count also > > if (eth_p_mpls(eth_type)) > > mpls_label_count = hweight_long(key->mpls.num_labels_mask) > > > > > I am not suggesting changing sw_flow_actions, You can define count > variable in ovs_nla_copy_actions() and pass it as a pointer to nested > function. That can be used to keep track of MPLS labels at all nested > actions. Actions clone & sample does a clone of SKB correct? Hence shouldn't they maintain a seperate mpls_label count for each nested action set I assume instead of passing as pointer from ovs_nla_copy_actions ,if passed by value it should solve the problem.Need to try that though.
On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote: > From: Martin Varghese <martin.varghese@nokia.com> > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > direction though the userspace OVS supports a max depth of 3 labels. > This change enables openvswitch module to support a max depth of > 3 labels in the ingress. > Hi Martin, Thanks for the patch. I have one comment below. > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > --- > Changes in v2 > - Moved MPLS count validation from datapath to configuration. > - Fixed set mpls function. > > net/openvswitch/actions.c | 2 +- > net/openvswitch/flow.c | 20 ++++++++++----- > net/openvswitch/flow.h | 9 ++++--- > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > 4 files changed, 66 insertions(+), 22 deletions(-) > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c > index 3572e11..f3125d7 100644 > --- a/net/openvswitch/actions.c > +++ b/net/openvswitch/actions.c > @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > if (err) > return err; > > - flow_key->mpls.top_lse = lse; > + flow_key->mpls.lse[0] = lse; > return 0; > } > > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c > index dca3b1e..c101355 100644 > --- a/net/openvswitch/flow.c > +++ b/net/openvswitch/flow.c > @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) > memset(&key->ipv4, 0, sizeof(key->ipv4)); > } > } else if (eth_p_mpls(key->eth.type)) { > - size_t stack_len = MPLS_HLEN; > + u8 label_count = 1; > > + memset(&key->mpls, 0, sizeof(key->mpls)); > skb_set_inner_network_header(skb, skb->mac_len); > while (1) { > __be32 lse; > > - error = check_header(skb, skb->mac_len + stack_len); > + error = check_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (unlikely(error)) > return 0; > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > - if (stack_len == MPLS_HLEN) > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > + if (label_count <= MPLS_LABEL_DEPTH) > + memcpy(&key->mpls.lse[label_count - 1], &lse, > + MPLS_HLEN); > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > + skb_set_inner_network_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (lse & htonl(MPLS_LS_S_MASK)) > break; > > - stack_len += MPLS_HLEN; > + label_count++; > } > + if (label_count > MPLS_LABEL_DEPTH) > + label_count = MPLS_LABEL_DEPTH; > + > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > } else if (key->eth.type == htons(ETH_P_IPV6)) { > int nh_len; /* IPv6 Header + Extensions */ > > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h > index 3e2cc22..d9eccbe 100644 > --- a/net/openvswitch/flow.h > +++ b/net/openvswitch/flow.h > @@ -30,6 +30,7 @@ enum sw_flow_mac_proto { > MAC_PROTO_ETHERNET, > }; > #define SW_FLOW_KEY_INVALID 0x80 > +#define MPLS_LABEL_DEPTH 3 > > /* Store options at the end of the array if they are less than the > * maximum size. This allows us to get the benefits of variable length > @@ -85,9 +86,6 @@ struct sw_flow_key { > */ > union { > struct { > - __be32 top_lse; /* top label stack entry */ > - } mpls; > - struct { > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > u8 tos; /* IP ToS. */ > u8 ttl; /* IP TTL/hop limit. */ > @@ -135,6 +133,11 @@ struct sw_flow_key { > } nd; > }; > } ipv6; > + struct { > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ Why using a bitmap here? why not just num_labels? I saw that you have to convert it using hweight_long() to num_labels a couple places below. Regards, William > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > + } mpls; > + > struct ovs_key_nsh nsh; /* network service header */ > }; > struct { > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > index d7559c6..21de061 100644 > --- a/net/openvswitch/flow_netlink.c > +++ b/net/openvswitch/flow_netlink.c > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > .next = ovs_tunnel_key_lens, }, > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > const struct ovs_key_mpls *mpls_key; > + u32 hdr_len; > + u32 label_count, label_count_mask, i; > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > - mpls_key->mpls_lse, is_mask); > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > + > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > + hdr_len % sizeof(struct ovs_key_mpls)) > + return -EINVAL; > + > + label_count_mask = GENMASK(label_count - 1, 0); > + > + for (i = 0 ; i < label_count; i++) > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > + mpls_key[i].mpls_lse, is_mask); > + > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > + label_count_mask, is_mask); > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > } > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > } else if (eth_p_mpls(swkey->eth.type)) { > + u8 i, num_labels; > struct ovs_key_mpls *mpls_key; > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > + num_labels = hweight_long(output->mpls.num_labels_mask); > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > + num_labels * sizeof(*mpls_key)); > if (!nla) > goto nla_put_failure; > + > mpls_key = nla_data(nla); > - mpls_key->mpls_lse = output->mpls.top_lse; > + for (i = 0; i < num_labels; i++) > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > } > > if ((swkey->eth.type == htons(ETH_P_IP) || > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > u8 mac_proto = ovs_key_mac_proto(key); > const struct nlattr *a; > int rem, err; > + u32 mpls_label_count = 0; > + > + if (eth_p_mpls(eth_type)) > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > nla_for_each_nested(a, attr, rem) { > /* Expected argument lengths, (u32)-1 for variable length. */ > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > !eth_p_mpls(eth_type))) > return -EINVAL; > eth_type = mpls->mpls_ethertype; > + mpls_label_count++; > break; > } > > - case OVS_ACTION_ATTR_POP_MPLS: > + case OVS_ACTION_ATTR_POP_MPLS: { > + __be16 proto; > if (vlan_tci & htons(VLAN_CFI_MASK) || > !eth_p_mpls(eth_type)) > return -EINVAL; > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > - * as there is no check here to ensure that the new > - * eth_type is valid and thus set actions could > - * write off the end of the packet or otherwise > - * corrupt it. > + /* Disallow subsequent L2.5+ set actions as there is > + * no check here to ensure that the new eth type is > + * valid and thus set actions could write off the > + * end of the packet or otherwise corrupt it. > * > * Support for these actions is planned using packet > * recirculation. > */ > - eth_type = htons(0); > + proto = nla_get_be16(a); > + mpls_label_count--; > + > + if (!eth_p_mpls(proto) || !mpls_label_count) > + eth_type = htons(0); > + else > + eth_type = proto; > + > break; > + } > > case OVS_ACTION_ATTR_SET: > err = validate_set(a, key, sfa, > -- > 1.8.3.1 >
On Thu, Oct 24, 2019 at 01:47:40PM -0700, William Tu wrote: > On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote: > > From: Martin Varghese <martin.varghese@nokia.com> > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > direction though the userspace OVS supports a max depth of 3 labels. > > This change enables openvswitch module to support a max depth of > > 3 labels in the ingress. > > > > Hi Martin, > Thanks for the patch. I have one comment below. > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > --- > > Changes in v2 > > - Moved MPLS count validation from datapath to configuration. > > - Fixed set mpls function. > > > > net/openvswitch/actions.c | 2 +- > > net/openvswitch/flow.c | 20 ++++++++++----- > > net/openvswitch/flow.h | 9 ++++--- > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c > > index 3572e11..f3125d7 100644 > > --- a/net/openvswitch/actions.c > > +++ b/net/openvswitch/actions.c > > @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > > if (err) > > return err; > > > > - flow_key->mpls.top_lse = lse; > > + flow_key->mpls.lse[0] = lse; > > return 0; > > } > > > > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c > > index dca3b1e..c101355 100644 > > --- a/net/openvswitch/flow.c > > +++ b/net/openvswitch/flow.c > > @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) > > memset(&key->ipv4, 0, sizeof(key->ipv4)); > > } > > } else if (eth_p_mpls(key->eth.type)) { > > - size_t stack_len = MPLS_HLEN; > > + u8 label_count = 1; > > > > + memset(&key->mpls, 0, sizeof(key->mpls)); > > skb_set_inner_network_header(skb, skb->mac_len); > > while (1) { > > __be32 lse; > > > > - error = check_header(skb, skb->mac_len + stack_len); > > + error = check_header(skb, skb->mac_len + > > + label_count * MPLS_HLEN); > > if (unlikely(error)) > > return 0; > > > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > > > - if (stack_len == MPLS_HLEN) > > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > > + if (label_count <= MPLS_LABEL_DEPTH) > > + memcpy(&key->mpls.lse[label_count - 1], &lse, > > + MPLS_HLEN); > > > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > > + skb_set_inner_network_header(skb, skb->mac_len + > > + label_count * MPLS_HLEN); > > if (lse & htonl(MPLS_LS_S_MASK)) > > break; > > > > - stack_len += MPLS_HLEN; > > + label_count++; > > } > > + if (label_count > MPLS_LABEL_DEPTH) > > + label_count = MPLS_LABEL_DEPTH; > > + > > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > > > } else if (key->eth.type == htons(ETH_P_IPV6)) { > > int nh_len; /* IPv6 Header + Extensions */ > > > > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h > > index 3e2cc22..d9eccbe 100644 > > --- a/net/openvswitch/flow.h > > +++ b/net/openvswitch/flow.h > > @@ -30,6 +30,7 @@ enum sw_flow_mac_proto { > > MAC_PROTO_ETHERNET, > > }; > > #define SW_FLOW_KEY_INVALID 0x80 > > +#define MPLS_LABEL_DEPTH 3 > > > > /* Store options at the end of the array if they are less than the > > * maximum size. This allows us to get the benefits of variable length > > @@ -85,9 +86,6 @@ struct sw_flow_key { > > */ > > union { > > struct { > > - __be32 top_lse; /* top label stack entry */ > > - } mpls; > > - struct { > > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > > u8 tos; /* IP ToS. */ > > u8 ttl; /* IP TTL/hop limit. */ > > @@ -135,6 +133,11 @@ struct sw_flow_key { > > } nd; > > }; > > } ipv6; > > + struct { > > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > > Why using a bitmap here? why not just num_labels? > I saw that you have to convert it using hweight_long() > to num_labels a couple places below. > num_labels will not work when used in flow_key for flow match. Assume a case where a packet with 3 labels are received and the configured flow has a match condition for the top most label only.Num_labels cannot be used in that case My original patch was with num_labels.And we found that it will not work for the above case. Jbenc@redhat.com proposed the idea of num_labels_mask. Thanks for checking the patch. > Regards, > William > > > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > > + } mpls; > > + > > struct ovs_key_nsh nsh; /* network service header */ > > }; > > struct { > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > index d7559c6..21de061 100644 > > --- a/net/openvswitch/flow_netlink.c > > +++ b/net/openvswitch/flow_netlink.c > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > .next = ovs_tunnel_key_lens, }, > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > const struct ovs_key_mpls *mpls_key; > > + u32 hdr_len; > > + u32 label_count, label_count_mask, i; > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > - mpls_key->mpls_lse, is_mask); > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > + > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > + hdr_len % sizeof(struct ovs_key_mpls)) > > + return -EINVAL; > > + > > + label_count_mask = GENMASK(label_count - 1, 0); > > + > > + for (i = 0 ; i < label_count; i++) > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > + mpls_key[i].mpls_lse, is_mask); > > + > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > + label_count_mask, is_mask); > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > } > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > } else if (eth_p_mpls(swkey->eth.type)) { > > + u8 i, num_labels; > > struct ovs_key_mpls *mpls_key; > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > + num_labels * sizeof(*mpls_key)); > > if (!nla) > > goto nla_put_failure; > > + > > mpls_key = nla_data(nla); > > - mpls_key->mpls_lse = output->mpls.top_lse; > > + for (i = 0; i < num_labels; i++) > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > } > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > u8 mac_proto = ovs_key_mac_proto(key); > > const struct nlattr *a; > > int rem, err; > > + u32 mpls_label_count = 0; > > + > > + if (eth_p_mpls(eth_type)) > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > > nla_for_each_nested(a, attr, rem) { > > /* Expected argument lengths, (u32)-1 for variable length. */ > > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > !eth_p_mpls(eth_type))) > > return -EINVAL; > > eth_type = mpls->mpls_ethertype; > > + mpls_label_count++; > > break; > > } > > > > - case OVS_ACTION_ATTR_POP_MPLS: > > + case OVS_ACTION_ATTR_POP_MPLS: { > > + __be16 proto; > > if (vlan_tci & htons(VLAN_CFI_MASK) || > > !eth_p_mpls(eth_type)) > > return -EINVAL; > > > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > > - * as there is no check here to ensure that the new > > - * eth_type is valid and thus set actions could > > - * write off the end of the packet or otherwise > > - * corrupt it. > > + /* Disallow subsequent L2.5+ set actions as there is > > + * no check here to ensure that the new eth type is > > + * valid and thus set actions could write off the > > + * end of the packet or otherwise corrupt it. > > * > > * Support for these actions is planned using packet > > * recirculation. > > */ > > - eth_type = htons(0); > > + proto = nla_get_be16(a); > > + mpls_label_count--; > > + > > + if (!eth_p_mpls(proto) || !mpls_label_count) > > + eth_type = htons(0); > > + else > > + eth_type = proto; > > + > > break; > > + } > > > > case OVS_ACTION_ATTR_SET: > > err = validate_set(a, key, sfa, > > -- > > 1.8.3.1 > >
On Tue, Oct 22, 2019 at 10:05 PM Martin Varghese <martinvarghesenokia@gmail.com> wrote: > > On Tue, Oct 22, 2019 at 08:59:46PM -0700, Pravin Shelar wrote: > > On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese > > <martinvarghesenokia@gmail.com> wrote: > > > > > > On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote: > > > > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese > > > > <martinvarghesenokia@gmail.com> wrote: > > > > > > > > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > > > > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > > > > direction though the userspace OVS supports a max depth of 3 labels. > > > > > This change enables openvswitch module to support a max depth of > > > > > 3 labels in the ingress. > > > > > > > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > > > > --- > > > > > Changes in v2 > > > > > - Moved MPLS count validation from datapath to configuration. > > > > > - Fixed set mpls function. > > > > > > > > > This patch looks pretty close now. > > > > > > > > > net/openvswitch/actions.c | 2 +- > > > > > net/openvswitch/flow.c | 20 ++++++++++----- > > > > > net/openvswitch/flow.h | 9 ++++--- > > > > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > > > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > > > > > > > ... > > > > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > > > > index d7559c6..21de061 100644 > > > > > --- a/net/openvswitch/flow_netlink.c > > > > > +++ b/net/openvswitch/flow_netlink.c > > > > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) > > > > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > > > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > > > > .next = ovs_tunnel_key_lens, }, > > > > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > > > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > > > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > > > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > > > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > > > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > > > > > > > if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { > > > > > const struct ovs_key_mpls *mpls_key; > > > > > + u32 hdr_len; > > > > > + u32 label_count, label_count_mask, i; > > > > > > > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > > > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > > > > - mpls_key->mpls_lse, is_mask); > > > > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > > > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > > > > + > > > > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > > > > + hdr_len % sizeof(struct ovs_key_mpls)) > > > > > + return -EINVAL; > > > > > + > > > > > + label_count_mask = GENMASK(label_count - 1, 0); > > > > > + > > > > > + for (i = 0 ; i < label_count; i++) > > > > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > > > > + mpls_key[i].mpls_lse, is_mask); > > > > > + > > > > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > > > > + label_count_mask, is_mask); > > > > > > > > > > attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > > > > > } > > > > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > > > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > > > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > > > > } else if (eth_p_mpls(swkey->eth.type)) { > > > > > + u8 i, num_labels; > > > > > struct ovs_key_mpls *mpls_key; > > > > > > > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > > > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > > > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > > > > + num_labels * sizeof(*mpls_key)); > > > > > if (!nla) > > > > > goto nla_put_failure; > > > > > + > > > > > mpls_key = nla_data(nla); > > > > > - mpls_key->mpls_lse = output->mpls.top_lse; > > > > > + for (i = 0; i < num_labels; i++) > > > > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > > > > } > > > > > > > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > > > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > > > u8 mac_proto = ovs_key_mac_proto(key); > > > > > const struct nlattr *a; > > > > > int rem, err; > > > > > + u32 mpls_label_count = 0; > > > > > + > > > > > + if (eth_p_mpls(eth_type)) > > > > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > > > > > > > > The MPLS push and pop action could be part of nested actions in > > > > sample, so the count needs to be global count across such nested > > > > actions. have a look at validate_and_copy_sample(). > > > > > > > Embedding mpls_label_count in struct sw_flow_actions will not work for clone > > > > > > I guess we need to move the below code to ovs_nla_copy_actions and extend the arguments of __ovs_nla_copy_actions to take mpls_label_count also > > > if (eth_p_mpls(eth_type)) > > > mpls_label_count = hweight_long(key->mpls.num_labels_mask) > > > > > > > > I am not suggesting changing sw_flow_actions, You can define count > > variable in ovs_nla_copy_actions() and pass it as a pointer to nested > > function. That can be used to keep track of MPLS labels at all nested > > actions. > > Actions clone & sample does a clone of SKB correct? You can pass pointer to a separate (cloned) variable when verifying those actions in those cases. > Hence shouldn't they maintain a seperate mpls_label count for each nested action set > I assume instead of passing as pointer from ovs_nla_copy_actions ,if passed by value it should > solve the problem.Need to try that though.
On Thu, Oct 24, 2019 at 7:34 PM Martin Varghese <martinvarghesenokia@gmail.com> wrote: > > On Thu, Oct 24, 2019 at 01:47:40PM -0700, William Tu wrote: > > On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote: > > > From: Martin Varghese <martin.varghese@nokia.com> > > > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress > > > direction though the userspace OVS supports a max depth of 3 labels. > > > This change enables openvswitch module to support a max depth of > > > 3 labels in the ingress. > > > > > > > Hi Martin, > > Thanks for the patch. I have one comment below. > > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > > --- > > > Changes in v2 > > > - Moved MPLS count validation from datapath to configuration. > > > - Fixed set mpls function. > > > > > > net/openvswitch/actions.c | 2 +- > > > net/openvswitch/flow.c | 20 ++++++++++----- > > > net/openvswitch/flow.h | 9 ++++--- > > > net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++--------- > > > 4 files changed, 66 insertions(+), 22 deletions(-) > > > > > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c > > > index 3572e11..f3125d7 100644 > > > --- a/net/openvswitch/actions.c > > > +++ b/net/openvswitch/actions.c > > > @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > > > if (err) > > > return err; > > > > > > - flow_key->mpls.top_lse = lse; > > > + flow_key->mpls.lse[0] = lse; > > > return 0; > > > } > > > > > > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c > > > index dca3b1e..c101355 100644 > > > --- a/net/openvswitch/flow.c > > > +++ b/net/openvswitch/flow.c > > > @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) > > > memset(&key->ipv4, 0, sizeof(key->ipv4)); > > > } > > > } else if (eth_p_mpls(key->eth.type)) { > > > - size_t stack_len = MPLS_HLEN; > > > + u8 label_count = 1; > > > > > > + memset(&key->mpls, 0, sizeof(key->mpls)); > > > skb_set_inner_network_header(skb, skb->mac_len); > > > while (1) { > > > __be32 lse; > > > > > > - error = check_header(skb, skb->mac_len + stack_len); > > > + error = check_header(skb, skb->mac_len + > > > + label_count * MPLS_HLEN); > > > if (unlikely(error)) > > > return 0; > > > > > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > > > > > - if (stack_len == MPLS_HLEN) > > > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > > > + if (label_count <= MPLS_LABEL_DEPTH) > > > + memcpy(&key->mpls.lse[label_count - 1], &lse, > > > + MPLS_HLEN); > > > > > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > > > + skb_set_inner_network_header(skb, skb->mac_len + > > > + label_count * MPLS_HLEN); > > > if (lse & htonl(MPLS_LS_S_MASK)) > > > break; > > > > > > - stack_len += MPLS_HLEN; > > > + label_count++; > > > } > > > + if (label_count > MPLS_LABEL_DEPTH) > > > + label_count = MPLS_LABEL_DEPTH; > > > + > > > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > > > > > } else if (key->eth.type == htons(ETH_P_IPV6)) { > > > int nh_len; /* IPv6 Header + Extensions */ > > > > > > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h > > > index 3e2cc22..d9eccbe 100644 > > > --- a/net/openvswitch/flow.h > > > +++ b/net/openvswitch/flow.h > > > @@ -30,6 +30,7 @@ enum sw_flow_mac_proto { > > > MAC_PROTO_ETHERNET, > > > }; > > > #define SW_FLOW_KEY_INVALID 0x80 > > > +#define MPLS_LABEL_DEPTH 3 > > > > > > /* Store options at the end of the array if they are less than the > > > * maximum size. This allows us to get the benefits of variable length > > > @@ -85,9 +86,6 @@ struct sw_flow_key { > > > */ > > > union { > > > struct { > > > - __be32 top_lse; /* top label stack entry */ > > > - } mpls; > > > - struct { > > > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > > > u8 tos; /* IP ToS. */ > > > u8 ttl; /* IP TTL/hop limit. */ > > > @@ -135,6 +133,11 @@ struct sw_flow_key { > > > } nd; > > > }; > > > } ipv6; > > > + struct { > > > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > > > > Why using a bitmap here? why not just num_labels? > > I saw that you have to convert it using hweight_long() > > to num_labels a couple places below. > > > > num_labels will not work when used in flow_key for flow match. > Assume a case where a packet with 3 labels are received and the configured > flow has a match condition for the top most label only.Num_labels cannot be > used in that case > > My original patch was with num_labels.And we found that it will not work for > the above case. > Jbenc@redhat.com proposed the idea of num_labels_mask. > Thank you. Now I understand. William
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 3572e11..f3125d7 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (err) return err; - flow_key->mpls.top_lse = lse; + flow_key->mpls.lse[0] = lse; return 0; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dca3b1e..c101355 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { - size_t stack_len = MPLS_HLEN; + u8 label_count = 1; + memset(&key->mpls, 0, sizeof(key->mpls)); skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; - error = check_header(skb, skb->mac_len + stack_len); + error = check_header(skb, skb->mac_len + + label_count * MPLS_HLEN); if (unlikely(error)) return 0; memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); - if (stack_len == MPLS_HLEN) - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + if (label_count <= MPLS_LABEL_DEPTH) + memcpy(&key->mpls.lse[label_count - 1], &lse, + MPLS_HLEN); - skb_set_inner_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + + label_count * MPLS_HLEN); if (lse & htonl(MPLS_LS_S_MASK)) break; - stack_len += MPLS_HLEN; + label_count++; } + if (label_count > MPLS_LABEL_DEPTH) + label_count = MPLS_LABEL_DEPTH; + + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 3e2cc22..d9eccbe 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -30,6 +30,7 @@ enum sw_flow_mac_proto { MAC_PROTO_ETHERNET, }; #define SW_FLOW_KEY_INVALID 0x80 +#define MPLS_LABEL_DEPTH 3 /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length @@ -85,9 +86,6 @@ struct sw_flow_key { */ union { struct { - __be32 top_lse; /* top label stack entry */ - } mpls; - struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ u8 tos; /* IP ToS. */ u8 ttl; /* IP TTL/hop limit. */ @@ -135,6 +133,11 @@ struct sw_flow_key { } nd; }; } ipv6; + struct { + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ + } mpls; + struct ovs_key_nsh nsh; /* network service header */ }; struct { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d7559c6..21de061 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void) [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; + u32 hdr_len; + u32 label_count, label_count_mask, i; mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); - SW_FLOW_KEY_PUT(match, mpls.top_lse, - mpls_key->mpls_lse, is_mask); + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); + label_count = hdr_len / sizeof(struct ovs_key_mpls); + + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || + hdr_len % sizeof(struct ovs_key_mpls)) + return -EINVAL; + + label_count_mask = GENMASK(label_count - 1, 0); + + for (i = 0 ; i < label_count; i++) + SW_FLOW_KEY_PUT(match, mpls.lse[i], + mpls_key[i].mpls_lse, is_mask); + + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, + label_count_mask, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_MPLS); } @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } else if (eth_p_mpls(swkey->eth.type)) { + u8 i, num_labels; struct ovs_key_mpls *mpls_key; - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + num_labels = hweight_long(output->mpls.num_labels_mask); + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, + num_labels * sizeof(*mpls_key)); if (!nla) goto nla_put_failure; + mpls_key = nla_data(nla); - mpls_key->mpls_lse = output->mpls.top_lse; + for (i = 0; i < num_labels; i++) + mpls_key[i].mpls_lse = output->mpls.lse[i]; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + u32 mpls_label_count = 0; + + if (eth_p_mpls(eth_type)) + mpls_label_count = hweight_long(key->mpls.num_labels_mask); nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, !eth_p_mpls(eth_type))) return -EINVAL; eth_type = mpls->mpls_ethertype; + mpls_label_count++; break; } - case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: { + __be16 proto; if (vlan_tci & htons(VLAN_CFI_MASK) || !eth_p_mpls(eth_type)) return -EINVAL; - /* Disallow subsequent L2.5+ set and mpls_pop actions - * as there is no check here to ensure that the new - * eth_type is valid and thus set actions could - * write off the end of the packet or otherwise - * corrupt it. + /* Disallow subsequent L2.5+ set actions as there is + * no check here to ensure that the new eth type is + * valid and thus set actions could write off the + * end of the packet or otherwise corrupt it. * * Support for these actions is planned using packet * recirculation. */ - eth_type = htons(0); + proto = nla_get_be16(a); + mpls_label_count--; + + if (!eth_p_mpls(proto) || !mpls_label_count) + eth_type = htons(0); + else + eth_type = proto; + break; + } case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa,