Message ID | CAPWQB7HdWBtp9XKaZsWOH_ENLeiE_Tnbowqows_krmoRvfF=jg@mail.gmail.com |
---|---|
State | Not Applicable |
Headers | show |
> On Mar 2, 2017, at 5:57 PM, Joe Stringer <joe@ovn.org> wrote: > > On 28 February 2017 at 17:17, Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>> wrote: >> Upstream commit: >> >> commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc >> Author: Jarno Rajahalme <jarno@ovn.org> >> Date: Thu Feb 9 11:21:59 2017 -0800 >> >> openvswitch: Add original direction conntrack tuple to sw_flow_key. >> >> Add the fields of the conntrack original direction 5-tuple to struct >> sw_flow_key. The new fields are initially marked as non-existent, and >> are populated whenever a conntrack action is executed and either finds >> or generates a conntrack entry. This means that these fields exist >> for all packets that were not rejected by conntrack as untrackable. >> >> The original tuple fields in the sw_flow_key are filled from the >> original direction tuple of the conntrack entry relating to the >> current packet, or from the original direction tuple of the master >> conntrack entry, if the current conntrack entry has a master. >> Generally, expected connections of connections having an assigned >> helper (e.g., FTP), have a master conntrack entry. >> >> The main purpose of the new conntrack original tuple fields is to >> allow matching on them for policy decision purposes, with the premise >> that the admissibility of tracked connections reply packets (as well >> as original direction packets), and both direction packets of any >> related connections may be based on ACL rules applying to the master >> connection's original direction 5-tuple. This also makes it easier to >> make policy decisions when the actual packet headers might have been >> transformed by NAT, as the original direction 5-tuple represents the >> packet headers before any such transformation. >> >> When using the original direction 5-tuple the admissibility of return >> and/or related packets need not be based on the mere existence of a >> conntrack entry, allowing separation of admission policy from the >> established conntrack state. While existence of a conntrack entry is >> required for admission of the return or related packets, policy >> changes can render connections that were initially admitted to be >> rejected or dropped afterwards. If the admission of the return and >> related packets was based on mere conntrack state (e.g., connection >> being in an established state), a policy change that would make the >> connection rejected or dropped would need to find and delete all >> conntrack entries affected by such a change. When using the original >> direction 5-tuple matching the affected conntrack entries can be >> allowed to time out instead, as the established state of the >> connection would not need to be the basis for packet admission any >> more. >> >> It should be noted that the directionality of related connections may >> be the same or different than that of the master connection, and >> neither the original direction 5-tuple nor the conntrack state bits >> carry this information. If needed, the directionality of the master >> connection can be stored in master's conntrack mark or labels, which >> are automatically inherited by the expected related connections. >> >> The fact that neither ARP nor ND packets are trackable by conntrack >> allows mutual exclusion between ARP/ND and the new conntrack original >> tuple fields. Hence, the IP addresses are overlaid in union with ARP >> and ND fields. This allows the sw_flow_key to not grow much due to >> this patch, but it also means that we must be careful to never use the >> new key fields with ARP or ND packets. ARP is easy to distinguish and >> keep mutually exclusive based on the ethernet type, but ND being an >> ICMPv6 protocol requires a bit more attention. >> >> Signed-off-by: Jarno Rajahalme <jarno@ovn.org> >> Acked-by: Joe Stringer <joe@ovn.org> >> Acked-by: Pravin B Shelar <pshelar@ovn.org> >> Signed-off-by: David S. Miller <davem@davemloft.net> >> >> Signed-off-by: Jarno Rajahalme <jarno@ovn.org> >> --- > > I had to roll in the following incremental (derived from your later > patch) to fix the build with this commit: > Right, I forgot to mention that I left these patches separate knowing that they will not compile individually. > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index 1f6812a6dd02..50bbafaa0231 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -381,6 +381,8 @@ odp_execute_set_action(struct dp_packet *packet, > const struct nlattr *a) > case OVS_KEY_ATTR_VLAN: > case OVS_KEY_ATTR_TCP_FLAGS: > case OVS_KEY_ATTR_CT_STATE: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: > case OVS_KEY_ATTR_CT_ZONE: > case OVS_KEY_ATTR_CT_MARK: > case OVS_KEY_ATTR_CT_LABELS: > @@ -476,6 +478,8 @@ odp_execute_masked_set_action(struct dp_packet *packet, > case OVS_KEY_ATTR_CT_ZONE: > case OVS_KEY_ATTR_CT_MARK: > case OVS_KEY_ATTR_CT_LABELS: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: > case OVS_KEY_ATTR_ENCAP: > case OVS_KEY_ATTR_ETHERTYPE: > case OVS_KEY_ATTR_IN_PORT: > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 41067385e821..1f1512ae47fd 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -150,6 +150,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, > char *namebuf, size_t bufsize) > case OVS_KEY_ATTR_CT_ZONE: return "ct_zone"; > case OVS_KEY_ATTR_CT_MARK: return "ct_mark"; > case OVS_KEY_ATTR_CT_LABELS: return "ct_label"; > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: return "ct_tuple4"; > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: return "ct_tuple6"; > case OVS_KEY_ATTR_TUNNEL: return "tunnel"; > case OVS_KEY_ATTR_IN_PORT: return "in_port"; > case OVS_KEY_ATTR_ETHERNET: return "eth"; > @@ -1874,6 +1876,8 @@ static const struct attr_len_tbl > ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = > [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 }, > [OVS_KEY_ATTR_CT_MARK] = { .len = 4 }, > [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, > + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { .len = sizeof(struct > ovs_key_ct_tuple_ipv4) }, > + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct > ovs_key_ct_tuple_ipv6) }, > }; > > /* Returns the correct length of the payload for a flow key attribute of the > @@ -2823,6 +2827,40 @@ format_odp_key_attr(const struct nlattr *a, > const struct nlattr *ma, > break; > } > > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: { > + const struct ovs_key_ct_tuple_ipv4 *key = nl_attr_get(a); > + const struct ovs_key_ct_tuple_ipv4 *mask = ma ? nl_attr_get(ma) : NULL; > + > + format_ipv4(ds, "src", key->ipv4_src, MASK(mask, ipv4_src), verbose); > + format_ipv4(ds, "dst", key->ipv4_dst, MASK(mask, ipv4_dst), verbose); > + format_u8u(ds, "proto", key->ipv4_proto, MASK(mask, ipv4_proto), > + verbose); > + format_be16(ds, "tp_src", key->src_port, MASK(mask, src_port), > + verbose); > + format_be16(ds, "tp_dst", key->dst_port, MASK(mask, dst_port), > + verbose); > + ds_chomp(ds, ','); > + break; > + } > + > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: { > + const struct ovs_key_ct_tuple_ipv6 *key = nl_attr_get(a); > + const struct ovs_key_ct_tuple_ipv6 *mask = ma ? nl_attr_get(ma) : NULL; > + > + format_in6_addr(ds, "src", &key->ipv6_src, MASK(mask, ipv6_src), > + verbose); > + format_in6_addr(ds, "dst", &key->ipv6_dst, MASK(mask, ipv6_dst), > + verbose); > + format_u8u(ds, "proto", key->ipv6_proto, MASK(mask, ipv6_proto), > + verbose); > + format_be16(ds, "src_port", key->src_port, MASK(mask, src_port), > + verbose); > + format_be16(ds, "dst_port", key->dst_port, MASK(mask, dst_port), > + verbose); > + ds_chomp(ds, ','); > + break; > + } > + > case OVS_KEY_ATTR_TUNNEL: > format_odp_tun_attr(a, ma, ds, verbose); > break; > diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > index 520b8dd196bb..69cdf69f4b39 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -1025,6 +1025,8 @@ sflow_read_set_action(const struct nlattr *attr, > case OVS_KEY_ATTR_CT_ZONE: > case OVS_KEY_ATTR_CT_MARK: > case OVS_KEY_ATTR_CT_LABELS: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: > + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: > case OVS_KEY_ATTR_UNSPEC: > case __OVS_KEY_ATTR_MAX: > default: > >> datapath/actions.c | 2 + >> datapath/conntrack.c | 86 +++++++++++++++++++++-- >> datapath/conntrack.h | 10 ++- >> datapath/flow.c | 34 +++++++-- >> datapath/flow.h | 49 ++++++++++--- >> datapath/flow_netlink.c | 85 ++++++++++++++++------ >> datapath/flow_netlink.h | 7 +- >> datapath/linux/compat/include/linux/openvswitch.h | 18 +++++ >> 8 files changed, 246 insertions(+), 45 deletions(-) >> >> diff --git a/datapath/actions.c b/datapath/actions.c >> index 82833d0..71ec14c 100644 >> --- a/datapath/actions.c >> +++ b/datapath/actions.c >> @@ -1011,6 +1011,8 @@ static int execute_masked_set_action(struct sk_buff *skb, >> case OVS_KEY_ATTR_CT_ZONE: >> case OVS_KEY_ATTR_CT_MARK: >> case OVS_KEY_ATTR_CT_LABELS: >> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: >> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: >> err = -EINVAL; >> break; >> } >> diff --git a/datapath/conntrack.c b/datapath/conntrack.c >> index 16a7773..d8309c9 100644 >> --- a/datapath/conntrack.c >> +++ b/datapath/conntrack.c >> @@ -163,6 +163,20 @@ static void ovs_ct_get_labels(const struct nf_conn *ct, >> memset(labels, 0, OVS_CT_LABELS_LEN); >> } >> >> +static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, >> + const struct nf_conntrack_tuple *orig, >> + u8 icmp_proto) >> +{ >> + key->ct.orig_proto = orig->dst.protonum; >> + if (orig->dst.protonum == icmp_proto) { >> + key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); >> + key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); >> + } else { >> + key->ct.orig_tp.src = orig->src.u.all; >> + key->ct.orig_tp.dst = orig->dst.u.all; >> + } >> +} >> + >> static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, >> const struct nf_conntrack_zone *zone, >> const struct nf_conn *ct) >> @@ -171,6 +185,35 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, >> key->ct.zone = zone->id; >> key->ct.mark = ovs_ct_get_mark(ct); >> ovs_ct_get_labels(ct, &key->ct.labels); >> + >> + if (ct) { >> + const struct nf_conntrack_tuple *orig; >> + >> + /* Use the master if we have one. */ >> + if (ct->master) >> + ct = ct->master; >> + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; >> + >> + /* IP version must match with the master connection. */ >> + if (key->eth.type == htons(ETH_P_IP) && >> + nf_ct_l3num(ct) == NFPROTO_IPV4) { >> + key->ipv4.ct_orig.src = orig->src.u3.ip; >> + key->ipv4.ct_orig.dst = orig->dst.u3.ip; >> + __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); >> + return; >> + } else if (key->eth.type == htons(ETH_P_IPV6) && >> + !sw_flow_key_is_nd(key) && >> + nf_ct_l3num(ct) == NFPROTO_IPV6) { >> + key->ipv6.ct_orig.src = orig->src.u3.in6; >> + key->ipv6.ct_orig.dst = orig->dst.u3.in6; >> + __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); >> + return; >> + } >> + } >> + /* Clear 'ct.orig_proto' to mark the non-existence of conntrack >> + * original direction key fields. >> + */ >> + key->ct.orig_proto = 0; >> } >> >> /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has >> @@ -224,24 +267,55 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) >> ovs_ct_update_key(skb, NULL, key, false, false); >> } >> >> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) >> +#define IN6_ADDR_INITIALIZER(ADDR) \ >> + { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ >> + (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } >> + >> +int ovs_ct_put_key(const struct sw_flow_key *swkey, >> + const struct sw_flow_key *output, struct sk_buff *skb) >> { >> - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) >> + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state)) >> return -EMSGSIZE; >> >> if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && >> - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) >> + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone)) >> return -EMSGSIZE; >> >> if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && >> - nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) >> + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) >> return -EMSGSIZE; >> >> if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && >> - nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), >> - &key->ct.labels)) >> + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), >> + &output->ct.labels)) >> return -EMSGSIZE; >> >> + if (swkey->ct.orig_proto) { >> + if (swkey->eth.type == htons(ETH_P_IP)) { >> + struct ovs_key_ct_tuple_ipv4 orig = { >> + output->ipv4.ct_orig.src, >> + output->ipv4.ct_orig.dst, >> + output->ct.orig_tp.src, >> + output->ct.orig_tp.dst, >> + output->ct.orig_proto, >> + }; >> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, >> + sizeof(orig), &orig)) >> + return -EMSGSIZE; >> + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { >> + struct ovs_key_ct_tuple_ipv6 orig = { >> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), >> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), >> + output->ct.orig_tp.src, >> + output->ct.orig_tp.dst, >> + output->ct.orig_proto, >> + }; >> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, >> + sizeof(orig), &orig)) >> + return -EMSGSIZE; >> + } >> + } >> + >> return 0; >> } >> >> diff --git a/datapath/conntrack.h b/datapath/conntrack.h >> index 15dbf0a..2bd753d 100644 >> --- a/datapath/conntrack.h >> +++ b/datapath/conntrack.h >> @@ -33,7 +33,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, >> const struct ovs_conntrack_info *); >> >> void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); >> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); >> +int ovs_ct_put_key(const struct sw_flow_key *swkey, >> + const struct sw_flow_key *output, struct sk_buff *skb); >> void ovs_ct_free_action(const struct nlattr *a); >> >> #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ >> @@ -80,9 +81,14 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, >> key->ct.zone = 0; >> key->ct.mark = 0; >> memset(&key->ct.labels, 0, sizeof(key->ct.labels)); >> + /* Clear 'ct.orig_proto' to mark the non-existence of original >> + * direction key fields. >> + */ >> + key->ct.orig_proto = 0; >> } >> >> -static inline int ovs_ct_put_key(const struct sw_flow_key *key, >> +static inline int ovs_ct_put_key(const struct sw_flow_key *swkey, >> + const struct sw_flow_key *output, >> struct sk_buff *skb) >> { >> return 0; >> diff --git a/datapath/flow.c b/datapath/flow.c >> index 390286c..d663960 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -696,6 +696,8 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) >> int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, >> struct sk_buff *skb, struct sw_flow_key *key) >> { >> + int err; >> + >> /* Extract metadata from packet. */ >> if (tun_info) { >> key->tun_proto = ip_tunnel_info_af(tun_info); >> @@ -719,25 +721,49 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, >> key->phy.priority = skb->priority; >> key->phy.in_port = OVS_CB(skb)->input_vport->port_no; >> key->phy.skb_mark = skb->mark; >> - ovs_ct_fill_key(skb, key); >> key->ovs_flow_hash = 0; >> key->recirc_id = 0; >> >> - return key_extract(skb, key); >> + err = key_extract(skb, key); >> + if (!err) >> + ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */ >> + return err; >> } >> >> int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, >> struct sk_buff *skb, >> struct sw_flow_key *key, bool log) >> { >> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; >> + u64 attrs = 0; >> int err; >> >> + err = parse_flow_nlattrs(attr, a, &attrs, log); >> + if (err) >> + return -EINVAL; >> + >> memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); >> >> /* Extract metadata from netlink attributes. */ >> - err = ovs_nla_get_flow_metadata(net, attr, key, log); >> + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log); >> if (err) >> return err; >> >> - return key_extract(skb, key); >> + err = key_extract(skb, key); >> + if (err) >> + return err; >> + >> + /* Check that we have conntrack original direction tuple metadata only >> + * for packets for which it makes sense. Otherwise the key may be >> + * corrupted due to overlapping key fields. >> + */ >> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) && >> + key->eth.type != htons(ETH_P_IP)) >> + return -EINVAL; >> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) && >> + (key->eth.type != htons(ETH_P_IPV6) || >> + sw_flow_key_is_nd(key))) >> + return -EINVAL; >> + >> + return 0; >> } >> diff --git a/datapath/flow.h b/datapath/flow.h >> index 2dd0696..d4124c6 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -1,5 +1,5 @@ >> /* >> - * Copyright (c) 2007-2015 Nicira, Inc. >> + * Copyright (c) 2007-2017 Nicira, Inc. >> * >> * This program is free software; you can redistribute it and/or >> * modify it under the terms of version 2 of the GNU General Public >> @@ -94,10 +94,16 @@ struct sw_flow_key { >> __be32 src; /* IP source address. */ >> __be32 dst; /* IP destination address. */ >> } addr; >> - struct { >> - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ >> - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ >> - } arp; >> + union { >> + struct { >> + __be32 src; >> + __be32 dst; >> + } ct_orig; /* Conntrack original direction fields. */ >> + struct { >> + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ >> + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ >> + } arp; >> + }; >> } ipv4; >> struct { >> struct { >> @@ -105,23 +111,44 @@ struct sw_flow_key { >> struct in6_addr dst; /* IPv6 destination address. */ >> } addr; >> __be32 label; /* IPv6 flow label. */ >> - struct { >> - struct in6_addr target; /* ND target address. */ >> - u8 sll[ETH_ALEN]; /* ND source link layer address. */ >> - u8 tll[ETH_ALEN]; /* ND target link layer address. */ >> - } nd; >> + union { >> + struct { >> + struct in6_addr src; >> + struct in6_addr dst; >> + } ct_orig; /* Conntrack original direction fields. */ >> + struct { >> + struct in6_addr target; /* ND target address. */ >> + u8 sll[ETH_ALEN]; /* ND source link layer address. */ >> + u8 tll[ETH_ALEN]; /* ND target link layer address. */ >> + } nd; >> + }; >> } ipv6; >> }; >> struct { >> /* Connection tracking fields. */ >> + u8 state; >> + u8 orig_proto; /* CT orig tuple IP protocol. */ >> u16 zone; >> u32 mark; >> - u8 state; >> + struct { >> + __be16 src; /* CT orig tuple tp src port. */ >> + __be16 dst; /* CT orig tuple tp dst port. */ >> + } orig_tp; >> + >> struct ovs_key_ct_labels labels; >> } ct; >> >> } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ >> >> +static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key) >> +{ >> + return key->eth.type == htons(ETH_P_IPV6) && >> + key->ip.proto == NEXTHDR_ICMP && >> + key->tp.dst == 0 && >> + (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || >> + key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)); >> +} >> + >> struct sw_flow_key_range { >> unsigned short int start; >> unsigned short int end; >> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c >> index 0f32664..5fac207 100644 >> --- a/datapath/flow_netlink.c >> +++ b/datapath/flow_netlink.c >> @@ -131,7 +131,9 @@ static bool match_validate(const struct sw_flow_match *match, >> * pass the validation tests. >> */ >> mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4) >> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) >> | (1ULL << OVS_KEY_ATTR_IPV6) >> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) >> | (1ULL << OVS_KEY_ATTR_TCP) >> | (1ULL << OVS_KEY_ATTR_TCP_FLAGS) >> | (1ULL << OVS_KEY_ATTR_UDP) >> @@ -163,8 +165,10 @@ static bool match_validate(const struct sw_flow_match *match, >> >> if (match->key->eth.type == htons(ETH_P_IP)) { >> key_expected |= 1ULL << OVS_KEY_ATTR_IPV4; >> - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) >> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { >> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4; >> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; >> + } >> >> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { >> if (match->key->ip.proto == IPPROTO_UDP) { >> @@ -198,8 +202,10 @@ static bool match_validate(const struct sw_flow_match *match, >> >> if (match->key->eth.type == htons(ETH_P_IPV6)) { >> key_expected |= 1ULL << OVS_KEY_ATTR_IPV6; >> - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) >> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { >> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6; >> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; >> + } >> >> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { >> if (match->key->ip.proto == IPPROTO_UDP) { >> @@ -232,6 +238,12 @@ static bool match_validate(const struct sw_flow_match *match, >> htons(NDISC_NEIGHBOUR_SOLICITATION) || >> match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { >> key_expected |= 1ULL << OVS_KEY_ATTR_ND; >> + /* Original direction conntrack tuple >> + * uses the same space as the ND fields >> + * in the key, so both are not allowed >> + * at the same time. >> + */ >> + mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); >> if (match->mask && (match->mask->key.tp.src == htons(0xff))) >> mask_allowed |= 1ULL << OVS_KEY_ATTR_ND; >> } >> @@ -284,7 +296,7 @@ size_t ovs_key_attr_size(void) >> /* Whenever adding new OVS_KEY_ FIELDS, we should consider >> * updating this function. >> */ >> - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); >> + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); >> >> return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ >> + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ >> @@ -297,6 +309,7 @@ size_t ovs_key_attr_size(void) >> + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ >> + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ >> + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ >> + + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ >> + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ >> + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ >> + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ >> @@ -357,6 +370,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { >> [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, >> [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, >> [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, >> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { >> + .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, >> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { >> + .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, >> }; >> >> static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) >> @@ -432,9 +449,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr, >> return __parse_flow_nlattrs(attr, a, attrsp, log, true); >> } >> >> -static int parse_flow_nlattrs(const struct nlattr *attr, >> - const struct nlattr *a[], u64 *attrsp, >> - bool log) >> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], >> + u64 *attrsp, bool log) >> { >> return __parse_flow_nlattrs(attr, a, attrsp, log, false); >> } >> @@ -900,6 +916,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, >> sizeof(*cl), is_mask); >> *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); >> } >> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { >> + const struct ovs_key_ct_tuple_ipv4 *ct; >> + >> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); >> + >> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); >> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv4_proto, is_mask); >> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); >> + } >> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { >> + const struct ovs_key_ct_tuple_ipv6 *ct; >> + >> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); >> + >> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, >> + sizeof(match->key->ipv6.ct_orig.src), >> + is_mask); >> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, >> + sizeof(match->key->ipv6.ct_orig.dst), >> + is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); >> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv6_proto, is_mask); >> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); >> + } >> return 0; >> } >> >> @@ -1377,9 +1421,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) >> >> /** >> * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. >> - * @key: Receives extracted in_port, priority, tun_key and skb_mark. >> - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute >> - * sequence. >> + * @net: Network namespace. >> + * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack >> + * metadata. >> + * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink >> + * attributes. >> + * @attrs: Bit mask for the netlink attributes included in @a. >> * @log: Boolean to allow kernel error logging. Normally true, but when >> * probing for feature compatibility this should be passed in as false to >> * suppress unnecessary error logging. >> @@ -1388,25 +1435,23 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) >> * take the same form accepted by flow_from_nlattrs(), but only enough of it to >> * get the metadata, that is, the parts of the flow key that cannot be >> * extracted from the packet itself. >> + * >> + * This must be called before the packet key fields are filled in 'key'. >> */ >> >> -int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, >> - struct sw_flow_key *key, >> - bool log) >> +int ovs_nla_get_flow_metadata(struct net *net, >> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], >> + u64 attrs, struct sw_flow_key *key, bool log) >> { >> - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; >> struct sw_flow_match match; >> - u64 attrs = 0; >> - int err; >> - >> - err = parse_flow_nlattrs(attr, a, &attrs, log); >> - if (err) >> - return -EINVAL; >> >> memset(&match, 0, sizeof(match)); >> match.key = key; >> >> memset(&key->ct, 0, sizeof(key->ct)); >> + memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); >> + memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); >> + >> key->phy.in_port = DP_MAX_PORTS; >> >> return metadata_from_nlattrs(net, &match, &attrs, a, false, log); >> @@ -1455,7 +1500,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, >> if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) >> goto nla_put_failure; >> >> - if (ovs_ct_put_key(output, skb)) >> + if (ovs_ct_put_key(swkey, output, skb)) >> goto nla_put_failure; >> >> nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); >> diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h >> index 1c4208b..8d04d07 100644 >> --- a/datapath/flow_netlink.h >> +++ b/datapath/flow_netlink.h >> @@ -45,8 +45,11 @@ void ovs_match_init(struct sw_flow_match *match, >> >> int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, >> int attr, bool is_mask, struct sk_buff *); >> -int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, >> - struct sw_flow_key *, bool log); >> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], >> + u64 *attrsp, bool log); >> +int ovs_nla_get_flow_metadata(struct net *net, >> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], >> + u64 attrs, struct sw_flow_key *key, bool log); >> >> int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); >> int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); >> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h >> index d185860..23f8845 100644 >> --- a/datapath/linux/compat/include/linux/openvswitch.h >> +++ b/datapath/linux/compat/include/linux/openvswitch.h >> @@ -356,6 +356,8 @@ enum ovs_key_attr { >> OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ >> OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ >> OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */ >> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ >> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ >> >> #ifdef __KERNEL__ >> /* Only used within kernel data path. */ >> @@ -496,6 +498,22 @@ struct ovs_key_ct_labels { >> >> #define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) >> >> +struct ovs_key_ct_tuple_ipv4 { >> + __be32 ipv4_src; >> + __be32 ipv4_dst; >> + __be16 src_port; >> + __be16 dst_port; >> + __u8 ipv4_proto; >> +}; >> + >> +struct ovs_key_ct_tuple_ipv6 { >> + __be32 ipv6_src[4]; >> + __be32 ipv6_dst[4]; >> + __be16 src_port; >> + __be16 dst_port; >> + __u8 ipv6_proto; >> +}; >> + >> /** >> * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. >> * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow >> -- >> 2.1.4 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org <mailto:dev@openvswitch.org> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 1f6812a6dd02..50bbafaa0231 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -381,6 +381,8 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a) case OVS_KEY_ATTR_VLAN: case OVS_KEY_ATTR_TCP_FLAGS: case OVS_KEY_ATTR_CT_STATE: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: @@ -476,6 +478,8 @@ odp_execute_masked_set_action(struct dp_packet *packet, case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: case OVS_KEY_ATTR_ENCAP: case OVS_KEY_ATTR_ETHERTYPE: case OVS_KEY_ATTR_IN_PORT: diff --git a/lib/odp-util.c b/lib/odp-util.c index 41067385e821..1f1512ae47fd 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -150,6 +150,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_CT_ZONE: return "ct_zone"; case OVS_KEY_ATTR_CT_MARK: return "ct_mark"; case OVS_KEY_ATTR_CT_LABELS: return "ct_label"; + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: return "ct_tuple4"; + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: return "ct_tuple6"; case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; @@ -1874,6 +1876,8 @@ static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 }, [OVS_KEY_ATTR_CT_MARK] = { .len = 4 }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, }; /* Returns the correct length of the payload for a flow key attribute of the @@ -2823,6 +2827,40 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, break; } + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: { + const struct ovs_key_ct_tuple_ipv4 *key = nl_attr_get(a); + const struct ovs_key_ct_tuple_ipv4 *mask = ma ? nl_attr_get(ma) : NULL; + + format_ipv4(ds, "src", key->ipv4_src, MASK(mask, ipv4_src), verbose); + format_ipv4(ds, "dst", key->ipv4_dst, MASK(mask, ipv4_dst), verbose); + format_u8u(ds, "proto", key->ipv4_proto, MASK(mask, ipv4_proto), + verbose); + format_be16(ds, "tp_src", key->src_port, MASK(mask, src_port), + verbose); + format_be16(ds, "tp_dst", key->dst_port, MASK(mask, dst_port), + verbose); + ds_chomp(ds, ','); + break; + } + + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: { + const struct ovs_key_ct_tuple_ipv6 *key = nl_attr_get(a); + const struct ovs_key_ct_tuple_ipv6 *mask = ma ? nl_attr_get(ma) : NULL; + + format_in6_addr(ds, "src", &key->ipv6_src, MASK(mask, ipv6_src), + verbose); + format_in6_addr(ds, "dst", &key->ipv6_dst, MASK(mask, ipv6_dst), + verbose); + format_u8u(ds, "proto", key->ipv6_proto, MASK(mask, ipv6_proto), + verbose); + format_be16(ds, "src_port", key->src_port, MASK(mask, src_port), + verbose); + format_be16(ds, "dst_port", key->dst_port, MASK(mask, dst_port), + verbose); + ds_chomp(ds, ','); + break; + } + case OVS_KEY_ATTR_TUNNEL: format_odp_tun_attr(a, ma, ds, verbose); break; diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index 520b8dd196bb..69cdf69f4b39 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -1025,6 +1025,8 @@ sflow_read_set_action(const struct nlattr *attr, case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: case OVS_KEY_ATTR_UNSPEC: case __OVS_KEY_ATTR_MAX: default: