Message ID | 20221215124557.358665-2-amusil@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | Add extension for partial CT flush | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | fail | test: fail |
Ales Musil <amusil@redhat.com> writes: > Currently, the CT can be flushed by dpctl only be specifying > the whole 5-tuple. This is not very convenient when there are > only some fields known to the user of CT flush. Add new struct > ofputil_ct_match which represents the generic filtering that can > be done for CT flush. The match is done only on fields that are > non-zero with exception to the icmp fields. > > This allows the filtering just within dpctl, however > it is a preparation for OpenFlow extension. > > Reported-at: https://bugzilla.redhat.com/2120546 > Signed-off-by: Ales Musil <amusil@redhat.com> > --- > v3: Rebase on top of master. > Address the C99 comment and missing dpif_close call. > v2: Rebase on top of master. > Address comments from Paolo. > --- > NEWS | 2 + > include/openvswitch/ofp-util.h | 28 +++ > lib/automake.mk | 2 + > lib/ct-dpif.c | 201 +++++++++------------ > lib/ct-dpif.h | 4 +- > lib/dpctl.c | 45 +++-- > lib/dpctl.man | 3 +- > lib/ofp-ct-util.c | 311 +++++++++++++++++++++++++++++++++ > lib/ofp-ct-util.h | 34 ++++ > tests/system-traffic.at | 82 ++++++++- > 10 files changed, 568 insertions(+), 144 deletions(-) > create mode 100644 lib/ofp-ct-util.c > create mode 100644 lib/ofp-ct-util.h > > diff --git a/NEWS b/NEWS > index 265375e1c..ff8904b02 100644 > --- a/NEWS > +++ b/NEWS > @@ -14,6 +14,8 @@ Post-v3.0.0 > 10 Gbps link speed by default in case the actual link speed cannot be > determined. Previously it was 10 Mbps. Values can still be overridden > by specifying 'max-rate' or '[r]stp-path-cost' accordingly. > + - ovs-dpctl and related ovs-appctl commands: > + * "flush-conntrack" is capable of handling partial 5-tuple. > > > v3.0.0 - 15 Aug 2022 > diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h > index 091a09cad..84937ae26 100644 > --- a/include/openvswitch/ofp-util.h > +++ b/include/openvswitch/ofp-util.h > @@ -19,6 +19,9 @@ > > #include <stdbool.h> > #include <stdint.h> > +#include <sys/types.h> > +#include <netinet/in.h> > + > #include "openvswitch/ofp-protocol.h" > > struct ofp_header; > @@ -27,6 +30,31 @@ struct ofp_header; > extern "C" { > #endif > > +struct ofputil_ct_tuple { > + struct in6_addr src; > + struct in6_addr dst; > + > + union { > + ovs_be16 src_port; > + ovs_be16 icmp_id; > + }; > + union { > + ovs_be16 dst_port; > + struct { > + uint8_t icmp_code; > + uint8_t icmp_type; > + }; > + }; > +}; > + > +struct ofputil_ct_match { > + uint8_t ip_proto; > + uint16_t l3_type; > + > + struct ofputil_ct_tuple tuple_orig; > + struct ofputil_ct_tuple tuple_reply; > +}; > + > bool ofputil_decode_hello(const struct ofp_header *, > uint32_t *allowed_versions); > struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap); > diff --git a/lib/automake.mk b/lib/automake.mk > index a0fabe38f..37135f118 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -227,6 +227,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/ofp-actions.c \ > lib/ofp-bundle.c \ > lib/ofp-connection.c \ > + lib/ofp-ct-util.c \ > + lib/ofp-ct-util.h \ > lib/ofp-ed-props.c \ > lib/ofp-errors.c \ > lib/ofp-flow.c \ > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > index 6f17a26b5..906e827c1 100644 > --- a/lib/ct-dpif.c > +++ b/lib/ct-dpif.c > @@ -20,6 +20,7 @@ > #include <errno.h> > > #include "ct-dpif.h" > +#include "ofp-ct-util.h" > #include "openvswitch/ofp-parse.h" > #include "openvswitch/vlog.h" > > @@ -71,6 +72,31 @@ ct_dpif_dump_start(struct dpif *dpif, struct ct_dpif_dump_state **dump, > return err; > } > > +static void > +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofputil_ct_tuple *ofp_tuple, > + struct ct_dpif_tuple *tuple, > + uint16_t l3_type, uint8_t ip_proto) > +{ > + if (l3_type == AF_INET) { > + tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src); > + tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst); > + } else { > + tuple->src.in6 = ofp_tuple->src; > + tuple->dst.in6 = ofp_tuple->dst; > + } > + > + tuple->l3_type = l3_type; > + tuple->ip_proto = ip_proto; > + tuple->src_port = ofp_tuple->src_port; > + > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > + tuple->icmp_code = ofp_tuple->icmp_code; > + tuple->icmp_type = ofp_tuple->icmp_type; > + } else { > + tuple->dst_port = ofp_tuple->dst_port; > + } > +} > + > /* Dump one connection from a tracker, and put it in 'entry'. > * > * 'dump' should have been initialized by ct_dpif_dump_start(). > @@ -100,7 +126,62 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > ? dpif->dpif_class->ct_dump_done(dpif, dump) > : EOPNOTSUPP); > } > - > + > +static int > +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone, > + const struct ofputil_ct_match *match) { > + struct ct_dpif_dump_state *dump; > + struct ct_dpif_entry cte; > + int error; > + int tot_bkts; > + > + if (!dpif->dpif_class->ct_flush) { > + return EOPNOTSUPP; > + } > + > + if (VLOG_IS_DBG_ENABLED()) { > + struct ds ds = DS_EMPTY_INITIALIZER; > + ofputil_ct_match_format(&ds, match); > + VLOG_DBG("%s: ct_flush:%s in zone %d", dpif_name(dpif), ds_cstr(&ds), > + zone ? *zone : 0); > + ds_destroy(&ds); > + } > + > + /* If we have full five tuple in orig just do the flush over that > + * tuple directly. */ > + if (ofputil_ct_tuple_is_five_tuple(&match->tuple_orig, match->ip_proto)) { > + struct ct_dpif_tuple tuple; > + ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple, > + match->l3_type, match->ip_proto); > + return dpif->dpif_class->ct_flush(dpif, zone, &tuple); > + } > + > + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts); > + if (error) { > + return error; > + } > + > + while (!(error = ct_dpif_dump_next(dump, &cte))) { > + if (zone && *zone != cte.zone) { > + continue; > + } > + > + if (ofputil_ct_match_cmp(match, &cte)) { > + error = dpif->dpif_class->ct_flush(dpif, &cte.zone, > + &cte.tuple_orig); > + if (error) { > + break; > + } > + } > + } > + if (error == EOF) { > + error = 0; > + } > + > + ct_dpif_dump_done(dump); > + return error; > +} > + > /* Flush the entries in the connection tracker used by 'dpif'. The > * arguments have the following behavior: > * > @@ -111,14 +192,10 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */ > int > ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, > - const struct ct_dpif_tuple *tuple) > + const struct ofputil_ct_match *match) > { > - if (tuple) { > - struct ds ds = DS_EMPTY_INITIALIZER; > - ct_dpif_format_tuple(&ds, tuple); > - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds), > - zone ? *zone : 0); > - ds_destroy(&ds); > + if (match) { > + return ct_dpif_flush_tuple(dpif, zone, match); > } else if (zone) { > VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone); > } else { > @@ -126,7 +203,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, > } > > return (dpif->dpif_class->ct_flush > - ? dpif->dpif_class->ct_flush(dpif, zone, tuple) > + ? dpif->dpif_class->ct_flush(dpif, zone, NULL) > : EOPNOTSUPP); > } > > @@ -583,112 +660,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state) > ds_put_format(ds, "=%u", conn_per_state); > } > > -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > - * Returns true on success. Otherwise, returns false and puts the error > - * message in 'ds'. */ > -bool > -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds) > -{ > - char *pos, *key, *value, *copy; > - memset(tuple, 0, sizeof *tuple); > - > - pos = copy = xstrdup(s); > - while (ofputil_parse_key_value(&pos, &key, &value)) { > - if (!*value) { > - ds_put_format(ds, "field %s missing value", key); > - goto error; > - } > - > - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) { > - if (tuple->l3_type && tuple->l3_type != AF_INET) { > - ds_put_cstr(ds, "L3 type set multiple times"); > - goto error; > - } else { > - tuple->l3_type = AF_INET; > - } > - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip : > - &tuple->dst.ip)) { > - goto error_with_msg; > - } > - } else if (!strcmp(key, "ct_ipv6_src") || > - !strcmp(key, "ct_ipv6_dst")) { > - if (tuple->l3_type && tuple->l3_type != AF_INET6) { > - ds_put_cstr(ds, "L3 type set multiple times"); > - goto error; > - } else { > - tuple->l3_type = AF_INET6; > - } > - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 : > - &tuple->dst.in6)) { > - goto error_with_msg; > - } > - } else if (!strcmp(key, "ct_nw_proto")) { > - char *err = str_to_u8(value, key, &tuple->ip_proto); > - if (err) { > - free(err); > - goto error_with_msg; > - } > - } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) { > - uint16_t port; > - char *err = str_to_u16(value, key, &port); > - if (err) { > - free(err); > - goto error_with_msg; > - } > - if (key[6] == 's') { > - tuple->src_port = htons(port); > - } else { > - tuple->dst_port = htons(port); > - } > - } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") || > - !strcmp(key, "icmp_id") ) { > - if (tuple->ip_proto != IPPROTO_ICMP && > - tuple->ip_proto != IPPROTO_ICMPV6) { > - ds_put_cstr(ds, "invalid L4 fields"); > - goto error; > - } > - uint16_t icmp_id; > - char *err; > - if (key[5] == 't') { > - err = str_to_u8(value, key, &tuple->icmp_type); > - } else if (key[5] == 'c') { > - err = str_to_u8(value, key, &tuple->icmp_code); > - } else { > - err = str_to_u16(value, key, &icmp_id); > - tuple->icmp_id = htons(icmp_id); > - } > - if (err) { > - free(err); > - goto error_with_msg; > - } > - } else { > - ds_put_format(ds, "invalid conntrack tuple field: %s", key); > - goto error; > - } > - } > - > - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) || > - !tuple->ip_proto) { > - /* icmp_type, icmp_code, and icmp_id can be 0. */ > - if (tuple->ip_proto != IPPROTO_ICMP && > - tuple->ip_proto != IPPROTO_ICMPV6) { > - if (!tuple->src_port || !tuple->dst_port) { > - ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields " > - "is missing."); > - goto error; > - } > - } > - } > - > - free(copy); > - return true; > - > -error_with_msg: > - ds_put_format(ds, "failed to parse field %s", key); > -error: > - free(copy); > - return false; > -} > > void > ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone, > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h > index 2848549b0..337c99dd4 100644 > --- a/lib/ct-dpif.h > +++ b/lib/ct-dpif.h > @@ -17,6 +17,7 @@ > #ifndef CT_DPIF_H > #define CT_DPIF_H > > +#include "openvswitch/ofp-util.h" > #include "openvswitch/types.h" > #include "packets.h" > > @@ -285,7 +286,7 @@ int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, > int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *); > int ct_dpif_dump_done(struct ct_dpif_dump_state *); > int ct_dpif_flush(struct dpif *, const uint16_t *zone, > - const struct ct_dpif_tuple *); > + const struct ofputil_ct_match *); > int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns); > int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns); > int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns); > @@ -311,7 +312,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t ipproto); > void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *); > uint8_t ct_dpif_coalesce_tcp_state(uint8_t state); > void ct_dpif_format_tcp_stat(struct ds *, int, int); > -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct ds *); > void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t limit, > uint32_t count); > void ct_dpif_free_zone_limits(struct ovs_list *); > diff --git a/lib/dpctl.c b/lib/dpctl.c > index 29041fa3e..3cdedbe97 100644 > --- a/lib/dpctl.c > +++ b/lib/dpctl.c > @@ -40,6 +40,7 @@ > #include "netdev.h" > #include "netlink.h" > #include "odp-util.h" > +#include "ofp-ct-util.h" > #include "openvswitch/ofpbuf.h" > #include "packets.h" > #include "openvswitch/shash.h" > @@ -1707,47 +1708,41 @@ dpctl_flush_conntrack(int argc, const char *argv[], > struct dpctl_params *dpctl_p) > { > struct dpif *dpif = NULL; > - struct ct_dpif_tuple tuple, *ptuple = NULL; > - struct ds ds = DS_EMPTY_INITIALIZER; > - uint16_t zone, *pzone = NULL; > - int error; > + struct ofputil_ct_match match = {0}; > + uint16_t zone; > + bool with_zone = false; > int args = argc - 1; > > /* Parse ct tuple */ > - if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) { > - ptuple = &tuple; > + if (args) { > + struct ds ds = DS_EMPTY_INITIALIZER; > + if (!ofputil_ct_match_parse(&match, argv[args], &ds)) { > + dpctl_error(dpctl_p, EINVAL, "%s", ds_cstr(&ds)); > + ds_destroy(&ds); > + return EINVAL; > + } > args--; > } > > /* Parse zone */ > if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) { > - pzone = &zone; > + with_zone = true; > args--; > } > > - /* Report error if there are more than one unparsed argument. */ > - if (args > 1) { > - ds_put_cstr(&ds, "invalid arguments"); > - error = EINVAL; > - goto error; > - } > - > - error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > + int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > if (error) { > - return error; > + dpctl_error(dpctl_p, error, "Cannot open dpif"); > + goto end; just returning error here is fine, right? > } > > - error = ct_dpif_flush(dpif, pzone, ptuple); > - if (!error) { > - dpif_close(dpif); > - return 0; > - } else { > - ds_put_cstr(&ds, "failed to flush conntrack"); > + error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); > + if (error) { > + dpctl_error(dpctl_p, error, "Failed to flush conntrack"); > + goto end; Given the above, the label could be removed, and so the goto here Other than that, the patch LGTM: Acked-by: Paolo Valerio <pvalerio@redhat.com> > } > > -error: > - dpctl_error(dpctl_p, error, "%s", ds_cstr(&ds)); > - ds_destroy(&ds); > +end: > dpif_close(dpif); > return error; > } > diff --git a/lib/dpctl.man b/lib/dpctl.man > index 87ea8087b..b0cabe05d 100644 > --- a/lib/dpctl.man > +++ b/lib/dpctl.man > @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in > If \fIct-tuple\fR is provided, flushes the connection entry specified by > \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided. > The userspace connection tracker requires flushing with the original pre-NATed > -tuple and a warning log will be otherwise generated. > +tuple and a warning log will be otherwise generated. The tuple can be partial > +and will remove all connections that are matching on the specified fields. > An example of an IPv4 ICMP \fIct-tuple\fR: > .IP > "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10" > diff --git a/lib/ofp-ct-util.c b/lib/ofp-ct-util.c > new file mode 100644 > index 000000000..9112305cc > --- /dev/null > +++ b/lib/ofp-ct-util.c > @@ -0,0 +1,311 @@ > + > +/* Copyright (c) 2022, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <stdbool.h> > +#include <stdint.h> > +#include <sys/types.h> > +#include <netinet/in.h> > +#include <netinet/icmp6.h> > + > +#include "ct-dpif.h" > +#include "ofp-ct-util.h" > +#include "openvswitch/dynamic-string.h" > +#include "openvswitch/ofp-parse.h" > +#include "openvswitch/ofp-util.h" > +#include "openvswitch/packets.h" > + > +static inline bool > +ofputil_ct_inet_addr_cmp_partial(const struct in6_addr *partial, > + const union ct_dpif_inet_addr *addr, > + const uint16_t l3_type) > +{ > + if (ipv6_is_zero(partial)) { > + return true; > + } > + > + if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != addr->ip) { > + return false; > + } > + > + if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) { > + return false; > + } > + > + return true; > +} > + > +static inline bool > +ofputil_ct_tuple_ip_cmp_partial(const struct ofputil_ct_tuple *partial, > + const struct ct_dpif_tuple *tuple, > + const uint16_t l3_type, const uint8_t ip_proto) > +{ > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->src, > + &tuple->src, l3_type)) { > + return false; > + } > + > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->dst, > + &tuple->dst, l3_type)) { > + return false; > + } > + > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > + if (partial->icmp_id != tuple->icmp_id) { > + return false; > + } > + > + if (partial->icmp_type != tuple->icmp_type) { > + return false; > + } > + > + if (partial->icmp_code != tuple->icmp_code) { > + return false; > + } > + } else { > + if (partial->src_port && partial->src_port != tuple->src_port) { > + return false; > + } > + > + if (partial->dst_port && partial->dst_port != tuple->dst_port) { > + return false; > + } > + } > + > + return true; > +} > + > +/* Compares the non-zero members if they match. This is useful for clearing > + * up all connections specified by a partial tuples for orig/reply. */ > +bool > +ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > + const struct ct_dpif_entry *entry) > +{ > + if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) { > + return false; > + } > + > + if (match->ip_proto && match->ip_proto != entry->tuple_orig.ip_proto) { > + return false; > + } > + > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_orig, > + &entry->tuple_orig, > + match->l3_type, match->ip_proto)) { > + return false; > + } > + > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_reply, > + &entry->tuple_reply, > + match->l3_type, match->ip_proto)) { > + return false; > + } > + > + return true; > +} > + > +static void > +ofputil_ct_tuple_format(struct ds *ds, const struct ofputil_ct_tuple *tuple, > + uint8_t ip_proto) > +{ > + ds_put_cstr(ds, "src="); > + ipv6_format_mapped(&tuple->src, ds); > + ds_put_cstr(ds, ",dst="); > + ipv6_format_mapped(&tuple->dst, ds); > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > + ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u", > + ntohs(tuple->icmp_id), tuple->icmp_type, > + tuple->icmp_code); > + > + } else { > + ds_put_format(ds, ",src_port=%u,dst_port=%u", ntohs(tuple->src_port), > + ntohs(tuple->dst_port)); > + } > +} > + > +bool > +ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, > + uint8_t ip_proto) > +{ > + /* First check if we have address. */ > + bool five_tuple = !ipv6_is_zero(&tuple->src) && !ipv6_is_zero(&tuple->dst); > + > + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) { > + five_tuple = five_tuple && tuple->src_port && tuple->dst_port; > + } > + > + return five_tuple; > +} > + > +void > +ofputil_ct_match_format(struct ds *ds, const struct ofputil_ct_match *match) > +{ > + ds_put_format(ds, " l3_type=%u,ip_proto=%u", match->l3_type, > + match->ip_proto); > + ds_put_cstr(ds, ",orig=("); > + ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto); > + ds_put_cstr(ds, "),reply=("); > + ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto); > + ds_put_cstr(ds, ")"); > +} > + > +static bool > +ofputil_ct_tuple_ip_parse(struct in6_addr *addr, char *value, uint16_t l3_type) > +{ > + if (!ipv6_is_zero(addr)) { > + return false; > + } > + > + if (l3_type == AF_INET) { > + ovs_be32 ip = 0; > + > + ip_parse(value, &ip); > + *addr = in6_addr_mapped_ipv4(ip); > + } else { > + ipv6_parse(value, addr); > + } > + > + return true; > +} > + > +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > + * Returns true on success. Otherwise, returns false and puts the error > + * message in 'ds'. */ > +bool > +ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, > + struct ds *ds) > +{ > + char *pos, *key, *value, *copy; > + > + memset(match, 0, sizeof *match); > + struct ofputil_ct_tuple *tuple = &match->tuple_orig; > + > + pos = copy = xstrdup(s); > + while (ofputil_parse_key_value(&pos, &key, &value)) { > + if (!*value) { > + ds_put_format(ds, "field %s missing value", key); > + goto error_with_msg; > + } > + > + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") > + || !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst")) { > + match->l3_type = key[6] == '6' ? AF_INET6 : AF_INET; > + uint8_t index = key[6] == '6' ? 8 : 6; > + struct in6_addr *addr = key[index] == 's' > + ? &tuple->src : &tuple->dst; > + > + if (!ofputil_ct_tuple_ip_parse(addr, value, match->l3_type)) { > + ds_put_format(ds, "%s is set multiple times", key); > + goto error; > + } > + } else if (!strcmp(key, "ct_nw_proto")) { > + char *err = str_to_u8(value, key, &match->ip_proto); > + > + if (err) { > + free(err); > + goto error_with_msg; > + } > + } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, "ct_tp_dst")) { > + uint16_t port; > + char *err = str_to_u16(value, key, &port); > + > + if (err) { > + free(err); > + goto error_with_msg; > + } > + if (key[6] == 's') { > + tuple->src_port = htons(port); > + } else { > + tuple->dst_port = htons(port); > + } > + } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") || > + !strcmp(key, "icmp_id")) { > + if (match->ip_proto != IPPROTO_ICMP && > + match->ip_proto != IPPROTO_ICMPV6) { > + ds_put_cstr(ds, "invalid L4 fields"); > + goto error; > + } > + uint16_t icmp_id; > + char *err; > + > + if (key[5] == 't') { > + err = str_to_u8(value, key, &tuple->icmp_type); > + } else if (key[5] == 'c') { > + err = str_to_u8(value, key, &tuple->icmp_code); > + } else { > + err = str_to_u16(value, key, &icmp_id); > + tuple->icmp_id = htons(icmp_id); > + } > + if (err) { > + free(err); > + goto error_with_msg; > + } > + } else { > + ds_put_format(ds, "invalid conntrack tuple field: %s", key); > + goto error; > + } > + } > + > + if (!match->ip_proto && (tuple->src_port || tuple->dst_port)) { > + ds_put_cstr(ds, "port is set without protocol"); > + goto error; > + } > + > + /* For the filtering to work with icmp we need to fill the reply direction > + * with correct information. */ > + if (match->ip_proto == IPPROTO_ICMP) { > + switch (match->tuple_orig.icmp_type) { > + case ICMP4_ECHO_REQUEST: > + match->tuple_reply.icmp_type = ICMP4_ECHO_REPLY; > + break; > + case ICMP4_ECHO_REPLY: > + match->tuple_reply.icmp_type = ICMP4_ECHO_REQUEST; > + break; > + case ICMP4_TIMESTAMP: > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMPREPLY; > + break; > + case ICMP4_TIMESTAMPREPLY: > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMP; > + break; > + case ICMP4_INFOREQUEST: > + match->tuple_reply.icmp_type = ICMP4_INFOREPLY; > + break; > + case ICMP4_INFOREPLY: > + match->tuple_reply.icmp_type = ICMP4_INFOREQUEST; > + break; > + } > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > + } else if (match->ip_proto == IPPROTO_ICMPV6) { > + switch (match->tuple_orig.icmp_type) { > + case ICMP6_ECHO_REQUEST: > + match->tuple_reply.icmp_type = ICMP6_ECHO_REPLY; > + break; > + case ICMP6_ECHO_REPLY: > + match->tuple_reply.icmp_type = ICMP6_ECHO_REQUEST; > + break; > + } > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > + } > + > + free(copy); > + return true; > + > +error_with_msg: > + ds_put_format(ds, "failed to parse field %s", key); > +error: > + free(copy); > + return false; > +} > diff --git a/lib/ofp-ct-util.h b/lib/ofp-ct-util.h > new file mode 100644 > index 000000000..a53d73cb0 > --- /dev/null > +++ b/lib/ofp-ct-util.h > @@ -0,0 +1,34 @@ > +/* Copyright (c) 2022, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef OVS_OFP_CT_UTIL_H > +#define OVS_OFP_CT_UTIL_H > + > +#include "ct-dpif.h" > +#include "openvswitch/ofp-util.h" > + > +bool ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > + const struct ct_dpif_entry *entry); > + > +bool ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, > + uint8_t ip_proto); > + > +void ofputil_ct_match_format(struct ds *ds, > + const struct ofputil_ct_match *match); > + > +bool ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, > + struct ds *ds); > + > +#endif /* lib/ofp-ct-util.h */ > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index e5403519f..51903a658 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -2230,7 +2230,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10. > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > -AT_SETUP([conntrack - ct flush by 5-tuple]) > +AT_SETUP([conntrack - ct flush]) > CHECK_CONNTRACK() > OVS_TRAFFIC_VSWITCHD_START() > > @@ -2291,6 +2291,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 $ICMP_TUPLE]) > AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [1], [dnl > ]) > > +dnl Test UDP from port 1 and 2, partial flush by src port > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) > + > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > + > +dnl Test UDP from port 1 and 2, partial flush by dst port > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) > + > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > + > +dnl Test UDP from port 1 and 2, partial flush by src address > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) > + > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > + > +dnl Test UDP from port 1 and 2, partial flush by dst address > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) > + > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1']) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > + > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > -- > 2.38.1
On Thu, Dec 15, 2022 at 4:28 PM Paolo Valerio <pvalerio@redhat.com> wrote: > Ales Musil <amusil@redhat.com> writes: > > > Currently, the CT can be flushed by dpctl only be specifying > > the whole 5-tuple. This is not very convenient when there are > > only some fields known to the user of CT flush. Add new struct > > ofputil_ct_match which represents the generic filtering that can > > be done for CT flush. The match is done only on fields that are > > non-zero with exception to the icmp fields. > > > > This allows the filtering just within dpctl, however > > it is a preparation for OpenFlow extension. > > > > Reported-at: https://bugzilla.redhat.com/2120546 > > Signed-off-by: Ales Musil <amusil@redhat.com> > > --- > > v3: Rebase on top of master. > > Address the C99 comment and missing dpif_close call. > > v2: Rebase on top of master. > > Address comments from Paolo. > > --- > > NEWS | 2 + > > include/openvswitch/ofp-util.h | 28 +++ > > lib/automake.mk | 2 + > > lib/ct-dpif.c | 201 +++++++++------------ > > lib/ct-dpif.h | 4 +- > > lib/dpctl.c | 45 +++-- > > lib/dpctl.man | 3 +- > > lib/ofp-ct-util.c | 311 +++++++++++++++++++++++++++++++++ > > lib/ofp-ct-util.h | 34 ++++ > > tests/system-traffic.at | 82 ++++++++- > > 10 files changed, 568 insertions(+), 144 deletions(-) > > create mode 100644 lib/ofp-ct-util.c > > create mode 100644 lib/ofp-ct-util.h > > > > diff --git a/NEWS b/NEWS > > index 265375e1c..ff8904b02 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -14,6 +14,8 @@ Post-v3.0.0 > > 10 Gbps link speed by default in case the actual link speed cannot > be > > determined. Previously it was 10 Mbps. Values can still be > overridden > > by specifying 'max-rate' or '[r]stp-path-cost' accordingly. > > + - ovs-dpctl and related ovs-appctl commands: > > + * "flush-conntrack" is capable of handling partial 5-tuple. > > > > > > v3.0.0 - 15 Aug 2022 > > diff --git a/include/openvswitch/ofp-util.h > b/include/openvswitch/ofp-util.h > > index 091a09cad..84937ae26 100644 > > --- a/include/openvswitch/ofp-util.h > > +++ b/include/openvswitch/ofp-util.h > > @@ -19,6 +19,9 @@ > > > > #include <stdbool.h> > > #include <stdint.h> > > +#include <sys/types.h> > > +#include <netinet/in.h> > > + > > #include "openvswitch/ofp-protocol.h" > > > > struct ofp_header; > > @@ -27,6 +30,31 @@ struct ofp_header; > > extern "C" { > > #endif > > > > +struct ofputil_ct_tuple { > > + struct in6_addr src; > > + struct in6_addr dst; > > + > > + union { > > + ovs_be16 src_port; > > + ovs_be16 icmp_id; > > + }; > > + union { > > + ovs_be16 dst_port; > > + struct { > > + uint8_t icmp_code; > > + uint8_t icmp_type; > > + }; > > + }; > > +}; > > + > > +struct ofputil_ct_match { > > + uint8_t ip_proto; > > + uint16_t l3_type; > > + > > + struct ofputil_ct_tuple tuple_orig; > > + struct ofputil_ct_tuple tuple_reply; > > +}; > > + > > bool ofputil_decode_hello(const struct ofp_header *, > > uint32_t *allowed_versions); > > struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap); > > diff --git a/lib/automake.mk b/lib/automake.mk > > index a0fabe38f..37135f118 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -227,6 +227,8 @@ lib_libopenvswitch_la_SOURCES = \ > > lib/ofp-actions.c \ > > lib/ofp-bundle.c \ > > lib/ofp-connection.c \ > > + lib/ofp-ct-util.c \ > > + lib/ofp-ct-util.h \ > > lib/ofp-ed-props.c \ > > lib/ofp-errors.c \ > > lib/ofp-flow.c \ > > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > > index 6f17a26b5..906e827c1 100644 > > --- a/lib/ct-dpif.c > > +++ b/lib/ct-dpif.c > > @@ -20,6 +20,7 @@ > > #include <errno.h> > > > > #include "ct-dpif.h" > > +#include "ofp-ct-util.h" > > #include "openvswitch/ofp-parse.h" > > #include "openvswitch/vlog.h" > > > > @@ -71,6 +72,31 @@ ct_dpif_dump_start(struct dpif *dpif, struct > ct_dpif_dump_state **dump, > > return err; > > } > > > > +static void > > +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofputil_ct_tuple > *ofp_tuple, > > + struct ct_dpif_tuple *tuple, > > + uint16_t l3_type, uint8_t ip_proto) > > +{ > > + if (l3_type == AF_INET) { > > + tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src); > > + tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst); > > + } else { > > + tuple->src.in6 = ofp_tuple->src; > > + tuple->dst.in6 = ofp_tuple->dst; > > + } > > + > > + tuple->l3_type = l3_type; > > + tuple->ip_proto = ip_proto; > > + tuple->src_port = ofp_tuple->src_port; > > + > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + tuple->icmp_code = ofp_tuple->icmp_code; > > + tuple->icmp_type = ofp_tuple->icmp_type; > > + } else { > > + tuple->dst_port = ofp_tuple->dst_port; > > + } > > +} > > + > > /* Dump one connection from a tracker, and put it in 'entry'. > > * > > * 'dump' should have been initialized by ct_dpif_dump_start(). > > @@ -100,7 +126,62 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > > ? dpif->dpif_class->ct_dump_done(dpif, dump) > > : EOPNOTSUPP); > > } > > - > > + > > +static int > > +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone, > > + const struct ofputil_ct_match *match) { > > + struct ct_dpif_dump_state *dump; > > + struct ct_dpif_entry cte; > > + int error; > > + int tot_bkts; > > + > > + if (!dpif->dpif_class->ct_flush) { > > + return EOPNOTSUPP; > > + } > > + > > + if (VLOG_IS_DBG_ENABLED()) { > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + ofputil_ct_match_format(&ds, match); > > + VLOG_DBG("%s: ct_flush:%s in zone %d", dpif_name(dpif), > ds_cstr(&ds), > > + zone ? *zone : 0); > > + ds_destroy(&ds); > > + } > > + > > + /* If we have full five tuple in orig just do the flush over that > > + * tuple directly. */ > > + if (ofputil_ct_tuple_is_five_tuple(&match->tuple_orig, > match->ip_proto)) { > > + struct ct_dpif_tuple tuple; > > + ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple, > > + match->l3_type, > match->ip_proto); > > + return dpif->dpif_class->ct_flush(dpif, zone, &tuple); > > + } > > + > > + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts); > > + if (error) { > > + return error; > > + } > > + > > + while (!(error = ct_dpif_dump_next(dump, &cte))) { > > + if (zone && *zone != cte.zone) { > > + continue; > > + } > > + > > + if (ofputil_ct_match_cmp(match, &cte)) { > > + error = dpif->dpif_class->ct_flush(dpif, &cte.zone, > > + &cte.tuple_orig); > > + if (error) { > > + break; > > + } > > + } > > + } > > + if (error == EOF) { > > + error = 0; > > + } > > + > > + ct_dpif_dump_done(dump); > > + return error; > > +} > > + > > /* Flush the entries in the connection tracker used by 'dpif'. The > > * arguments have the following behavior: > > * > > @@ -111,14 +192,10 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > > * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */ > > int > > ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, > > - const struct ct_dpif_tuple *tuple) > > + const struct ofputil_ct_match *match) > > { > > - if (tuple) { > > - struct ds ds = DS_EMPTY_INITIALIZER; > > - ct_dpif_format_tuple(&ds, tuple); > > - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), > ds_cstr(&ds), > > - zone ? *zone : 0); > > - ds_destroy(&ds); > > + if (match) { > > + return ct_dpif_flush_tuple(dpif, zone, match); > > } else if (zone) { > > VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone); > > } else { > > @@ -126,7 +203,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t > *zone, > > } > > > > return (dpif->dpif_class->ct_flush > > - ? dpif->dpif_class->ct_flush(dpif, zone, tuple) > > + ? dpif->dpif_class->ct_flush(dpif, zone, NULL) > > : EOPNOTSUPP); > > } > > > > @@ -583,112 +660,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int > tcp_state, int conn_per_state) > > ds_put_format(ds, "=%u", conn_per_state); > > } > > > > -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > > - * Returns true on success. Otherwise, returns false and puts the error > > - * message in 'ds'. */ > > -bool > > -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct > ds *ds) > > -{ > > - char *pos, *key, *value, *copy; > > - memset(tuple, 0, sizeof *tuple); > > - > > - pos = copy = xstrdup(s); > > - while (ofputil_parse_key_value(&pos, &key, &value)) { > > - if (!*value) { > > - ds_put_format(ds, "field %s missing value", key); > > - goto error; > > - } > > - > > - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > - goto error; > > - } else { > > - tuple->l3_type = AF_INET; > > - } > > - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip : > > - &tuple->dst.ip)) { > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_ipv6_src") || > > - !strcmp(key, "ct_ipv6_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET6) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > - goto error; > > - } else { > > - tuple->l3_type = AF_INET6; > > - } > > - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 : > > - &tuple->dst.in6)) { > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_nw_proto")) { > > - char *err = str_to_u8(value, key, &tuple->ip_proto); > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_tp_src") || > !strcmp(key,"ct_tp_dst")) { > > - uint16_t port; > > - char *err = str_to_u16(value, key, &port); > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - if (key[6] == 's') { > > - tuple->src_port = htons(port); > > - } else { > > - tuple->dst_port = htons(port); > > - } > > - } else if (!strcmp(key, "icmp_type") || !strcmp(key, > "icmp_code") || > > - !strcmp(key, "icmp_id") ) { > > - if (tuple->ip_proto != IPPROTO_ICMP && > > - tuple->ip_proto != IPPROTO_ICMPV6) { > > - ds_put_cstr(ds, "invalid L4 fields"); > > - goto error; > > - } > > - uint16_t icmp_id; > > - char *err; > > - if (key[5] == 't') { > > - err = str_to_u8(value, key, &tuple->icmp_type); > > - } else if (key[5] == 'c') { > > - err = str_to_u8(value, key, &tuple->icmp_code); > > - } else { > > - err = str_to_u16(value, key, &icmp_id); > > - tuple->icmp_id = htons(icmp_id); > > - } > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - } else { > > - ds_put_format(ds, "invalid conntrack tuple field: %s", key); > > - goto error; > > - } > > - } > > - > > - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) > || > > - !tuple->ip_proto) { > > - /* icmp_type, icmp_code, and icmp_id can be 0. */ > > - if (tuple->ip_proto != IPPROTO_ICMP && > > - tuple->ip_proto != IPPROTO_ICMPV6) { > > - if (!tuple->src_port || !tuple->dst_port) { > > - ds_put_cstr(ds, "at least one of the conntrack 5-tuple > fields " > > - "is missing."); > > - goto error; > > - } > > - } > > - } > > - > > - free(copy); > > - return true; > > - > > -error_with_msg: > > - ds_put_format(ds, "failed to parse field %s", key); > > -error: > > - free(copy); > > - return false; > > -} > > > > void > > ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone, > > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h > > index 2848549b0..337c99dd4 100644 > > --- a/lib/ct-dpif.h > > +++ b/lib/ct-dpif.h > > @@ -17,6 +17,7 @@ > > #ifndef CT_DPIF_H > > #define CT_DPIF_H > > > > +#include "openvswitch/ofp-util.h" > > #include "openvswitch/types.h" > > #include "packets.h" > > > > @@ -285,7 +286,7 @@ int ct_dpif_dump_start(struct dpif *, struct > ct_dpif_dump_state **, > > int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry > *); > > int ct_dpif_dump_done(struct ct_dpif_dump_state *); > > int ct_dpif_flush(struct dpif *, const uint16_t *zone, > > - const struct ct_dpif_tuple *); > > + const struct ofputil_ct_match *); > > int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns); > > int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns); > > int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns); > > @@ -311,7 +312,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t > ipproto); > > void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *); > > uint8_t ct_dpif_coalesce_tcp_state(uint8_t state); > > void ct_dpif_format_tcp_stat(struct ds *, int, int); > > -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct > ds *); > > void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t > limit, > > uint32_t count); > > void ct_dpif_free_zone_limits(struct ovs_list *); > > diff --git a/lib/dpctl.c b/lib/dpctl.c > > index 29041fa3e..3cdedbe97 100644 > > --- a/lib/dpctl.c > > +++ b/lib/dpctl.c > > @@ -40,6 +40,7 @@ > > #include "netdev.h" > > #include "netlink.h" > > #include "odp-util.h" > > +#include "ofp-ct-util.h" > > #include "openvswitch/ofpbuf.h" > > #include "packets.h" > > #include "openvswitch/shash.h" > > @@ -1707,47 +1708,41 @@ dpctl_flush_conntrack(int argc, const char > *argv[], > > struct dpctl_params *dpctl_p) > > { > > struct dpif *dpif = NULL; > > - struct ct_dpif_tuple tuple, *ptuple = NULL; > > - struct ds ds = DS_EMPTY_INITIALIZER; > > - uint16_t zone, *pzone = NULL; > > - int error; > > + struct ofputil_ct_match match = {0}; > > + uint16_t zone; > > + bool with_zone = false; > > int args = argc - 1; > > > > /* Parse ct tuple */ > > - if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) { > > - ptuple = &tuple; > > + if (args) { > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + if (!ofputil_ct_match_parse(&match, argv[args], &ds)) { > > + dpctl_error(dpctl_p, EINVAL, "%s", ds_cstr(&ds)); > > + ds_destroy(&ds); > > + return EINVAL; > > + } > > args--; > > } > > > > /* Parse zone */ > > if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) { > > - pzone = &zone; > > + with_zone = true; > > args--; > > } > > > > - /* Report error if there are more than one unparsed argument. */ > > - if (args > 1) { > > - ds_put_cstr(&ds, "invalid arguments"); > > - error = EINVAL; > > - goto error; > > - } > > - > > - error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > > + int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > > if (error) { > > - return error; > > + dpctl_error(dpctl_p, error, "Cannot open dpif"); > > + goto end; > > just returning error here is fine, right? > I'm not sure if we are supposed to call close when the open errors out. If not the below diff could be applied. diff --git a/lib/dpctl.c b/lib/dpctl.c index 3cdedbe97..e669aa4c9 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -1733,16 +1733,14 @@ dpctl_flush_conntrack(int argc, const char *argv[], int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); if (error) { dpctl_error(dpctl_p, error, "Cannot open dpif"); - goto end; + return error; } error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); if (error) { dpctl_error(dpctl_p, error, "Failed to flush conntrack"); - goto end; } -end: dpif_close(dpif); return error; } > > > } > > > > - error = ct_dpif_flush(dpif, pzone, ptuple); > > - if (!error) { > > - dpif_close(dpif); > > - return 0; > > - } else { > > - ds_put_cstr(&ds, "failed to flush conntrack"); > > + error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); > > + if (error) { > > + dpctl_error(dpctl_p, error, "Failed to flush conntrack"); > > + goto end; > > Given the above, the label could be removed, and so the goto here > > Other than that, the patch LGTM: > > Acked-by: Paolo Valerio <pvalerio@redhat.com> > Thanks, Ales > > > } > > > > -error: > > - dpctl_error(dpctl_p, error, "%s", ds_cstr(&ds)); > > - ds_destroy(&ds); > > +end: > > dpif_close(dpif); > > return error; > > } > > diff --git a/lib/dpctl.man b/lib/dpctl.man > > index 87ea8087b..b0cabe05d 100644 > > --- a/lib/dpctl.man > > +++ b/lib/dpctl.man > > @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes > the connections in > > If \fIct-tuple\fR is provided, flushes the connection entry specified by > > \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not > provided. > > The userspace connection tracker requires flushing with the original > pre-NATed > > -tuple and a warning log will be otherwise generated. > > +tuple and a warning log will be otherwise generated. The tuple can be > partial > > +and will remove all connections that are matching on the specified > fields. > > An example of an IPv4 ICMP \fIct-tuple\fR: > > .IP > > > "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10" > > diff --git a/lib/ofp-ct-util.c b/lib/ofp-ct-util.c > > new file mode 100644 > > index 000000000..9112305cc > > --- /dev/null > > +++ b/lib/ofp-ct-util.c > > @@ -0,0 +1,311 @@ > > + > > +/* Copyright (c) 2022, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > +#include <stdbool.h> > > +#include <stdint.h> > > +#include <sys/types.h> > > +#include <netinet/in.h> > > +#include <netinet/icmp6.h> > > + > > +#include "ct-dpif.h" > > +#include "ofp-ct-util.h" > > +#include "openvswitch/dynamic-string.h" > > +#include "openvswitch/ofp-parse.h" > > +#include "openvswitch/ofp-util.h" > > +#include "openvswitch/packets.h" > > + > > +static inline bool > > +ofputil_ct_inet_addr_cmp_partial(const struct in6_addr *partial, > > + const union ct_dpif_inet_addr *addr, > > + const uint16_t l3_type) > > +{ > > + if (ipv6_is_zero(partial)) { > > + return true; > > + } > > + > > + if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != > addr->ip) { > > + return false; > > + } > > + > > + if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static inline bool > > +ofputil_ct_tuple_ip_cmp_partial(const struct ofputil_ct_tuple *partial, > > + const struct ct_dpif_tuple *tuple, > > + const uint16_t l3_type, const uint8_t > ip_proto) > > +{ > > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->src, > > + &tuple->src, l3_type)) { > > + return false; > > + } > > + > > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->dst, > > + &tuple->dst, l3_type)) { > > + return false; > > + } > > + > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + if (partial->icmp_id != tuple->icmp_id) { > > + return false; > > + } > > + > > + if (partial->icmp_type != tuple->icmp_type) { > > + return false; > > + } > > + > > + if (partial->icmp_code != tuple->icmp_code) { > > + return false; > > + } > > + } else { > > + if (partial->src_port && partial->src_port != tuple->src_port) { > > + return false; > > + } > > + > > + if (partial->dst_port && partial->dst_port != tuple->dst_port) { > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +/* Compares the non-zero members if they match. This is useful for > clearing > > + * up all connections specified by a partial tuples for orig/reply. */ > > +bool > > +ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > > + const struct ct_dpif_entry *entry) > > +{ > > + if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) { > > + return false; > > + } > > + > > + if (match->ip_proto && match->ip_proto != > entry->tuple_orig.ip_proto) { > > + return false; > > + } > > + > > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_orig, > > + &entry->tuple_orig, > > + match->l3_type, > match->ip_proto)) { > > + return false; > > + } > > + > > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_reply, > > + &entry->tuple_reply, > > + match->l3_type, > match->ip_proto)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static void > > +ofputil_ct_tuple_format(struct ds *ds, const struct ofputil_ct_tuple > *tuple, > > + uint8_t ip_proto) > > +{ > > + ds_put_cstr(ds, "src="); > > + ipv6_format_mapped(&tuple->src, ds); > > + ds_put_cstr(ds, ",dst="); > > + ipv6_format_mapped(&tuple->dst, ds); > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u", > > + ntohs(tuple->icmp_id), tuple->icmp_type, > > + tuple->icmp_code); > > + > > + } else { > > + ds_put_format(ds, ",src_port=%u,dst_port=%u", > ntohs(tuple->src_port), > > + ntohs(tuple->dst_port)); > > + } > > +} > > + > > +bool > > +ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, > > + uint8_t ip_proto) > > +{ > > + /* First check if we have address. */ > > + bool five_tuple = !ipv6_is_zero(&tuple->src) && > !ipv6_is_zero(&tuple->dst); > > + > > + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) { > > + five_tuple = five_tuple && tuple->src_port && tuple->dst_port; > > + } > > + > > + return five_tuple; > > +} > > + > > +void > > +ofputil_ct_match_format(struct ds *ds, const struct ofputil_ct_match > *match) > > +{ > > + ds_put_format(ds, " l3_type=%u,ip_proto=%u", match->l3_type, > > + match->ip_proto); > > + ds_put_cstr(ds, ",orig=("); > > + ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto); > > + ds_put_cstr(ds, "),reply=("); > > + ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto); > > + ds_put_cstr(ds, ")"); > > +} > > + > > +static bool > > +ofputil_ct_tuple_ip_parse(struct in6_addr *addr, char *value, uint16_t > l3_type) > > +{ > > + if (!ipv6_is_zero(addr)) { > > + return false; > > + } > > + > > + if (l3_type == AF_INET) { > > + ovs_be32 ip = 0; > > + > > + ip_parse(value, &ip); > > + *addr = in6_addr_mapped_ipv4(ip); > > + } else { > > + ipv6_parse(value, addr); > > + } > > + > > + return true; > > +} > > + > > +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > > + * Returns true on success. Otherwise, returns false and puts the error > > + * message in 'ds'. */ > > +bool > > +ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, > > + struct ds *ds) > > +{ > > + char *pos, *key, *value, *copy; > > + > > + memset(match, 0, sizeof *match); > > + struct ofputil_ct_tuple *tuple = &match->tuple_orig; > > + > > + pos = copy = xstrdup(s); > > + while (ofputil_parse_key_value(&pos, &key, &value)) { > > + if (!*value) { > > + ds_put_format(ds, "field %s missing value", key); > > + goto error_with_msg; > > + } > > + > > + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") > > + || !strcmp(key, "ct_ipv6_src") || !strcmp(key, > "ct_ipv6_dst")) { > > + match->l3_type = key[6] == '6' ? AF_INET6 : AF_INET; > > + uint8_t index = key[6] == '6' ? 8 : 6; > > + struct in6_addr *addr = key[index] == 's' > > + ? &tuple->src : &tuple->dst; > > + > > + if (!ofputil_ct_tuple_ip_parse(addr, value, > match->l3_type)) { > > + ds_put_format(ds, "%s is set multiple times", key); > > + goto error; > > + } > > + } else if (!strcmp(key, "ct_nw_proto")) { > > + char *err = str_to_u8(value, key, &match->ip_proto); > > + > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, > "ct_tp_dst")) { > > + uint16_t port; > > + char *err = str_to_u16(value, key, &port); > > + > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + if (key[6] == 's') { > > + tuple->src_port = htons(port); > > + } else { > > + tuple->dst_port = htons(port); > > + } > > + } else if (!strcmp(key, "icmp_type") || !strcmp(key, > "icmp_code") || > > + !strcmp(key, "icmp_id")) { > > + if (match->ip_proto != IPPROTO_ICMP && > > + match->ip_proto != IPPROTO_ICMPV6) { > > + ds_put_cstr(ds, "invalid L4 fields"); > > + goto error; > > + } > > + uint16_t icmp_id; > > + char *err; > > + > > + if (key[5] == 't') { > > + err = str_to_u8(value, key, &tuple->icmp_type); > > + } else if (key[5] == 'c') { > > + err = str_to_u8(value, key, &tuple->icmp_code); > > + } else { > > + err = str_to_u16(value, key, &icmp_id); > > + tuple->icmp_id = htons(icmp_id); > > + } > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + } else { > > + ds_put_format(ds, "invalid conntrack tuple field: %s", key); > > + goto error; > > + } > > + } > > + > > + if (!match->ip_proto && (tuple->src_port || tuple->dst_port)) { > > + ds_put_cstr(ds, "port is set without protocol"); > > + goto error; > > + } > > + > > + /* For the filtering to work with icmp we need to fill the reply > direction > > + * with correct information. */ > > + if (match->ip_proto == IPPROTO_ICMP) { > > + switch (match->tuple_orig.icmp_type) { > > + case ICMP4_ECHO_REQUEST: > > + match->tuple_reply.icmp_type = ICMP4_ECHO_REPLY; > > + break; > > + case ICMP4_ECHO_REPLY: > > + match->tuple_reply.icmp_type = ICMP4_ECHO_REQUEST; > > + break; > > + case ICMP4_TIMESTAMP: > > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMPREPLY; > > + break; > > + case ICMP4_TIMESTAMPREPLY: > > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMP; > > + break; > > + case ICMP4_INFOREQUEST: > > + match->tuple_reply.icmp_type = ICMP4_INFOREPLY; > > + break; > > + case ICMP4_INFOREPLY: > > + match->tuple_reply.icmp_type = ICMP4_INFOREQUEST; > > + break; > > + } > > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > > + } else if (match->ip_proto == IPPROTO_ICMPV6) { > > + switch (match->tuple_orig.icmp_type) { > > + case ICMP6_ECHO_REQUEST: > > + match->tuple_reply.icmp_type = ICMP6_ECHO_REPLY; > > + break; > > + case ICMP6_ECHO_REPLY: > > + match->tuple_reply.icmp_type = ICMP6_ECHO_REQUEST; > > + break; > > + } > > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > > + } > > + > > + free(copy); > > + return true; > > + > > +error_with_msg: > > + ds_put_format(ds, "failed to parse field %s", key); > > +error: > > + free(copy); > > + return false; > > +} > > diff --git a/lib/ofp-ct-util.h b/lib/ofp-ct-util.h > > new file mode 100644 > > index 000000000..a53d73cb0 > > --- /dev/null > > +++ b/lib/ofp-ct-util.h > > @@ -0,0 +1,34 @@ > > +/* Copyright (c) 2022, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef OVS_OFP_CT_UTIL_H > > +#define OVS_OFP_CT_UTIL_H > > + > > +#include "ct-dpif.h" > > +#include "openvswitch/ofp-util.h" > > + > > +bool ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > > + const struct ct_dpif_entry *entry); > > + > > +bool ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple > *tuple, > > + uint8_t ip_proto); > > + > > +void ofputil_ct_match_format(struct ds *ds, > > + const struct ofputil_ct_match *match); > > + > > +bool ofputil_ct_match_parse(struct ofputil_ct_match *match, const char > *s, > > + struct ds *ds); > > + > > +#endif /* lib/ofp-ct-util.h */ > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > > index e5403519f..51903a658 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -2230,7 +2230,7 @@ > udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10. > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > -AT_SETUP([conntrack - ct flush by 5-tuple]) > > +AT_SETUP([conntrack - ct flush]) > > CHECK_CONNTRACK() > > OVS_TRAFFIC_VSWITCHD_START() > > > > @@ -2291,6 +2291,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 > $ICMP_TUPLE]) > > AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.2,"], [1], [dnl > > ]) > > > > +dnl Test UDP from port 1 and 2, partial flush by src port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_src=1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_src=2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_dst=2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_dst=1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by src address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > -- > > 2.38.1 > >
Ales Musil <amusil@redhat.com> writes: > On Thu, Dec 15, 2022 at 4:28 PM Paolo Valerio <pvalerio@redhat.com> wrote: > > Ales Musil <amusil@redhat.com> writes: > > > Currently, the CT can be flushed by dpctl only be specifying > > the whole 5-tuple. This is not very convenient when there are > > only some fields known to the user of CT flush. Add new struct > > ofputil_ct_match which represents the generic filtering that can > > be done for CT flush. The match is done only on fields that are > > non-zero with exception to the icmp fields. > > > > This allows the filtering just within dpctl, however > > it is a preparation for OpenFlow extension. > > > > Reported-at: https://bugzilla.redhat.com/2120546 > > Signed-off-by: Ales Musil <amusil@redhat.com> > > --- > > v3: Rebase on top of master. > > Address the C99 comment and missing dpif_close call. > > v2: Rebase on top of master. > > Address comments from Paolo. > > --- > > NEWS | 2 + > > include/openvswitch/ofp-util.h | 28 +++ > > lib/automake.mk | 2 + > > lib/ct-dpif.c | 201 +++++++++------------ > > lib/ct-dpif.h | 4 +- > > lib/dpctl.c | 45 +++-- > > lib/dpctl.man | 3 +- > > lib/ofp-ct-util.c | 311 +++++++++++++++++++++++++++++++++ > > lib/ofp-ct-util.h | 34 ++++ > > tests/system-traffic.at | 82 ++++++++- > > 10 files changed, 568 insertions(+), 144 deletions(-) > > create mode 100644 lib/ofp-ct-util.c > > create mode 100644 lib/ofp-ct-util.h > > > > diff --git a/NEWS b/NEWS > > index 265375e1c..ff8904b02 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -14,6 +14,8 @@ Post-v3.0.0 > > 10 Gbps link speed by default in case the actual link speed cannot > be > > determined. Previously it was 10 Mbps. Values can still be > overridden > > by specifying 'max-rate' or '[r]stp-path-cost' accordingly. > > + - ovs-dpctl and related ovs-appctl commands: > > + * "flush-conntrack" is capable of handling partial 5-tuple. > > > > > > v3.0.0 - 15 Aug 2022 > > diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ > ofp-util.h > > index 091a09cad..84937ae26 100644 > > --- a/include/openvswitch/ofp-util.h > > +++ b/include/openvswitch/ofp-util.h > > @@ -19,6 +19,9 @@ > > > > #include <stdbool.h> > > #include <stdint.h> > > +#include <sys/types.h> > > +#include <netinet/in.h> > > + > > #include "openvswitch/ofp-protocol.h" > > > > struct ofp_header; > > @@ -27,6 +30,31 @@ struct ofp_header; > > extern "C" { > > #endif > > > > +struct ofputil_ct_tuple { > > + struct in6_addr src; > > + struct in6_addr dst; > > + > > + union { > > + ovs_be16 src_port; > > + ovs_be16 icmp_id; > > + }; > > + union { > > + ovs_be16 dst_port; > > + struct { > > + uint8_t icmp_code; > > + uint8_t icmp_type; > > + }; > > + }; > > +}; > > + > > +struct ofputil_ct_match { > > + uint8_t ip_proto; > > + uint16_t l3_type; > > + > > + struct ofputil_ct_tuple tuple_orig; > > + struct ofputil_ct_tuple tuple_reply; > > +}; > > + > > bool ofputil_decode_hello(const struct ofp_header *, > > uint32_t *allowed_versions); > > struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap); > > diff --git a/lib/automake.mk b/lib/automake.mk > > index a0fabe38f..37135f118 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -227,6 +227,8 @@ lib_libopenvswitch_la_SOURCES = \ > > lib/ofp-actions.c \ > > lib/ofp-bundle.c \ > > lib/ofp-connection.c \ > > + lib/ofp-ct-util.c \ > > + lib/ofp-ct-util.h \ > > lib/ofp-ed-props.c \ > > lib/ofp-errors.c \ > > lib/ofp-flow.c \ > > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > > index 6f17a26b5..906e827c1 100644 > > --- a/lib/ct-dpif.c > > +++ b/lib/ct-dpif.c > > @@ -20,6 +20,7 @@ > > #include <errno.h> > > > > #include "ct-dpif.h" > > +#include "ofp-ct-util.h" > > #include "openvswitch/ofp-parse.h" > > #include "openvswitch/vlog.h" > > > > @@ -71,6 +72,31 @@ ct_dpif_dump_start(struct dpif *dpif, struct > ct_dpif_dump_state **dump, > > return err; > > } > > > > +static void > > +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofputil_ct_tuple > *ofp_tuple, > > + struct ct_dpif_tuple *tuple, > > + uint16_t l3_type, uint8_t ip_proto) > > +{ > > + if (l3_type == AF_INET) { > > + tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src); > > + tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst); > > + } else { > > + tuple->src.in6 = ofp_tuple->src; > > + tuple->dst.in6 = ofp_tuple->dst; > > + } > > + > > + tuple->l3_type = l3_type; > > + tuple->ip_proto = ip_proto; > > + tuple->src_port = ofp_tuple->src_port; > > + > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + tuple->icmp_code = ofp_tuple->icmp_code; > > + tuple->icmp_type = ofp_tuple->icmp_type; > > + } else { > > + tuple->dst_port = ofp_tuple->dst_port; > > + } > > +} > > + > > /* Dump one connection from a tracker, and put it in 'entry'. > > * > > * 'dump' should have been initialized by ct_dpif_dump_start(). > > @@ -100,7 +126,62 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > > ? dpif->dpif_class->ct_dump_done(dpif, dump) > > : EOPNOTSUPP); > > } > > - > > + > > +static int > > +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone, > > + const struct ofputil_ct_match *match) { > > + struct ct_dpif_dump_state *dump; > > + struct ct_dpif_entry cte; > > + int error; > > + int tot_bkts; > > + > > + if (!dpif->dpif_class->ct_flush) { > > + return EOPNOTSUPP; > > + } > > + > > + if (VLOG_IS_DBG_ENABLED()) { > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + ofputil_ct_match_format(&ds, match); > > + VLOG_DBG("%s: ct_flush:%s in zone %d", dpif_name(dpif), ds_cstr > (&ds), > > + zone ? *zone : 0); > > + ds_destroy(&ds); > > + } > > + > > + /* If we have full five tuple in orig just do the flush over that > > + * tuple directly. */ > > + if (ofputil_ct_tuple_is_five_tuple(&match->tuple_orig, match-> > ip_proto)) { > > + struct ct_dpif_tuple tuple; > > + ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple, > > + match->l3_type, match-> > ip_proto); > > + return dpif->dpif_class->ct_flush(dpif, zone, &tuple); > > + } > > + > > + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts); > > + if (error) { > > + return error; > > + } > > + > > + while (!(error = ct_dpif_dump_next(dump, &cte))) { > > + if (zone && *zone != cte.zone) { > > + continue; > > + } > > + > > + if (ofputil_ct_match_cmp(match, &cte)) { > > + error = dpif->dpif_class->ct_flush(dpif, &cte.zone, > > + &cte.tuple_orig); > > + if (error) { > > + break; > > + } > > + } > > + } > > + if (error == EOF) { > > + error = 0; > > + } > > + > > + ct_dpif_dump_done(dump); > > + return error; > > +} > > + > > /* Flush the entries in the connection tracker used by 'dpif'. The > > * arguments have the following behavior: > > * > > @@ -111,14 +192,10 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > > * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */ > > int > > ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, > > - const struct ct_dpif_tuple *tuple) > > + const struct ofputil_ct_match *match) > > { > > - if (tuple) { > > - struct ds ds = DS_EMPTY_INITIALIZER; > > - ct_dpif_format_tuple(&ds, tuple); > > - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr > (&ds), > > - zone ? *zone : 0); > > - ds_destroy(&ds); > > + if (match) { > > + return ct_dpif_flush_tuple(dpif, zone, match); > > } else if (zone) { > > VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone); > > } else { > > @@ -126,7 +203,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t > *zone, > > } > > > > return (dpif->dpif_class->ct_flush > > - ? dpif->dpif_class->ct_flush(dpif, zone, tuple) > > + ? dpif->dpif_class->ct_flush(dpif, zone, NULL) > > : EOPNOTSUPP); > > } > > > > @@ -583,112 +660,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int > tcp_state, int conn_per_state) > > ds_put_format(ds, "=%u", conn_per_state); > > } > > > > -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > > - * Returns true on success. Otherwise, returns false and puts the error > > - * message in 'ds'. */ > > -bool > > -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct > ds *ds) > > -{ > > - char *pos, *key, *value, *copy; > > - memset(tuple, 0, sizeof *tuple); > > - > > - pos = copy = xstrdup(s); > > - while (ofputil_parse_key_value(&pos, &key, &value)) { > > - if (!*value) { > > - ds_put_format(ds, "field %s missing value", key); > > - goto error; > > - } > > - > > - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > - goto error; > > - } else { > > - tuple->l3_type = AF_INET; > > - } > > - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip : > > - &tuple->dst.ip)) { > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_ipv6_src") || > > - !strcmp(key, "ct_ipv6_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET6) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > - goto error; > > - } else { > > - tuple->l3_type = AF_INET6; > > - } > > - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 : > > - &tuple->dst.in6)) { > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_nw_proto")) { > > - char *err = str_to_u8(value, key, &tuple->ip_proto); > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_tp_src") || !strcmp > (key,"ct_tp_dst")) { > > - uint16_t port; > > - char *err = str_to_u16(value, key, &port); > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - if (key[6] == 's') { > > - tuple->src_port = htons(port); > > - } else { > > - tuple->dst_port = htons(port); > > - } > > - } else if (!strcmp(key, "icmp_type") || !strcmp(key, > "icmp_code") || > > - !strcmp(key, "icmp_id") ) { > > - if (tuple->ip_proto != IPPROTO_ICMP && > > - tuple->ip_proto != IPPROTO_ICMPV6) { > > - ds_put_cstr(ds, "invalid L4 fields"); > > - goto error; > > - } > > - uint16_t icmp_id; > > - char *err; > > - if (key[5] == 't') { > > - err = str_to_u8(value, key, &tuple->icmp_type); > > - } else if (key[5] == 'c') { > > - err = str_to_u8(value, key, &tuple->icmp_code); > > - } else { > > - err = str_to_u16(value, key, &icmp_id); > > - tuple->icmp_id = htons(icmp_id); > > - } > > - if (err) { > > - free(err); > > - goto error_with_msg; > > - } > > - } else { > > - ds_put_format(ds, "invalid conntrack tuple field: %s", key); > > - goto error; > > - } > > - } > > - > > - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) | > | > > - !tuple->ip_proto) { > > - /* icmp_type, icmp_code, and icmp_id can be 0. */ > > - if (tuple->ip_proto != IPPROTO_ICMP && > > - tuple->ip_proto != IPPROTO_ICMPV6) { > > - if (!tuple->src_port || !tuple->dst_port) { > > - ds_put_cstr(ds, "at least one of the conntrack 5-tuple > fields " > > - "is missing."); > > - goto error; > > - } > > - } > > - } > > - > > - free(copy); > > - return true; > > - > > -error_with_msg: > > - ds_put_format(ds, "failed to parse field %s", key); > > -error: > > - free(copy); > > - return false; > > -} > > > > void > > ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone, > > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h > > index 2848549b0..337c99dd4 100644 > > --- a/lib/ct-dpif.h > > +++ b/lib/ct-dpif.h > > @@ -17,6 +17,7 @@ > > #ifndef CT_DPIF_H > > #define CT_DPIF_H > > > > +#include "openvswitch/ofp-util.h" > > #include "openvswitch/types.h" > > #include "packets.h" > > > > @@ -285,7 +286,7 @@ int ct_dpif_dump_start(struct dpif *, struct > ct_dpif_dump_state **, > > int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry > *); > > int ct_dpif_dump_done(struct ct_dpif_dump_state *); > > int ct_dpif_flush(struct dpif *, const uint16_t *zone, > > - const struct ct_dpif_tuple *); > > + const struct ofputil_ct_match *); > > int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns); > > int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns); > > int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns); > > @@ -311,7 +312,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t > ipproto); > > void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *); > > uint8_t ct_dpif_coalesce_tcp_state(uint8_t state); > > void ct_dpif_format_tcp_stat(struct ds *, int, int); > > -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct > ds *); > > void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t > limit, > > uint32_t count); > > void ct_dpif_free_zone_limits(struct ovs_list *); > > diff --git a/lib/dpctl.c b/lib/dpctl.c > > index 29041fa3e..3cdedbe97 100644 > > --- a/lib/dpctl.c > > +++ b/lib/dpctl.c > > @@ -40,6 +40,7 @@ > > #include "netdev.h" > > #include "netlink.h" > > #include "odp-util.h" > > +#include "ofp-ct-util.h" > > #include "openvswitch/ofpbuf.h" > > #include "packets.h" > > #include "openvswitch/shash.h" > > @@ -1707,47 +1708,41 @@ dpctl_flush_conntrack(int argc, const char *argv > [], > > struct dpctl_params *dpctl_p) > > { > > struct dpif *dpif = NULL; > > - struct ct_dpif_tuple tuple, *ptuple = NULL; > > - struct ds ds = DS_EMPTY_INITIALIZER; > > - uint16_t zone, *pzone = NULL; > > - int error; > > + struct ofputil_ct_match match = {0}; > > + uint16_t zone; > > + bool with_zone = false; > > int args = argc - 1; > > > > /* Parse ct tuple */ > > - if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) { > > - ptuple = &tuple; > > + if (args) { > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + if (!ofputil_ct_match_parse(&match, argv[args], &ds)) { > > + dpctl_error(dpctl_p, EINVAL, "%s", ds_cstr(&ds)); > > + ds_destroy(&ds); > > + return EINVAL; > > + } > > args--; > > } > > > > /* Parse zone */ > > if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) { > > - pzone = &zone; > > + with_zone = true; > > args--; > > } > > > > - /* Report error if there are more than one unparsed argument. */ > > - if (args > 1) { > > - ds_put_cstr(&ds, "invalid arguments"); > > - error = EINVAL; > > - goto error; > > - } > > - > > - error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > > + int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > > if (error) { > > - return error; > > + dpctl_error(dpctl_p, error, "Cannot open dpif"); > > + goto end; > > just returning error here is fine, right? > > > I'm not sure if we are supposed to call close when the open errors out. > If not the below diff could be applied. > this diff on top looks ok to me, thanks. > diff --git a/lib/dpctl.c b/lib/dpctl.c > index 3cdedbe97..e669aa4c9 100644 > --- a/lib/dpctl.c > +++ b/lib/dpctl.c > @@ -1733,16 +1733,14 @@ dpctl_flush_conntrack(int argc, const char *argv[], > int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); > if (error) { > dpctl_error(dpctl_p, error, "Cannot open dpif"); > - goto end; > + return error; > } > > error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); > if (error) { > dpctl_error(dpctl_p, error, "Failed to flush conntrack"); > - goto end; > } > > -end: > dpif_close(dpif); > return error; > } > > > > > } > > > > - error = ct_dpif_flush(dpif, pzone, ptuple); > > - if (!error) { > > - dpif_close(dpif); > > - return 0; > > - } else { > > - ds_put_cstr(&ds, "failed to flush conntrack"); > > + error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); > > + if (error) { > > + dpctl_error(dpctl_p, error, "Failed to flush conntrack"); > > + goto end; > > Given the above, the label could be removed, and so the goto here > > Other than that, the patch LGTM: > > Acked-by: Paolo Valerio <pvalerio@redhat.com> > > > Thanks, > Ales > > > > > } > > > > -error: > > - dpctl_error(dpctl_p, error, "%s", ds_cstr(&ds)); > > - ds_destroy(&ds); > > +end: > > dpif_close(dpif); > > return error; > > } > > diff --git a/lib/dpctl.man b/lib/dpctl.man > > index 87ea8087b..b0cabe05d 100644 > > --- a/lib/dpctl.man > > +++ b/lib/dpctl.man > > @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes > the connections in > > If \fIct-tuple\fR is provided, flushes the connection entry specified by > > \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not > provided. > > The userspace connection tracker requires flushing with the original > pre-NATed > > -tuple and a warning log will be otherwise generated. > > +tuple and a warning log will be otherwise generated. The tuple can be > partial > > +and will remove all connections that are matching on the specified > fields. > > An example of an IPv4 ICMP \fIct-tuple\fR: > > .IP > > "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type= > 8,icmp_code=0,icmp_id=10" > > diff --git a/lib/ofp-ct-util.c b/lib/ofp-ct-util.c > > new file mode 100644 > > index 000000000..9112305cc > > --- /dev/null > > +++ b/lib/ofp-ct-util.c > > @@ -0,0 +1,311 @@ > > + > > +/* Copyright (c) 2022, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > +#include <stdbool.h> > > +#include <stdint.h> > > +#include <sys/types.h> > > +#include <netinet/in.h> > > +#include <netinet/icmp6.h> > > + > > +#include "ct-dpif.h" > > +#include "ofp-ct-util.h" > > +#include "openvswitch/dynamic-string.h" > > +#include "openvswitch/ofp-parse.h" > > +#include "openvswitch/ofp-util.h" > > +#include "openvswitch/packets.h" > > + > > +static inline bool > > +ofputil_ct_inet_addr_cmp_partial(const struct in6_addr *partial, > > + const union ct_dpif_inet_addr *addr, > > + const uint16_t l3_type) > > +{ > > + if (ipv6_is_zero(partial)) { > > + return true; > > + } > > + > > + if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != > addr->ip) { > > + return false; > > + } > > + > > + if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static inline bool > > +ofputil_ct_tuple_ip_cmp_partial(const struct ofputil_ct_tuple *partial, > > + const struct ct_dpif_tuple *tuple, > > + const uint16_t l3_type, const uint8_t > ip_proto) > > +{ > > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->src, > > + &tuple->src, l3_type)) { > > + return false; > > + } > > + > > + if (!ofputil_ct_inet_addr_cmp_partial(&partial->dst, > > + &tuple->dst, l3_type)) { > > + return false; > > + } > > + > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + if (partial->icmp_id != tuple->icmp_id) { > > + return false; > > + } > > + > > + if (partial->icmp_type != tuple->icmp_type) { > > + return false; > > + } > > + > > + if (partial->icmp_code != tuple->icmp_code) { > > + return false; > > + } > > + } else { > > + if (partial->src_port && partial->src_port != tuple->src_port) { > > + return false; > > + } > > + > > + if (partial->dst_port && partial->dst_port != tuple->dst_port) { > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +/* Compares the non-zero members if they match. This is useful for > clearing > > + * up all connections specified by a partial tuples for orig/reply. */ > > +bool > > +ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > > + const struct ct_dpif_entry *entry) > > +{ > > + if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) { > > + return false; > > + } > > + > > + if (match->ip_proto && match->ip_proto != entry-> > tuple_orig.ip_proto) { > > + return false; > > + } > > + > > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_orig, > > + &entry->tuple_orig, > > + match->l3_type, match-> > ip_proto)) { > > + return false; > > + } > > + > > + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_reply, > > + &entry->tuple_reply, > > + match->l3_type, match-> > ip_proto)) { > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static void > > +ofputil_ct_tuple_format(struct ds *ds, const struct ofputil_ct_tuple > *tuple, > > + uint8_t ip_proto) > > +{ > > + ds_put_cstr(ds, "src="); > > + ipv6_format_mapped(&tuple->src, ds); > > + ds_put_cstr(ds, ",dst="); > > + ipv6_format_mapped(&tuple->dst, ds); > > + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { > > + ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u", > > + ntohs(tuple->icmp_id), tuple->icmp_type, > > + tuple->icmp_code); > > + > > + } else { > > + ds_put_format(ds, ",src_port=%u,dst_port=%u", ntohs(tuple-> > src_port), > > + ntohs(tuple->dst_port)); > > + } > > +} > > + > > +bool > > +ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, > > + uint8_t ip_proto) > > +{ > > + /* First check if we have address. */ > > + bool five_tuple = !ipv6_is_zero(&tuple->src) && !ipv6_is_zero(& > tuple->dst); > > + > > + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) { > > + five_tuple = five_tuple && tuple->src_port && tuple->dst_port; > > + } > > + > > + return five_tuple; > > +} > > + > > +void > > +ofputil_ct_match_format(struct ds *ds, const struct ofputil_ct_match > *match) > > +{ > > + ds_put_format(ds, " l3_type=%u,ip_proto=%u", match->l3_type, > > + match->ip_proto); > > + ds_put_cstr(ds, ",orig=("); > > + ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto); > > + ds_put_cstr(ds, "),reply=("); > > + ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto); > > + ds_put_cstr(ds, ")"); > > +} > > + > > +static bool > > +ofputil_ct_tuple_ip_parse(struct in6_addr *addr, char *value, uint16_t > l3_type) > > +{ > > + if (!ipv6_is_zero(addr)) { > > + return false; > > + } > > + > > + if (l3_type == AF_INET) { > > + ovs_be32 ip = 0; > > + > > + ip_parse(value, &ip); > > + *addr = in6_addr_mapped_ipv4(ip); > > + } else { > > + ipv6_parse(value, addr); > > + } > > + > > + return true; > > +} > > + > > +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > > + * Returns true on success. Otherwise, returns false and puts the error > > + * message in 'ds'. */ > > +bool > > +ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, > > + struct ds *ds) > > +{ > > + char *pos, *key, *value, *copy; > > + > > + memset(match, 0, sizeof *match); > > + struct ofputil_ct_tuple *tuple = &match->tuple_orig; > > + > > + pos = copy = xstrdup(s); > > + while (ofputil_parse_key_value(&pos, &key, &value)) { > > + if (!*value) { > > + ds_put_format(ds, "field %s missing value", key); > > + goto error_with_msg; > > + } > > + > > + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") > > + || !strcmp(key, "ct_ipv6_src") || !strcmp(key, > "ct_ipv6_dst")) { > > + match->l3_type = key[6] == '6' ? AF_INET6 : AF_INET; > > + uint8_t index = key[6] == '6' ? 8 : 6; > > + struct in6_addr *addr = key[index] == 's' > > + ? &tuple->src : &tuple->dst; > > + > > + if (!ofputil_ct_tuple_ip_parse(addr, value, match->l3_type)) > { > > + ds_put_format(ds, "%s is set multiple times", key); > > + goto error; > > + } > > + } else if (!strcmp(key, "ct_nw_proto")) { > > + char *err = str_to_u8(value, key, &match->ip_proto); > > + > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, > "ct_tp_dst")) { > > + uint16_t port; > > + char *err = str_to_u16(value, key, &port); > > + > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + if (key[6] == 's') { > > + tuple->src_port = htons(port); > > + } else { > > + tuple->dst_port = htons(port); > > + } > > + } else if (!strcmp(key, "icmp_type") || !strcmp(key, > "icmp_code") || > > + !strcmp(key, "icmp_id")) { > > + if (match->ip_proto != IPPROTO_ICMP && > > + match->ip_proto != IPPROTO_ICMPV6) { > > + ds_put_cstr(ds, "invalid L4 fields"); > > + goto error; > > + } > > + uint16_t icmp_id; > > + char *err; > > + > > + if (key[5] == 't') { > > + err = str_to_u8(value, key, &tuple->icmp_type); > > + } else if (key[5] == 'c') { > > + err = str_to_u8(value, key, &tuple->icmp_code); > > + } else { > > + err = str_to_u16(value, key, &icmp_id); > > + tuple->icmp_id = htons(icmp_id); > > + } > > + if (err) { > > + free(err); > > + goto error_with_msg; > > + } > > + } else { > > + ds_put_format(ds, "invalid conntrack tuple field: %s", key); > > + goto error; > > + } > > + } > > + > > + if (!match->ip_proto && (tuple->src_port || tuple->dst_port)) { > > + ds_put_cstr(ds, "port is set without protocol"); > > + goto error; > > + } > > + > > + /* For the filtering to work with icmp we need to fill the reply > direction > > + * with correct information. */ > > + if (match->ip_proto == IPPROTO_ICMP) { > > + switch (match->tuple_orig.icmp_type) { > > + case ICMP4_ECHO_REQUEST: > > + match->tuple_reply.icmp_type = ICMP4_ECHO_REPLY; > > + break; > > + case ICMP4_ECHO_REPLY: > > + match->tuple_reply.icmp_type = ICMP4_ECHO_REQUEST; > > + break; > > + case ICMP4_TIMESTAMP: > > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMPREPLY; > > + break; > > + case ICMP4_TIMESTAMPREPLY: > > + match->tuple_reply.icmp_type = ICMP4_TIMESTAMP; > > + break; > > + case ICMP4_INFOREQUEST: > > + match->tuple_reply.icmp_type = ICMP4_INFOREPLY; > > + break; > > + case ICMP4_INFOREPLY: > > + match->tuple_reply.icmp_type = ICMP4_INFOREQUEST; > > + break; > > + } > > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > > + } else if (match->ip_proto == IPPROTO_ICMPV6) { > > + switch (match->tuple_orig.icmp_type) { > > + case ICMP6_ECHO_REQUEST: > > + match->tuple_reply.icmp_type = ICMP6_ECHO_REPLY; > > + break; > > + case ICMP6_ECHO_REPLY: > > + match->tuple_reply.icmp_type = ICMP6_ECHO_REQUEST; > > + break; > > + } > > + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; > > + } > > + > > + free(copy); > > + return true; > > + > > +error_with_msg: > > + ds_put_format(ds, "failed to parse field %s", key); > > +error: > > + free(copy); > > + return false; > > +} > > diff --git a/lib/ofp-ct-util.h b/lib/ofp-ct-util.h > > new file mode 100644 > > index 000000000..a53d73cb0 > > --- /dev/null > > +++ b/lib/ofp-ct-util.h > > @@ -0,0 +1,34 @@ > > +/* Copyright (c) 2022, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef OVS_OFP_CT_UTIL_H > > +#define OVS_OFP_CT_UTIL_H > > + > > +#include "ct-dpif.h" > > +#include "openvswitch/ofp-util.h" > > + > > +bool ofputil_ct_match_cmp(const struct ofputil_ct_match *match, > > + const struct ct_dpif_entry *entry); > > + > > +bool ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple > *tuple, > > + uint8_t ip_proto); > > + > > +void ofputil_ct_match_format(struct ds *ds, > > + const struct ofputil_ct_match *match); > > + > > +bool ofputil_ct_match_parse(struct ofputil_ct_match *match, const char > *s, > > + struct ds *ds); > > + > > +#endif /* lib/ofp-ct-util.h */ > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > > index e5403519f..51903a658 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -2230,7 +2230,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport= > 2),reply=(src=10.1.1.2,dst=10. > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > -AT_SETUP([conntrack - ct flush by 5-tuple]) > > +AT_SETUP([conntrack - ct flush]) > > CHECK_CONNTRACK() > > OVS_TRAFFIC_VSWITCHD_START() > > > > @@ -2291,6 +2291,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 > $ICMP_TUPLE]) > > AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1 > \.2,"], [1], [dnl > > ]) > > > > +dnl Test UDP from port 1 and 2, partial flush by src port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src= > 10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src= > 1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src= > 2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src= > 10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst= > 2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst= > 1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by src address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src= > 10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet= > 50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], > [0], [dnl > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src= > 10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src= > 10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > -- > > 2.38.1 > > > > > -- > > Ales Musil > > Senior Software Engineer - OVN Core > > Red Hat EMEA > > amusil@redhat.com IM: amusil > > [logo]
diff --git a/NEWS b/NEWS index 265375e1c..ff8904b02 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ Post-v3.0.0 10 Gbps link speed by default in case the actual link speed cannot be determined. Previously it was 10 Mbps. Values can still be overridden by specifying 'max-rate' or '[r]stp-path-cost' accordingly. + - ovs-dpctl and related ovs-appctl commands: + * "flush-conntrack" is capable of handling partial 5-tuple. v3.0.0 - 15 Aug 2022 diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h index 091a09cad..84937ae26 100644 --- a/include/openvswitch/ofp-util.h +++ b/include/openvswitch/ofp-util.h @@ -19,6 +19,9 @@ #include <stdbool.h> #include <stdint.h> +#include <sys/types.h> +#include <netinet/in.h> + #include "openvswitch/ofp-protocol.h" struct ofp_header; @@ -27,6 +30,31 @@ struct ofp_header; extern "C" { #endif +struct ofputil_ct_tuple { + struct in6_addr src; + struct in6_addr dst; + + union { + ovs_be16 src_port; + ovs_be16 icmp_id; + }; + union { + ovs_be16 dst_port; + struct { + uint8_t icmp_code; + uint8_t icmp_type; + }; + }; +}; + +struct ofputil_ct_match { + uint8_t ip_proto; + uint16_t l3_type; + + struct ofputil_ct_tuple tuple_orig; + struct ofputil_ct_tuple tuple_reply; +}; + bool ofputil_decode_hello(const struct ofp_header *, uint32_t *allowed_versions); struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap); diff --git a/lib/automake.mk b/lib/automake.mk index a0fabe38f..37135f118 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -227,6 +227,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/ofp-actions.c \ lib/ofp-bundle.c \ lib/ofp-connection.c \ + lib/ofp-ct-util.c \ + lib/ofp-ct-util.h \ lib/ofp-ed-props.c \ lib/ofp-errors.c \ lib/ofp-flow.c \ diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index 6f17a26b5..906e827c1 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -20,6 +20,7 @@ #include <errno.h> #include "ct-dpif.h" +#include "ofp-ct-util.h" #include "openvswitch/ofp-parse.h" #include "openvswitch/vlog.h" @@ -71,6 +72,31 @@ ct_dpif_dump_start(struct dpif *dpif, struct ct_dpif_dump_state **dump, return err; } +static void +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofputil_ct_tuple *ofp_tuple, + struct ct_dpif_tuple *tuple, + uint16_t l3_type, uint8_t ip_proto) +{ + if (l3_type == AF_INET) { + tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src); + tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst); + } else { + tuple->src.in6 = ofp_tuple->src; + tuple->dst.in6 = ofp_tuple->dst; + } + + tuple->l3_type = l3_type; + tuple->ip_proto = ip_proto; + tuple->src_port = ofp_tuple->src_port; + + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + tuple->icmp_code = ofp_tuple->icmp_code; + tuple->icmp_type = ofp_tuple->icmp_type; + } else { + tuple->dst_port = ofp_tuple->dst_port; + } +} + /* Dump one connection from a tracker, and put it in 'entry'. * * 'dump' should have been initialized by ct_dpif_dump_start(). @@ -100,7 +126,62 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) ? dpif->dpif_class->ct_dump_done(dpif, dump) : EOPNOTSUPP); } - + +static int +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone, + const struct ofputil_ct_match *match) { + struct ct_dpif_dump_state *dump; + struct ct_dpif_entry cte; + int error; + int tot_bkts; + + if (!dpif->dpif_class->ct_flush) { + return EOPNOTSUPP; + } + + if (VLOG_IS_DBG_ENABLED()) { + struct ds ds = DS_EMPTY_INITIALIZER; + ofputil_ct_match_format(&ds, match); + VLOG_DBG("%s: ct_flush:%s in zone %d", dpif_name(dpif), ds_cstr(&ds), + zone ? *zone : 0); + ds_destroy(&ds); + } + + /* If we have full five tuple in orig just do the flush over that + * tuple directly. */ + if (ofputil_ct_tuple_is_five_tuple(&match->tuple_orig, match->ip_proto)) { + struct ct_dpif_tuple tuple; + ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple, + match->l3_type, match->ip_proto); + return dpif->dpif_class->ct_flush(dpif, zone, &tuple); + } + + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts); + if (error) { + return error; + } + + while (!(error = ct_dpif_dump_next(dump, &cte))) { + if (zone && *zone != cte.zone) { + continue; + } + + if (ofputil_ct_match_cmp(match, &cte)) { + error = dpif->dpif_class->ct_flush(dpif, &cte.zone, + &cte.tuple_orig); + if (error) { + break; + } + } + } + if (error == EOF) { + error = 0; + } + + ct_dpif_dump_done(dump); + return error; +} + /* Flush the entries in the connection tracker used by 'dpif'. The * arguments have the following behavior: * @@ -111,14 +192,10 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */ int ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, - const struct ct_dpif_tuple *tuple) + const struct ofputil_ct_match *match) { - if (tuple) { - struct ds ds = DS_EMPTY_INITIALIZER; - ct_dpif_format_tuple(&ds, tuple); - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds), - zone ? *zone : 0); - ds_destroy(&ds); + if (match) { + return ct_dpif_flush_tuple(dpif, zone, match); } else if (zone) { VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone); } else { @@ -126,7 +203,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t *zone, } return (dpif->dpif_class->ct_flush - ? dpif->dpif_class->ct_flush(dpif, zone, tuple) + ? dpif->dpif_class->ct_flush(dpif, zone, NULL) : EOPNOTSUPP); } @@ -583,112 +660,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state) ds_put_format(ds, "=%u", conn_per_state); } -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. - * Returns true on success. Otherwise, returns false and puts the error - * message in 'ds'. */ -bool -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds) -{ - char *pos, *key, *value, *copy; - memset(tuple, 0, sizeof *tuple); - - pos = copy = xstrdup(s); - while (ofputil_parse_key_value(&pos, &key, &value)) { - if (!*value) { - ds_put_format(ds, "field %s missing value", key); - goto error; - } - - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) { - if (tuple->l3_type && tuple->l3_type != AF_INET) { - ds_put_cstr(ds, "L3 type set multiple times"); - goto error; - } else { - tuple->l3_type = AF_INET; - } - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip : - &tuple->dst.ip)) { - goto error_with_msg; - } - } else if (!strcmp(key, "ct_ipv6_src") || - !strcmp(key, "ct_ipv6_dst")) { - if (tuple->l3_type && tuple->l3_type != AF_INET6) { - ds_put_cstr(ds, "L3 type set multiple times"); - goto error; - } else { - tuple->l3_type = AF_INET6; - } - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 : - &tuple->dst.in6)) { - goto error_with_msg; - } - } else if (!strcmp(key, "ct_nw_proto")) { - char *err = str_to_u8(value, key, &tuple->ip_proto); - if (err) { - free(err); - goto error_with_msg; - } - } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) { - uint16_t port; - char *err = str_to_u16(value, key, &port); - if (err) { - free(err); - goto error_with_msg; - } - if (key[6] == 's') { - tuple->src_port = htons(port); - } else { - tuple->dst_port = htons(port); - } - } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") || - !strcmp(key, "icmp_id") ) { - if (tuple->ip_proto != IPPROTO_ICMP && - tuple->ip_proto != IPPROTO_ICMPV6) { - ds_put_cstr(ds, "invalid L4 fields"); - goto error; - } - uint16_t icmp_id; - char *err; - if (key[5] == 't') { - err = str_to_u8(value, key, &tuple->icmp_type); - } else if (key[5] == 'c') { - err = str_to_u8(value, key, &tuple->icmp_code); - } else { - err = str_to_u16(value, key, &icmp_id); - tuple->icmp_id = htons(icmp_id); - } - if (err) { - free(err); - goto error_with_msg; - } - } else { - ds_put_format(ds, "invalid conntrack tuple field: %s", key); - goto error; - } - } - - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) || - !tuple->ip_proto) { - /* icmp_type, icmp_code, and icmp_id can be 0. */ - if (tuple->ip_proto != IPPROTO_ICMP && - tuple->ip_proto != IPPROTO_ICMPV6) { - if (!tuple->src_port || !tuple->dst_port) { - ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields " - "is missing."); - goto error; - } - } - } - - free(copy); - return true; - -error_with_msg: - ds_put_format(ds, "failed to parse field %s", key); -error: - free(copy); - return false; -} void ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone, diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index 2848549b0..337c99dd4 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -17,6 +17,7 @@ #ifndef CT_DPIF_H #define CT_DPIF_H +#include "openvswitch/ofp-util.h" #include "openvswitch/types.h" #include "packets.h" @@ -285,7 +286,7 @@ int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *); int ct_dpif_dump_done(struct ct_dpif_dump_state *); int ct_dpif_flush(struct dpif *, const uint16_t *zone, - const struct ct_dpif_tuple *); + const struct ofputil_ct_match *); int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns); int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns); int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns); @@ -311,7 +312,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t ipproto); void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *); uint8_t ct_dpif_coalesce_tcp_state(uint8_t state); void ct_dpif_format_tcp_stat(struct ds *, int, int); -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct ds *); void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t limit, uint32_t count); void ct_dpif_free_zone_limits(struct ovs_list *); diff --git a/lib/dpctl.c b/lib/dpctl.c index 29041fa3e..3cdedbe97 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -40,6 +40,7 @@ #include "netdev.h" #include "netlink.h" #include "odp-util.h" +#include "ofp-ct-util.h" #include "openvswitch/ofpbuf.h" #include "packets.h" #include "openvswitch/shash.h" @@ -1707,47 +1708,41 @@ dpctl_flush_conntrack(int argc, const char *argv[], struct dpctl_params *dpctl_p) { struct dpif *dpif = NULL; - struct ct_dpif_tuple tuple, *ptuple = NULL; - struct ds ds = DS_EMPTY_INITIALIZER; - uint16_t zone, *pzone = NULL; - int error; + struct ofputil_ct_match match = {0}; + uint16_t zone; + bool with_zone = false; int args = argc - 1; /* Parse ct tuple */ - if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) { - ptuple = &tuple; + if (args) { + struct ds ds = DS_EMPTY_INITIALIZER; + if (!ofputil_ct_match_parse(&match, argv[args], &ds)) { + dpctl_error(dpctl_p, EINVAL, "%s", ds_cstr(&ds)); + ds_destroy(&ds); + return EINVAL; + } args--; } /* Parse zone */ if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) { - pzone = &zone; + with_zone = true; args--; } - /* Report error if there are more than one unparsed argument. */ - if (args > 1) { - ds_put_cstr(&ds, "invalid arguments"); - error = EINVAL; - goto error; - } - - error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); + int error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif); if (error) { - return error; + dpctl_error(dpctl_p, error, "Cannot open dpif"); + goto end; } - error = ct_dpif_flush(dpif, pzone, ptuple); - if (!error) { - dpif_close(dpif); - return 0; - } else { - ds_put_cstr(&ds, "failed to flush conntrack"); + error = ct_dpif_flush(dpif, with_zone ? &zone : NULL, &match); + if (error) { + dpctl_error(dpctl_p, error, "Failed to flush conntrack"); + goto end; } -error: - dpctl_error(dpctl_p, error, "%s", ds_cstr(&ds)); - ds_destroy(&ds); +end: dpif_close(dpif); return error; } diff --git a/lib/dpctl.man b/lib/dpctl.man index 87ea8087b..b0cabe05d 100644 --- a/lib/dpctl.man +++ b/lib/dpctl.man @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in If \fIct-tuple\fR is provided, flushes the connection entry specified by \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided. The userspace connection tracker requires flushing with the original pre-NATed -tuple and a warning log will be otherwise generated. +tuple and a warning log will be otherwise generated. The tuple can be partial +and will remove all connections that are matching on the specified fields. An example of an IPv4 ICMP \fIct-tuple\fR: .IP "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10" diff --git a/lib/ofp-ct-util.c b/lib/ofp-ct-util.c new file mode 100644 index 000000000..9112305cc --- /dev/null +++ b/lib/ofp-ct-util.c @@ -0,0 +1,311 @@ + +/* Copyright (c) 2022, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/icmp6.h> + +#include "ct-dpif.h" +#include "ofp-ct-util.h" +#include "openvswitch/dynamic-string.h" +#include "openvswitch/ofp-parse.h" +#include "openvswitch/ofp-util.h" +#include "openvswitch/packets.h" + +static inline bool +ofputil_ct_inet_addr_cmp_partial(const struct in6_addr *partial, + const union ct_dpif_inet_addr *addr, + const uint16_t l3_type) +{ + if (ipv6_is_zero(partial)) { + return true; + } + + if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != addr->ip) { + return false; + } + + if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) { + return false; + } + + return true; +} + +static inline bool +ofputil_ct_tuple_ip_cmp_partial(const struct ofputil_ct_tuple *partial, + const struct ct_dpif_tuple *tuple, + const uint16_t l3_type, const uint8_t ip_proto) +{ + if (!ofputil_ct_inet_addr_cmp_partial(&partial->src, + &tuple->src, l3_type)) { + return false; + } + + if (!ofputil_ct_inet_addr_cmp_partial(&partial->dst, + &tuple->dst, l3_type)) { + return false; + } + + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + if (partial->icmp_id != tuple->icmp_id) { + return false; + } + + if (partial->icmp_type != tuple->icmp_type) { + return false; + } + + if (partial->icmp_code != tuple->icmp_code) { + return false; + } + } else { + if (partial->src_port && partial->src_port != tuple->src_port) { + return false; + } + + if (partial->dst_port && partial->dst_port != tuple->dst_port) { + return false; + } + } + + return true; +} + +/* Compares the non-zero members if they match. This is useful for clearing + * up all connections specified by a partial tuples for orig/reply. */ +bool +ofputil_ct_match_cmp(const struct ofputil_ct_match *match, + const struct ct_dpif_entry *entry) +{ + if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) { + return false; + } + + if (match->ip_proto && match->ip_proto != entry->tuple_orig.ip_proto) { + return false; + } + + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_orig, + &entry->tuple_orig, + match->l3_type, match->ip_proto)) { + return false; + } + + if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_reply, + &entry->tuple_reply, + match->l3_type, match->ip_proto)) { + return false; + } + + return true; +} + +static void +ofputil_ct_tuple_format(struct ds *ds, const struct ofputil_ct_tuple *tuple, + uint8_t ip_proto) +{ + ds_put_cstr(ds, "src="); + ipv6_format_mapped(&tuple->src, ds); + ds_put_cstr(ds, ",dst="); + ipv6_format_mapped(&tuple->dst, ds); + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u", + ntohs(tuple->icmp_id), tuple->icmp_type, + tuple->icmp_code); + + } else { + ds_put_format(ds, ",src_port=%u,dst_port=%u", ntohs(tuple->src_port), + ntohs(tuple->dst_port)); + } +} + +bool +ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, + uint8_t ip_proto) +{ + /* First check if we have address. */ + bool five_tuple = !ipv6_is_zero(&tuple->src) && !ipv6_is_zero(&tuple->dst); + + if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) { + five_tuple = five_tuple && tuple->src_port && tuple->dst_port; + } + + return five_tuple; +} + +void +ofputil_ct_match_format(struct ds *ds, const struct ofputil_ct_match *match) +{ + ds_put_format(ds, " l3_type=%u,ip_proto=%u", match->l3_type, + match->ip_proto); + ds_put_cstr(ds, ",orig=("); + ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto); + ds_put_cstr(ds, "),reply=("); + ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto); + ds_put_cstr(ds, ")"); +} + +static bool +ofputil_ct_tuple_ip_parse(struct in6_addr *addr, char *value, uint16_t l3_type) +{ + if (!ipv6_is_zero(addr)) { + return false; + } + + if (l3_type == AF_INET) { + ovs_be32 ip = 0; + + ip_parse(value, &ip); + *addr = in6_addr_mapped_ipv4(ip); + } else { + ipv6_parse(value, addr); + } + + return true; +} + +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. + * Returns true on success. Otherwise, returns false and puts the error + * message in 'ds'. */ +bool +ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, + struct ds *ds) +{ + char *pos, *key, *value, *copy; + + memset(match, 0, sizeof *match); + struct ofputil_ct_tuple *tuple = &match->tuple_orig; + + pos = copy = xstrdup(s); + while (ofputil_parse_key_value(&pos, &key, &value)) { + if (!*value) { + ds_put_format(ds, "field %s missing value", key); + goto error_with_msg; + } + + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") + || !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst")) { + match->l3_type = key[6] == '6' ? AF_INET6 : AF_INET; + uint8_t index = key[6] == '6' ? 8 : 6; + struct in6_addr *addr = key[index] == 's' + ? &tuple->src : &tuple->dst; + + if (!ofputil_ct_tuple_ip_parse(addr, value, match->l3_type)) { + ds_put_format(ds, "%s is set multiple times", key); + goto error; + } + } else if (!strcmp(key, "ct_nw_proto")) { + char *err = str_to_u8(value, key, &match->ip_proto); + + if (err) { + free(err); + goto error_with_msg; + } + } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, "ct_tp_dst")) { + uint16_t port; + char *err = str_to_u16(value, key, &port); + + if (err) { + free(err); + goto error_with_msg; + } + if (key[6] == 's') { + tuple->src_port = htons(port); + } else { + tuple->dst_port = htons(port); + } + } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") || + !strcmp(key, "icmp_id")) { + if (match->ip_proto != IPPROTO_ICMP && + match->ip_proto != IPPROTO_ICMPV6) { + ds_put_cstr(ds, "invalid L4 fields"); + goto error; + } + uint16_t icmp_id; + char *err; + + if (key[5] == 't') { + err = str_to_u8(value, key, &tuple->icmp_type); + } else if (key[5] == 'c') { + err = str_to_u8(value, key, &tuple->icmp_code); + } else { + err = str_to_u16(value, key, &icmp_id); + tuple->icmp_id = htons(icmp_id); + } + if (err) { + free(err); + goto error_with_msg; + } + } else { + ds_put_format(ds, "invalid conntrack tuple field: %s", key); + goto error; + } + } + + if (!match->ip_proto && (tuple->src_port || tuple->dst_port)) { + ds_put_cstr(ds, "port is set without protocol"); + goto error; + } + + /* For the filtering to work with icmp we need to fill the reply direction + * with correct information. */ + if (match->ip_proto == IPPROTO_ICMP) { + switch (match->tuple_orig.icmp_type) { + case ICMP4_ECHO_REQUEST: + match->tuple_reply.icmp_type = ICMP4_ECHO_REPLY; + break; + case ICMP4_ECHO_REPLY: + match->tuple_reply.icmp_type = ICMP4_ECHO_REQUEST; + break; + case ICMP4_TIMESTAMP: + match->tuple_reply.icmp_type = ICMP4_TIMESTAMPREPLY; + break; + case ICMP4_TIMESTAMPREPLY: + match->tuple_reply.icmp_type = ICMP4_TIMESTAMP; + break; + case ICMP4_INFOREQUEST: + match->tuple_reply.icmp_type = ICMP4_INFOREPLY; + break; + case ICMP4_INFOREPLY: + match->tuple_reply.icmp_type = ICMP4_INFOREQUEST; + break; + } + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; + } else if (match->ip_proto == IPPROTO_ICMPV6) { + switch (match->tuple_orig.icmp_type) { + case ICMP6_ECHO_REQUEST: + match->tuple_reply.icmp_type = ICMP6_ECHO_REPLY; + break; + case ICMP6_ECHO_REPLY: + match->tuple_reply.icmp_type = ICMP6_ECHO_REQUEST; + break; + } + match->tuple_reply.icmp_id = match->tuple_orig.icmp_id; + } + + free(copy); + return true; + +error_with_msg: + ds_put_format(ds, "failed to parse field %s", key); +error: + free(copy); + return false; +} diff --git a/lib/ofp-ct-util.h b/lib/ofp-ct-util.h new file mode 100644 index 000000000..a53d73cb0 --- /dev/null +++ b/lib/ofp-ct-util.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2022, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVS_OFP_CT_UTIL_H +#define OVS_OFP_CT_UTIL_H + +#include "ct-dpif.h" +#include "openvswitch/ofp-util.h" + +bool ofputil_ct_match_cmp(const struct ofputil_ct_match *match, + const struct ct_dpif_entry *entry); + +bool ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple, + uint8_t ip_proto); + +void ofputil_ct_match_format(struct ds *ds, + const struct ofputil_ct_match *match); + +bool ofputil_ct_match_parse(struct ofputil_ct_match *match, const char *s, + struct ds *ds); + +#endif /* lib/ofp-ct-util.h */ diff --git a/tests/system-traffic.at b/tests/system-traffic.at index e5403519f..51903a658 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2230,7 +2230,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10. OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([conntrack - ct flush by 5-tuple]) +AT_SETUP([conntrack - ct flush]) CHECK_CONNTRACK() OVS_TRAFFIC_VSWITCHD_START() @@ -2291,6 +2291,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 $ICMP_TUPLE]) AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [1], [dnl ]) +dnl Test UDP from port 1 and 2, partial flush by src port +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) + + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + +dnl Test UDP from port 1 and 2, partial flush by dst port +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) + + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + +dnl Test UDP from port 1 and 2, partial flush by src address +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) + + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + +dnl Test UDP from port 1 and 2, partial flush by dst address +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) + + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP
Currently, the CT can be flushed by dpctl only be specifying the whole 5-tuple. This is not very convenient when there are only some fields known to the user of CT flush. Add new struct ofputil_ct_match which represents the generic filtering that can be done for CT flush. The match is done only on fields that are non-zero with exception to the icmp fields. This allows the filtering just within dpctl, however it is a preparation for OpenFlow extension. Reported-at: https://bugzilla.redhat.com/2120546 Signed-off-by: Ales Musil <amusil@redhat.com> --- v3: Rebase on top of master. Address the C99 comment and missing dpif_close call. v2: Rebase on top of master. Address comments from Paolo. --- NEWS | 2 + include/openvswitch/ofp-util.h | 28 +++ lib/automake.mk | 2 + lib/ct-dpif.c | 201 +++++++++------------ lib/ct-dpif.h | 4 +- lib/dpctl.c | 45 +++-- lib/dpctl.man | 3 +- lib/ofp-ct-util.c | 311 +++++++++++++++++++++++++++++++++ lib/ofp-ct-util.h | 34 ++++ tests/system-traffic.at | 82 ++++++++- 10 files changed, 568 insertions(+), 144 deletions(-) create mode 100644 lib/ofp-ct-util.c create mode 100644 lib/ofp-ct-util.h