@@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
========================== ============== ============== ========= =======
Connection tracking 4.3 2.5 2.6 YES
Conntrack Fragment Reass. 4.3 2.6 2.12 YES
- Conntrack Timeout Policies 5.2 2.12 NO NO
+ Conntrack Timeout Policies 5.2 2.12 2.13 NO
Conntrack Zone Limit 4.18 2.10 2.13 YES
Conntrack NAT 4.6 2.6 2.8 YES
Tunnel - LISP NO 2.11 NO NO
@@ -10,6 +10,8 @@ Post-v2.13.0
* Deprecated DPDK ring ports (dpdkr) are no longer supported.
- Linux datapath:
* Support for kernel versions up to 5.5.x.
+ - Userspace datapath:
+ * Add support for conntrack zone-based timeout policy.
v2.13.0 - 14 Feb 2020
@@ -53,6 +53,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/conntrack-icmp.c \
lib/conntrack-private.h \
lib/conntrack-tcp.c \
+ lib/conntrack-tp.c \
+ lib/conntrack-tp.h \
lib/conntrack-other.c \
lib/conntrack.c \
lib/conntrack.h \
@@ -22,6 +22,7 @@
#include <netinet/icmp6.h>
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "dp-packet.h"
enum OVS_PACKED_ENUM icmp_state {
@@ -51,7 +52,8 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_,
{
struct conn_icmp *conn = conn_icmp_cast(conn_);
conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
- conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+ conn_update_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+ now);
return CT_UPDATE_VALID;
}
@@ -80,7 +82,8 @@ icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
{
struct conn_icmp *conn = xzalloc(sizeof *conn);
conn->state = ICMPS_FIRST;
- conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+ conn_init_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+ now);
return &conn->up;
}
@@ -17,6 +17,7 @@
#include <config.h>
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "dp-packet.h"
enum OVS_PACKED_ENUM other_state {
@@ -56,8 +57,9 @@ other_conn_update(struct conntrack *ct, struct conn *conn_,
ret = CT_UPDATE_VALID_NEW;
}
- conn_update_expiration(ct, &conn->up, other_timeouts[conn->state], now);
-
+ conn_update_expiration_with_tp(ct, &conn->up,
+ other_timeouts[conn->state],
+ now);
return ret;
}
@@ -76,7 +78,8 @@ other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
conn = xzalloc(sizeof *conn);
conn->state = OTHERS_FIRST;
- conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now);
+ conn_init_expiration_with_tp(ct, &conn->up, other_timeouts[conn->state],
+ now);
return &conn->up;
}
@@ -118,6 +118,8 @@ struct conn {
/* Immutable data. */
bool alg_related; /* True if alg data connection. */
enum ct_conn_type conn_type;
+
+ uint32_t tpid; /* Timeout policy ID. */
};
enum ct_update_res {
@@ -143,9 +145,9 @@ enum ct_update_res {
CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
-/* The smallest of the above values: it is used as an upper bound for the
- * interval between two rounds of cleanup of expired entries */
-#define CT_TM_MIN (30 * 1000)
+/* This is used as an upper bound for the interval between two
+ * rounds of cleanup of expired entries */
+#define CT_TM_MIN (1 * 1000)
#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
CT_TIMEOUTS
@@ -163,6 +165,7 @@ struct conntrack {
struct cmap conns OVS_GUARDED;
struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
struct hmap zone_limits OVS_GUARDED;
+ struct hmap timeout_policies OVS_GUARDED;
uint32_t hash_basis; /* Salt for hashing a connection key. */
pthread_t clean_thread; /* Periodically cleans up connection tracker. */
struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
@@ -212,16 +215,22 @@ extern long long ct_timeout_val[];
/* ct_lock must be held. */
static inline void
conn_init_expiration(struct conntrack *ct, struct conn *conn,
- enum ct_timeout tm, long long now)
+ enum ct_timeout tm, long long now,
+ uint32_t tp_value, bool use_default)
{
- conn->expiration = now + ct_timeout_val[tm];
+ if (use_default) {
+ conn->expiration = now + ct_timeout_val[tm];
+ } else {
+ conn->expiration = now + tp_value * 1000;
+ }
ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
}
/* The conn entry lock must be held on entry and exit. */
static inline void
conn_update_expiration(struct conntrack *ct, struct conn *conn,
- enum ct_timeout tm, long long now)
+ enum ct_timeout tm, long long now,
+ uint32_t tp_value, bool use_default)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
ovs_mutex_unlock(&conn->lock);
@@ -229,7 +238,11 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn,
ovs_mutex_lock(&ct->ct_lock);
ovs_mutex_lock(&conn->lock);
if (!conn->cleaned) {
- conn->expiration = now + ct_timeout_val[tm];
+ if (use_default) {
+ conn->expiration = now + ct_timeout_val[tm];
+ } else {
+ conn->expiration = now + tp_value * 1000;
+ }
ovs_list_remove(&conn->exp_node);
ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
}
@@ -39,6 +39,7 @@
#include <config.h>
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "coverage.h"
#include "ct-dpif.h"
#include "dp-packet.h"
@@ -188,7 +189,8 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
return CT_UPDATE_NEW;
} else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
src->state = CT_DPIF_TCPS_SYN_SENT;
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now);
+ conn_update_expiration_with_tp(ct, &conn->up,
+ CT_TM_TCP_FIRST_PACKET, now);
return CT_UPDATE_VALID_NEW;
}
}
@@ -339,18 +341,23 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
&& dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSED, now);
+ conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSED,
+ now);
} else if (src->state >= CT_DPIF_TCPS_CLOSING
&& dst->state >= CT_DPIF_TCPS_CLOSING) {
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIN_WAIT, now);
+ conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_FIN_WAIT,
+ now);
} else if (src->state < CT_DPIF_TCPS_ESTABLISHED
|| dst->state < CT_DPIF_TCPS_ESTABLISHED) {
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_OPENING, now);
+ conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_OPENING,
+ now);
} else if (src->state >= CT_DPIF_TCPS_CLOSING
|| dst->state >= CT_DPIF_TCPS_CLOSING) {
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSING, now);
+ conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSING,
+ now);
} else {
- conn_update_expiration(ct, &conn->up, CT_TM_TCP_ESTABLISHED, now);
+ conn_update_expiration_with_tp(ct, &conn->up,
+ CT_TM_TCP_ESTABLISHED, now);
}
} else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
|| dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
@@ -471,7 +478,8 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
src->state = CT_DPIF_TCPS_SYN_SENT;
dst->state = CT_DPIF_TCPS_CLOSED;
- conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
+ conn_init_expiration_with_tp(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET,
+ now);
return &newconn->up;
}
new file mode 100644
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "conntrack-private.h"
+#include "conntrack-tp.h"
+#include "dp-packet.h"
+
+#include "openvswitch/vlog.h"
+VLOG_DEFINE_THIS_MODULE(conntrack_tp);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static const char *ct_timeout_str[] = {
+#define CT_TIMEOUT(NAME, VALUE) #NAME
+ CT_TIMEOUTS
+#undef CT_TIMEOUT
+};
+
+static inline bool
+check_present_and_set(struct timeout_policy *tp, uint32_t *v,
+ enum ct_dpif_tp_attr attr)
+{
+ if (tp && (tp->p.present & (1 << attr))) {
+ *v = tp->p.attrs[attr];
+ return true;
+ }
+ return false;
+}
+
+#define TP_HAS_FUNC(ATTR) \
+static bool \
+tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v) \
+{ \
+ return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR); \
+}
+
+/* These functions check whether the timeout value is set from the
+ * present bit. If true, then set the value to '*v'. For meaning
+ * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
+ *
+ * Note that ICMP_REPLY means receiving an ICMP error reply, and
+ * currently it is not implemented.
+ */
+TP_HAS_FUNC(TCP_SYN_SENT)
+TP_HAS_FUNC(TCP_SYN_RECV)
+TP_HAS_FUNC(TCP_ESTABLISHED)
+TP_HAS_FUNC(TCP_FIN_WAIT)
+TP_HAS_FUNC(TCP_TIME_WAIT)
+TP_HAS_FUNC(TCP_CLOSE)
+TP_HAS_FUNC(UDP_FIRST)
+TP_HAS_FUNC(UDP_SINGLE)
+TP_HAS_FUNC(UDP_MULTIPLE)
+TP_HAS_FUNC(ICMP_FIRST)
+/* TP_HAS_FUNC(ICMP_REPLY) not implemented. */
+
+static bool
+conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now,
+ struct timeout_policy *tp)
+{
+ uint32_t val;
+
+ switch (tm) {
+ case CT_TM_TCP_FIRST_PACKET:
+ if (tp_has_TCP_SYN_SENT(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_TCP_OPENING:
+ if (tp_has_TCP_SYN_RECV(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_TCP_ESTABLISHED:
+ if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_TCP_CLOSING:
+ if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_TCP_FIN_WAIT:
+ if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_TCP_CLOSED:
+ if (tp_has_TCP_CLOSE(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_OTHER_FIRST:
+ if (tp_has_UDP_FIRST(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_OTHER_BIDIR:
+ if (tp_has_UDP_SINGLE(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_OTHER_MULTIPLE:
+ if (tp_has_UDP_MULTIPLE(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_ICMP_FIRST:
+ if (tp_has_ICMP_FIRST(tp, &val)) {
+ goto update_with_val;
+ }
+ break;
+ case CT_TM_ICMP_REPLY:
+ /* Not Implemented. */
+ return true;
+ case N_CT_TM:
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ return false;
+
+update_with_val:
+ conn_update_expiration(ct, conn, tm, now, val, false);
+ return true;
+}
+
+static bool
+conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now,
+ struct timeout_policy *tp)
+{
+ uint32_t val;
+
+ switch (tm) {
+ case CT_TM_TCP_FIRST_PACKET:
+ if (tp_has_TCP_SYN_SENT(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_TCP_OPENING:
+ if (tp_has_TCP_SYN_RECV(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_TCP_ESTABLISHED:
+ if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_TCP_CLOSING:
+ if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_TCP_FIN_WAIT:
+ if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_TCP_CLOSED:
+ if (tp_has_TCP_CLOSE(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_OTHER_FIRST:
+ if (tp_has_UDP_FIRST(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_OTHER_BIDIR:
+ if (tp_has_UDP_SINGLE(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_OTHER_MULTIPLE:
+ if (tp_has_UDP_MULTIPLE(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_ICMP_FIRST:
+ if (tp_has_ICMP_FIRST(tp, &val)) {
+ goto init_with_val;
+ }
+ break;
+ case CT_TM_ICMP_REPLY:
+ /* Not Implemented. */
+ return true;
+ case N_CT_TM:
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ return false;
+
+init_with_val:
+ conn_init_expiration(ct, conn, tm, now, val, false);
+ return true;
+}
+
+void
+conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now)
+{
+ struct timeout_policy *tp;
+
+ tp = timeout_policy_lookup(ct, conn->tpid);
+ if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
+ VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
+ ct_timeout_str[tm]);
+ } else {
+ /* Init with default. */
+ conn_init_expiration(ct, conn, tm, now, 0, true);
+ }
+}
+
+void
+conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now)
+{
+ struct timeout_policy *tp;
+
+ tp = timeout_policy_lookup(ct, conn->tpid);
+ if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
+ VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
+ ct_timeout_str[tm]);
+ } else {
+ /* Update with default. */
+ conn_update_expiration(ct, conn, tm, now, 0, true);
+ }
+}
new file mode 100644
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONNTRACK_TP_H
+#define CONNTRACK_TP_H 1
+
+void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now);
+void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now);
+#endif
@@ -143,6 +143,12 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
static void
expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
+static void timeout_policy_init(struct conntrack *ct);
+static int timeout_policy_create(struct conntrack *ct,
+ struct timeout_policy *tp);
+static void timeout_policy_clean(struct conntrack *ct,
+ struct timeout_policy *tp);
+
static struct ct_l4_proto *l4_protos[] = {
[IPPROTO_TCP] = &ct_proto_tcp,
[IPPROTO_UDP] = &ct_proto_other,
@@ -312,6 +318,7 @@ conntrack_init(void)
}
hmap_init(&ct->zone_limits);
ct->zone_limit_seq = 0;
+ timeout_policy_init(ct);
ovs_mutex_unlock(&ct->ct_lock);
ct->hash_basis = random_uint32();
@@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
return 0;
}
+struct timeout_policy *
+timeout_policy_get(struct conntrack *ct, int32_t tpid)
+{
+ struct timeout_policy *tp;
+
+ ovs_mutex_lock(&ct->ct_lock);
+ tp = timeout_policy_lookup(ct, tpid);
+ if (!tp) {
+ ovs_mutex_unlock(&ct->ct_lock);
+ return NULL;
+ }
+
+ ovs_mutex_unlock(&ct->ct_lock);
+ return tp;
+}
+
+struct timeout_policy *
+timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ struct timeout_policy *tp;
+ uint32_t hash;
+
+ hash = zone_key_hash(tpid, ct->hash_basis);
+ HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
+ if (tp->p.id == tpid) {
+ return tp;
+ }
+ }
+ return NULL;
+}
+
+static bool
+is_valid_tpid(uint32_t tpid OVS_UNUSED)
+{
+ return true;
+}
+
+static int
+timeout_policy_create(struct conntrack *ct,
+ struct timeout_policy *new_tp)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ uint32_t tpid = new_tp->p.id;
+ uint32_t hash;
+
+ if (is_valid_tpid(tpid)) {
+ struct timeout_policy *tp;
+
+ tp = xzalloc(sizeof *tp);
+ memcpy(&tp->p, &new_tp->p, sizeof tp->p);
+
+ hash = zone_key_hash(tpid, ct->hash_basis);
+ hmap_insert(&ct->timeout_policies, &tp->node, hash);
+
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
+static void
+update_existing_tp(struct timeout_policy *tp_dst,
+ struct timeout_policy *tp_src)
+{
+ struct ct_dpif_timeout_policy *dst, *src;
+ int i;
+
+ dst = &tp_dst->p;
+ src = &tp_src->p;
+
+ /* Set the value to dst if present bit in src is set. */
+ for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
+ if (src->present & (1 << i)) {
+ dst->attrs[i] = src->attrs[i];
+ dst->present |= (1 << i);
+ }
+ }
+}
+
+int
+timeout_policy_update(struct conntrack *ct, struct timeout_policy *new_tp)
+{
+ int err = 0;
+ uint32_t tpid = new_tp->p.id;
+
+ ovs_mutex_lock(&ct->ct_lock);
+ struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+ if (tp) {
+ VLOG_INFO("Changed timeout policy of existing tpid %d", tpid);
+ update_existing_tp(tp, new_tp);
+ } else {
+ err = timeout_policy_create(ct, new_tp);
+ if (err) {
+ VLOG_WARN("Request to create timeout policy failed");
+ } else {
+ VLOG_INFO("Created timeout policy tpid %d", tpid);
+ }
+ }
+ ovs_mutex_unlock(&ct->ct_lock);
+ return err;
+}
+
+static void
+timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ hmap_remove(&ct->timeout_policies, &tp->node);
+ free(tp);
+}
+
+int
+timeout_policy_delete(struct conntrack *ct, uint32_t tpid)
+{
+ ovs_mutex_lock(&ct->ct_lock);
+ struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+ if (tp) {
+ VLOG_INFO("Deleted timeout policy for id %d", tpid);
+ timeout_policy_clean(ct, tp);
+ } else {
+ VLOG_INFO("Attempted delete of non-existent timeout policy: zone %d",
+ tpid);
+ }
+ ovs_mutex_unlock(&ct->ct_lock);
+ return 0;
+}
+
+void
+timeout_policy_init(struct conntrack *ct)
+{
+ hmap_init(&ct->timeout_policies);
+}
+
static void
conn_clean_cmn(struct conntrack *ct, struct conn *conn)
OVS_REQUIRES(ct->ct_lock)
@@ -502,6 +642,12 @@ conntrack_destroy(struct conntrack *ct)
}
hmap_destroy(&ct->zone_limits);
+ struct timeout_policy *tp;
+ HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
+ free(tp);
+ }
+ hmap_destroy(&ct->timeout_policies);
+
ovs_mutex_unlock(&ct->ct_lock);
ovs_mutex_destroy(&ct->ct_lock);
@@ -1275,7 +1421,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
bool force, bool commit, long long now, const uint32_t *setmark,
const struct ovs_key_ct_labels *setlabel,
const struct nat_action_info_t *nat_action_info,
- ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
+ ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+ uint32_t tpid)
{
/* Reset ct_state whenever entering a new zone. */
if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
@@ -1323,6 +1470,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
nat_action_info,
ct_alg_ctl, now,
&create_new_conn))) {
+ conn->tpid = tpid;
create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
}
if (nat_action_info && !create_new_conn) {
@@ -1395,7 +1543,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
const struct ovs_key_ct_labels *setlabel,
ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
const struct nat_action_info_t *nat_action_info,
- long long now)
+ long long now, uint32_t tpid)
{
ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
ct->hash_basis);
@@ -1417,7 +1565,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
write_ct_md(packet, zone, NULL, NULL, NULL);
} else {
process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
- setlabel, nat_action_info, tp_src, tp_dst, helper);
+ setlabel, nat_action_info, tp_src, tp_dst, helper,
+ tpid);
}
}
@@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now)
* - We want to reduce the number of wakeups and batch connection cleanup
* when the load is not very high. CT_CLEAN_INTERVAL ensures that if we
* are coping with the current cleanup tasks, then we wait at least
- * 5 seconds to do further cleanup.
+ * 1 seconds to do further cleanup.
*
* - We don't want to keep the map locked too long, as we might prevent
* traffic from flowing. CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
* behind, there is at least some 200ms blocks of time when the map will be
* left alone, so the datapath can operate unhindered.
*/
-#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
+#define CT_CLEAN_INTERVAL 1000 /* 1 second */
#define CT_CLEAN_MIN_INTERVAL 200 /* 0.2 seconds */
static void *
@@ -20,6 +20,7 @@
#include <stdbool.h>
#include "cmap.h"
+#include "ct-dpif.h"
#include "latch.h"
#include "odp-netlink.h"
#include "openvswitch/hmap.h"
@@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
const struct ovs_key_ct_labels *setlabel,
ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
const struct nat_action_info_t *nat_action_info,
- long long now);
+ long long now, uint32_t tpid);
void conntrack_clear(struct dp_packet *packet);
struct conntrack_dump {
@@ -111,6 +112,11 @@ struct conntrack_zone_limit {
uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
};
+struct timeout_policy {
+ struct hmap_node node;
+ struct ct_dpif_timeout_policy p;
+};
+
enum {
INVALID_ZONE = -2,
DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
@@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
int32_t zone);
int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
int zone_limit_delete(struct conntrack *ct, uint16_t zone);
+int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
+struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid);
+struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
+ int32_t tpid);
+
#endif /* conntrack.h */
@@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
bool commit = false;
unsigned int left;
uint16_t zone = 0;
+ uint32_t tpid = 0;
const char *helper = NULL;
const uint32_t *setmark = NULL;
const struct ovs_key_ct_labels *setlabel = NULL;
@@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
* netlink events. */
break;
case OVS_CT_ATTR_TIMEOUT:
- /* Userspace datapath does not support customized timeout
- * policy yet. */
+ if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
+ VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
+ }
break;
case OVS_CT_ATTR_NAT: {
const struct nlattr *b_nest;
@@ -7459,7 +7461,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
commit, zone, setmark, setlabel, aux->flow->tp_src,
aux->flow->tp_dst, helper, nat_action_info_ref,
- pmd->ctx.now / 1000);
+ pmd->ctx.now / 1000, tpid);
break;
}
@@ -7693,6 +7695,64 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
}
static int
+dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
+ const struct ct_dpif_timeout_policy *dpif_tp)
+{
+ struct timeout_policy tp;
+ struct dp_netdev *dp;
+ int err = 0;
+
+ dp = get_dp_netdev(dpif);
+ memcpy(&tp.p, dpif_tp, sizeof tp.p);
+ err = timeout_policy_update(dp->conntrack, &tp);
+ return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
+ struct ct_dpif_timeout_policy *dpif_tp)
+{
+ struct timeout_policy *tp;
+ struct dp_netdev *dp;
+ int err = 0;
+
+ dp = get_dp_netdev(dpif);
+ tp = timeout_policy_get(dp->conntrack, tp_id);
+ if (!tp) {
+ return EINVAL;
+ }
+ memcpy(dpif_tp, &tp->p, sizeof tp->p);
+ return err;
+}
+
+static int
+dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
+ uint32_t tp_id)
+{
+ struct dp_netdev *dp;
+ int err = 0;
+
+ dp = get_dp_netdev(dpif);
+ err = timeout_policy_delete(dp->conntrack, tp_id);
+ return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+ uint32_t tp_id,
+ uint16_t dl_type OVS_UNUSED,
+ uint8_t nw_proto OVS_UNUSED,
+ char **tp_name, bool *is_generic)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&ds, "%"PRIu32, tp_id);
+ *tp_name = ds_steal_cstr(&ds);
+ *is_generic = true;
+ return 0;
+}
+
+static int
dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7802,13 +7862,13 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_ct_set_limits,
dpif_netdev_ct_get_limits,
dpif_netdev_ct_del_limits,
- NULL, /* ct_set_timeout_policy */
- NULL, /* ct_get_timeout_policy */
- NULL, /* ct_del_timeout_policy */
+ dpif_netdev_ct_set_timeout_policy,
+ dpif_netdev_ct_get_timeout_policy,
+ dpif_netdev_ct_del_timeout_policy,
NULL, /* ct_timeout_policy_dump_start */
NULL, /* ct_timeout_policy_dump_next */
NULL, /* ct_timeout_policy_dump_done */
- NULL, /* ct_get_timeout_policy_name */
+ dpif_netdev_ct_get_timeout_policy_name,
dpif_netdev_ipf_set_enabled,
dpif_netdev_ipf_set_min_frag,
dpif_netdev_ipf_set_max_nfrags,
@@ -3301,8 +3301,9 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
AT_CHECK([ovs-appctl dpctl/flush-conntrack])
dnl Shorten the udp_single and icmp_first timeout in zone 5
+dnl Userspace datapath uses udp_first, and kernel datapath uses udp_single
VSCTL_ADD_DATAPATH_TABLE()
-AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_first=3 udp_single=3 icmp_first=3])
dnl Send ICMP and UDP traffic
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
@@ -99,12 +99,8 @@ m4_define([CHECK_CONNTRACK_NAT])
# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
-* The userspace datapath does not support this feature yet.
#
-m4_define([CHECK_CONNTRACK_TIMEOUT],
-[
- AT_SKIP_IF([:])
-])
+m4_define([CHECK_CONNTRACK_TIMEOUT])
# CHECK_CT_DPIF_SET_GET_MAXCONNS()
#
@@ -90,7 +90,7 @@ ct_thread_main(void *aux_)
ovs_barrier_block(&barrier);
for (i = 0; i < n_pkts; i += batch_size) {
conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL,
- 0, 0, NULL, NULL, now);
+ 0, 0, NULL, NULL, now, 0);
}
ovs_barrier_block(&barrier);
destroy_packets(pkt_batch);
@@ -174,7 +174,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
if (flow.dl_type != dl_type) {
conntrack_execute(ct_, &new_batch, dl_type, false, true, 0,
- NULL, NULL, 0, 0, NULL, NULL, now);
+ NULL, NULL, 0, 0, NULL, NULL, now, 0);
dp_packet_batch_init(&new_batch);
}
dp_packet_batch_add(&new_batch, packet);
@@ -182,7 +182,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
if (!dp_packet_batch_is_empty(&new_batch)) {
conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL,
- 0, 0, NULL, NULL, now);
+ 0, 0, NULL, NULL, now, 0);
}
}
Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout policy support") adds conntrack timeout policy for kernel datapath. This patch enables support for the userspace datapath. I tested using the 'make check-system-userspace' which checks the timeout policies for ICMP and UDP cases. Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/671278549 Signed-off-by: William Tu <u9012063@gmail.com> --- Documentation/faq/releases.rst | 2 +- NEWS | 2 + lib/automake.mk | 2 + lib/conntrack-icmp.c | 7 +- lib/conntrack-other.c | 9 +- lib/conntrack-private.h | 27 +++-- lib/conntrack-tcp.c | 22 ++-- lib/conntrack-tp.c | 246 +++++++++++++++++++++++++++++++++++++++ lib/conntrack-tp.h | 24 ++++ lib/conntrack.c | 159 ++++++++++++++++++++++++- lib/conntrack.h | 14 ++- lib/dpif-netdev.c | 74 ++++++++++-- tests/system-traffic.at | 3 +- tests/system-userspace-macros.at | 6 +- tests/test-conntrack.c | 6 +- 15 files changed, 561 insertions(+), 42 deletions(-) create mode 100644 lib/conntrack-tp.c create mode 100644 lib/conntrack-tp.h