@@ -200,7 +200,7 @@ enum ct_ephemeral_range {
struct conntrack {
struct ovs_mutex ct_lock; /* Protects 2 following fields. */
- struct cmap conns OVS_GUARDED;
+ struct cmap conns[UINT16_MAX+1] OVS_GUARDED;
struct rculist exp_lists[N_EXP_LISTS];
struct cmap zone_limits OVS_GUARDED;
struct cmap timeout_policies OVS_GUARDED;
@@ -254,7 +254,9 @@ conntrack_init(void)
ovs_mutex_init_adaptive(&ct->ct_lock);
ovs_mutex_lock(&ct->ct_lock);
- cmap_init(&ct->conns);
+ for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+ cmap_init(&ct->conns[i]);
+ }
for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
rculist_init(&ct->exp_lists[i]);
}
@@ -427,12 +429,14 @@ conn_clean__(struct conntrack *ct, struct conn *conn)
}
hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis);
- cmap_remove(&ct->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash);
+ cmap_remove(&ct->conns[conn->key_node[CT_DIR_FWD].key.zone],
+ &conn->key_node[CT_DIR_FWD].cm_node, hash);
if (conn->nat_action) {
hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key,
ct->hash_basis);
- cmap_remove(&ct->conns, &conn->key_node[CT_DIR_REV].cm_node, hash);
+ cmap_remove(&ct->conns[conn->key_node[CT_DIR_REV].key.zone],
+ &conn->key_node[CT_DIR_REV].cm_node, hash);
}
rculist_remove(&conn->node);
@@ -503,7 +507,9 @@ conntrack_destroy(struct conntrack *ct)
ovs_mutex_lock(&ct->ct_lock);
- cmap_destroy(&ct->conns);
+ for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+ cmap_destroy(&ct->conns[i]);
+ }
cmap_destroy(&ct->zone_limits);
cmap_destroy(&ct->timeout_policies);
@@ -534,7 +540,7 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
struct conn *conn = NULL;
bool found = false;
- CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns) {
+ CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns[key->zone]) {
if (keyn->dir == CT_DIR_FWD) {
conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
} else {
@@ -962,14 +968,16 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
nat_packet(pkt, nc, false, ctx->icmp_related);
uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
ct->hash_basis);
- cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash);
+ cmap_insert(&ct->conns[ctx->key.zone],
+ &rev_key_node->cm_node, rev_hash);
}
ovs_mutex_init_adaptive(&nc->lock);
atomic_flag_clear(&nc->reclaimed);
fwd_key_node->dir = CT_DIR_FWD;
rev_key_node->dir = CT_DIR_REV;
- cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash);
+ cmap_insert(&ct->conns[ctx->key.zone],
+ &fwd_key_node->cm_node, ctx->hash);
conn_expire_push_front(ct, nc);
atomic_count_inc(&ct->n_conn);
ctx->conn = nc; /* For completeness. */
@@ -2649,11 +2657,12 @@ conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump,
if (pzone) {
dump->zone = *pzone;
dump->filter_zone = true;
+ dump->current_zone = dump->zone;
}
dump->ct = ct;
*ptot_bkts = 1; /* Need to clean up the callers. */
- dump->cursor = cmap_cursor_start(&ct->conns);
+ dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
return 0;
}
@@ -2665,20 +2674,26 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
struct conn_key_node *keyn;
struct conn *conn;
- CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) {
- if (keyn->dir != CT_DIR_FWD) {
- continue;
- }
+ while (true) {
+ CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) {
+ if (keyn->dir != CT_DIR_FWD) {
+ continue;
+ }
- conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
- if (conn_expired(conn, now)) {
- continue;
- }
+ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
+ if (conn_expired(conn, now)) {
+ continue;
+ }
- if (!dump->filter_zone || keyn->key.zone == dump->zone) {
conn_to_ct_dpif_entry(conn, entry, now);
return 0;
}
+
+ if (dump->filter_zone || dump->current_zone == UINT16_MAX) {
+ break;
+ }
+ dump->current_zone++;
+ dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
}
return EOF;
@@ -2756,20 +2771,32 @@ conntrack_exp_dump_done(struct conntrack_dump *dump OVS_UNUSED)
return 0;
}
+static int
+conntrack_flush_zone(struct conntrack *ct, const uint16_t zone)
+{
+ struct conn_key_node *keyn;
+ struct conn *conn;
+
+ CMAP_FOR_EACH (keyn, cm_node, &ct->conns[zone]) {
+ if (keyn->dir != CT_DIR_FWD) {
+ continue;
+ }
+ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
+ conn_clean(ct, conn);
+ }
+
+ return 0;
+}
+
int
conntrack_flush(struct conntrack *ct, const uint16_t *zone)
{
- struct conn_key_node *keyn;
- struct conn *conn;
+ if (zone) {
+ return conntrack_flush_zone(ct, *zone);
+ }
- CMAP_FOR_EACH (keyn, cm_node, &ct->conns) {
- if (keyn->dir != CT_DIR_FWD) {
- continue;
- }
- conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
- if (!zone || *zone == keyn->key.zone) {
- conn_clean(ct, conn);
- }
+ for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+ conntrack_flush_zone(ct, i);
}
return 0;
@@ -112,6 +112,7 @@ struct conntrack_dump {
};
bool filter_zone;
uint16_t zone;
+ uint16_t current_zone;
};
struct conntrack_zone_limit {
Currently conntrack uses a single large cmap for all connections stored. This cmap contains all connections for all conntrack zones which are completely separate from each other. By separating each zone to its own cmap we can significantly optimize the performance when using multiple zones. The change fixes a similar issue as [1] where slow conntrack zone flush operations significantly slow down OVN router failover. The difference is just that this fix is used whith dpdk, while [1] was when using the ovs kernel module. As we now need to store more cmap's the memory usage of struct conntrack increases by 524280 bytes. Additionally we need 65535 cmaps with 128 bytes each. This leads to a total memory increase of around 10MB. Running "./ovstest test-conntrack benchmark 4 33554432 32 1" shows no real difference in the multithreading behaviour against a single zone. Running the new "./ovstest test-conntrack benchmark-zones" show significant speedups as shown below. The values for "ct execute" are for acting on the complete zone with all its entries in total (so in the first case adding 10,000 new conntrack entries). All tests are run 1000 times. When running with 1,000 zones with 10,000 entries each we see the following results (all in microseconds): "./ovstest test-conntrack benchmark-zones 10000 1000 1000" +------+--------+---------+---------+ | Min | Max | 95%ile | Avg | +------------------------+------+--------+---------+---------+ | ct execute (commit) | | | | | | with commit | 2266 | 3505 | 2707.06 | 2592.06 | | without commit | 2411 | 12730 | 4432.50 | 2736.78 | +------------------------+------+--------+---------+---------+ | ct execute (no commit) | | | | | | with commit | 699 | 1238 | 886.15 | 722.67 | | without commit | 700 | 3377 | 1934.42 | 803.53 | +------------------------+------+--------+---------+---------+ | flush full zone | | | | | | with commit | 619 | 1122 | 901.36 | 679.15 | | without commit | 618 | 105078 | 64591 | 2886.46 | +------------------------+------+--------+---------+---------+ | flush empty zone | | | | | | with commit | 0 | 5 | 1.00 | 0.64 | | without commit | 54 | 87469 | 64520 | 2172.25 | +------------------------+------+--------+---------+---------+ When running with 10,000 zones with 1,000 entries each we see the following results (all in microseconds): "./ovstest test-conntrack benchmark-zones 1000 10000 1000" +------+--------+---------+---------+ | Min | Max | 95%ile | Avg | +------------------------+------+--------+---------+---------+ | ct execute (commit) | | | | | | with commit | 215 | 287 | 231.88 | 222.30 | | without commit | 214 | 1692 | 569.18 | 285.83 | +------------------------+------+--------+---------+---------+ | ct execute (no commit) | | | | | | with commit | 68 | 97 | 74.69 | 70.09 | | without commit | 68 | 300 | 158.40 | 82.06 | +------------------------+------+--------+---------+---------+ | flush full zone | | | | | | with commit | 47 | 211 | 56.34 | 50.34 | | without commit | 48 | 96330 | 63392 | 63923 | +------------------------+------+--------+---------+---------+ | flush empty zone | | | | | | with commit | 0 | 1 | 1.00 | 0.44 | | without commit | 3 | 109728 | 63923 | 3629.44 | +------------------------+------+--------+---------+---------+ Comparing the averages we see: * a moderate performance improvement for conntrack_execute with or without commiting of around 6% to 23% * a significant performance improvement for flushing a full zone of around 75% to 99% * an even more significant improvement for flushing empty zones since we no longer need to check any unrelated connections [1] 9ec849e8aa869b646c372fac552ae2609a4b5f66 Signed-off-by: Felix Huettner <felix.huettner@mail.schwarz> --- lib/conntrack-private.h | 2 +- lib/conntrack.c | 81 +++++++++++++++++++++++++++-------------- lib/conntrack.h | 1 + 3 files changed, 56 insertions(+), 28 deletions(-)