diff mbox series

[ovs-dev,2/2] conntrack: Key connections by zone.

Message ID 584e7ad36ea551d55e03c857e1536054b559e040.1713960663.git.felix.huettner@mail.schwarz
State Superseded
Headers show
Series [ovs-dev,1/2] test-conntrack: add per zone benchmark tool | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Felix Huettner April 24, 2024, 12:17 p.m. UTC
Currently conntrack uses a single large cmap for all connections stored.
This cmap contains all connections for all conntrack zones which are
completely separate from each other. By separating each zone to its own
cmap we can significantly optimize the performance when using multiple
zones.

The change fixes a similar issue as [1] where slow conntrack zone flush
operations significantly slow down OVN router failover. The difference is
just that this fix is used whith dpdk, while [1] was when using the ovs
kernel module.

As we now need to store more cmap's the memory usage of struct conntrack
increases by 524280 bytes. Additionally we need 65535 cmaps with 128
bytes each. This leads to a total memory increase of around 10MB.

Running "./ovstest test-conntrack benchmark 4 33554432 32 1" shows no
real difference in the multithreading behaviour against a single zone.

Running the new "./ovstest test-conntrack benchmark-zones" show
significant speedups as shown below. The values for "ct execute" are for
acting on the complete zone with all its entries in total (so in the
first case adding 10,000 new conntrack entries). All tests are run 1000
times.

When running with 1,000 zones with 10,000 entries each we see the
following results (all in microseconds):
"./ovstest test-conntrack benchmark-zones 10000 1000 1000"

                         +------+--------+---------+---------+
                         |  Min |   Max  |  95%ile |   Avg   |
+------------------------+------+--------+---------+---------+
| ct execute (commit)    |      |        |         |         |
|            with commit | 2266 |   3505 | 2707.06 | 2592.06 |
|         without commit | 2411 |  12730 | 4432.50 | 2736.78 |
+------------------------+------+--------+---------+---------+
| ct execute (no commit) |      |        |         |         |
|            with commit |  699 |   1238 |  886.15 |  722.67 |
|         without commit |  700 |   3377 | 1934.42 |  803.53 |
+------------------------+------+--------+---------+---------+
| flush full zone        |      |        |         |         |
|            with commit |  619 |   1122 |  901.36 |  679.15 |
|         without commit |  618 | 105078 |   64591 | 2886.46 |
+------------------------+------+--------+---------+---------+
| flush empty zone       |      |        |         |         |
|            with commit |    0 |      5 |    1.00 |    0.64 |
|         without commit |   54 |  87469 |   64520 | 2172.25 |
+------------------------+------+--------+---------+---------+

When running with 10,000 zones with 1,000 entries each we see the
following results (all in microseconds):
"./ovstest test-conntrack benchmark-zones 1000 10000 1000"

                         +------+--------+---------+---------+
                         |  Min |   Max  |  95%ile |   Avg   |
+------------------------+------+--------+---------+---------+
| ct execute (commit)    |      |        |         |         |
|            with commit |  215 |    287 |  231.88 |  222.30 |
|         without commit |  214 |   1692 |  569.18 |  285.83 |
+------------------------+------+--------+---------+---------+
| ct execute (no commit) |      |        |         |         |
|            with commit |   68 |     97 |   74.69 |   70.09 |
|         without commit |   68 |    300 |  158.40 |   82.06 |
+------------------------+------+--------+---------+---------+
| flush full zone        |      |        |         |         |
|            with commit |   47 |    211 |   56.34 |   50.34 |
|         without commit |   48 |  96330 |   63392 |   63923 |
+------------------------+------+--------+---------+---------+
| flush empty zone       |      |        |         |         |
|            with commit |    0 |      1 |    1.00 |    0.44 |
|         without commit |    3 | 109728 |   63923 | 3629.44 |
+------------------------+------+--------+---------+---------+

Comparing the averages we see:
* a moderate performance improvement for conntrack_execute with or
  without commiting of around 6% to 23%
* a significant performance improvement for flushing a full zone of
  around 75% to 99%
* an even more significant improvement for flushing empty zones since we
  no longer need to check any unrelated connections

[1] 9ec849e8aa869b646c372fac552ae2609a4b5f66

Signed-off-by: Felix Huettner <felix.huettner@mail.schwarz>
---
 lib/conntrack-private.h |  2 +-
 lib/conntrack.c         | 81 +++++++++++++++++++++++++++--------------
 lib/conntrack.h         |  1 +
 3 files changed, 56 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 3fd5fccd3..19f1f42a0 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -200,7 +200,7 @@  enum ct_ephemeral_range {
 
 struct conntrack {
     struct ovs_mutex ct_lock; /* Protects 2 following fields. */
-    struct cmap conns OVS_GUARDED;
+    struct cmap conns[UINT16_MAX+1] OVS_GUARDED;
     struct rculist exp_lists[N_EXP_LISTS];
     struct cmap zone_limits OVS_GUARDED;
     struct cmap timeout_policies OVS_GUARDED;
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 7e3ed0ee0..16e1c8bb5 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -254,7 +254,9 @@  conntrack_init(void)
 
     ovs_mutex_init_adaptive(&ct->ct_lock);
     ovs_mutex_lock(&ct->ct_lock);
-    cmap_init(&ct->conns);
+    for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+        cmap_init(&ct->conns[i]);
+    }
     for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
         rculist_init(&ct->exp_lists[i]);
     }
@@ -427,12 +429,14 @@  conn_clean__(struct conntrack *ct, struct conn *conn)
     }
 
     hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis);
-    cmap_remove(&ct->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash);
+    cmap_remove(&ct->conns[conn->key_node[CT_DIR_FWD].key.zone],
+                &conn->key_node[CT_DIR_FWD].cm_node, hash);
 
     if (conn->nat_action) {
         hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key,
                              ct->hash_basis);
-        cmap_remove(&ct->conns, &conn->key_node[CT_DIR_REV].cm_node, hash);
+        cmap_remove(&ct->conns[conn->key_node[CT_DIR_REV].key.zone],
+                    &conn->key_node[CT_DIR_REV].cm_node, hash);
     }
 
     rculist_remove(&conn->node);
@@ -503,7 +507,9 @@  conntrack_destroy(struct conntrack *ct)
 
     ovs_mutex_lock(&ct->ct_lock);
 
-    cmap_destroy(&ct->conns);
+    for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+        cmap_destroy(&ct->conns[i]);
+    }
     cmap_destroy(&ct->zone_limits);
     cmap_destroy(&ct->timeout_policies);
 
@@ -534,7 +540,7 @@  conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
     struct conn *conn = NULL;
     bool found = false;
 
-    CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns) {
+    CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns[key->zone]) {
         if (keyn->dir == CT_DIR_FWD) {
             conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
         } else {
@@ -962,14 +968,16 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
             nat_packet(pkt, nc, false, ctx->icmp_related);
             uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
                                               ct->hash_basis);
-            cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash);
+            cmap_insert(&ct->conns[ctx->key.zone],
+                        &rev_key_node->cm_node, rev_hash);
         }
 
         ovs_mutex_init_adaptive(&nc->lock);
         atomic_flag_clear(&nc->reclaimed);
         fwd_key_node->dir = CT_DIR_FWD;
         rev_key_node->dir = CT_DIR_REV;
-        cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash);
+        cmap_insert(&ct->conns[ctx->key.zone],
+                    &fwd_key_node->cm_node, ctx->hash);
         conn_expire_push_front(ct, nc);
         atomic_count_inc(&ct->n_conn);
         ctx->conn = nc; /* For completeness. */
@@ -2649,11 +2657,12 @@  conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump,
     if (pzone) {
         dump->zone = *pzone;
         dump->filter_zone = true;
+        dump->current_zone = dump->zone;
     }
 
     dump->ct = ct;
     *ptot_bkts = 1; /* Need to clean up the callers. */
-    dump->cursor = cmap_cursor_start(&ct->conns);
+    dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
     return 0;
 }
 
@@ -2665,20 +2674,26 @@  conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
     struct conn_key_node *keyn;
     struct conn *conn;
 
-    CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) {
-        if (keyn->dir != CT_DIR_FWD) {
-            continue;
-        }
+    while (true) {
+        CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) {
+            if (keyn->dir != CT_DIR_FWD) {
+                continue;
+            }
 
-        conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
-        if (conn_expired(conn, now)) {
-            continue;
-        }
+            conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
+            if (conn_expired(conn, now)) {
+                continue;
+            }
 
-        if (!dump->filter_zone || keyn->key.zone == dump->zone) {
             conn_to_ct_dpif_entry(conn, entry, now);
             return 0;
         }
+
+        if (dump->filter_zone || dump->current_zone == UINT16_MAX) {
+            break;
+        }
+        dump->current_zone++;
+        dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
     }
 
     return EOF;
@@ -2756,20 +2771,32 @@  conntrack_exp_dump_done(struct conntrack_dump *dump OVS_UNUSED)
     return 0;
 }
 
+static int
+conntrack_flush_zone(struct conntrack *ct, const uint16_t zone)
+{
+    struct conn_key_node *keyn;
+    struct conn *conn;
+
+    CMAP_FOR_EACH (keyn, cm_node, &ct->conns[zone]) {
+        if (keyn->dir != CT_DIR_FWD) {
+            continue;
+        }
+        conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
+        conn_clean(ct, conn);
+    }
+
+    return 0;
+}
+
 int
 conntrack_flush(struct conntrack *ct, const uint16_t *zone)
 {
-    struct conn_key_node *keyn;
-    struct conn *conn;
+    if (zone) {
+        return conntrack_flush_zone(ct, *zone);
+    }
 
-    CMAP_FOR_EACH (keyn, cm_node, &ct->conns) {
-        if (keyn->dir != CT_DIR_FWD) {
-            continue;
-        }
-        conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
-        if (!zone || *zone == keyn->key.zone) {
-            conn_clean(ct, conn);
-        }
+    for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
+        conntrack_flush_zone(ct, i);
     }
 
     return 0;
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 8ab8b0017..13bb02ea9 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -112,6 +112,7 @@  struct conntrack_dump {
     };
     bool filter_zone;
     uint16_t zone;
+    uint16_t current_zone;
 };
 
 struct conntrack_zone_limit {