diff mbox series

[ovs-dev,v2,5/6] conntrack: Use a per zone default limit.

Message ID 20240930205034.65484-5-pvalerio@redhat.com
State Accepted
Commit b57c1da5c39a9520a13d671c56317c3409debb92
Delegated to: aaron conole
Headers show
Series [ovs-dev,v2,1/6] conntrack: Correctly annotate conntrack member. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Paolo Valerio Sept. 30, 2024, 8:50 p.m. UTC
Before this change the default limit, instead of being considered
per-zone, was considered as a global value that every new entry was
checked against during the creation. This was not the intended
behavior as the default limit should be inherited by each zone instead
of being an aggregate number.

This change corrects that by removing the default limit from the cmap
and making it global (atomic). Now, whenever a new connection needs to
be committed, if default_zone_limit is set and the entry for the zone
doesn't exist, a new entry for that zone is lazily created, marked as
default. All subsequent packets for that zone will undergo the regular
lookup process.
To distinguish between default and user-defined entries, the storage
for the limit member of struct conntrack_zone_limit has been changed
from a 32-bit unsigned integer to a 64-bit signed integer. The
negative value ZONE_LIMIT_CONN_DEFAULT now indicates a default entry.

Operations such as creation/deletion are modified accordingly taking
into account this new behavior.

Worth noting that OVS_REQUIRES(ct->ct_lock) is not a strict
requirement for zone_limit_lookup_or_default(), however since the
function operates under the lock and it can create an entry in the
slow path, the lock requirement is enforced in order to make thread
safety checks work. The function can still be moved outside the
creation lock or any lock, keeping the fastpath lockless (turning
zone_limit_lookup_protected() to its unprotected version) and locking
only in the slow path (replacing zone_limit_create__() with
zone_limit_create__().

The patch also extends `conntrack - limit by zone` test in order to
check the behavior, and while at it, update test's packet-out to use
compose-packet function.

Fixes: a7f33fdbfb67 ("conntrack: Support zone limits.")
Reported-at: https://issues.redhat.com/browse/FDP-122
Reported-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
---
v2:
Aaron:
- Added entry in NEWS
- updated commit message mentioning the storage change for limit
---
 NEWS                    |   5 +
 lib/conntrack-private.h |   7 +-
 lib/conntrack.c         | 233 +++++++++++++++++++++++++++++++---------
 tests/system-traffic.at |  64 +++++++----
 4 files changed, 236 insertions(+), 73 deletions(-)
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 7a9626bf4..48384ab1d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,10 @@ 
 Post-v3.4.0
 --------------------
+   - Userspace datapath:
+     * The default zone limit, if set, is now inherited by any zone
+       that does not have a specific value defined, rather than being
+       treated as a global value, aligning the behavior with that of
+       the kernel datapath.
 
 
 v3.4.0 - 15 Aug 2024
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 2770470d1..46b212754 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -198,10 +198,11 @@  enum ct_ephemeral_range {
 #define FOR_EACH_PORT_IN_RANGE(curr, min, max) \
     FOR_EACH_PORT_IN_RANGE__(curr, min, max, OVS_JOIN(idx, __COUNTER__))
 
+#define ZONE_LIMIT_CONN_DEFAULT -1
 
 struct conntrack_zone_limit {
     int32_t zone;
-    atomic_uint32_t limit;
+    atomic_int64_t limit;
     atomic_count count;
     uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
 };
@@ -212,6 +213,9 @@  struct conntrack {
     struct rculist exp_lists[N_EXP_LISTS] OVS_GUARDED;
     struct cmap zone_limits OVS_GUARDED;
     struct cmap timeout_policies OVS_GUARDED;
+    uint32_t zone_limit_seq OVS_GUARDED; /* Used to disambiguate zone limit
+                                          * counts. */
+    atomic_uint32_t default_zone_limit;
 
     uint32_t hash_basis; /* Salt for hashing a connection key. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
@@ -234,7 +238,6 @@  struct conntrack {
                                                      * control context.  */
 
     struct ipf *ipf; /* Fragmentation handling context. */
-    uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
     atomic_bool tcp_seq_chk; /* Check TCP sequence numbers. */
     atomic_uint32_t sweep_ms; /* Next sweep interval. */
 };
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 3d19d37df..0061a5636 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -270,6 +270,7 @@  conntrack_init(void)
     atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
     atomic_init(&ct->tcp_seq_chk, true);
     atomic_init(&ct->sweep_ms, 20000);
+    atomic_init(&ct->default_zone_limit, 0);
     latch_init(&ct->clean_thread_exit);
     ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
     ct->ipf = ipf_init();
@@ -296,6 +297,28 @@  zone_key_hash(int32_t zone, uint32_t basis)
     return hash;
 }
 
+static int64_t
+zone_limit_get_limit__(struct conntrack_zone_limit *czl)
+{
+    int64_t limit;
+    atomic_read_relaxed(&czl->limit, &limit);
+
+    return limit;
+}
+
+static int64_t
+zone_limit_get_limit(struct conntrack *ct, struct conntrack_zone_limit *czl)
+{
+    int64_t limit = zone_limit_get_limit__(czl);
+
+    if (limit == ZONE_LIMIT_CONN_DEFAULT) {
+        atomic_read_relaxed(&ct->default_zone_limit, &limit);
+        limit = limit ? : -1;
+    }
+
+    return limit;
+}
+
 static struct zone_limit *
 zone_limit_lookup_protected(struct conntrack *ct, int32_t zone)
     OVS_REQUIRES(ct->ct_lock)
@@ -323,11 +346,56 @@  zone_limit_lookup(struct conntrack *ct, int32_t zone)
     return NULL;
 }
 
+static struct zone_limit *
+zone_limit_create__(struct conntrack *ct, int32_t zone, int64_t limit)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct zone_limit *zl = NULL;
+
+    if (zone > DEFAULT_ZONE && zone <= MAX_ZONE) {
+        zl = xmalloc(sizeof *zl);
+        atomic_init(&zl->czl.limit, limit);
+        atomic_count_init(&zl->czl.count, 0);
+        zl->czl.zone = zone;
+        zl->czl.zone_limit_seq = ct->zone_limit_seq++;
+        uint32_t hash = zone_key_hash(zone, ct->hash_basis);
+        cmap_insert(&ct->zone_limits, &zl->node, hash);
+    }
+
+    return zl;
+}
+
+static struct zone_limit *
+zone_limit_create(struct conntrack *ct, int32_t zone, int64_t limit)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
+
+    if (zl) {
+        return zl;
+    }
+
+    return zone_limit_create__(ct, zone, limit);
+}
+
+/* Lazily creates a new entry in the zone_limits cmap if default limit
+ * is set and there's no entry for the zone. */
 static struct zone_limit *
 zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone)
+    OVS_REQUIRES(ct->ct_lock)
 {
-    struct zone_limit *zl = zone_limit_lookup(ct, zone);
-    return zl ? zl : zone_limit_lookup(ct, DEFAULT_ZONE);
+    struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
+
+    if (!zl) {
+        uint32_t limit;
+        atomic_read_relaxed(&ct->default_zone_limit, &limit);
+
+        if (limit) {
+            zl = zone_limit_create__(ct, zone, ZONE_LIMIT_CONN_DEFAULT);
+        }
+    }
+
+    return zl;
 }
 
 struct conntrack_zone_info
@@ -338,89 +406,147 @@  zone_limit_get(struct conntrack *ct, int32_t zone)
         .limit = 0,
         .count = 0,
     };
-    struct zone_limit *zl = zone_limit_lookup_or_default(ct, zone);
+    struct zone_limit *zl = zone_limit_lookup(ct, zone);
     if (zl) {
-        czl.zone = zl->czl.zone;
-        atomic_read_relaxed(&zl->czl.limit, &czl.limit);
+        int64_t czl_limit = zone_limit_get_limit__(&zl->czl);
+        if (czl_limit > ZONE_LIMIT_CONN_DEFAULT) {
+            czl.zone = zl->czl.zone;
+            czl.limit = czl_limit;
+        } else {
+            atomic_read_relaxed(&ct->default_zone_limit, &czl.limit);
+        }
+
         czl.count = atomic_count_get(&zl->czl.count);
+    } else {
+        atomic_read_relaxed(&ct->default_zone_limit, &czl.limit);
     }
+
     return czl;
 }
 
-static int
-zone_limit_create(struct conntrack *ct, int32_t zone, uint32_t limit)
+static void
+zone_limit_clean__(struct conntrack *ct, struct zone_limit *zl)
     OVS_REQUIRES(ct->ct_lock)
 {
-    struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
+    uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis);
+    cmap_remove(&ct->zone_limits, &zl->node, hash);
+    ovsrcu_postpone(free, zl);
+}
 
-    if (zl) {
-        return 0;
-    }
+static void
+zone_limit_clean(struct conntrack *ct, struct zone_limit *zl)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    uint32_t limit;
 
-    if (zone >= DEFAULT_ZONE && zone <= MAX_ZONE) {
-        zl = xmalloc(sizeof *zl);
-        atomic_init(&zl->czl.limit, limit);
-        atomic_count_init(&zl->czl.count, 0);
-        zl->czl.zone = zone;
-        zl->czl.zone_limit_seq = ct->zone_limit_seq++;
-        uint32_t hash = zone_key_hash(zone, ct->hash_basis);
-        cmap_insert(&ct->zone_limits, &zl->node, hash);
-        return 0;
+    atomic_read_relaxed(&ct->default_zone_limit, &limit);
+    /* Do not remove the entry if the default limit is enabled, but
+     * simply move the limit to default. */
+    if (limit) {
+        atomic_store_relaxed(&zl->czl.limit, ZONE_LIMIT_CONN_DEFAULT);
     } else {
-        return EINVAL;
+        zone_limit_clean__(ct, zl);
     }
 }
 
-int
-zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit)
+static void
+zone_limit_clean_default(struct conntrack *ct)
+    OVS_REQUIRES(ct->ct_lock)
 {
-    int err = 0;
-    struct zone_limit *zl = zone_limit_lookup(ct, zone);
-    if (zl) {
-        atomic_store_relaxed(&zl->czl.limit, limit);
-        VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone);
-    } else {
-        ovs_mutex_lock(&ct->ct_lock);
-        err = zone_limit_create(ct, zone, limit);
-        ovs_mutex_unlock(&ct->ct_lock);
-        if (!err) {
-            VLOG_INFO("Created zone limit of %u for zone %d", limit, zone);
-        } else {
-            VLOG_WARN("Request to create zone limit for invalid zone %d",
-                      zone);
+    struct zone_limit *zl;
+    int64_t czl_limit;
+
+    atomic_store_relaxed(&ct->default_zone_limit, 0);
+
+    CMAP_FOR_EACH (zl, node, &ct->zone_limits) {
+        atomic_read_relaxed(&zl->czl.limit, &czl_limit);
+        if (zone_limit_get_limit__(&zl->czl) == ZONE_LIMIT_CONN_DEFAULT) {
+            zone_limit_clean__(ct, zl);
         }
     }
-    return err;
 }
 
-static void
-zone_limit_clean(struct conntrack *ct, struct zone_limit *zl)
+static bool
+zone_limit_delete__(struct conntrack *ct, int32_t zone)
     OVS_REQUIRES(ct->ct_lock)
 {
-    uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis);
-    cmap_remove(&ct->zone_limits, &zl->node, hash);
-    ovsrcu_postpone(free, zl);
+    struct zone_limit *zl = NULL;
+
+    if (zone == DEFAULT_ZONE) {
+        zone_limit_clean_default(ct);
+    } else {
+        zl = zone_limit_lookup_protected(ct, zone);
+        if (zl) {
+            zone_limit_clean(ct, zl);
+        }
+    }
+
+    return zl != NULL;
 }
 
 int
 zone_limit_delete(struct conntrack *ct, int32_t zone)
 {
+    bool deleted;
+
     ovs_mutex_lock(&ct->ct_lock);
-    struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
-    if (zl) {
-        zone_limit_clean(ct, zl);
-    }
+    deleted = zone_limit_delete__(ct, zone);
+    ovs_mutex_unlock(&ct->ct_lock);
 
     if (zone != DEFAULT_ZONE) {
-        VLOG_INFO(zl ? "Deleted zone limit for zone %d"
-                     : "Attempted delete of non-existent zone limit: zone %d",
+        VLOG_INFO(deleted
+                  ? "Deleted zone limit for zone %d"
+                  : "Attempted delete of non-existent zone limit: zone %d",
                   zone);
     }
 
-    ovs_mutex_unlock(&ct->ct_lock);
     return 0;
 }
 
+static void
+zone_limit_update_default(struct conntrack *ct, int32_t zone, uint32_t limit)
+{
+    /* limit zero means delete default. */
+    if (limit == 0) {
+        ovs_mutex_lock(&ct->ct_lock);
+        zone_limit_delete__(ct, zone);
+        ovs_mutex_unlock(&ct->ct_lock);
+    } else {
+        atomic_store_relaxed(&ct->default_zone_limit, limit);
+    }
+}
+
+int
+zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit)
+{
+    struct zone_limit *zl;
+    int err = 0;
+
+    if (zone == DEFAULT_ZONE) {
+        zone_limit_update_default(ct, zone, limit);
+        VLOG_INFO("Set default zone limit to %u", limit);
+        return err;
+    }
+
+    zl = zone_limit_lookup(ct, zone);
+    if (zl) {
+        atomic_store_relaxed(&zl->czl.limit, limit);
+        VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone);
+    } else {
+        ovs_mutex_lock(&ct->ct_lock);
+        err = zone_limit_create(ct, zone, limit) == NULL;
+        ovs_mutex_unlock(&ct->ct_lock);
+        if (!err) {
+            VLOG_INFO("Created zone limit of %u for zone %d", limit, zone);
+        } else {
+            VLOG_WARN("Request to create zone limit for invalid zone %d",
+                      zone);
+        }
+    }
+
+    return err;
+}
+
 static void
 conn_clean__(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
@@ -917,13 +1043,14 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
     }
 
     if (commit) {
-        uint32_t czl_limit;
+        int64_t czl_limit;
         struct conn_key_node *fwd_key_node, *rev_key_node;
         struct zone_limit *zl = zone_limit_lookup_or_default(ct,
                                                              ctx->key.zone);
         if (zl) {
-            atomic_read_relaxed(&zl->czl.limit, &czl_limit);
-            if (atomic_count_get(&zl->czl.count) >= czl_limit) {
+            czl_limit = zone_limit_get_limit(ct, &zl->czl);
+            if (czl_limit >= 0 &&
+                atomic_count_get(&zl->czl.count) >= czl_limit) {
                 COVERAGE_INC(conntrack_zone_full);
                 return nc;
             }
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 158629f42..fe115d92b 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -5686,6 +5686,41 @@  priority=100,in_port=2,udp,action=ct(zone=3,commit),1
 
 AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
 
+m4_define([UDP_PKT], [m4_join([,],
+  [eth_src=50:54:00:00:00:0$1,eth_dst=50:54:00:00:00:0$2,dl_type=0x0800],
+  [nw_src=10.1.1.$1,nw_dst=10.1.1.$2],
+  [nw_proto=17,nw_ttl=64,nw_frag=no],
+  [udp_src=1,udp_dst=$3])])
+
+AT_CHECK([ovs-appctl dpctl/ct-set-limits default=3])
+AT_CHECK([ovs-appctl dpctl/ct-get-limits], [],[dnl
+default limit=3
+])
+
+dnl Send 5 packets from each port in order to hit the default zone limit.
+for i in $(seq 2 6); do
+    pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([1], [2], [$i])")
+    AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=${pkt} actions=resubmit(,0)"])
+done
+
+AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1], [],[dnl
+default limit=3
+zone=1,limit=3,count=3
+])
+
+for i in $(seq 2 6); do
+    pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])")
+    AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"])
+done
+
+AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,3], [],[dnl
+default limit=3
+zone=1,limit=3,count=3
+zone=3,limit=3,count=3
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack])
+
 AT_CHECK([ovs-appctl dpctl/ct-set-limits default=10 zone=1,limit=5 zone=2,limit=3 zone=3,limit=3 zone=4,limit=15])
 AT_CHECK([ovs-appctl dpctl/ct-del-limits zone=2,4,5])
 AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,2,3,4], [],[dnl
@@ -5697,15 +5732,10 @@  zone=4,limit=10,count=0
 ])
 
 dnl Test UDP from port 1
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000300080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000500080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000600080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000700080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000800080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000900080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000a00080000 actions=resubmit(,0)"])
+for i in $(seq 2 10); do
+    pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([1], [2], [$i])")
+    AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=${pkt} actions=resubmit(,0)"])
+done
 
 AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,2,3,4,5], [0], [dnl
 default limit=10
@@ -5732,11 +5762,10 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=6),reply=(src=10.1.1.2,dst=10.
 ])
 
 dnl Test UDP from port 2
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000200080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000300080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000400080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000500080000 actions=resubmit(,0)"])
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000600080000 actions=resubmit(,0)"])
+for i in $(seq 2 6); do
+    pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])")
+    AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"])
+done
 
 AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,3], [0], [dnl
 default limit=10
@@ -5797,10 +5826,9 @@  default limit=10
 zone=1,limit=3,count=0
 zone=3,limit=3,count=0])
 
-for i in 2 3 4 5 6; do
-  packet="50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000${i}00080000"
-  AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 \
-              "in_port=2 packet=${packet} actions=resubmit(,0)"])
+for i in $(seq 2 6); do
+    pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])")
+    AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"])
 done
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.3," | sort ], [0], [dnl