diff mbox series

[ovs-dev,v4,4/4] Controller, northd: Add support for CT zone limits.

Message ID 20240626055616.516625-5-amusil@redhat.com
State Superseded
Headers show
Series Add ability to limit CT entries per LS/LR/LSP | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ales Musil June 26, 2024, 5:56 a.m. UTC
Add support for limiting the CT zone usage per Ls, LR or LSP.
When the limit is configured on logical switch it will also implicitly
set limits for all ports in that logical switch. The port configuration
can be overwritten individually and has priority over the whole logical
switch configuration.

The value 0 means unlimited, when the value is not specified it is
derived from OvS default CT limit specified for given OvS datapath.

Reported-at: https://bugzilla.redhat.com/2189924
Signed-off-by: Ales Musil <amusil@redhat.com>
---
v4: Rebase on top of latest main.
    Change naming of the ct_zone_limit_sync to avoid potential confusion as suggested by Lorenzo.
v3: Rebase on top of latest main.
---
 NEWS                        |   3 +
 controller/ct-zone.c        | 175 ++++++++++++++++++++++++++++++++----
 controller/ct-zone.h        |  12 ++-
 controller/ovn-controller.c |  25 +++++-
 lib/ovn-util.c              |  17 ++++
 lib/ovn-util.h              |   3 +
 northd/northd.c             |   8 ++
 ovn-nb.xml                  |  29 ++++++
 tests/ovn-controller.at     |  99 ++++++++++++++++++++
 9 files changed, 349 insertions(+), 22 deletions(-)

Comments

Dumitru Ceara July 11, 2024, 8:53 p.m. UTC | #1
On 6/26/24 07:56, Ales Musil wrote:
> Add support for limiting the CT zone usage per Ls, LR or LSP.
> When the limit is configured on logical switch it will also implicitly
> set limits for all ports in that logical switch. The port configuration
> can be overwritten individually and has priority over the whole logical
> switch configuration.
> 
> The value 0 means unlimited, when the value is not specified it is
> derived from OvS default CT limit specified for given OvS datapath.
> 
> Reported-at: https://bugzilla.redhat.com/2189924
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
> v4: Rebase on top of latest main.
>     Change naming of the ct_zone_limit_sync to avoid potential confusion as suggested by Lorenzo.
> v3: Rebase on top of latest main.
> ---

Hi Ales,

Sorry for reviewing this so late in the cycle.  I think the patch looks
OK.  I mostly have some performance related concerns, it would be great
to get some scale testing numbers.  The other comment I had is minor.

>  NEWS                        |   3 +
>  controller/ct-zone.c        | 175 ++++++++++++++++++++++++++++++++----
>  controller/ct-zone.h        |  12 ++-
>  controller/ovn-controller.c |  25 +++++-
>  lib/ovn-util.c              |  17 ++++
>  lib/ovn-util.h              |   3 +
>  northd/northd.c             |   8 ++
>  ovn-nb.xml                  |  29 ++++++
>  tests/ovn-controller.at     |  99 ++++++++++++++++++++
>  9 files changed, 349 insertions(+), 22 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 01db77d57..f50d68a82 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -34,6 +34,9 @@ Post v24.03.0
>    - NATs can now be given an arbitrary match condition and priority. This
>      allows for conditional NATs to be configured. See the ovn-nb(5) man
>      page for more information.
> +  - Add support for CT zone limit that can be specified per LR
> +    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
> +    (options:ct-zone-limit).
>  
>  OVN v24.03.0 - 01 Mar 2024
>  --------------------------
> diff --git a/controller/ct-zone.c b/controller/ct-zone.c
> index 6ed1e4108..97f0de6c3 100644
> --- a/controller/ct-zone.c
> +++ b/controller/ct-zone.c
> @@ -34,6 +34,18 @@ static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
>  static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
>  static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
>                          uint16_t zone, bool set_pending);
> +static void
> +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> +                             const struct sbrec_datapath_binding *dp,
> +                             const char *name,
> +                             struct ovsdb_idl_index *pb_by_dp);
> +static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
> +                                 int64_t limit);
> +static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
> +static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
> +static int64_t ct_zone_limit_normalize(int64_t limit);
> +static struct ovsrec_ct_zone *
> +ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id);
>  
>  void
>  ct_zones_restore(struct ct_zone_ctx *ctx,
> @@ -196,11 +208,14 @@ ct_zones_update(const struct sset *local_lports,
>  
>  void
>  ct_zones_commit(const struct ovsrec_bridge *br_int,
> +                const struct ovsrec_datapath *ovs_dp,
> +                struct ovsdb_idl_txn *ovs_idl_txn,
>                  struct shash *pending_ct_zones)
>  {
>      struct shash_node *iter;
>      SHASH_FOR_EACH (iter, pending_ct_zones) {
>          struct ct_zone_pending_entry *ctzpe = iter->data;
> +        struct ct_zone *ct_zone = &ctzpe->ct_zone;
>  
>          /* The transaction is open, so any pending entries in the
>           * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
> @@ -212,7 +227,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
>  
>          char *user_str = xasprintf("ct-zone-%s", iter->name);
>          if (ctzpe->add) {
> -            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
> +            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
>              struct smap_node *node =
>                      smap_get_node(&br_int->external_ids, user_str);
>              if (!node || strcmp(node->value, zone_str)) {
> @@ -227,6 +242,19 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
>          }
>          free(user_str);
>  
> +        struct ovsrec_ct_zone *ovs_zone =
> +                ct_zone_find_ovsrec(ovs_dp, ct_zone->zone);

Does this create a measurable performance impact?  We're going to
potentially walk all already assigned CT zones every time a new port is
bound locally.  Should we cache the ovs_zone records and index them by
zone_id?  That probably also means that we might need to add incremental
processing support for ovsrec_ct_zone.

On the other hand, I tried a quick test of binding 500 ovs interfaces to
LSPs in a sandbox (ovs and ovn compiled with gcc and -O2 and debug
logging disabled) and I didn't see any noticeable performance degradation.

Just to be sure, would it be possible to do some e2e performance
measurements with ovn-heater with this patch?

> +        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
> +            ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
> +        } else if (ctzpe->add && ct_zone->limit >= 0) {
> +            if (!ovs_zone) {
> +                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
> +                ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
> +                                                       ovs_zone);
> +            }
> +            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
> +        }
> +
>          ctzpe->state = CT_ZONE_DB_SENT;
>      }
>  }
> @@ -247,8 +275,19 @@ ct_zones_pending_clear_commited(struct shash *pending)
>  /* Returns "true" when there is no need for full recompute. */
>  bool
>  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> -                         const struct sbrec_datapath_binding *dp)
> +                         const struct sbrec_datapath_binding *dp,
> +                         struct ovsdb_idl_index *pb_by_dp)
>  {
> +    const char *name = smap_get(&dp->external_ids, "name");
> +    if (!name) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> +                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> +        return true;
> +    }
> +
> +    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
> +
>      int req_snat_zone = ct_zone_get_snat(dp);
>      if (req_snat_zone == -1) {
>          /* datapath snat ct zone is not set.  This condition will also hit
> @@ -259,14 +298,6 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
>          return true;
>      }
>  
> -    const char *name = smap_get(&dp->external_ids, "name");
> -    if (!name) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> -        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> -                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> -        return true;
> -    }
> -
>      /* Check if the requested snat zone has changed for the datapath
>       * or not.  If so, then fall back to full recompute of
>       * ct_zone engine. */
> @@ -290,14 +321,18 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
>  
>  /* Returns "true" if there was an update to the context. */
>  bool
> -ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> +ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> +                           const struct sbrec_port_binding *pb,
>                             bool updated, int *scan_start)
>  {
> -    struct shash_node *node = shash_find(&ctx->current, name);
> -    if (updated && !node) {
> -        ct_zone_assign_unused(ctx, name, scan_start);
> +    struct shash_node *node = shash_find(&ctx->current, pb->logical_port);
> +    if (updated) {
> +        if (!node) {
> +            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
> +        }
> +        ct_zone_limit_update(ctx, pb->logical_port, ct_zone_get_pb_limit(pb));
>          return true;
> -    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
> +    } else if (node && ct_zone_remove(ctx, node->name)) {
>          return true;
>      }
>  
> @@ -311,6 +346,25 @@ ct_zone_find_zone(const struct shash *ct_zones, const char *name)
>      return ct_zone ? ct_zone->zone : 0;
>  }
>  
> +void
> +ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> +                     const struct hmap *local_datapaths,
> +                     struct ovsdb_idl_index *pb_by_dp)
> +{
> +    const struct local_datapath *ld;
> +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> +        const char *name = smap_get(&ld->datapath->external_ids, "name");
> +        if (!name) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
> +                        "skipping zone assignment.",
> +                        UUID_ARGS(&ld->datapath->header_.uuid));
> +            continue;
> +        }
> +
> +        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
> +    }
> +}
>  
>  static bool
>  ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
> @@ -363,7 +417,10 @@ ct_zone_add(struct ct_zone_ctx *ctx, const char *name, uint16_t zone,
>          shash_add(&ctx->current, name, ct_zone);
>      }
>  
> -    ct_zone->zone = zone;
> +    *ct_zone = (struct ct_zone) {
> +        .zone = zone,
> +        .limit = -1,
> +    };
>  
>      if (set_pending) {
>          ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
> @@ -446,6 +503,7 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
>  
>          struct ct_zone ct_zone = {
>              .zone = zone,
> +            .limit = -1,
>          };
>          /* Make sure we remove the uuid one in the next OvS DB commit without
>           * flush. */
> @@ -461,3 +519,88 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
>      ct_zone_add(ctx, current_name, zone, false);
>      free(new_name);
>  }
> +
> +static void
> +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> +                             const struct sbrec_datapath_binding *dp,
> +                             const char *name,
> +                             struct ovsdb_idl_index *pb_by_dp)
> +{
> +    if (smap_get(&dp->external_ids, "logical-switch")) {
> +        struct sbrec_port_binding *target =
> +                sbrec_port_binding_index_init_row(pb_by_dp);
> +        sbrec_port_binding_index_set_datapath(target, dp);
> +
> +        const struct sbrec_port_binding *pb;
> +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
> +            ct_zone_limit_update(ctx, pb->logical_port,
> +                                 ct_zone_get_pb_limit(pb));
> +        }
> +
> +        sbrec_port_binding_index_destroy_row(target);
> +    }
> +
> +    int64_t dp_limit = ct_zone_get_dp_limit(dp);
> +    char *dnat = alloc_nat_zone_key(name, "dnat");
> +    char *snat = alloc_nat_zone_key(name, "snat");
> +
> +    ct_zone_limit_update(ctx, dnat, dp_limit);
> +    ct_zone_limit_update(ctx, snat, dp_limit);
> +
> +    free(dnat);
> +    free(snat);
> +}
> +
> +static void
> +ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t limit)
> +{
> +    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
> +
> +    if (!ct_zone) {
> +        return;
> +    }
> +
> +    if (ct_zone->limit != limit) {
> +        ct_zone->limit = limit;
> +        /* Add pending entry only for DB store to avoid flushing the zone. */
> +        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
> +                            true, name);
> +        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
> +                 ct_zone->zone, ct_zone->limit);
> +    }
> +}
> +
> +static int64_t
> +ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
> +{
> +    int64_t limit = ovn_smap_get_llong(&dp->external_ids, "ct-zone-limit", -1);
> +    return ct_zone_limit_normalize(limit);
> +}
> +
> +static int64_t
> +ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
> +{
> +    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
> +                                          "ct-zone-limit", -1);
> +    int64_t limit = ovn_smap_get_llong(&pb->options,
> +                                       "ct-zone-limit", dp_limit);
> +    return ct_zone_limit_normalize(limit);
> +}
> +
> +static int64_t
> +ct_zone_limit_normalize(int64_t limit)
> +{
> +    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
> +}
> +
> +static struct ovsrec_ct_zone *
> +ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id)
> +{
> +    for (int i = 0; i < dp->n_ct_zones; i++) {

Nit: size_t

> +        if (dp->key_ct_zones[i] == zone_id) {
> +            return dp->value_ct_zones[i];
> +        }
> +    }
> +
> +    return NULL;
> +}
> diff --git a/controller/ct-zone.h b/controller/ct-zone.h
> index af68de2d6..5d7f66224 100644
> --- a/controller/ct-zone.h
> +++ b/controller/ct-zone.h
> @@ -43,6 +43,7 @@ struct ct_zone_ctx {
>  date
>  struct ct_zone {
>      uint16_t zone;
> +    int64_t limit;
>  };
>  
>  /* States to move through when a new conntrack zone has been allocated. */
> @@ -70,12 +71,19 @@ void ct_zones_update(const struct sset *local_lports,
>                       const struct hmap *local_datapaths,
>                       struct ct_zone_ctx *ctx);
>  void ct_zones_commit(const struct ovsrec_bridge *br_int,
> +                     const struct ovsrec_datapath *ovs_dp,
> +                     struct ovsdb_idl_txn *ovs_idl_txn,
>                       struct shash *pending_ct_zones);
>  void ct_zones_pending_clear_commited(struct shash *pending);
>  bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> -                              const struct sbrec_datapath_binding *dp);
> -bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> +                              const struct sbrec_datapath_binding *dp,
> +                              struct ovsdb_idl_index *pb_by_dp);
> +bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> +                                const struct sbrec_port_binding *pb,
>                                  bool updated, int *scan_start);
>  uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char *name);
> +void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> +                          const struct hmap *local_datapaths,
> +                          struct ovsdb_idl_index *pb_by_dp);
>  
>  #endif /* controller/ct-zone.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index a47ed5d9a..1b4c89cda 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -798,6 +798,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
>      ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
> +    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
>      ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
>      ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
> @@ -807,6 +808,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
>      ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
> +    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
> +    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
>  
>      chassis_register_ovs_idl(ovs_idl);
>      encaps_register_ovs_idl(ovs_idl);
> @@ -2219,6 +2222,10 @@ en_ct_zones_run(struct engine_node *node, void *data)
>      struct ed_type_ct_zones *ct_zones_data = data;
>      struct ed_type_runtime_data *rt_data =
>          engine_get_input_data("runtime_data", node);
> +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> +            engine_ovsdb_node_get_index(
> +                    engine_get_input("SB_port_binding", node),
> +                    "datapath");
>  
>      const struct ovsrec_open_vswitch_table *ovs_table =
>          EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> @@ -2232,6 +2239,8 @@ en_ct_zones_run(struct engine_node *node, void *data)
>      ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
>      ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
>                      &ct_zones_data->ctx);
> +    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
> +                         sbrec_port_binding_by_datapath);

Same question about potential performance impact here too.  Instead of
doing this in two steps wouldn't it make sense to actually store in the
ct_zone structure the requested limit already when we create the
structure (or the pending one)?  We'd avoid this O(n log n) - I think -
complexity.

But, as mentioned above, if there's no visible e2e performance impact we
can probably avoid the complication and keep it as is.

>  
>      ct_zones_data->recomputed = true;
>      engine_set_node_state(node, EN_UPDATED);
> @@ -2249,6 +2258,10 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
>          engine_get_input_data("runtime_data", node);
>      const struct sbrec_datapath_binding_table *dp_table =
>          EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> +            engine_ovsdb_node_get_index(
> +                    engine_get_input("SB_port_binding", node),
> +                    "datapath");
>  
>      SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
>          if (!get_local_datapath(&rt_data->local_datapaths,
> @@ -2262,7 +2275,8 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
>              return false;
>          }
>  
> -        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
> +        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
> +                                      sbrec_port_binding_by_datapath)) {
>              return false;
>          }
>      }
> @@ -2311,8 +2325,8 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data)
>                      t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
>                      t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
>              updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
> -                                                  t_lport->pb->logical_port,
> -                                                  port_updated, &scan_start);
> +                                                  t_lport->pb, port_updated,
> +                                                  &scan_start);
>          }
>      }
>  
> @@ -5154,6 +5168,9 @@ main(int argc, char *argv[])
>                       ct_zones_datapath_binding_handler);
>      engine_add_input(&en_ct_zones, &en_runtime_data,
>                       ct_zones_runtime_data_handler);
> +    /* The CT node just needs the index for port bindings by name. The data
> +     * for ports are processed in the runtime handler. */
> +    engine_add_input(&en_ct_zones, &en_sb_port_binding, engine_noop_handler);
>  
>      engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
>                       ovs_interface_shadow_ovs_interface_handler);
> @@ -5558,7 +5575,7 @@ main(int argc, char *argv[])
>                          if (ct_zones_data) {
>                              stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
>                                              time_msec());
> -                            ct_zones_commit(br_int,
> +                            ct_zones_commit(br_int, br_int_dp, ovs_idl_txn,
>                                              &ct_zones_data->ctx.pending);
>                              stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
>                                             time_msec());
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index 58e941193..1ad347419 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -816,6 +816,23 @@ str_tolower(const char *orig)
>      return copy;
>  }
>  
> +/* This is a wrapper function which get the value associated with 'key' in
> + * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
> + * valid unsigned integer can't be parsed from its value, returns 'def'.
> + */
> +long long
> +ovn_smap_get_llong(const struct smap *smap, const char *key, long long def)
> +{
> +    const char *value = smap_get(smap, key);
> +    long long ll_value;
> +
> +    if (!value || !str_to_llong(value, 10, &ll_value)) {
> +        return def;
> +    }
> +
> +    return ll_value;
> +}
> +
>  /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
>   * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped address).
>   * The caller must free() the memory allocated for 'ip_address'.
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index f75b821b6..ae971ce5a 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -211,6 +211,9 @@ char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
>   */
>  char *str_tolower(const char *orig);
>  
> +long long ovn_smap_get_llong(const struct smap *smap, const char *key,
> +                             long long def);
> +
>  /* OVN daemon options. Taken from ovs/lib/daemon.h. */
>  #define OVN_DAEMON_OPTION_ENUMS                     \
>      OVN_OPT_DETACH,                                 \
> diff --git a/northd/northd.c b/northd/northd.c
> index 7e474a7b8..7d3ad63a1 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -737,6 +737,14 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od)
>          smap_add(&ids, "name2", name2);
>      }
>  
> +    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
> +                                               &od->nbs->other_config :
> +                                               &od->nbr->options,
> +                                               "ct-zone-limit", -1);
> +    if (ct_zone_limit > 0) {
> +        smap_add_format(&ids, "ct-zone-limit", "%"PRId64, ct_zone_limit);
> +    }
> +
>      /* Set interconn-ts. */
>      if (od->nbs) {
>          const char *ts = smap_get(&od->nbs->other_config, "interconn-ts");
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b0f8d3687..53e697308 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -721,6 +721,17 @@
>          this timeout will be automatically removed. The value defaults
>          to 0, which means disabled.
>        </column>
> +
> +      <column name="other_config" key="ct-zone-limit"
> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +        CT zone <code>limit</code> value for given
> +        <ref table="Logical_Switch"/>. This value will be propagated to all
> +        <ref table="Logical_Switch_Port"/> when configured, but can be
> +        overwritten individually per <ref table="Logical_Switch_Port"/>. The
> +        value 0 means  unlimited, when the option is not present the limit
> +        is not set and the zone limit is derived from OvS default datapath
> +        limit.
> +      </column>
>      </group>
>  
>      <group title="IP Multicast Snooping Options">
> @@ -1122,6 +1133,16 @@
>            <code>false</code>.
>          </column>
>  
> +        <column name="options" key="ct-zone-limit"
> +                type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +          CT zone <code>limit</code> value for given
> +          <ref table="Logical_Switch_Port"/>. This value has priority over
> +          limit specified on <ref table="Logical_Switch"/> when configured.
> +          The value 0 means unlimited, when the option is not present the limit
> +          is not set and the zone limit is derived from OvS default datapath
> +          limit.
> +        </column>
> +
>        </group>
>  
>        <group title="Options for localnet ports">
> @@ -2785,6 +2806,14 @@ or
>          </p>
>  
>        </column>
> +
> +      <column name="options" key="ct-zone-limit"
> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +        CT zone <code>limit</code> value for given
> +        <ref table="Logical_Router"/>. The value 0 means unlimited, when the
> +        option is not present the limit is not set and the zone limit is
> +        derived from OvS default datapath limit.
> +      </column>
>      </group>
>  
>      <group title="Common Columns">
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 2cb86dc98..0a20cbc09 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -3127,3 +3127,102 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log])
>  
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - CT zone limit])
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
> +
> +check ovs-vsctl add-port br-int lsp \
> +    -- set Interface lsp external-ids:iface-id=lsp
> +
> +check ovn-nbctl lr-add lr
> +
> +check ovn-nbctl ls-add ls
> +check ovn-nbctl lsp-add ls ls-lr
> +check ovn-nbctl lsp-set-type ls-lr router
> +check ovn-nbctl lsp-set-addresses ls-lr router
> +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
> +
> +check ovn-nbctl lsp-add ls lsp
> +check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
> +
> +check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
> +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
> +
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +
> +get_zone_num() {
> +    output=$1
> +    name=$2
> +
> +    printf "$output" | grep $name | cut -d ' ' -f 2
> +}
> +
> +check_ovs_ct_limit() {
> +    zone=$1
> +    limit=$2
> +
> +    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
> +default limit=?
> +zone=$zone,limit=$limit,count=?
> +])
> +}
> +
> +wait_ovs_ct_limit_count() {
> +    count=$1
> +
> +    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings --format=table list CT_Zone | wc -l)])
> +}
> +
> +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
> +lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
> +lr_snat=$(get_zone_num "$ct_zones" lr_snat)
> +
> +ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
> +ls_snat=$(get_zone_num "$ct_zones" ls_snat)
> +
> +lsp=$(get_zone_num "$ct_zones" lsp)
> +
> +wait_ovs_ct_limit_count 0
> +
> +check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
> +wait_ovs_ct_limit_count 2
> +check_ovs_ct_limit $lr_dnat 5
> +check_ovs_ct_limit $lr_snat 5
> +
> +check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
> +wait_ovs_ct_limit_count 0
> +
> +check ovn-nbctl --wait=hv set Logical_Switch ls other_config:ct-zone-limit=10
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 10
> +
> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp options:ct-zone-limit=5
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 5
> +
> +check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options ct-zone-limit
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 10
> +
> +check ovn-nbctl --wait=hv remove Logical_Switch ls other_config ct-zone-limit
> +wait_ovs_ct_limit_count 0
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])

Regards,
Dumitru
Ales Musil July 12, 2024, 2:30 p.m. UTC | #2
On Thu, Jul 11, 2024 at 10:54 PM Dumitru Ceara <dceara@redhat.com> wrote:

> On 6/26/24 07:56, Ales Musil wrote:
> > Add support for limiting the CT zone usage per Ls, LR or LSP.
> > When the limit is configured on logical switch it will also implicitly
> > set limits for all ports in that logical switch. The port configuration
> > can be overwritten individually and has priority over the whole logical
> > switch configuration.
> >
> > The value 0 means unlimited, when the value is not specified it is
> > derived from OvS default CT limit specified for given OvS datapath.
> >
> > Reported-at: https://bugzilla.redhat.com/2189924
> > Signed-off-by: Ales Musil <amusil@redhat.com>
> > ---
> > v4: Rebase on top of latest main.
> >     Change naming of the ct_zone_limit_sync to avoid potential confusion
> as suggested by Lorenzo.
> > v3: Rebase on top of latest main.
> > ---
>
> Hi Ales,
>
> Sorry for reviewing this so late in the cycle.  I think the patch looks
> OK.  I mostly have some performance related concerns, it would be great
> to get some scale testing numbers.  The other comment I had is minor.
>

Hi Dumitru,

thank you for the review.


>
> >  NEWS                        |   3 +
> >  controller/ct-zone.c        | 175 ++++++++++++++++++++++++++++++++----
> >  controller/ct-zone.h        |  12 ++-
> >  controller/ovn-controller.c |  25 +++++-
> >  lib/ovn-util.c              |  17 ++++
> >  lib/ovn-util.h              |   3 +
> >  northd/northd.c             |   8 ++
> >  ovn-nb.xml                  |  29 ++++++
> >  tests/ovn-controller.at     |  99 ++++++++++++++++++++
> >  9 files changed, 349 insertions(+), 22 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 01db77d57..f50d68a82 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -34,6 +34,9 @@ Post v24.03.0
> >    - NATs can now be given an arbitrary match condition and priority.
> This
> >      allows for conditional NATs to be configured. See the ovn-nb(5) man
> >      page for more information.
> > +  - Add support for CT zone limit that can be specified per LR
> > +    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
> > +    (options:ct-zone-limit).
> >
> >  OVN v24.03.0 - 01 Mar 2024
> >  --------------------------
> > diff --git a/controller/ct-zone.c b/controller/ct-zone.c
> > index 6ed1e4108..97f0de6c3 100644
> > --- a/controller/ct-zone.c
> > +++ b/controller/ct-zone.c
> > @@ -34,6 +34,18 @@ static bool ct_zone_assign_unused(struct ct_zone_ctx
> *ctx,
> >  static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
> >  static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
> >                          uint16_t zone, bool set_pending);
> > +static void
> > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > +                             const struct sbrec_datapath_binding *dp,
> > +                             const char *name,
> > +                             struct ovsdb_idl_index *pb_by_dp);
> > +static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char
> *name,
> > +                                 int64_t limit);
> > +static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding
> *dp);
> > +static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding
> *pb);
> > +static int64_t ct_zone_limit_normalize(int64_t limit);
> > +static struct ovsrec_ct_zone *
> > +ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id);
> >
> >  void
> >  ct_zones_restore(struct ct_zone_ctx *ctx,
> > @@ -196,11 +208,14 @@ ct_zones_update(const struct sset *local_lports,
> >
> >  void
> >  ct_zones_commit(const struct ovsrec_bridge *br_int,
> > +                const struct ovsrec_datapath *ovs_dp,
> > +                struct ovsdb_idl_txn *ovs_idl_txn,
> >                  struct shash *pending_ct_zones)
> >  {
> >      struct shash_node *iter;
> >      SHASH_FOR_EACH (iter, pending_ct_zones) {
> >          struct ct_zone_pending_entry *ctzpe = iter->data;
> > +        struct ct_zone *ct_zone = &ctzpe->ct_zone;
> >
> >          /* The transaction is open, so any pending entries in the
> >           * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
> > @@ -212,7 +227,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
> >
> >          char *user_str = xasprintf("ct-zone-%s", iter->name);
> >          if (ctzpe->add) {
> > -            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
> > +            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
> >              struct smap_node *node =
> >                      smap_get_node(&br_int->external_ids, user_str);
> >              if (!node || strcmp(node->value, zone_str)) {
> > @@ -227,6 +242,19 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
> >          }
> >          free(user_str);
> >
> > +        struct ovsrec_ct_zone *ovs_zone =
> > +                ct_zone_find_ovsrec(ovs_dp, ct_zone->zone);
>
> Does this create a measurable performance impact?  We're going to
> potentially walk all already assigned CT zones every time a new port is
> bound locally.  Should we cache the ovs_zone records and index them by
> zone_id?  That probably also means that we might need to add incremental
> processing support for ovsrec_ct_zone.
>
> On the other hand, I tried a quick test of binding 500 ovs interfaces to
> LSPs in a sandbox (ovs and ovn compiled with gcc and -O2 and debug
> logging disabled) and I didn't see any noticeable performance degradation.
>
> Just to be sure, would it be possible to do some e2e performance
> measurements with ovn-heater with this patch?
>

I made it into an array so we don't lookup every single pending port
and it is accessed via index instead.


>
> > +        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
> > +            ovsrec_datapath_update_ct_zones_delkey(ovs_dp,
> ct_zone->zone);
> > +        } else if (ctzpe->add && ct_zone->limit >= 0) {
> > +            if (!ovs_zone) {
> > +                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
> > +                ovsrec_datapath_update_ct_zones_setkey(ovs_dp,
> ct_zone->zone,
> > +                                                       ovs_zone);
> > +            }
> > +            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
> > +        }
> > +
> >          ctzpe->state = CT_ZONE_DB_SENT;
> >      }
> >  }
> > @@ -247,8 +275,19 @@ ct_zones_pending_clear_commited(struct shash
> *pending)
> >  /* Returns "true" when there is no need for full recompute. */
> >  bool
> >  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > -                         const struct sbrec_datapath_binding *dp)
> > +                         const struct sbrec_datapath_binding *dp,
> > +                         struct ovsdb_idl_index *pb_by_dp)
> >  {
> > +    const char *name = smap_get(&dp->external_ids, "name");
> > +    if (!name) {
> > +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > +        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"'
> skipping"
> > +                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > +        return true;
> > +    }
> > +
> > +    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
> > +
> >      int req_snat_zone = ct_zone_get_snat(dp);
> >      if (req_snat_zone == -1) {
> >          /* datapath snat ct zone is not set.  This condition will also
> hit
> > @@ -259,14 +298,6 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> >          return true;
> >      }
> >
> > -    const char *name = smap_get(&dp->external_ids, "name");
> > -    if (!name) {
> > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > -        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"'
> skipping"
> > -                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > -        return true;
> > -    }
> > -
> >      /* Check if the requested snat zone has changed for the datapath
> >       * or not.  If so, then fall back to full recompute of
> >       * ct_zone engine. */
> > @@ -290,14 +321,18 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> >
> >  /* Returns "true" if there was an update to the context. */
> >  bool
> > -ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> > +ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > +                           const struct sbrec_port_binding *pb,
> >                             bool updated, int *scan_start)
> >  {
> > -    struct shash_node *node = shash_find(&ctx->current, name);
> > -    if (updated && !node) {
> > -        ct_zone_assign_unused(ctx, name, scan_start);
> > +    struct shash_node *node = shash_find(&ctx->current,
> pb->logical_port);
> > +    if (updated) {
> > +        if (!node) {
> > +            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
> > +        }
> > +        ct_zone_limit_update(ctx, pb->logical_port,
> ct_zone_get_pb_limit(pb));
> >          return true;
> > -    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
> > +    } else if (node && ct_zone_remove(ctx, node->name)) {
> >          return true;
> >      }
> >
> > @@ -311,6 +346,25 @@ ct_zone_find_zone(const struct shash *ct_zones,
> const char *name)
> >      return ct_zone ? ct_zone->zone : 0;
> >  }
> >
> > +void
> > +ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > +                     const struct hmap *local_datapaths,
> > +                     struct ovsdb_idl_index *pb_by_dp)
> > +{
> > +    const struct local_datapath *ld;
> > +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> > +        const char *name = smap_get(&ld->datapath->external_ids,
> "name");
> > +        if (!name) {
> > +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
> 1);
> > +            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
> > +                        "skipping zone assignment.",
> > +                        UUID_ARGS(&ld->datapath->header_.uuid));
> > +            continue;
> > +        }
> > +
> > +        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
> > +    }
> > +}
> >
> >  static bool
> >  ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
> > @@ -363,7 +417,10 @@ ct_zone_add(struct ct_zone_ctx *ctx, const char
> *name, uint16_t zone,
> >          shash_add(&ctx->current, name, ct_zone);
> >      }
> >
> > -    ct_zone->zone = zone;
> > +    *ct_zone = (struct ct_zone) {
> > +        .zone = zone,
> > +        .limit = -1,
> > +    };
> >
> >      if (set_pending) {
> >          ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
> > @@ -446,6 +503,7 @@ ct_zone_restore(const struct
> sbrec_datapath_binding_table *dp_table,
> >
> >          struct ct_zone ct_zone = {
> >              .zone = zone,
> > +            .limit = -1,
> >          };
> >          /* Make sure we remove the uuid one in the next OvS DB commit
> without
> >           * flush. */
> > @@ -461,3 +519,88 @@ ct_zone_restore(const struct
> sbrec_datapath_binding_table *dp_table,
> >      ct_zone_add(ctx, current_name, zone, false);
> >      free(new_name);
> >  }
> > +
> > +static void
> > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > +                             const struct sbrec_datapath_binding *dp,
> > +                             const char *name,
> > +                             struct ovsdb_idl_index *pb_by_dp)
> > +{
> > +    if (smap_get(&dp->external_ids, "logical-switch")) {
> > +        struct sbrec_port_binding *target =
> > +                sbrec_port_binding_index_init_row(pb_by_dp);
> > +        sbrec_port_binding_index_set_datapath(target, dp);
> > +
> > +        const struct sbrec_port_binding *pb;
> > +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
> > +            ct_zone_limit_update(ctx, pb->logical_port,
> > +                                 ct_zone_get_pb_limit(pb));
> > +        }
> > +
> > +        sbrec_port_binding_index_destroy_row(target);
> > +    }
> > +
> > +    int64_t dp_limit = ct_zone_get_dp_limit(dp);
> > +    char *dnat = alloc_nat_zone_key(name, "dnat");
> > +    char *snat = alloc_nat_zone_key(name, "snat");
> > +
> > +    ct_zone_limit_update(ctx, dnat, dp_limit);
> > +    ct_zone_limit_update(ctx, snat, dp_limit);
> > +
> > +    free(dnat);
> > +    free(snat);
> > +}
> > +
> > +static void
> > +ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t
> limit)
> > +{
> > +    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
> > +
> > +    if (!ct_zone) {
> > +        return;
> > +    }
> > +
> > +    if (ct_zone->limit != limit) {
> > +        ct_zone->limit = limit;
> > +        /* Add pending entry only for DB store to avoid flushing the
> zone. */
> > +        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
> > +                            true, name);
> > +        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
> > +                 ct_zone->zone, ct_zone->limit);
> > +    }
> > +}
> > +
> > +static int64_t
> > +ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
> > +{
> > +    int64_t limit = ovn_smap_get_llong(&dp->external_ids,
> "ct-zone-limit", -1);
> > +    return ct_zone_limit_normalize(limit);
> > +}
> > +
> > +static int64_t
> > +ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
> > +{
> > +    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
> > +                                          "ct-zone-limit", -1);
> > +    int64_t limit = ovn_smap_get_llong(&pb->options,
> > +                                       "ct-zone-limit", dp_limit);
> > +    return ct_zone_limit_normalize(limit);
> > +}
> > +
> > +static int64_t
> > +ct_zone_limit_normalize(int64_t limit)
> > +{
> > +    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
> > +}
> > +
> > +static struct ovsrec_ct_zone *
> > +ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id)
> > +{
> > +    for (int i = 0; i < dp->n_ct_zones; i++) {
>
> Nit: size_t
>

Fixed in v5.


>
> > +        if (dp->key_ct_zones[i] == zone_id) {
> > +            return dp->value_ct_zones[i];
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > diff --git a/controller/ct-zone.h b/controller/ct-zone.h
> > index af68de2d6..5d7f66224 100644
> > --- a/controller/ct-zone.h
> > +++ b/controller/ct-zone.h
> > @@ -43,6 +43,7 @@ struct ct_zone_ctx {
> >  date
> >  struct ct_zone {
> >      uint16_t zone;
> > +    int64_t limit;
> >  };
> >
> >  /* States to move through when a new conntrack zone has been allocated.
> */
> > @@ -70,12 +71,19 @@ void ct_zones_update(const struct sset *local_lports,
> >                       const struct hmap *local_datapaths,
> >                       struct ct_zone_ctx *ctx);
> >  void ct_zones_commit(const struct ovsrec_bridge *br_int,
> > +                     const struct ovsrec_datapath *ovs_dp,
> > +                     struct ovsdb_idl_txn *ovs_idl_txn,
> >                       struct shash *pending_ct_zones);
> >  void ct_zones_pending_clear_commited(struct shash *pending);
> >  bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > -                              const struct sbrec_datapath_binding *dp);
> > -bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char
> *name,
> > +                              const struct sbrec_datapath_binding *dp,
> > +                              struct ovsdb_idl_index *pb_by_dp);
> > +bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > +                                const struct sbrec_port_binding *pb,
> >                                  bool updated, int *scan_start);
> >  uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char
> *name);
> > +void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > +                          const struct hmap *local_datapaths,
> > +                          struct ovsdb_idl_index *pb_by_dp);
> >
> >  #endif /* controller/ct-zone.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index a47ed5d9a..1b4c89cda 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -798,6 +798,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
> >      ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
> > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
> >      ovsdb_idl_add_table(ovs_idl,
> &ovsrec_table_flow_sample_collector_set);
> >      ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
> > @@ -807,6 +808,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
> >      ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
> > +    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
> > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
> >
> >      chassis_register_ovs_idl(ovs_idl);
> >      encaps_register_ovs_idl(ovs_idl);
> > @@ -2219,6 +2222,10 @@ en_ct_zones_run(struct engine_node *node, void
> *data)
> >      struct ed_type_ct_zones *ct_zones_data = data;
> >      struct ed_type_runtime_data *rt_data =
> >          engine_get_input_data("runtime_data", node);
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > +            engine_ovsdb_node_get_index(
> > +                    engine_get_input("SB_port_binding", node),
> > +                    "datapath");
> >
> >      const struct ovsrec_open_vswitch_table *ovs_table =
> >          EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> > @@ -2232,6 +2239,8 @@ en_ct_zones_run(struct engine_node *node, void
> *data)
> >      ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
> >      ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
> >                      &ct_zones_data->ctx);
> > +    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
> > +                         sbrec_port_binding_by_datapath);
>
> Same question about potential performance impact here too.  Instead of
> doing this in two steps wouldn't it make sense to actually store in the
> ct_zone structure the requested limit already when we create the
> structure (or the pending one)?  We'd avoid this O(n log n) - I think -
> complexity.
>
> But, as mentioned above, if there's no visible e2e performance impact we
> can probably avoid the complication and keep it as is.
>

I have tried to run some ovn-heater jobs, mainly 500 density-heavy
and 500-density-light. There wasn't any deviation from the baseline.


>
> >
> >      ct_zones_data->recomputed = true;
> >      engine_set_node_state(node, EN_UPDATED);
> > @@ -2249,6 +2258,10 @@ ct_zones_datapath_binding_handler(struct
> engine_node *node, void *data)
> >          engine_get_input_data("runtime_data", node);
> >      const struct sbrec_datapath_binding_table *dp_table =
> >          EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > +            engine_ovsdb_node_get_index(
> > +                    engine_get_input("SB_port_binding", node),
> > +                    "datapath");
> >
> >      SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
> >          if (!get_local_datapath(&rt_data->local_datapaths,
> > @@ -2262,7 +2275,8 @@ ct_zones_datapath_binding_handler(struct
> engine_node *node, void *data)
> >              return false;
> >          }
> >
> > -        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
> > +        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
> > +                                      sbrec_port_binding_by_datapath)) {
> >              return false;
> >          }
> >      }
> > @@ -2311,8 +2325,8 @@ ct_zones_runtime_data_handler(struct engine_node
> *node, void *data)
> >                      t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
> >                      t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
> >              updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
> > -
> t_lport->pb->logical_port,
> > -                                                  port_updated,
> &scan_start);
> > +                                                  t_lport->pb,
> port_updated,
> > +                                                  &scan_start);
> >          }
> >      }
> >
> > @@ -5154,6 +5168,9 @@ main(int argc, char *argv[])
> >                       ct_zones_datapath_binding_handler);
> >      engine_add_input(&en_ct_zones, &en_runtime_data,
> >                       ct_zones_runtime_data_handler);
> > +    /* The CT node just needs the index for port bindings by name. The
> data
> > +     * for ports are processed in the runtime handler. */
> > +    engine_add_input(&en_ct_zones, &en_sb_port_binding,
> engine_noop_handler);
> >
> >      engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
> >                       ovs_interface_shadow_ovs_interface_handler);
> > @@ -5558,7 +5575,7 @@ main(int argc, char *argv[])
> >                          if (ct_zones_data) {
> >
> stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
> >                                              time_msec());
> > -                            ct_zones_commit(br_int,
> > +                            ct_zones_commit(br_int, br_int_dp,
> ovs_idl_txn,
> >
> &ct_zones_data->ctx.pending);
> >
> stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
> >                                             time_msec());
> > diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> > index 58e941193..1ad347419 100644
> > --- a/lib/ovn-util.c
> > +++ b/lib/ovn-util.c
> > @@ -816,6 +816,23 @@ str_tolower(const char *orig)
> >      return copy;
> >  }
> >
> > +/* This is a wrapper function which get the value associated with 'key'
> in
> > + * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
> > + * valid unsigned integer can't be parsed from its value, returns 'def'.
> > + */
> > +long long
> > +ovn_smap_get_llong(const struct smap *smap, const char *key, long long
> def)
> > +{
> > +    const char *value = smap_get(smap, key);
> > +    long long ll_value;
> > +
> > +    if (!value || !str_to_llong(value, 10, &ll_value)) {
> > +        return def;
> > +    }
> > +
> > +    return ll_value;
> > +}
> > +
> >  /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
> >   * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped
> address).
> >   * The caller must free() the memory allocated for 'ip_address'.
> > diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> > index f75b821b6..ae971ce5a 100644
> > --- a/lib/ovn-util.h
> > +++ b/lib/ovn-util.h
> > @@ -211,6 +211,9 @@ char *normalize_v46_prefix(const struct in6_addr
> *prefix, unsigned int plen);
> >   */
> >  char *str_tolower(const char *orig);
> >
> > +long long ovn_smap_get_llong(const struct smap *smap, const char *key,
> > +                             long long def);
> > +
> >  /* OVN daemon options. Taken from ovs/lib/daemon.h. */
> >  #define OVN_DAEMON_OPTION_ENUMS                     \
> >      OVN_OPT_DETACH,                                 \
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 7e474a7b8..7d3ad63a1 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -737,6 +737,14 @@ ovn_datapath_update_external_ids(struct
> ovn_datapath *od)
> >          smap_add(&ids, "name2", name2);
> >      }
> >
> > +    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
> > +                                               &od->nbs->other_config :
> > +                                               &od->nbr->options,
> > +                                               "ct-zone-limit", -1);
> > +    if (ct_zone_limit > 0) {
> > +        smap_add_format(&ids, "ct-zone-limit", "%"PRId64,
> ct_zone_limit);
> > +    }
> > +
> >      /* Set interconn-ts. */
> >      if (od->nbs) {
> >          const char *ts = smap_get(&od->nbs->other_config,
> "interconn-ts");
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index b0f8d3687..53e697308 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -721,6 +721,17 @@
> >          this timeout will be automatically removed. The value defaults
> >          to 0, which means disabled.
> >        </column>
> > +
> > +      <column name="other_config" key="ct-zone-limit"
> > +              type='{"type": "integer", "minInteger": 0, "maxInteger":
> 4294967295}'>
> > +        CT zone <code>limit</code> value for given
> > +        <ref table="Logical_Switch"/>. This value will be propagated to
> all
> > +        <ref table="Logical_Switch_Port"/> when configured, but can be
> > +        overwritten individually per <ref
> table="Logical_Switch_Port"/>. The
> > +        value 0 means  unlimited, when the option is not present the
> limit
> > +        is not set and the zone limit is derived from OvS default
> datapath
> > +        limit.
> > +      </column>
> >      </group>
> >
> >      <group title="IP Multicast Snooping Options">
> > @@ -1122,6 +1133,16 @@
> >            <code>false</code>.
> >          </column>
> >
> > +        <column name="options" key="ct-zone-limit"
> > +                type='{"type": "integer", "minInteger": 0,
> "maxInteger": 4294967295}'>
> > +          CT zone <code>limit</code> value for given
> > +          <ref table="Logical_Switch_Port"/>. This value has priority
> over
> > +          limit specified on <ref table="Logical_Switch"/> when
> configured.
> > +          The value 0 means unlimited, when the option is not present
> the limit
> > +          is not set and the zone limit is derived from OvS default
> datapath
> > +          limit.
> > +        </column>
> > +
> >        </group>
> >
> >        <group title="Options for localnet ports">
> > @@ -2785,6 +2806,14 @@ or
> >          </p>
> >
> >        </column>
> > +
> > +      <column name="options" key="ct-zone-limit"
> > +              type='{"type": "integer", "minInteger": 0, "maxInteger":
> 4294967295}'>
> > +        CT zone <code>limit</code> value for given
> > +        <ref table="Logical_Router"/>. The value 0 means unlimited,
> when the
> > +        option is not present the limit is not set and the zone limit is
> > +        derived from OvS default datapath limit.
> > +      </column>
> >      </group>
> >
> >      <group title="Common Columns">
> > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> > index 2cb86dc98..0a20cbc09 100644
> > --- a/tests/ovn-controller.at
> > +++ b/tests/ovn-controller.at
> > @@ -3127,3 +3127,102 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235:
> connected' hv1/ovn-controller.log])
> >
> >  OVN_CLEANUP([hv1])
> >  AT_CLEANUP
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ovn-controller - CT zone limit])
> > +ovn_start
> > +
> > +net_add n1
> > +sim_add hv1
> > +as hv1
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
> > +
> > +check ovs-vsctl add-port br-int lsp \
> > +    -- set Interface lsp external-ids:iface-id=lsp
> > +
> > +check ovn-nbctl lr-add lr
> > +
> > +check ovn-nbctl ls-add ls
> > +check ovn-nbctl lsp-add ls ls-lr
> > +check ovn-nbctl lsp-set-type ls-lr router
> > +check ovn-nbctl lsp-set-addresses ls-lr router
> > +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
> > +
> > +check ovn-nbctl lsp-add ls lsp
> > +check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
> > +
> > +check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
> > +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
> > +
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +
> > +get_zone_num() {
> > +    output=$1
> > +    name=$2
> > +
> > +    printf "$output" | grep $name | cut -d ' ' -f 2
> > +}
> > +
> > +check_ovs_ct_limit() {
> > +    zone=$1
> > +    limit=$2
> > +
> > +    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed
> "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
> > +default limit=?
> > +zone=$zone,limit=$limit,count=?
> > +])
> > +}
> > +
> > +wait_ovs_ct_limit_count() {
> > +    count=$1
> > +
> > +    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings
> --format=table list CT_Zone | wc -l)])
> > +}
> > +
> > +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
> > +lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
> > +lr_snat=$(get_zone_num "$ct_zones" lr_snat)
> > +
> > +ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
> > +ls_snat=$(get_zone_num "$ct_zones" ls_snat)
> > +
> > +lsp=$(get_zone_num "$ct_zones" lsp)
> > +
> > +wait_ovs_ct_limit_count 0
> > +
> > +check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
> > +wait_ovs_ct_limit_count 2
> > +check_ovs_ct_limit $lr_dnat 5
> > +check_ovs_ct_limit $lr_snat 5
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
> > +wait_ovs_ct_limit_count 0
> > +
> > +check ovn-nbctl --wait=hv set Logical_Switch ls
> other_config:ct-zone-limit=10
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 10
> > +
> > +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp
> options:ct-zone-limit=5
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 5
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options
> ct-zone-limit
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 10
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Switch ls other_config
> ct-zone-limit
> > +wait_ovs_ct_limit_count 0
> > +
> > +OVN_CLEANUP([hv1])
> > +AT_CLEANUP
> > +])
>
> Regards,
> Dumitru
>
>
Thanks,
Ales
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 01db77d57..f50d68a82 100644
--- a/NEWS
+++ b/NEWS
@@ -34,6 +34,9 @@  Post v24.03.0
   - NATs can now be given an arbitrary match condition and priority. This
     allows for conditional NATs to be configured. See the ovn-nb(5) man
     page for more information.
+  - Add support for CT zone limit that can be specified per LR
+    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
+    (options:ct-zone-limit).
 
 OVN v24.03.0 - 01 Mar 2024
 --------------------------
diff --git a/controller/ct-zone.c b/controller/ct-zone.c
index 6ed1e4108..97f0de6c3 100644
--- a/controller/ct-zone.c
+++ b/controller/ct-zone.c
@@ -34,6 +34,18 @@  static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
 static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
 static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
                         uint16_t zone, bool set_pending);
+static void
+ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
+                             const struct sbrec_datapath_binding *dp,
+                             const char *name,
+                             struct ovsdb_idl_index *pb_by_dp);
+static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
+                                 int64_t limit);
+static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
+static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
+static int64_t ct_zone_limit_normalize(int64_t limit);
+static struct ovsrec_ct_zone *
+ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id);
 
 void
 ct_zones_restore(struct ct_zone_ctx *ctx,
@@ -196,11 +208,14 @@  ct_zones_update(const struct sset *local_lports,
 
 void
 ct_zones_commit(const struct ovsrec_bridge *br_int,
+                const struct ovsrec_datapath *ovs_dp,
+                struct ovsdb_idl_txn *ovs_idl_txn,
                 struct shash *pending_ct_zones)
 {
     struct shash_node *iter;
     SHASH_FOR_EACH (iter, pending_ct_zones) {
         struct ct_zone_pending_entry *ctzpe = iter->data;
+        struct ct_zone *ct_zone = &ctzpe->ct_zone;
 
         /* The transaction is open, so any pending entries in the
          * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
@@ -212,7 +227,7 @@  ct_zones_commit(const struct ovsrec_bridge *br_int,
 
         char *user_str = xasprintf("ct-zone-%s", iter->name);
         if (ctzpe->add) {
-            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
+            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
             struct smap_node *node =
                     smap_get_node(&br_int->external_ids, user_str);
             if (!node || strcmp(node->value, zone_str)) {
@@ -227,6 +242,19 @@  ct_zones_commit(const struct ovsrec_bridge *br_int,
         }
         free(user_str);
 
+        struct ovsrec_ct_zone *ovs_zone =
+                ct_zone_find_ovsrec(ovs_dp, ct_zone->zone);
+        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
+            ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
+        } else if (ctzpe->add && ct_zone->limit >= 0) {
+            if (!ovs_zone) {
+                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
+                ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
+                                                       ovs_zone);
+            }
+            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
+        }
+
         ctzpe->state = CT_ZONE_DB_SENT;
     }
 }
@@ -247,8 +275,19 @@  ct_zones_pending_clear_commited(struct shash *pending)
 /* Returns "true" when there is no need for full recompute. */
 bool
 ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
-                         const struct sbrec_datapath_binding *dp)
+                         const struct sbrec_datapath_binding *dp,
+                         struct ovsdb_idl_index *pb_by_dp)
 {
+    const char *name = smap_get(&dp->external_ids, "name");
+    if (!name) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
+                    "zone check.", UUID_ARGS(&dp->header_.uuid));
+        return true;
+    }
+
+    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
+
     int req_snat_zone = ct_zone_get_snat(dp);
     if (req_snat_zone == -1) {
         /* datapath snat ct zone is not set.  This condition will also hit
@@ -259,14 +298,6 @@  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
         return true;
     }
 
-    const char *name = smap_get(&dp->external_ids, "name");
-    if (!name) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
-                    "zone check.", UUID_ARGS(&dp->header_.uuid));
-        return true;
-    }
-
     /* Check if the requested snat zone has changed for the datapath
      * or not.  If so, then fall back to full recompute of
      * ct_zone engine. */
@@ -290,14 +321,18 @@  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
 
 /* Returns "true" if there was an update to the context. */
 bool
-ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
+ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
+                           const struct sbrec_port_binding *pb,
                            bool updated, int *scan_start)
 {
-    struct shash_node *node = shash_find(&ctx->current, name);
-    if (updated && !node) {
-        ct_zone_assign_unused(ctx, name, scan_start);
+    struct shash_node *node = shash_find(&ctx->current, pb->logical_port);
+    if (updated) {
+        if (!node) {
+            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
+        }
+        ct_zone_limit_update(ctx, pb->logical_port, ct_zone_get_pb_limit(pb));
         return true;
-    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
+    } else if (node && ct_zone_remove(ctx, node->name)) {
         return true;
     }
 
@@ -311,6 +346,25 @@  ct_zone_find_zone(const struct shash *ct_zones, const char *name)
     return ct_zone ? ct_zone->zone : 0;
 }
 
+void
+ct_zones_limits_sync(struct ct_zone_ctx *ctx,
+                     const struct hmap *local_datapaths,
+                     struct ovsdb_idl_index *pb_by_dp)
+{
+    const struct local_datapath *ld;
+    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
+        const char *name = smap_get(&ld->datapath->external_ids, "name");
+        if (!name) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
+                        "skipping zone assignment.",
+                        UUID_ARGS(&ld->datapath->header_.uuid));
+            continue;
+        }
+
+        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
+    }
+}
 
 static bool
 ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
@@ -363,7 +417,10 @@  ct_zone_add(struct ct_zone_ctx *ctx, const char *name, uint16_t zone,
         shash_add(&ctx->current, name, ct_zone);
     }
 
-    ct_zone->zone = zone;
+    *ct_zone = (struct ct_zone) {
+        .zone = zone,
+        .limit = -1,
+    };
 
     if (set_pending) {
         ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
@@ -446,6 +503,7 @@  ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
 
         struct ct_zone ct_zone = {
             .zone = zone,
+            .limit = -1,
         };
         /* Make sure we remove the uuid one in the next OvS DB commit without
          * flush. */
@@ -461,3 +519,88 @@  ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
     ct_zone_add(ctx, current_name, zone, false);
     free(new_name);
 }
+
+static void
+ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
+                             const struct sbrec_datapath_binding *dp,
+                             const char *name,
+                             struct ovsdb_idl_index *pb_by_dp)
+{
+    if (smap_get(&dp->external_ids, "logical-switch")) {
+        struct sbrec_port_binding *target =
+                sbrec_port_binding_index_init_row(pb_by_dp);
+        sbrec_port_binding_index_set_datapath(target, dp);
+
+        const struct sbrec_port_binding *pb;
+        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
+            ct_zone_limit_update(ctx, pb->logical_port,
+                                 ct_zone_get_pb_limit(pb));
+        }
+
+        sbrec_port_binding_index_destroy_row(target);
+    }
+
+    int64_t dp_limit = ct_zone_get_dp_limit(dp);
+    char *dnat = alloc_nat_zone_key(name, "dnat");
+    char *snat = alloc_nat_zone_key(name, "snat");
+
+    ct_zone_limit_update(ctx, dnat, dp_limit);
+    ct_zone_limit_update(ctx, snat, dp_limit);
+
+    free(dnat);
+    free(snat);
+}
+
+static void
+ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t limit)
+{
+    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
+
+    if (!ct_zone) {
+        return;
+    }
+
+    if (ct_zone->limit != limit) {
+        ct_zone->limit = limit;
+        /* Add pending entry only for DB store to avoid flushing the zone. */
+        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
+                            true, name);
+        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
+                 ct_zone->zone, ct_zone->limit);
+    }
+}
+
+static int64_t
+ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
+{
+    int64_t limit = ovn_smap_get_llong(&dp->external_ids, "ct-zone-limit", -1);
+    return ct_zone_limit_normalize(limit);
+}
+
+static int64_t
+ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
+{
+    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
+                                          "ct-zone-limit", -1);
+    int64_t limit = ovn_smap_get_llong(&pb->options,
+                                       "ct-zone-limit", dp_limit);
+    return ct_zone_limit_normalize(limit);
+}
+
+static int64_t
+ct_zone_limit_normalize(int64_t limit)
+{
+    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
+}
+
+static struct ovsrec_ct_zone *
+ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id)
+{
+    for (int i = 0; i < dp->n_ct_zones; i++) {
+        if (dp->key_ct_zones[i] == zone_id) {
+            return dp->value_ct_zones[i];
+        }
+    }
+
+    return NULL;
+}
diff --git a/controller/ct-zone.h b/controller/ct-zone.h
index af68de2d6..5d7f66224 100644
--- a/controller/ct-zone.h
+++ b/controller/ct-zone.h
@@ -43,6 +43,7 @@  struct ct_zone_ctx {
 
 struct ct_zone {
     uint16_t zone;
+    int64_t limit;
 };
 
 /* States to move through when a new conntrack zone has been allocated. */
@@ -70,12 +71,19 @@  void ct_zones_update(const struct sset *local_lports,
                      const struct hmap *local_datapaths,
                      struct ct_zone_ctx *ctx);
 void ct_zones_commit(const struct ovsrec_bridge *br_int,
+                     const struct ovsrec_datapath *ovs_dp,
+                     struct ovsdb_idl_txn *ovs_idl_txn,
                      struct shash *pending_ct_zones);
 void ct_zones_pending_clear_commited(struct shash *pending);
 bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
-                              const struct sbrec_datapath_binding *dp);
-bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
+                              const struct sbrec_datapath_binding *dp,
+                              struct ovsdb_idl_index *pb_by_dp);
+bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
+                                const struct sbrec_port_binding *pb,
                                 bool updated, int *scan_start);
 uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char *name);
+void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
+                          const struct hmap *local_datapaths,
+                          struct ovsdb_idl_index *pb_by_dp);
 
 #endif /* controller/ct-zone.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index a47ed5d9a..1b4c89cda 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -798,6 +798,7 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
@@ -807,6 +808,8 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
+    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
 
     chassis_register_ovs_idl(ovs_idl);
     encaps_register_ovs_idl(ovs_idl);
@@ -2219,6 +2222,10 @@  en_ct_zones_run(struct engine_node *node, void *data)
     struct ed_type_ct_zones *ct_zones_data = data;
     struct ed_type_runtime_data *rt_data =
         engine_get_input_data("runtime_data", node);
+    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
+            engine_ovsdb_node_get_index(
+                    engine_get_input("SB_port_binding", node),
+                    "datapath");
 
     const struct ovsrec_open_vswitch_table *ovs_table =
         EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
@@ -2232,6 +2239,8 @@  en_ct_zones_run(struct engine_node *node, void *data)
     ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
     ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
                     &ct_zones_data->ctx);
+    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
+                         sbrec_port_binding_by_datapath);
 
     ct_zones_data->recomputed = true;
     engine_set_node_state(node, EN_UPDATED);
@@ -2249,6 +2258,10 @@  ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
         engine_get_input_data("runtime_data", node);
     const struct sbrec_datapath_binding_table *dp_table =
         EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
+    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
+            engine_ovsdb_node_get_index(
+                    engine_get_input("SB_port_binding", node),
+                    "datapath");
 
     SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
         if (!get_local_datapath(&rt_data->local_datapaths,
@@ -2262,7 +2275,8 @@  ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
             return false;
         }
 
-        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
+        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
+                                      sbrec_port_binding_by_datapath)) {
             return false;
         }
     }
@@ -2311,8 +2325,8 @@  ct_zones_runtime_data_handler(struct engine_node *node, void *data)
                     t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
                     t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
             updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
-                                                  t_lport->pb->logical_port,
-                                                  port_updated, &scan_start);
+                                                  t_lport->pb, port_updated,
+                                                  &scan_start);
         }
     }
 
@@ -5154,6 +5168,9 @@  main(int argc, char *argv[])
                      ct_zones_datapath_binding_handler);
     engine_add_input(&en_ct_zones, &en_runtime_data,
                      ct_zones_runtime_data_handler);
+    /* The CT node just needs the index for port bindings by name. The data
+     * for ports are processed in the runtime handler. */
+    engine_add_input(&en_ct_zones, &en_sb_port_binding, engine_noop_handler);
 
     engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
                      ovs_interface_shadow_ovs_interface_handler);
@@ -5558,7 +5575,7 @@  main(int argc, char *argv[])
                         if (ct_zones_data) {
                             stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
                                             time_msec());
-                            ct_zones_commit(br_int,
+                            ct_zones_commit(br_int, br_int_dp, ovs_idl_txn,
                                             &ct_zones_data->ctx.pending);
                             stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
                                            time_msec());
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 58e941193..1ad347419 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -816,6 +816,23 @@  str_tolower(const char *orig)
     return copy;
 }
 
+/* This is a wrapper function which get the value associated with 'key' in
+ * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
+ * valid unsigned integer can't be parsed from its value, returns 'def'.
+ */
+long long
+ovn_smap_get_llong(const struct smap *smap, const char *key, long long def)
+{
+    const char *value = smap_get(smap, key);
+    long long ll_value;
+
+    if (!value || !str_to_llong(value, 10, &ll_value)) {
+        return def;
+    }
+
+    return ll_value;
+}
+
 /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
  * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped address).
  * The caller must free() the memory allocated for 'ip_address'.
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index f75b821b6..ae971ce5a 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -211,6 +211,9 @@  char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
  */
 char *str_tolower(const char *orig);
 
+long long ovn_smap_get_llong(const struct smap *smap, const char *key,
+                             long long def);
+
 /* OVN daemon options. Taken from ovs/lib/daemon.h. */
 #define OVN_DAEMON_OPTION_ENUMS                     \
     OVN_OPT_DETACH,                                 \
diff --git a/northd/northd.c b/northd/northd.c
index 7e474a7b8..7d3ad63a1 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -737,6 +737,14 @@  ovn_datapath_update_external_ids(struct ovn_datapath *od)
         smap_add(&ids, "name2", name2);
     }
 
+    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
+                                               &od->nbs->other_config :
+                                               &od->nbr->options,
+                                               "ct-zone-limit", -1);
+    if (ct_zone_limit > 0) {
+        smap_add_format(&ids, "ct-zone-limit", "%"PRId64, ct_zone_limit);
+    }
+
     /* Set interconn-ts. */
     if (od->nbs) {
         const char *ts = smap_get(&od->nbs->other_config, "interconn-ts");
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b0f8d3687..53e697308 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -721,6 +721,17 @@ 
         this timeout will be automatically removed. The value defaults
         to 0, which means disabled.
       </column>
+
+      <column name="other_config" key="ct-zone-limit"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+        CT zone <code>limit</code> value for given
+        <ref table="Logical_Switch"/>. This value will be propagated to all
+        <ref table="Logical_Switch_Port"/> when configured, but can be
+        overwritten individually per <ref table="Logical_Switch_Port"/>. The
+        value 0 means  unlimited, when the option is not present the limit
+        is not set and the zone limit is derived from OvS default datapath
+        limit.
+      </column>
     </group>
 
     <group title="IP Multicast Snooping Options">
@@ -1122,6 +1133,16 @@ 
           <code>false</code>.
         </column>
 
+        <column name="options" key="ct-zone-limit"
+                type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+          CT zone <code>limit</code> value for given
+          <ref table="Logical_Switch_Port"/>. This value has priority over
+          limit specified on <ref table="Logical_Switch"/> when configured.
+          The value 0 means unlimited, when the option is not present the limit
+          is not set and the zone limit is derived from OvS default datapath
+          limit.
+        </column>
+
       </group>
 
       <group title="Options for localnet ports">
@@ -2785,6 +2806,14 @@  or
         </p>
 
       </column>
+
+      <column name="options" key="ct-zone-limit"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+        CT zone <code>limit</code> value for given
+        <ref table="Logical_Router"/>. The value 0 means unlimited, when the
+        option is not present the limit is not set and the zone limit is
+        derived from OvS default datapath limit.
+      </column>
     </group>
 
     <group title="Common Columns">
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 2cb86dc98..0a20cbc09 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -3127,3 +3127,102 @@  OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log])
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - CT zone limit])
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
+
+check ovs-vsctl add-port br-int lsp \
+    -- set Interface lsp external-ids:iface-id=lsp
+
+check ovn-nbctl lr-add lr
+
+check ovn-nbctl ls-add ls
+check ovn-nbctl lsp-add ls ls-lr
+check ovn-nbctl lsp-set-type ls-lr router
+check ovn-nbctl lsp-set-addresses ls-lr router
+check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
+
+check ovn-nbctl lsp-add ls lsp
+check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
+
+check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
+check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+get_zone_num() {
+    output=$1
+    name=$2
+
+    printf "$output" | grep $name | cut -d ' ' -f 2
+}
+
+check_ovs_ct_limit() {
+    zone=$1
+    limit=$2
+
+    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
+default limit=?
+zone=$zone,limit=$limit,count=?
+])
+}
+
+wait_ovs_ct_limit_count() {
+    count=$1
+
+    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings --format=table list CT_Zone | wc -l)])
+}
+
+ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
+lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
+lr_snat=$(get_zone_num "$ct_zones" lr_snat)
+
+ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
+ls_snat=$(get_zone_num "$ct_zones" ls_snat)
+
+lsp=$(get_zone_num "$ct_zones" lsp)
+
+wait_ovs_ct_limit_count 0
+
+check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
+wait_ovs_ct_limit_count 2
+check_ovs_ct_limit $lr_dnat 5
+check_ovs_ct_limit $lr_snat 5
+
+check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
+wait_ovs_ct_limit_count 0
+
+check ovn-nbctl --wait=hv set Logical_Switch ls other_config:ct-zone-limit=10
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 10
+
+check ovn-nbctl --wait=hv set Logical_Switch_Port lsp options:ct-zone-limit=5
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 5
+
+check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options ct-zone-limit
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 10
+
+check ovn-nbctl --wait=hv remove Logical_Switch ls other_config ct-zone-limit
+wait_ovs_ct_limit_count 0
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])