diff mbox series

[ovs-dev,v5,2/2] Controller, northd: Add support for CT zone limits.

Message ID 20240712143055.989919-3-amusil@redhat.com
State Superseded
Headers show
Series Add ability to limit CT entries per LS/LR/LSP | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ales Musil July 12, 2024, 2:30 p.m. UTC
Add support for limiting the CT zone usage per Ls, LR or LSP.
When the limit is configured on logical switch it will also implicitly
set limits for all ports in that logical switch. The port configuration
can be overwritten individually and has priority over the whole logical
switch configuration.

The value 0 means unlimited, when the value is not specified it is
derived from OvS default CT limit specified for given OvS datapath.

Reported-at: https://bugzilla.redhat.com/2189924
Signed-off-by: Ales Musil <amusil@redhat.com>
---
v5: Rebase on top of latest main.
    Avoid OvS CT zone lookup in every loop of pending commit.
v4: Rebase on top of latest main.
    Change naming of the ct_zone_limit_sync to avoid potential confusion as suggested by Lorenzo.
v3: Rebase on top of latest main.
---
 NEWS                        |   3 +
 controller/ct-zone.c        | 176 ++++++++++++++++++++++++++++++++----
 controller/ct-zone.h        |  14 ++-
 controller/ovn-controller.c |  27 +++++-
 lib/ovn-util.c              |  17 ++++
 lib/ovn-util.h              |   3 +
 northd/northd.c             |   8 ++
 ovn-nb.xml                  |  29 ++++++
 tests/ovn-controller.at     |  99 ++++++++++++++++++++
 9 files changed, 350 insertions(+), 26 deletions(-)

Comments

Mark Michelson July 24, 2024, 6:36 p.m. UTC | #1
Thanks Ales.

Acked-by: Mark Michelson <mmichels@redhat.com>

I have some nits regarding the documentation below. I think these can be 
corrected when committing.

On 7/12/24 10:30, Ales Musil wrote:
> Add support for limiting the CT zone usage per Ls, LR or LSP.
> When the limit is configured on logical switch it will also implicitly
> set limits for all ports in that logical switch. The port configuration
> can be overwritten individually and has priority over the whole logical
> switch configuration.
> 
> The value 0 means unlimited, when the value is not specified it is
> derived from OvS default CT limit specified for given OvS datapath.
> 
> Reported-at: https://bugzilla.redhat.com/2189924
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
> v5: Rebase on top of latest main.
>      Avoid OvS CT zone lookup in every loop of pending commit.
> v4: Rebase on top of latest main.
>      Change naming of the ct_zone_limit_sync to avoid potential confusion as suggested by Lorenzo.
> v3: Rebase on top of latest main.
> ---
>   NEWS                        |   3 +
>   controller/ct-zone.c        | 176 ++++++++++++++++++++++++++++++++----
>   controller/ct-zone.h        |  14 ++-
>   controller/ovn-controller.c |  27 +++++-
>   lib/ovn-util.c              |  17 ++++
>   lib/ovn-util.h              |   3 +
>   northd/northd.c             |   8 ++
>   ovn-nb.xml                  |  29 ++++++
>   tests/ovn-controller.at     |  99 ++++++++++++++++++++
>   9 files changed, 350 insertions(+), 26 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 3e392ff08..8e725684e 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -38,6 +38,9 @@ Post v24.03.0
>       ability to disable "VXLAN mode" to extend available tunnel IDs space for
>       datapaths from 4095 to 16711680.  For more details see man ovn-nb(5) for
>       mentioned option.
> +  - Add support for CT zone limit that can be specified per LR
> +    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
> +    (options:ct-zone-limit).
>   
>   OVN v24.03.0 - 01 Mar 2024
>   --------------------------
> diff --git a/controller/ct-zone.c b/controller/ct-zone.c
> index ab0eec9d0..edee893c8 100644
> --- a/controller/ct-zone.c
> +++ b/controller/ct-zone.c
> @@ -34,6 +34,16 @@ static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
>   static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
>   static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
>                           uint16_t zone, bool set_pending);
> +static void
> +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> +                             const struct sbrec_datapath_binding *dp,
> +                             const char *name,
> +                             struct ovsdb_idl_index *pb_by_dp);
> +static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
> +                                 int64_t limit);
> +static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
> +static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
> +static int64_t ct_zone_limit_normalize(int64_t limit);
>   
>   void
>   ct_zone_ctx_init(struct ct_zone_ctx *ctx)
> @@ -210,11 +220,24 @@ ct_zones_update(const struct sset *local_lports,
>   
>   void
>   ct_zones_commit(const struct ovsrec_bridge *br_int,
> -                struct shash *pending_ct_zones)
> +                const struct ovsrec_datapath *ovs_dp,
> +                struct ovsdb_idl_txn *ovs_idl_txn,
> +                struct ct_zone_ctx *ctx)
>   {
> +    if (shash_is_empty(&ctx->pending)) {
> +        return;
> +    }
> +
> +    struct ovsrec_ct_zone **all_zones =
> +            xzalloc(sizeof *all_zones * (MAX_CT_ZONES + 1));
> +    for (size_t i = 0; i < ovs_dp->n_ct_zones; i++) {
> +        all_zones[ovs_dp->key_ct_zones[i]] = ovs_dp->value_ct_zones[i];
> +    }
> +
>       struct shash_node *iter;
> -    SHASH_FOR_EACH (iter, pending_ct_zones) {
> +    SHASH_FOR_EACH (iter, &ctx->pending) {
>           struct ct_zone_pending_entry *ctzpe = iter->data;
> +        struct ct_zone *ct_zone = &ctzpe->ct_zone;
>   
>           /* The transaction is open, so any pending entries in the
>            * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
> @@ -226,7 +249,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
>   
>           char *user_str = xasprintf("ct-zone-%s", iter->name);
>           if (ctzpe->add) {
> -            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
> +            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
>               struct smap_node *node =
>                       smap_get_node(&br_int->external_ids, user_str);
>               if (!node || strcmp(node->value, zone_str)) {
> @@ -241,8 +264,22 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
>           }
>           free(user_str);
>   
> +        struct ovsrec_ct_zone *ovs_zone = all_zones[ct_zone->zone];
> +        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
> +            ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
> +        } else if (ctzpe->add && ct_zone->limit >= 0) {
> +            if (!ovs_zone) {
> +                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
> +                ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
> +                                                       ovs_zone);
> +            }
> +            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
> +        }
> +
>           ctzpe->state = CT_ZONE_DB_SENT;
>       }
> +
> +    free(all_zones);
>   }
>   
>   void
> @@ -261,8 +298,19 @@ ct_zones_pending_clear_commited(struct shash *pending)
>   /* Returns "true" when there is no need for full recompute. */
>   bool
>   ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> -                         const struct sbrec_datapath_binding *dp)
> +                         const struct sbrec_datapath_binding *dp,
> +                         struct ovsdb_idl_index *pb_by_dp)
>   {
> +    const char *name = smap_get(&dp->external_ids, "name");
> +    if (!name) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> +                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> +        return true;
> +    }
> +
> +    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
> +
>       int req_snat_zone = ct_zone_get_snat(dp);
>       if (req_snat_zone == -1) {
>           /* datapath snat ct zone is not set.  This condition will also hit
> @@ -273,14 +321,6 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
>           return true;
>       }
>   
> -    const char *name = smap_get(&dp->external_ids, "name");
> -    if (!name) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> -        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> -                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> -        return true;
> -    }
> -
>       /* Check if the requested snat zone has changed for the datapath
>        * or not.  If so, then fall back to full recompute of
>        * ct_zone engine. */
> @@ -304,14 +344,18 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
>   
>   /* Returns "true" if there was an update to the context. */
>   bool
> -ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> +ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> +                           const struct sbrec_port_binding *pb,
>                              bool updated, int *scan_start)
>   {
> -    struct shash_node *node = shash_find(&ctx->current, name);
> -    if (updated && !node) {
> -        ct_zone_assign_unused(ctx, name, scan_start);
> +    struct shash_node *node = shash_find(&ctx->current, pb->logical_port);
> +    if (updated) {
> +        if (!node) {
> +            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
> +        }
> +        ct_zone_limit_update(ctx, pb->logical_port, ct_zone_get_pb_limit(pb));
>           return true;
> -    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
> +    } else if (node && ct_zone_remove(ctx, node->name)) {
>           return true;
>       }
>   
> @@ -325,6 +369,25 @@ ct_zone_find_zone(const struct shash *ct_zones, const char *name)
>       return ct_zone ? ct_zone->zone : 0;
>   }
>   
> +void
> +ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> +                     const struct hmap *local_datapaths,
> +                     struct ovsdb_idl_index *pb_by_dp)
> +{
> +    const struct local_datapath *ld;
> +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> +        const char *name = smap_get(&ld->datapath->external_ids, "name");
> +        if (!name) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
> +                        "skipping zone assignment.",
> +                        UUID_ARGS(&ld->datapath->header_.uuid));
> +            continue;
> +        }
> +
> +        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
> +    }
> +}
>   
>   static bool
>   ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
> @@ -377,7 +440,10 @@ ct_zone_add(struct ct_zone_ctx *ctx, const char *name, uint16_t zone,
>           shash_add(&ctx->current, name, ct_zone);
>       }
>   
> -    ct_zone->zone = zone;
> +    *ct_zone = (struct ct_zone) {
> +        .zone = zone,
> +        .limit = -1,
> +    };
>   
>       if (set_pending) {
>           ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
> @@ -460,6 +526,7 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
>   
>           struct ct_zone ct_zone = {
>               .zone = zone,
> +            .limit = -1,
>           };
>           /* Make sure we remove the uuid one in the next OvS DB commit without
>            * flush. */
> @@ -475,3 +542,76 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
>       ct_zone_add(ctx, current_name, zone, false);
>       free(new_name);
>   }
> +
> +static void
> +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> +                             const struct sbrec_datapath_binding *dp,
> +                             const char *name,
> +                             struct ovsdb_idl_index *pb_by_dp)
> +{
> +    if (smap_get(&dp->external_ids, "logical-switch")) {
> +        struct sbrec_port_binding *target =
> +                sbrec_port_binding_index_init_row(pb_by_dp);
> +        sbrec_port_binding_index_set_datapath(target, dp);
> +
> +        const struct sbrec_port_binding *pb;
> +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
> +            ct_zone_limit_update(ctx, pb->logical_port,
> +                                 ct_zone_get_pb_limit(pb));
> +        }
> +
> +        sbrec_port_binding_index_destroy_row(target);
> +    }
> +
> +    int64_t dp_limit = ct_zone_get_dp_limit(dp);
> +    char *dnat = alloc_nat_zone_key(name, "dnat");
> +    char *snat = alloc_nat_zone_key(name, "snat");
> +
> +    ct_zone_limit_update(ctx, dnat, dp_limit);
> +    ct_zone_limit_update(ctx, snat, dp_limit);
> +
> +    free(dnat);
> +    free(snat);
> +}
> +
> +static void
> +ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t limit)
> +{
> +    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
> +
> +    if (!ct_zone) {
> +        return;
> +    }
> +
> +    if (ct_zone->limit != limit) {
> +        ct_zone->limit = limit;
> +        /* Add pending entry only for DB store to avoid flushing the zone. */
> +        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
> +                            true, name);
> +        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
> +                 ct_zone->zone, ct_zone->limit);
> +    }
> +}
> +
> +static int64_t
> +ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
> +{
> +    int64_t limit = ovn_smap_get_llong(&dp->external_ids, "ct-zone-limit", -1);
> +    return ct_zone_limit_normalize(limit);
> +}
> +
> +static int64_t
> +ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
> +{
> +    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
> +                                          "ct-zone-limit", -1);
> +    int64_t limit = ovn_smap_get_llong(&pb->options,
> +                                       "ct-zone-limit", dp_limit);
> +    return ct_zone_limit_normalize(limit);
> +}
> +
> +static int64_t
> +ct_zone_limit_normalize(int64_t limit)
> +{
> +    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
> +}
> diff --git a/controller/ct-zone.h b/controller/ct-zone.h
> index a7c2011a1..295ac84f3 100644
> --- a/controller/ct-zone.h
> +++ b/controller/ct-zone.h
> @@ -43,6 +43,7 @@ struct ct_zone_ctx {
>   
>   struct ct_zone {
>       uint16_t zone;
> +    int64_t limit;
>   };
>   
>   /* States to move through when a new conntrack zone has been allocated. */
> @@ -70,12 +71,19 @@ void ct_zones_update(const struct sset *local_lports,
>                        const struct hmap *local_datapaths,
>                        struct ct_zone_ctx *ctx);
>   void ct_zones_commit(const struct ovsrec_bridge *br_int,
> -                     struct shash *pending_ct_zones);
> +                     const struct ovsrec_datapath *ovs_dp,
> +                     struct ovsdb_idl_txn *ovs_idl_txn,
> +                     struct ct_zone_ctx *ctx);
>   void ct_zones_pending_clear_commited(struct shash *pending);
>   bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> -                              const struct sbrec_datapath_binding *dp);
> -bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> +                              const struct sbrec_datapath_binding *dp,
> +                              struct ovsdb_idl_index *pb_by_dp);
> +bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> +                                const struct sbrec_port_binding *pb,
>                                   bool updated, int *scan_start);
>   uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char *name);
> +void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> +                          const struct hmap *local_datapaths,
> +                          struct ovsdb_idl_index *pb_by_dp);
>   
>   #endif /* controller/ct-zone.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 99a7c8617..a875977ad 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -798,6 +798,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
>       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
> +    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
>       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
>       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
> @@ -807,6 +808,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
>       ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
> +    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
> +    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
>   
>       chassis_register_ovs_idl(ovs_idl);
>       encaps_register_ovs_idl(ovs_idl);
> @@ -2217,6 +2220,10 @@ en_ct_zones_run(struct engine_node *node, void *data)
>       struct ed_type_ct_zones *ct_zones_data = data;
>       struct ed_type_runtime_data *rt_data =
>           engine_get_input_data("runtime_data", node);
> +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> +            engine_ovsdb_node_get_index(
> +                    engine_get_input("SB_port_binding", node),
> +                    "datapath");
>   
>       const struct ovsrec_open_vswitch_table *ovs_table =
>           EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> @@ -2230,6 +2237,8 @@ en_ct_zones_run(struct engine_node *node, void *data)
>       ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
>       ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
>                       &ct_zones_data->ctx);
> +    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
> +                         sbrec_port_binding_by_datapath);
>   
>       ct_zones_data->recomputed = true;
>       engine_set_node_state(node, EN_UPDATED);
> @@ -2247,6 +2256,10 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
>           engine_get_input_data("runtime_data", node);
>       const struct sbrec_datapath_binding_table *dp_table =
>           EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> +            engine_ovsdb_node_get_index(
> +                    engine_get_input("SB_port_binding", node),
> +                    "datapath");
>   
>       SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
>           if (!get_local_datapath(&rt_data->local_datapaths,
> @@ -2260,7 +2273,8 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
>               return false;
>           }
>   
> -        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
> +        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
> +                                      sbrec_port_binding_by_datapath)) {
>               return false;
>           }
>       }
> @@ -2309,8 +2323,8 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data)
>                       t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
>                       t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
>               updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
> -                                                  t_lport->pb->logical_port,
> -                                                  port_updated, &scan_start);
> +                                                  t_lport->pb, port_updated,
> +                                                  &scan_start);
>           }
>       }
>   
> @@ -5151,6 +5165,9 @@ main(int argc, char *argv[])
>                        ct_zones_datapath_binding_handler);
>       engine_add_input(&en_ct_zones, &en_runtime_data,
>                        ct_zones_runtime_data_handler);
> +    /* The CT node just needs the index for port bindings by name. The data
> +     * for ports are processed in the runtime handler. */
> +    engine_add_input(&en_ct_zones, &en_sb_port_binding, engine_noop_handler);
>   
>       engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
>                        ovs_interface_shadow_ovs_interface_handler);
> @@ -5555,8 +5572,8 @@ main(int argc, char *argv[])
>                           if (ct_zones_data) {
>                               stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
>                                               time_msec());
> -                            ct_zones_commit(br_int,
> -                                            &ct_zones_data->ctx.pending);
> +                            ct_zones_commit(br_int, br_int_dp, ovs_idl_txn,
> +                                            &ct_zones_data->ctx);
>                               stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
>                                              time_msec());
>                           }
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index 58e941193..1ad347419 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -816,6 +816,23 @@ str_tolower(const char *orig)
>       return copy;
>   }
>   
> +/* This is a wrapper function which get the value associated with 'key' in
> + * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
> + * valid unsigned integer can't be parsed from its value, returns 'def'.
> + */
> +long long
> +ovn_smap_get_llong(const struct smap *smap, const char *key, long long def)
> +{
> +    const char *value = smap_get(smap, key);
> +    long long ll_value;
> +
> +    if (!value || !str_to_llong(value, 10, &ll_value)) {
> +        return def;
> +    }
> +
> +    return ll_value;
> +}
> +
>   /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
>    * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped address).
>    * The caller must free() the memory allocated for 'ip_address'.
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index f75b821b6..ae971ce5a 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -211,6 +211,9 @@ char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
>    */
>   char *str_tolower(const char *orig);
>   
> +long long ovn_smap_get_llong(const struct smap *smap, const char *key,
> +                             long long def);
> +
>   /* OVN daemon options. Taken from ovs/lib/daemon.h. */
>   #define OVN_DAEMON_OPTION_ENUMS                     \
>       OVN_OPT_DETACH,                                 \
> diff --git a/northd/northd.c b/northd/northd.c
> index 6898daa00..08208405b 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -741,6 +741,14 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od)
>           smap_add(&ids, "name2", name2);
>       }
>   
> +    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
> +                                               &od->nbs->other_config :
> +                                               &od->nbr->options,
> +                                               "ct-zone-limit", -1);
> +    if (ct_zone_limit > 0) {
> +        smap_add_format(&ids, "ct-zone-limit", "%"PRId64, ct_zone_limit);
> +    }
> +
>       /* Set interconn-ts. */
>       if (od->nbs) {
>           const char *ts = smap_get(&od->nbs->other_config, "interconn-ts");
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 9552534f6..8e369a850 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -731,6 +731,17 @@
>           this timeout will be automatically removed. The value defaults
>           to 0, which means disabled.
>         </column>
> +
> +      <column name="other_config" key="ct-zone-limit"
> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +        CT zone <code>limit</code> value for given
> +        <ref table="Logical_Switch"/>. This value will be propagated to all
> +        <ref table="Logical_Switch_Port"/> when configured, but can be
> +        overwritten individually per <ref table="Logical_Switch_Port"/>. The
> +        value 0 means  unlimited, when the option is not present the limit

There are two spaces between "means" and "unlimited" instead of one. 
Also, I think that instead of a comma after "unlimited" it should be a 
period/full stop. In total, the suggestion is

"value 0 means unlimited. When the option is not present the limit"

> +        is not set and the zone limit is derived from OvS default datapath
> +        limit.
> +      </column>
>       </group>
>   
>       <group title="IP Multicast Snooping Options">
> @@ -1132,6 +1143,16 @@
>             <code>false</code>.
>           </column>
>   
> +        <column name="options" key="ct-zone-limit"
> +                type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +          CT zone <code>limit</code> value for given
> +          <ref table="Logical_Switch_Port"/>. This value has priority over
> +          limit specified on <ref table="Logical_Switch"/> when configured.
> +          The value 0 means unlimited, when the option is not present the limit

Same suggestion here about ending the sentence after "unlimited"

> +          is not set and the zone limit is derived from OvS default datapath
> +          limit.
> +        </column>
> +
>         </group>
>   
>         <group title="Options for localnet ports">
> @@ -2795,6 +2816,14 @@ or
>           </p>
>   
>         </column>
> +
> +      <column name="options" key="ct-zone-limit"
> +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> +        CT zone <code>limit</code> value for given
> +        <ref table="Logical_Router"/>. The value 0 means unlimited, when the

And again here.

> +        option is not present the limit is not set and the zone limit is
> +        derived from OvS default datapath limit.
> +      </column>
>       </group>
>   
>       <group title="Common Columns">
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 2cb86dc98..0a20cbc09 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -3127,3 +3127,102 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log])
>   
>   OVN_CLEANUP([hv1])
>   AT_CLEANUP
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - CT zone limit])
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
> +
> +check ovs-vsctl add-port br-int lsp \
> +    -- set Interface lsp external-ids:iface-id=lsp
> +
> +check ovn-nbctl lr-add lr
> +
> +check ovn-nbctl ls-add ls
> +check ovn-nbctl lsp-add ls ls-lr
> +check ovn-nbctl lsp-set-type ls-lr router
> +check ovn-nbctl lsp-set-addresses ls-lr router
> +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
> +
> +check ovn-nbctl lsp-add ls lsp
> +check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
> +
> +check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
> +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
> +
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +
> +get_zone_num() {
> +    output=$1
> +    name=$2
> +
> +    printf "$output" | grep $name | cut -d ' ' -f 2
> +}
> +
> +check_ovs_ct_limit() {
> +    zone=$1
> +    limit=$2
> +
> +    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
> +default limit=?
> +zone=$zone,limit=$limit,count=?
> +])
> +}
> +
> +wait_ovs_ct_limit_count() {
> +    count=$1
> +
> +    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings --format=table list CT_Zone | wc -l)])
> +}
> +
> +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
> +lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
> +lr_snat=$(get_zone_num "$ct_zones" lr_snat)
> +
> +ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
> +ls_snat=$(get_zone_num "$ct_zones" ls_snat)
> +
> +lsp=$(get_zone_num "$ct_zones" lsp)
> +
> +wait_ovs_ct_limit_count 0
> +
> +check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
> +wait_ovs_ct_limit_count 2
> +check_ovs_ct_limit $lr_dnat 5
> +check_ovs_ct_limit $lr_snat 5
> +
> +check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
> +wait_ovs_ct_limit_count 0
> +
> +check ovn-nbctl --wait=hv set Logical_Switch ls other_config:ct-zone-limit=10
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 10
> +
> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp options:ct-zone-limit=5
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 5
> +
> +check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options ct-zone-limit
> +wait_ovs_ct_limit_count 3
> +check_ovs_ct_limit $ls_dnat 10
> +check_ovs_ct_limit $ls_snat 10
> +check_ovs_ct_limit $lsp 10
> +
> +check ovn-nbctl --wait=hv remove Logical_Switch ls other_config ct-zone-limit
> +wait_ovs_ct_limit_count 0
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
Numan Siddique July 24, 2024, 9:35 p.m. UTC | #2
On Wed, Jul 24, 2024 at 2:37 PM Mark Michelson <mmichels@redhat.com> wrote:
>
> Thanks Ales.
>
> Acked-by: Mark Michelson <mmichels@redhat.com>
>
> I have some nits regarding the documentation below. I think these can be
> corrected when committing.
>
> On 7/12/24 10:30, Ales Musil wrote:
> > Add support for limiting the CT zone usage per Ls, LR or LSP.
> > When the limit is configured on logical switch it will also implicitly
> > set limits for all ports in that logical switch. The port configuration
> > can be overwritten individually and has priority over the whole logical
> > switch configuration.
> >
> > The value 0 means unlimited, when the value is not specified it is
> > derived from OvS default CT limit specified for given OvS datapath.
> >
> > Reported-at: https://bugzilla.redhat.com/2189924
> > Signed-off-by: Ales Musil <amusil@redhat.com>

Hi Ales,

I've a few comments.  Please see below.


> > ---
> > v5: Rebase on top of latest main.
> >      Avoid OvS CT zone lookup in every loop of pending commit.
> > v4: Rebase on top of latest main.
> >      Change naming of the ct_zone_limit_sync to avoid potential confusion as suggested by Lorenzo.
> > v3: Rebase on top of latest main.
> > ---
> >   NEWS                        |   3 +
> >   controller/ct-zone.c        | 176 ++++++++++++++++++++++++++++++++----
> >   controller/ct-zone.h        |  14 ++-
> >   controller/ovn-controller.c |  27 +++++-
> >   lib/ovn-util.c              |  17 ++++
> >   lib/ovn-util.h              |   3 +
> >   northd/northd.c             |   8 ++
> >   ovn-nb.xml                  |  29 ++++++
> >   tests/ovn-controller.at     |  99 ++++++++++++++++++++
> >   9 files changed, 350 insertions(+), 26 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 3e392ff08..8e725684e 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -38,6 +38,9 @@ Post v24.03.0
> >       ability to disable "VXLAN mode" to extend available tunnel IDs space for
> >       datapaths from 4095 to 16711680.  For more details see man ovn-nb(5) for
> >       mentioned option.
> > +  - Add support for CT zone limit that can be specified per LR
> > +    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
> > +    (options:ct-zone-limit).
> >
> >   OVN v24.03.0 - 01 Mar 2024
> >   --------------------------
> > diff --git a/controller/ct-zone.c b/controller/ct-zone.c
> > index ab0eec9d0..edee893c8 100644
> > --- a/controller/ct-zone.c
> > +++ b/controller/ct-zone.c
> > @@ -34,6 +34,16 @@ static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
> >   static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
> >   static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
> >                           uint16_t zone, bool set_pending);
> > +static void
> > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > +                             const struct sbrec_datapath_binding *dp,
> > +                             const char *name,
> > +                             struct ovsdb_idl_index *pb_by_dp);
> > +static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
> > +                                 int64_t limit);
> > +static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
> > +static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
> > +static int64_t ct_zone_limit_normalize(int64_t limit);
> >
> >   void
> >   ct_zone_ctx_init(struct ct_zone_ctx *ctx)
> > @@ -210,11 +220,24 @@ ct_zones_update(const struct sset *local_lports,
> >
> >   void
> >   ct_zones_commit(const struct ovsrec_bridge *br_int,
> > -                struct shash *pending_ct_zones)
> > +                const struct ovsrec_datapath *ovs_dp,
> > +                struct ovsdb_idl_txn *ovs_idl_txn,
> > +                struct ct_zone_ctx *ctx)
> >   {
> > +    if (shash_is_empty(&ctx->pending)) {
> > +        return;
> > +    }
> > +
> > +    struct ovsrec_ct_zone **all_zones =
> > +            xzalloc(sizeof *all_zones * (MAX_CT_ZONES + 1));
> > +    for (size_t i = 0; i < ovs_dp->n_ct_zones; i++) {
> > +        all_zones[ovs_dp->key_ct_zones[i]] = ovs_dp->value_ct_zones[i];
> > +    }
> > +
> >       struct shash_node *iter;
> > -    SHASH_FOR_EACH (iter, pending_ct_zones) {
> > +    SHASH_FOR_EACH (iter, &ctx->pending) {
> >           struct ct_zone_pending_entry *ctzpe = iter->data;
> > +        struct ct_zone *ct_zone = &ctzpe->ct_zone;
> >
> >           /* The transaction is open, so any pending entries in the
> >            * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
> > @@ -226,7 +249,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
> >
> >           char *user_str = xasprintf("ct-zone-%s", iter->name);
> >           if (ctzpe->add) {
> > -            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
> > +            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
> >               struct smap_node *node =
> >                       smap_get_node(&br_int->external_ids, user_str);
> >               if (!node || strcmp(node->value, zone_str)) {
> > @@ -241,8 +264,22 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
> >           }
> >           free(user_str);
> >
> > +        struct ovsrec_ct_zone *ovs_zone = all_zones[ct_zone->zone];
> > +        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
> > +            ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
> > +        } else if (ctzpe->add && ct_zone->limit >= 0) {
> > +            if (!ovs_zone) {
> > +                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
> > +                ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
> > +                                                       ovs_zone);
> > +            }
> > +            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
> > +        }
> > +
> >           ctzpe->state = CT_ZONE_DB_SENT;
> >       }
> > +
> > +    free(all_zones);
> >   }
> >
> >   void
> > @@ -261,8 +298,19 @@ ct_zones_pending_clear_commited(struct shash *pending)
> >   /* Returns "true" when there is no need for full recompute. */
> >   bool
> >   ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > -                         const struct sbrec_datapath_binding *dp)
> > +                         const struct sbrec_datapath_binding *dp,
> > +                         struct ovsdb_idl_index *pb_by_dp)
> >   {
> > +    const char *name = smap_get(&dp->external_ids, "name");
> > +    if (!name) {
> > +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > +        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> > +                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > +        return true;
> > +    }
> > +
> > +    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
> > +
> >       int req_snat_zone = ct_zone_get_snat(dp);
> >       if (req_snat_zone == -1) {
> >           /* datapath snat ct zone is not set.  This condition will also hit
> > @@ -273,14 +321,6 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> >           return true;
> >       }
> >
> > -    const char *name = smap_get(&dp->external_ids, "name");
> > -    if (!name) {
> > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > -        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
> > -                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > -        return true;
> > -    }
> > -
> >       /* Check if the requested snat zone has changed for the datapath
> >        * or not.  If so, then fall back to full recompute of
> >        * ct_zone engine. */
> > @@ -304,14 +344,18 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> >
> >   /* Returns "true" if there was an update to the context. */
> >   bool
> > -ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> > +ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > +                           const struct sbrec_port_binding *pb,
> >                              bool updated, int *scan_start)
> >   {
> > -    struct shash_node *node = shash_find(&ctx->current, name);
> > -    if (updated && !node) {
> > -        ct_zone_assign_unused(ctx, name, scan_start);
> > +    struct shash_node *node = shash_find(&ctx->current, pb->logical_port);
> > +    if (updated) {
> > +        if (!node) {
> > +            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
> > +        }
> > +        ct_zone_limit_update(ctx, pb->logical_port, ct_zone_get_pb_limit(pb));
> >           return true;
> > -    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
> > +    } else if (node && ct_zone_remove(ctx, node->name)) {
> >           return true;
> >       }
> >
> > @@ -325,6 +369,25 @@ ct_zone_find_zone(const struct shash *ct_zones, const char *name)
> >       return ct_zone ? ct_zone->zone : 0;
> >   }
> >
> > +void
> > +ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > +                     const struct hmap *local_datapaths,
> > +                     struct ovsdb_idl_index *pb_by_dp)
> > +{
> > +    const struct local_datapath *ld;
> > +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> > +        const char *name = smap_get(&ld->datapath->external_ids, "name");
> > +        if (!name) {
> > +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > +            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
> > +                        "skipping zone assignment.",
> > +                        UUID_ARGS(&ld->datapath->header_.uuid));
> > +            continue;
> > +        }
> > +
> > +        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
> > +    }
> > +}
> >
> >   static bool
> >   ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
> > @@ -377,7 +440,10 @@ ct_zone_add(struct ct_zone_ctx *ctx, const char *name, uint16_t zone,
> >           shash_add(&ctx->current, name, ct_zone);
> >       }
> >
> > -    ct_zone->zone = zone;
> > +    *ct_zone = (struct ct_zone) {
> > +        .zone = zone,
> > +        .limit = -1,
> > +    };
> >
> >       if (set_pending) {
> >           ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
> > @@ -460,6 +526,7 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
> >
> >           struct ct_zone ct_zone = {
> >               .zone = zone,
> > +            .limit = -1,
> >           };
> >           /* Make sure we remove the uuid one in the next OvS DB commit without
> >            * flush. */
> > @@ -475,3 +542,76 @@ ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
> >       ct_zone_add(ctx, current_name, zone, false);
> >       free(new_name);
> >   }
> > +
> > +static void
> > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > +                             const struct sbrec_datapath_binding *dp,
> > +                             const char *name,
> > +                             struct ovsdb_idl_index *pb_by_dp)
> > +{
> > +    if (smap_get(&dp->external_ids, "logical-switch")) {
smap_get() lookup can be avoided if this function takes  'struct local_datapath'
instead of 'struct sbrec_datapath_binding'.  'struct local_datapath'
has already 'is_switch' .


> > +        struct sbrec_port_binding *target =
> > +                sbrec_port_binding_index_init_row(pb_by_dp);
> > +        sbrec_port_binding_index_set_datapath(target, dp);
> > +
> > +        const struct sbrec_port_binding *pb;
> > +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
> > +            ct_zone_limit_update(ctx, pb->logical_port,
> > +                                 ct_zone_get_pb_limit(pb));
> > +        }
> > +
'
I think iterating through all the lports of a datapath can be a bit
inefficient, as
it would also iterate through the logical ports bound on other chassis
and logical ports
of other types (patch, external etc).   ct_zone_limit_update() will be
called for each
of these logical ports  resulting in unnecessary look up in the
ct_zones_ctx->current.
Instead I'd suggest using the runtime data 'lbinding_data'.  You can
iterate either
lbinding_data.bindings or lbinding_data.lports to update the ct zone
limit for an lport
bound on this chassis.

ct_zone_limits_update_per_dp() will also be called for any update to
the datapath binding,
results in running the above for loop - SBREC_PORT_BINDING_FOR_EACH_EQUAL
which seems unnecessary. I think you can decouple lport ct zone limit updates
from the dp zone limit update.

Thanks
Numan


> > +        sbrec_port_binding_index_destroy_row(target);
> > +    }
> > +
> > +    int64_t dp_limit = ct_zone_get_dp_limit(dp);
> > +    char *dnat = alloc_nat_zone_key(name, "dnat");
> > +    char *snat = alloc_nat_zone_key(name, "snat");
> > +
> > +    ct_zone_limit_update(ctx, dnat, dp_limit);
> > +    ct_zone_limit_update(ctx, snat, dp_limit);
> > +
> > +    free(dnat);
> > +    free(snat);
> > +}
> > +
> > +static void
> > +ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t limit)
> > +{
> > +    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
> > +
> > +    if (!ct_zone) {
> > +        return;
> > +    }
> > +
> > +    if (ct_zone->limit != limit) {
> > +        ct_zone->limit = limit;
> > +        /* Add pending entry only for DB store to avoid flushing the zone. */
> > +        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
> > +                            true, name);
> > +        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
> > +                 ct_zone->zone, ct_zone->limit);
> > +    }
> > +}
> > +
> > +static int64_t
> > +ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
> > +{
> > +    int64_t limit = ovn_smap_get_llong(&dp->external_ids, "ct-zone-limit", -1);
> > +    return ct_zone_limit_normalize(limit);
> > +}
> > +
> > +static int64_t
> > +ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
> > +{
> > +    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
> > +                                          "ct-zone-limit", -1);
> > +    int64_t limit = ovn_smap_get_llong(&pb->options,
> > +                                       "ct-zone-limit", dp_limit);
> > +    return ct_zone_limit_normalize(limit);
> > +}
> > +
> > +static int64_t
> > +ct_zone_limit_normalize(int64_t limit)
> > +{
> > +    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
> > +}
> > diff --git a/controller/ct-zone.h b/controller/ct-zone.h
> > index a7c2011a1..295ac84f3 100644
> > --- a/controller/ct-zone.h
> > +++ b/controller/ct-zone.h
> > @@ -43,6 +43,7 @@ struct ct_zone_ctx {
> >
> >   struct ct_zone {
> >       uint16_t zone;
> > +    int64_t limit;
> >   };
> >
> >   /* States to move through when a new conntrack zone has been allocated. */
> > @@ -70,12 +71,19 @@ void ct_zones_update(const struct sset *local_lports,
> >                        const struct hmap *local_datapaths,
> >                        struct ct_zone_ctx *ctx);
> >   void ct_zones_commit(const struct ovsrec_bridge *br_int,
> > -                     struct shash *pending_ct_zones);
> > +                     const struct ovsrec_datapath *ovs_dp,
> > +                     struct ovsdb_idl_txn *ovs_idl_txn,
> > +                     struct ct_zone_ctx *ctx);
> >   void ct_zones_pending_clear_commited(struct shash *pending);
> >   bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > -                              const struct sbrec_datapath_binding *dp);
> > -bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> > +                              const struct sbrec_datapath_binding *dp,
> > +                              struct ovsdb_idl_index *pb_by_dp);
> > +bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > +                                const struct sbrec_port_binding *pb,
> >                                   bool updated, int *scan_start);
> >   uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char *name);
> > +void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > +                          const struct hmap *local_datapaths,
> > +                          struct ovsdb_idl_index *pb_by_dp);
> >
> >   #endif /* controller/ct-zone.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 99a7c8617..a875977ad 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -798,6 +798,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
> >       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
> > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
> >       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
> >       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
> > @@ -807,6 +808,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
> >       ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
> > +    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
> > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
> >
> >       chassis_register_ovs_idl(ovs_idl);
> >       encaps_register_ovs_idl(ovs_idl);
> > @@ -2217,6 +2220,10 @@ en_ct_zones_run(struct engine_node *node, void *data)
> >       struct ed_type_ct_zones *ct_zones_data = data;
> >       struct ed_type_runtime_data *rt_data =
> >           engine_get_input_data("runtime_data", node);
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > +            engine_ovsdb_node_get_index(
> > +                    engine_get_input("SB_port_binding", node),
> > +                    "datapath");
> >
> >       const struct ovsrec_open_vswitch_table *ovs_table =
> >           EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> > @@ -2230,6 +2237,8 @@ en_ct_zones_run(struct engine_node *node, void *data)
> >       ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
> >       ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
> >                       &ct_zones_data->ctx);
> > +    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
> > +                         sbrec_port_binding_by_datapath);
> >
> >       ct_zones_data->recomputed = true;
> >       engine_set_node_state(node, EN_UPDATED);
> > @@ -2247,6 +2256,10 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> >           engine_get_input_data("runtime_data", node);
> >       const struct sbrec_datapath_binding_table *dp_table =
> >           EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > +            engine_ovsdb_node_get_index(
> > +                    engine_get_input("SB_port_binding", node),
> > +                    "datapath");
> >
> >       SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
> >           if (!get_local_datapath(&rt_data->local_datapaths,
> > @@ -2260,7 +2273,8 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> >               return false;
> >           }
> >
> > -        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
> > +        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
> > +                                      sbrec_port_binding_by_datapath)) {
> >               return false;
> >           }
> >       }
> > @@ -2309,8 +2323,8 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data)
> >                       t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
> >                       t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
> >               updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
> > -                                                  t_lport->pb->logical_port,
> > -                                                  port_updated, &scan_start);
> > +                                                  t_lport->pb, port_updated,
> > +                                                  &scan_start);
> >           }
> >       }
> >
> > @@ -5151,6 +5165,9 @@ main(int argc, char *argv[])
> >                        ct_zones_datapath_binding_handler);
> >       engine_add_input(&en_ct_zones, &en_runtime_data,
> >                        ct_zones_runtime_data_handler);
> > +    /* The CT node just needs the index for port bindings by name. The data
> > +     * for ports are processed in the runtime handler. */
> > +    engine_add_input(&en_ct_zones, &en_sb_port_binding, engine_noop_handler);
> >
> >       engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
> >                        ovs_interface_shadow_ovs_interface_handler);
> > @@ -5555,8 +5572,8 @@ main(int argc, char *argv[])
> >                           if (ct_zones_data) {
> >                               stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
> >                                               time_msec());
> > -                            ct_zones_commit(br_int,
> > -                                            &ct_zones_data->ctx.pending);
> > +                            ct_zones_commit(br_int, br_int_dp, ovs_idl_txn,
> > +                                            &ct_zones_data->ctx);
> >                               stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
> >                                              time_msec());
> >                           }
> > diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> > index 58e941193..1ad347419 100644
> > --- a/lib/ovn-util.c
> > +++ b/lib/ovn-util.c
> > @@ -816,6 +816,23 @@ str_tolower(const char *orig)
> >       return copy;
> >   }
> >
> > +/* This is a wrapper function which get the value associated with 'key' in
> > + * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
> > + * valid unsigned integer can't be parsed from its value, returns 'def'.
> > + */
> > +long long
> > +ovn_smap_get_llong(const struct smap *smap, const char *key, long long def)
> > +{
> > +    const char *value = smap_get(smap, key);
> > +    long long ll_value;
> > +
> > +    if (!value || !str_to_llong(value, 10, &ll_value)) {
> > +        return def;
> > +    }
> > +
> > +    return ll_value;
> > +}
> > +
> >   /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
> >    * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped address).
> >    * The caller must free() the memory allocated for 'ip_address'.
> > diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> > index f75b821b6..ae971ce5a 100644
> > --- a/lib/ovn-util.h
> > +++ b/lib/ovn-util.h
> > @@ -211,6 +211,9 @@ char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
> >    */
> >   char *str_tolower(const char *orig);
> >
> > +long long ovn_smap_get_llong(const struct smap *smap, const char *key,
> > +                             long long def);
> > +
> >   /* OVN daemon options. Taken from ovs/lib/daemon.h. */
> >   #define OVN_DAEMON_OPTION_ENUMS                     \
> >       OVN_OPT_DETACH,                                 \
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 6898daa00..08208405b 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -741,6 +741,14 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od)
> >           smap_add(&ids, "name2", name2);
> >       }
> >
> > +    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
> > +                                               &od->nbs->other_config :
> > +                                               &od->nbr->options,
> > +                                               "ct-zone-limit", -1);
> > +    if (ct_zone_limit > 0) {
> > +        smap_add_format(&ids, "ct-zone-limit", "%"PRId64, ct_zone_limit);
> > +    }
> > +
> >       /* Set interconn-ts. */
> >       if (od->nbs) {
> >           const char *ts = smap_get(&od->nbs->other_config, "interconn-ts");
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index 9552534f6..8e369a850 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -731,6 +731,17 @@
> >           this timeout will be automatically removed. The value defaults
> >           to 0, which means disabled.
> >         </column>
> > +
> > +      <column name="other_config" key="ct-zone-limit"
> > +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> > +        CT zone <code>limit</code> value for given
> > +        <ref table="Logical_Switch"/>. This value will be propagated to all
> > +        <ref table="Logical_Switch_Port"/> when configured, but can be
> > +        overwritten individually per <ref table="Logical_Switch_Port"/>. The
> > +        value 0 means  unlimited, when the option is not present the limit
>
> There are two spaces between "means" and "unlimited" instead of one.
> Also, I think that instead of a comma after "unlimited" it should be a
> period/full stop. In total, the suggestion is
>
> "value 0 means unlimited. When the option is not present the limit"
>
> > +        is not set and the zone limit is derived from OvS default datapath
> > +        limit.
> > +      </column>
> >       </group>
> >
> >       <group title="IP Multicast Snooping Options">
> > @@ -1132,6 +1143,16 @@
> >             <code>false</code>.
> >           </column>
> >
> > +        <column name="options" key="ct-zone-limit"
> > +                type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> > +          CT zone <code>limit</code> value for given
> > +          <ref table="Logical_Switch_Port"/>. This value has priority over
> > +          limit specified on <ref table="Logical_Switch"/> when configured.
> > +          The value 0 means unlimited, when the option is not present the limit
>
> Same suggestion here about ending the sentence after "unlimited"
>
> > +          is not set and the zone limit is derived from OvS default datapath
> > +          limit.
> > +        </column>
> > +
> >         </group>
> >
> >         <group title="Options for localnet ports">
> > @@ -2795,6 +2816,14 @@ or
> >           </p>
> >
> >         </column>
> > +
> > +      <column name="options" key="ct-zone-limit"
> > +              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
> > +        CT zone <code>limit</code> value for given
> > +        <ref table="Logical_Router"/>. The value 0 means unlimited, when the
>
> And again here.
>
> > +        option is not present the limit is not set and the zone limit is
> > +        derived from OvS default datapath limit.
> > +      </column>
> >       </group>
> >
> >       <group title="Common Columns">
> > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> > index 2cb86dc98..0a20cbc09 100644
> > --- a/tests/ovn-controller.at
> > +++ b/tests/ovn-controller.at
> > @@ -3127,3 +3127,102 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log])
> >
> >   OVN_CLEANUP([hv1])
> >   AT_CLEANUP
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ovn-controller - CT zone limit])
> > +ovn_start
> > +
> > +net_add n1
> > +sim_add hv1
> > +as hv1
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
> > +
> > +check ovs-vsctl add-port br-int lsp \
> > +    -- set Interface lsp external-ids:iface-id=lsp
> > +
> > +check ovn-nbctl lr-add lr
> > +
> > +check ovn-nbctl ls-add ls
> > +check ovn-nbctl lsp-add ls ls-lr
> > +check ovn-nbctl lsp-set-type ls-lr router
> > +check ovn-nbctl lsp-set-addresses ls-lr router
> > +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
> > +
> > +check ovn-nbctl lsp-add ls lsp
> > +check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
> > +
> > +check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
> > +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
> > +
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +
> > +get_zone_num() {
> > +    output=$1
> > +    name=$2
> > +
> > +    printf "$output" | grep $name | cut -d ' ' -f 2
> > +}
> > +
> > +check_ovs_ct_limit() {
> > +    zone=$1
> > +    limit=$2
> > +
> > +    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
> > +default limit=?
> > +zone=$zone,limit=$limit,count=?
> > +])
> > +}
> > +
> > +wait_ovs_ct_limit_count() {
> > +    count=$1
> > +
> > +    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings --format=table list CT_Zone | wc -l)])
> > +}
> > +
> > +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
> > +lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
> > +lr_snat=$(get_zone_num "$ct_zones" lr_snat)
> > +
> > +ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
> > +ls_snat=$(get_zone_num "$ct_zones" ls_snat)
> > +
> > +lsp=$(get_zone_num "$ct_zones" lsp)
> > +
> > +wait_ovs_ct_limit_count 0
> > +
> > +check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
> > +wait_ovs_ct_limit_count 2
> > +check_ovs_ct_limit $lr_dnat 5
> > +check_ovs_ct_limit $lr_snat 5
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
> > +wait_ovs_ct_limit_count 0
> > +
> > +check ovn-nbctl --wait=hv set Logical_Switch ls other_config:ct-zone-limit=10
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 10
> > +
> > +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp options:ct-zone-limit=5
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 5
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options ct-zone-limit
> > +wait_ovs_ct_limit_count 3
> > +check_ovs_ct_limit $ls_dnat 10
> > +check_ovs_ct_limit $ls_snat 10
> > +check_ovs_ct_limit $lsp 10
> > +
> > +check ovn-nbctl --wait=hv remove Logical_Switch ls other_config ct-zone-limit
> > +wait_ovs_ct_limit_count 0
> > +
> > +OVN_CLEANUP([hv1])
> > +AT_CLEANUP
> > +])
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ales Musil July 25, 2024, 10:23 a.m. UTC | #3
On Wed, Jul 24, 2024 at 11:35 PM Numan Siddique <numans@ovn.org> wrote:

> On Wed, Jul 24, 2024 at 2:37 PM Mark Michelson <mmichels@redhat.com>
> wrote:
> >
> > Thanks Ales.
> >
> > Acked-by: Mark Michelson <mmichels@redhat.com>
> >
> > I have some nits regarding the documentation below. I think these can be
> > corrected when committing.
> >
> > On 7/12/24 10:30, Ales Musil wrote:
> > > Add support for limiting the CT zone usage per Ls, LR or LSP.
> > > When the limit is configured on logical switch it will also implicitly
> > > set limits for all ports in that logical switch. The port configuration
> > > can be overwritten individually and has priority over the whole logical
> > > switch configuration.
> > >
> > > The value 0 means unlimited, when the value is not specified it is
> > > derived from OvS default CT limit specified for given OvS datapath.
> > >
> > > Reported-at: https://bugzilla.redhat.com/2189924
> > > Signed-off-by: Ales Musil <amusil@redhat.com>
>
> Hi Ales,
>
> I've a few comments.  Please see below.
>

Hi Numan,

thank you for the review.



>
> > > ---
> > > v5: Rebase on top of latest main.
> > >      Avoid OvS CT zone lookup in every loop of pending commit.
> > > v4: Rebase on top of latest main.
> > >      Change naming of the ct_zone_limit_sync to avoid potential
> confusion as suggested by Lorenzo.
> > > v3: Rebase on top of latest main.
> > > ---
> > >   NEWS                        |   3 +
> > >   controller/ct-zone.c        | 176
> ++++++++++++++++++++++++++++++++----
> > >   controller/ct-zone.h        |  14 ++-
> > >   controller/ovn-controller.c |  27 +++++-
> > >   lib/ovn-util.c              |  17 ++++
> > >   lib/ovn-util.h              |   3 +
> > >   northd/northd.c             |   8 ++
> > >   ovn-nb.xml                  |  29 ++++++
> > >   tests/ovn-controller.at     |  99 ++++++++++++++++++++
> > >   9 files changed, 350 insertions(+), 26 deletions(-)
> > >
> > > diff --git a/NEWS b/NEWS
> > > index 3e392ff08..8e725684e 100644
> > > --- a/NEWS
> > > +++ b/NEWS
> > > @@ -38,6 +38,9 @@ Post v24.03.0
> > >       ability to disable "VXLAN mode" to extend available tunnel IDs
> space for
> > >       datapaths from 4095 to 16711680.  For more details see man
> ovn-nb(5) for
> > >       mentioned option.
> > > +  - Add support for CT zone limit that can be specified per LR
> > > +    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
> > > +    (options:ct-zone-limit).
> > >
> > >   OVN v24.03.0 - 01 Mar 2024
> > >   --------------------------
> > > diff --git a/controller/ct-zone.c b/controller/ct-zone.c
> > > index ab0eec9d0..edee893c8 100644
> > > --- a/controller/ct-zone.c
> > > +++ b/controller/ct-zone.c
> > > @@ -34,6 +34,16 @@ static bool ct_zone_assign_unused(struct
> ct_zone_ctx *ctx,
> > >   static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char
> *name);
> > >   static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
> > >                           uint16_t zone, bool set_pending);
> > > +static void
> > > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > > +                             const struct sbrec_datapath_binding *dp,
> > > +                             const char *name,
> > > +                             struct ovsdb_idl_index *pb_by_dp);
> > > +static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char
> *name,
> > > +                                 int64_t limit);
> > > +static int64_t ct_zone_get_dp_limit(const struct
> sbrec_datapath_binding *dp);
> > > +static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding
> *pb);
> > > +static int64_t ct_zone_limit_normalize(int64_t limit);
> > >
> > >   void
> > >   ct_zone_ctx_init(struct ct_zone_ctx *ctx)
> > > @@ -210,11 +220,24 @@ ct_zones_update(const struct sset *local_lports,
> > >
> > >   void
> > >   ct_zones_commit(const struct ovsrec_bridge *br_int,
> > > -                struct shash *pending_ct_zones)
> > > +                const struct ovsrec_datapath *ovs_dp,
> > > +                struct ovsdb_idl_txn *ovs_idl_txn,
> > > +                struct ct_zone_ctx *ctx)
> > >   {
> > > +    if (shash_is_empty(&ctx->pending)) {
> > > +        return;
> > > +    }
> > > +
> > > +    struct ovsrec_ct_zone **all_zones =
> > > +            xzalloc(sizeof *all_zones * (MAX_CT_ZONES + 1));
> > > +    for (size_t i = 0; i < ovs_dp->n_ct_zones; i++) {
> > > +        all_zones[ovs_dp->key_ct_zones[i]] =
> ovs_dp->value_ct_zones[i];
> > > +    }
> > > +
> > >       struct shash_node *iter;
> > > -    SHASH_FOR_EACH (iter, pending_ct_zones) {
> > > +    SHASH_FOR_EACH (iter, &ctx->pending) {
> > >           struct ct_zone_pending_entry *ctzpe = iter->data;
> > > +        struct ct_zone *ct_zone = &ctzpe->ct_zone;
> > >
> > >           /* The transaction is open, so any pending entries in the
> > >            * CT_ZONE_DB_QUEUED must be sent and any in
> CT_ZONE_DB_QUEUED
> > > @@ -226,7 +249,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
> > >
> > >           char *user_str = xasprintf("ct-zone-%s", iter->name);
> > >           if (ctzpe->add) {
> > > -            char *zone_str = xasprintf("%"PRIu16,
> ctzpe->ct_zone.zone);
> > > +            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
> > >               struct smap_node *node =
> > >                       smap_get_node(&br_int->external_ids, user_str);
> > >               if (!node || strcmp(node->value, zone_str)) {
> > > @@ -241,8 +264,22 @@ ct_zones_commit(const struct ovsrec_bridge
> *br_int,
> > >           }
> > >           free(user_str);
> > >
> > > +        struct ovsrec_ct_zone *ovs_zone = all_zones[ct_zone->zone];
> > > +        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
> > > +            ovsrec_datapath_update_ct_zones_delkey(ovs_dp,
> ct_zone->zone);
> > > +        } else if (ctzpe->add && ct_zone->limit >= 0) {
> > > +            if (!ovs_zone) {
> > > +                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
> > > +                ovsrec_datapath_update_ct_zones_setkey(ovs_dp,
> ct_zone->zone,
> > > +                                                       ovs_zone);
> > > +            }
> > > +            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
> > > +        }
> > > +
> > >           ctzpe->state = CT_ZONE_DB_SENT;
> > >       }
> > > +
> > > +    free(all_zones);
> > >   }
> > >
> > >   void
> > > @@ -261,8 +298,19 @@ ct_zones_pending_clear_commited(struct shash
> *pending)
> > >   /* Returns "true" when there is no need for full recompute. */
> > >   bool
> > >   ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > > -                         const struct sbrec_datapath_binding *dp)
> > > +                         const struct sbrec_datapath_binding *dp,
> > > +                         struct ovsdb_idl_index *pb_by_dp)
> > >   {
> > > +    const char *name = smap_get(&dp->external_ids, "name");
> > > +    if (!name) {
> > > +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > > +        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"'
> skipping"
> > > +                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > > +        return true;
> > > +    }
> > > +
> > > +    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
> > > +
> > >       int req_snat_zone = ct_zone_get_snat(dp);
> > >       if (req_snat_zone == -1) {
> > >           /* datapath snat ct zone is not set.  This condition will
> also hit
> > > @@ -273,14 +321,6 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > >           return true;
> > >       }
> > >
> > > -    const char *name = smap_get(&dp->external_ids, "name");
> > > -    if (!name) {
> > > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> > > -        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"'
> skipping"
> > > -                    "zone check.", UUID_ARGS(&dp->header_.uuid));
> > > -        return true;
> > > -    }
> > > -
> > >       /* Check if the requested snat zone has changed for the datapath
> > >        * or not.  If so, then fall back to full recompute of
> > >        * ct_zone engine. */
> > > @@ -304,14 +344,18 @@ ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > >
> > >   /* Returns "true" if there was an update to the context. */
> > >   bool
> > > -ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
> > > +ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > > +                           const struct sbrec_port_binding *pb,
> > >                              bool updated, int *scan_start)
> > >   {
> > > -    struct shash_node *node = shash_find(&ctx->current, name);
> > > -    if (updated && !node) {
> > > -        ct_zone_assign_unused(ctx, name, scan_start);
> > > +    struct shash_node *node = shash_find(&ctx->current,
> pb->logical_port);
> > > +    if (updated) {
> > > +        if (!node) {
> > > +            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
> > > +        }
> > > +        ct_zone_limit_update(ctx, pb->logical_port,
> ct_zone_get_pb_limit(pb));
> > >           return true;
> > > -    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
> > > +    } else if (node && ct_zone_remove(ctx, node->name)) {
> > >           return true;
> > >       }
> > >
> > > @@ -325,6 +369,25 @@ ct_zone_find_zone(const struct shash *ct_zones,
> const char *name)
> > >       return ct_zone ? ct_zone->zone : 0;
> > >   }
> > >
> > > +void
> > > +ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > > +                     const struct hmap *local_datapaths,
> > > +                     struct ovsdb_idl_index *pb_by_dp)
> > > +{
> > > +    const struct local_datapath *ld;
> > > +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> > > +        const char *name = smap_get(&ld->datapath->external_ids,
> "name");
> > > +        if (!name) {
> > > +            static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(5, 1);
> > > +            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
> > > +                        "skipping zone assignment.",
> > > +                        UUID_ARGS(&ld->datapath->header_.uuid));
> > > +            continue;
> > > +        }
> > > +
> > > +        ct_zone_limits_update_per_dp(ctx, ld->datapath, name,
> pb_by_dp);
> > > +    }
> > > +}
> > >
> > >   static bool
> > >   ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
> > > @@ -377,7 +440,10 @@ ct_zone_add(struct ct_zone_ctx *ctx, const char
> *name, uint16_t zone,
> > >           shash_add(&ctx->current, name, ct_zone);
> > >       }
> > >
> > > -    ct_zone->zone = zone;
> > > +    *ct_zone = (struct ct_zone) {
> > > +        .zone = zone,
> > > +        .limit = -1,
> > > +    };
> > >
> > >       if (set_pending) {
> > >           ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
> > > @@ -460,6 +526,7 @@ ct_zone_restore(const struct
> sbrec_datapath_binding_table *dp_table,
> > >
> > >           struct ct_zone ct_zone = {
> > >               .zone = zone,
> > > +            .limit = -1,
> > >           };
> > >           /* Make sure we remove the uuid one in the next OvS DB
> commit without
> > >            * flush. */
> > > @@ -475,3 +542,76 @@ ct_zone_restore(const struct
> sbrec_datapath_binding_table *dp_table,
> > >       ct_zone_add(ctx, current_name, zone, false);
> > >       free(new_name);
> > >   }
> > > +
> > > +static void
> > > +ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
> > > +                             const struct sbrec_datapath_binding *dp,
> > > +                             const char *name,
> > > +                             struct ovsdb_idl_index *pb_by_dp)
> > > +{
> > > +    if (smap_get(&dp->external_ids, "logical-switch")) {
> smap_get() lookup can be avoided if this function takes  'struct
> local_datapath'
> instead of 'struct sbrec_datapath_binding'.  'struct local_datapath'
> has already 'is_switch' .
>
>
> > > +        struct sbrec_port_binding *target =
> > > +                sbrec_port_binding_index_init_row(pb_by_dp);
> > > +        sbrec_port_binding_index_set_datapath(target, dp);
> > > +
> > > +        const struct sbrec_port_binding *pb;
> > > +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
> > > +            ct_zone_limit_update(ctx, pb->logical_port,
> > > +                                 ct_zone_get_pb_limit(pb));
> > > +        }
> > > +
> '
> I think iterating through all the lports of a datapath can be a bit
> inefficient, as
> it would also iterate through the logical ports bound on other chassis
> and logical ports
> of other types (patch, external etc).   ct_zone_limit_update() will be
> called for each
> of these logical ports  resulting in unnecessary look up in the
> ct_zones_ctx->current.
> Instead I'd suggest using the runtime data 'lbinding_data'.  You can
> iterate either
> lbinding_data.bindings or lbinding_data.lports to update the ct zone
> limit for an lport
> bound on this chassis.
>

I have switched the loop to use lbinding_data.lports to iterate
only through local ports.


>
> ct_zone_limits_update_per_dp() will also be called for any update to
> the datapath binding,
> results in running the above for loop - SBREC_PORT_BINDING_FOR_EACH_EQUAL
> which seems unnecessary. I think you can decouple lport ct zone limit
> updates
> from the dp zone limit update.
>

It cannot be decoupled because limit per datapath
should be applied to all ports in that datapath, however
I have added optimization to skip the loop completely
if the limit didn't change for the datapath.


>
> Thanks
> Numan
>
>
> > > +        sbrec_port_binding_index_destroy_row(target);
> > > +    }
> > > +
> > > +    int64_t dp_limit = ct_zone_get_dp_limit(dp);
> > > +    char *dnat = alloc_nat_zone_key(name, "dnat");
> > > +    char *snat = alloc_nat_zone_key(name, "snat");
> > > +
> > > +    ct_zone_limit_update(ctx, dnat, dp_limit);
> > > +    ct_zone_limit_update(ctx, snat, dp_limit);
> > > +
> > > +    free(dnat);
> > > +    free(snat);
> > > +}
> > > +
> > > +static void
> > > +ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
> int64_t limit)
> > > +{
> > > +    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
> > > +
> > > +    if (!ct_zone) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (ct_zone->limit != limit) {
> > > +        ct_zone->limit = limit;
> > > +        /* Add pending entry only for DB store to avoid flushing the
> zone. */
> > > +        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
> > > +                            true, name);
> > > +        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
> > > +                 ct_zone->zone, ct_zone->limit);
> > > +    }
> > > +}
> > > +
> > > +static int64_t
> > > +ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
> > > +{
> > > +    int64_t limit = ovn_smap_get_llong(&dp->external_ids,
> "ct-zone-limit", -1);
> > > +    return ct_zone_limit_normalize(limit);
> > > +}
> > > +
> > > +static int64_t
> > > +ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
> > > +{
> > > +    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
> > > +                                          "ct-zone-limit", -1);
> > > +    int64_t limit = ovn_smap_get_llong(&pb->options,
> > > +                                       "ct-zone-limit", dp_limit);
> > > +    return ct_zone_limit_normalize(limit);
> > > +}
> > > +
> > > +static int64_t
> > > +ct_zone_limit_normalize(int64_t limit)
> > > +{
> > > +    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
> > > +}
> > > diff --git a/controller/ct-zone.h b/controller/ct-zone.h
> > > index a7c2011a1..295ac84f3 100644
> > > --- a/controller/ct-zone.h
> > > +++ b/controller/ct-zone.h
> > > @@ -43,6 +43,7 @@ struct ct_zone_ctx {
> > >
> > >   struct ct_zone {
> > >       uint16_t zone;
> > > +    int64_t limit;
> > >   };
> > >
> > >   /* States to move through when a new conntrack zone has been
> allocated. */
> > > @@ -70,12 +71,19 @@ void ct_zones_update(const struct sset
> *local_lports,
> > >                        const struct hmap *local_datapaths,
> > >                        struct ct_zone_ctx *ctx);
> > >   void ct_zones_commit(const struct ovsrec_bridge *br_int,
> > > -                     struct shash *pending_ct_zones);
> > > +                     const struct ovsrec_datapath *ovs_dp,
> > > +                     struct ovsdb_idl_txn *ovs_idl_txn,
> > > +                     struct ct_zone_ctx *ctx);
> > >   void ct_zones_pending_clear_commited(struct shash *pending);
> > >   bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
> > > -                              const struct sbrec_datapath_binding
> *dp);
> > > -bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char
> *name,
> > > +                              const struct sbrec_datapath_binding *dp,
> > > +                              struct ovsdb_idl_index *pb_by_dp);
> > > +bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
> > > +                                const struct sbrec_port_binding *pb,
> > >                                   bool updated, int *scan_start);
> > >   uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char
> *name);
> > > +void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
> > > +                          const struct hmap *local_datapaths,
> > > +                          struct ovsdb_idl_index *pb_by_dp);
> > >
> > >   #endif /* controller/ct-zone.h */
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index 99a7c8617..a875977ad 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -798,6 +798,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
> > >       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
> > > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
> > >       ovsdb_idl_add_table(ovs_idl,
> &ovsrec_table_flow_sample_collector_set);
> > >       ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
> > > @@ -807,6 +808,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
> > >       ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
> > > +    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
> > > +    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
> > >
> > >       chassis_register_ovs_idl(ovs_idl);
> > >       encaps_register_ovs_idl(ovs_idl);
> > > @@ -2217,6 +2220,10 @@ en_ct_zones_run(struct engine_node *node, void
> *data)
> > >       struct ed_type_ct_zones *ct_zones_data = data;
> > >       struct ed_type_runtime_data *rt_data =
> > >           engine_get_input_data("runtime_data", node);
> > > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > > +            engine_ovsdb_node_get_index(
> > > +                    engine_get_input("SB_port_binding", node),
> > > +                    "datapath");
> > >
> > >       const struct ovsrec_open_vswitch_table *ovs_table =
> > >           EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> > > @@ -2230,6 +2237,8 @@ en_ct_zones_run(struct engine_node *node, void
> *data)
> > >       ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table,
> br_int);
> > >       ct_zones_update(&rt_data->local_lports,
> &rt_data->local_datapaths,
> > >                       &ct_zones_data->ctx);
> > > +    ct_zones_limits_sync(&ct_zones_data->ctx,
> &rt_data->local_datapaths,
> > > +                         sbrec_port_binding_by_datapath);
> > >
> > >       ct_zones_data->recomputed = true;
> > >       engine_set_node_state(node, EN_UPDATED);
> > > @@ -2247,6 +2256,10 @@ ct_zones_datapath_binding_handler(struct
> engine_node *node, void *data)
> > >           engine_get_input_data("runtime_data", node);
> > >       const struct sbrec_datapath_binding_table *dp_table =
> > >           EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> > > +    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
> > > +            engine_ovsdb_node_get_index(
> > > +                    engine_get_input("SB_port_binding", node),
> > > +                    "datapath");
> > >
> > >       SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
> > >           if (!get_local_datapath(&rt_data->local_datapaths,
> > > @@ -2260,7 +2273,8 @@ ct_zones_datapath_binding_handler(struct
> engine_node *node, void *data)
> > >               return false;
> > >           }
> > >
> > > -        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
> > > +        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
> > > +
> sbrec_port_binding_by_datapath)) {
> > >               return false;
> > >           }
> > >       }
> > > @@ -2309,8 +2323,8 @@ ct_zones_runtime_data_handler(struct engine_node
> *node, void *data)
> > >                       t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
> > >                       t_lport->tracked_type ==
> TRACKED_RESOURCE_UPDATED;
> > >               updated |=
> ct_zone_handle_port_update(&ct_zones_data->ctx,
> > > -
> t_lport->pb->logical_port,
> > > -                                                  port_updated,
> &scan_start);
> > > +                                                  t_lport->pb,
> port_updated,
> > > +                                                  &scan_start);
> > >           }
> > >       }
> > >
> > > @@ -5151,6 +5165,9 @@ main(int argc, char *argv[])
> > >                        ct_zones_datapath_binding_handler);
> > >       engine_add_input(&en_ct_zones, &en_runtime_data,
> > >                        ct_zones_runtime_data_handler);
> > > +    /* The CT node just needs the index for port bindings by name.
> The data
> > > +     * for ports are processed in the runtime handler. */
> > > +    engine_add_input(&en_ct_zones, &en_sb_port_binding,
> engine_noop_handler);
> > >
> > >       engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
> > >                        ovs_interface_shadow_ovs_interface_handler);
> > > @@ -5555,8 +5572,8 @@ main(int argc, char *argv[])
> > >                           if (ct_zones_data) {
> > >
>  stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
> > >                                               time_msec());
> > > -                            ct_zones_commit(br_int,
> > > -
> &ct_zones_data->ctx.pending);
> > > +                            ct_zones_commit(br_int, br_int_dp,
> ovs_idl_txn,
> > > +                                            &ct_zones_data->ctx);
> > >
>  stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
> > >                                              time_msec());
> > >                           }
> > > diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> > > index 58e941193..1ad347419 100644
> > > --- a/lib/ovn-util.c
> > > +++ b/lib/ovn-util.c
> > > @@ -816,6 +816,23 @@ str_tolower(const char *orig)
> > >       return copy;
> > >   }
> > >
> > > +/* This is a wrapper function which get the value associated with
> 'key' in
> > > + * 'smap' and converts it to a long long. If 'key' is not in 'smap'
> or a
> > > + * valid unsigned integer can't be parsed from its value, returns
> 'def'.
> > > + */
> > > +long long
> > > +ovn_smap_get_llong(const struct smap *smap, const char *key, long
> long def)
> > > +{
> > > +    const char *value = smap_get(smap, key);
> > > +    long long ll_value;
> > > +
> > > +    if (!value || !str_to_llong(value, 10, &ll_value)) {
> > > +        return def;
> > > +    }
> > > +
> > > +    return ll_value;
> > > +}
> > > +
> > >   /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
> > >    * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped
> address).
> > >    * The caller must free() the memory allocated for 'ip_address'.
> > > diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> > > index f75b821b6..ae971ce5a 100644
> > > --- a/lib/ovn-util.h
> > > +++ b/lib/ovn-util.h
> > > @@ -211,6 +211,9 @@ char *normalize_v46_prefix(const struct in6_addr
> *prefix, unsigned int plen);
> > >    */
> > >   char *str_tolower(const char *orig);
> > >
> > > +long long ovn_smap_get_llong(const struct smap *smap, const char *key,
> > > +                             long long def);
> > > +
> > >   /* OVN daemon options. Taken from ovs/lib/daemon.h. */
> > >   #define OVN_DAEMON_OPTION_ENUMS                     \
> > >       OVN_OPT_DETACH,                                 \
> > > diff --git a/northd/northd.c b/northd/northd.c
> > > index 6898daa00..08208405b 100644
> > > --- a/northd/northd.c
> > > +++ b/northd/northd.c
> > > @@ -741,6 +741,14 @@ ovn_datapath_update_external_ids(struct
> ovn_datapath *od)
> > >           smap_add(&ids, "name2", name2);
> > >       }
> > >
> > > +    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
> > > +                                               &od->nbs->other_config
> :
> > > +                                               &od->nbr->options,
> > > +                                               "ct-zone-limit", -1);
> > > +    if (ct_zone_limit > 0) {
> > > +        smap_add_format(&ids, "ct-zone-limit", "%"PRId64,
> ct_zone_limit);
> > > +    }
> > > +
> > >       /* Set interconn-ts. */
> > >       if (od->nbs) {
> > >           const char *ts = smap_get(&od->nbs->other_config,
> "interconn-ts");
> > > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > > index 9552534f6..8e369a850 100644
> > > --- a/ovn-nb.xml
> > > +++ b/ovn-nb.xml
> > > @@ -731,6 +731,17 @@
> > >           this timeout will be automatically removed. The value
> defaults
> > >           to 0, which means disabled.
> > >         </column>
> > > +
> > > +      <column name="other_config" key="ct-zone-limit"
> > > +              type='{"type": "integer", "minInteger": 0,
> "maxInteger": 4294967295}'>
> > > +        CT zone <code>limit</code> value for given
> > > +        <ref table="Logical_Switch"/>. This value will be propagated
> to all
> > > +        <ref table="Logical_Switch_Port"/> when configured, but can be
> > > +        overwritten individually per <ref
> table="Logical_Switch_Port"/>. The
> > > +        value 0 means  unlimited, when the option is not present the
> limit
> >
> > There are two spaces between "means" and "unlimited" instead of one.
> > Also, I think that instead of a comma after "unlimited" it should be a
> > period/full stop. In total, the suggestion is
> >
> > "value 0 means unlimited. When the option is not present the limit"
> >
> > > +        is not set and the zone limit is derived from OvS default
> datapath
> > > +        limit.
> > > +      </column>
> > >       </group>
> > >
> > >       <group title="IP Multicast Snooping Options">
> > > @@ -1132,6 +1143,16 @@
> > >             <code>false</code>.
> > >           </column>
> > >
> > > +        <column name="options" key="ct-zone-limit"
> > > +                type='{"type": "integer", "minInteger": 0,
> "maxInteger": 4294967295}'>
> > > +          CT zone <code>limit</code> value for given
> > > +          <ref table="Logical_Switch_Port"/>. This value has priority
> over
> > > +          limit specified on <ref table="Logical_Switch"/> when
> configured.
> > > +          The value 0 means unlimited, when the option is not present
> the limit
> >
> > Same suggestion here about ending the sentence after "unlimited"
> >
> > > +          is not set and the zone limit is derived from OvS default
> datapath
> > > +          limit.
> > > +        </column>
> > > +
> > >         </group>
> > >
> > >         <group title="Options for localnet ports">
> > > @@ -2795,6 +2816,14 @@ or
> > >           </p>
> > >
> > >         </column>
> > > +
> > > +      <column name="options" key="ct-zone-limit"
> > > +              type='{"type": "integer", "minInteger": 0,
> "maxInteger": 4294967295}'>
> > > +        CT zone <code>limit</code> value for given
> > > +        <ref table="Logical_Router"/>. The value 0 means unlimited,
> when the
> >
> > And again here.
> >
> > > +        option is not present the limit is not set and the zone limit
> is
> > > +        derived from OvS default datapath limit.
> > > +      </column>
> > >       </group>
> > >
> > >       <group title="Common Columns">
> > > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> > > index 2cb86dc98..0a20cbc09 100644
> > > --- a/tests/ovn-controller.at
> > > +++ b/tests/ovn-controller.at
> > > @@ -3127,3 +3127,102 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235:
> connected' hv1/ovn-controller.log])
> > >
> > >   OVN_CLEANUP([hv1])
> > >   AT_CLEANUP
> > > +
> > > +OVN_FOR_EACH_NORTHD([
> > > +AT_SETUP([ovn-controller - CT zone limit])
> > > +ovn_start
> > > +
> > > +net_add n1
> > > +sim_add hv1
> > > +as hv1
> > > +check ovs-vsctl add-br br-phys
> > > +ovn_attach n1 br-phys 192.168.0.1
> > > +
> > > +check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
> > > +
> > > +check ovs-vsctl add-port br-int lsp \
> > > +    -- set Interface lsp external-ids:iface-id=lsp
> > > +
> > > +check ovn-nbctl lr-add lr
> > > +
> > > +check ovn-nbctl ls-add ls
> > > +check ovn-nbctl lsp-add ls ls-lr
> > > +check ovn-nbctl lsp-set-type ls-lr router
> > > +check ovn-nbctl lsp-set-addresses ls-lr router
> > > +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
> > > +
> > > +check ovn-nbctl lsp-add ls lsp
> > > +check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
> > > +
> > > +check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
> > > +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
> > > +
> > > +wait_for_ports_up
> > > +check ovn-nbctl --wait=hv sync
> > > +
> > > +get_zone_num() {
> > > +    output=$1
> > > +    name=$2
> > > +
> > > +    printf "$output" | grep $name | cut -d ' ' -f 2
> > > +}
> > > +
> > > +check_ovs_ct_limit() {
> > > +    zone=$1
> > > +    limit=$2
> > > +
> > > +    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone |
> sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0],
> [dnl
> > > +default limit=?
> > > +zone=$zone,limit=$limit,count=?
> > > +])
> > > +}
> > > +
> > > +wait_ovs_ct_limit_count() {
> > > +    count=$1
> > > +
> > > +    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings
> --format=table list CT_Zone | wc -l)])
> > > +}
> > > +
> > > +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
> > > +lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
> > > +lr_snat=$(get_zone_num "$ct_zones" lr_snat)
> > > +
> > > +ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
> > > +ls_snat=$(get_zone_num "$ct_zones" ls_snat)
> > > +
> > > +lsp=$(get_zone_num "$ct_zones" lsp)
> > > +
> > > +wait_ovs_ct_limit_count 0
> > > +
> > > +check ovn-nbctl --wait=hv set Logical_Router lr
> options:ct-zone-limit=5
> > > +wait_ovs_ct_limit_count 2
> > > +check_ovs_ct_limit $lr_dnat 5
> > > +check_ovs_ct_limit $lr_snat 5
> > > +
> > > +check ovn-nbctl --wait=hv remove Logical_Router lr options
> ct-zone-limit
> > > +wait_ovs_ct_limit_count 0
> > > +
> > > +check ovn-nbctl --wait=hv set Logical_Switch ls
> other_config:ct-zone-limit=10
> > > +wait_ovs_ct_limit_count 3
> > > +check_ovs_ct_limit $ls_dnat 10
> > > +check_ovs_ct_limit $ls_snat 10
> > > +check_ovs_ct_limit $lsp 10
> > > +
> > > +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp
> options:ct-zone-limit=5
> > > +wait_ovs_ct_limit_count 3
> > > +check_ovs_ct_limit $ls_dnat 10
> > > +check_ovs_ct_limit $ls_snat 10
> > > +check_ovs_ct_limit $lsp 5
> > > +
> > > +check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options
> ct-zone-limit
> > > +wait_ovs_ct_limit_count 3
> > > +check_ovs_ct_limit $ls_dnat 10
> > > +check_ovs_ct_limit $ls_snat 10
> > > +check_ovs_ct_limit $lsp 10
> > > +
> > > +check ovn-nbctl --wait=hv remove Logical_Switch ls other_config
> ct-zone-limit
> > > +wait_ovs_ct_limit_count 0
> > > +
> > > +OVN_CLEANUP([hv1])
> > > +AT_CLEANUP
> > > +])
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
>
Thanks,
Ales
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 3e392ff08..8e725684e 100644
--- a/NEWS
+++ b/NEWS
@@ -38,6 +38,9 @@  Post v24.03.0
     ability to disable "VXLAN mode" to extend available tunnel IDs space for
     datapaths from 4095 to 16711680.  For more details see man ovn-nb(5) for
     mentioned option.
+  - Add support for CT zone limit that can be specified per LR
+    (options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
+    (options:ct-zone-limit).
 
 OVN v24.03.0 - 01 Mar 2024
 --------------------------
diff --git a/controller/ct-zone.c b/controller/ct-zone.c
index ab0eec9d0..edee893c8 100644
--- a/controller/ct-zone.c
+++ b/controller/ct-zone.c
@@ -34,6 +34,16 @@  static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
 static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
 static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
                         uint16_t zone, bool set_pending);
+static void
+ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
+                             const struct sbrec_datapath_binding *dp,
+                             const char *name,
+                             struct ovsdb_idl_index *pb_by_dp);
+static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
+                                 int64_t limit);
+static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
+static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
+static int64_t ct_zone_limit_normalize(int64_t limit);
 
 void
 ct_zone_ctx_init(struct ct_zone_ctx *ctx)
@@ -210,11 +220,24 @@  ct_zones_update(const struct sset *local_lports,
 
 void
 ct_zones_commit(const struct ovsrec_bridge *br_int,
-                struct shash *pending_ct_zones)
+                const struct ovsrec_datapath *ovs_dp,
+                struct ovsdb_idl_txn *ovs_idl_txn,
+                struct ct_zone_ctx *ctx)
 {
+    if (shash_is_empty(&ctx->pending)) {
+        return;
+    }
+
+    struct ovsrec_ct_zone **all_zones =
+            xzalloc(sizeof *all_zones * (MAX_CT_ZONES + 1));
+    for (size_t i = 0; i < ovs_dp->n_ct_zones; i++) {
+        all_zones[ovs_dp->key_ct_zones[i]] = ovs_dp->value_ct_zones[i];
+    }
+
     struct shash_node *iter;
-    SHASH_FOR_EACH (iter, pending_ct_zones) {
+    SHASH_FOR_EACH (iter, &ctx->pending) {
         struct ct_zone_pending_entry *ctzpe = iter->data;
+        struct ct_zone *ct_zone = &ctzpe->ct_zone;
 
         /* The transaction is open, so any pending entries in the
          * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
@@ -226,7 +249,7 @@  ct_zones_commit(const struct ovsrec_bridge *br_int,
 
         char *user_str = xasprintf("ct-zone-%s", iter->name);
         if (ctzpe->add) {
-            char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
+            char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
             struct smap_node *node =
                     smap_get_node(&br_int->external_ids, user_str);
             if (!node || strcmp(node->value, zone_str)) {
@@ -241,8 +264,22 @@  ct_zones_commit(const struct ovsrec_bridge *br_int,
         }
         free(user_str);
 
+        struct ovsrec_ct_zone *ovs_zone = all_zones[ct_zone->zone];
+        if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
+            ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
+        } else if (ctzpe->add && ct_zone->limit >= 0) {
+            if (!ovs_zone) {
+                ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
+                ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
+                                                       ovs_zone);
+            }
+            ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
+        }
+
         ctzpe->state = CT_ZONE_DB_SENT;
     }
+
+    free(all_zones);
 }
 
 void
@@ -261,8 +298,19 @@  ct_zones_pending_clear_commited(struct shash *pending)
 /* Returns "true" when there is no need for full recompute. */
 bool
 ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
-                         const struct sbrec_datapath_binding *dp)
+                         const struct sbrec_datapath_binding *dp,
+                         struct ovsdb_idl_index *pb_by_dp)
 {
+    const char *name = smap_get(&dp->external_ids, "name");
+    if (!name) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
+                    "zone check.", UUID_ARGS(&dp->header_.uuid));
+        return true;
+    }
+
+    ct_zone_limits_update_per_dp(ctx, dp, name, pb_by_dp);
+
     int req_snat_zone = ct_zone_get_snat(dp);
     if (req_snat_zone == -1) {
         /* datapath snat ct zone is not set.  This condition will also hit
@@ -273,14 +321,6 @@  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
         return true;
     }
 
-    const char *name = smap_get(&dp->external_ids, "name");
-    if (!name) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
-                    "zone check.", UUID_ARGS(&dp->header_.uuid));
-        return true;
-    }
-
     /* Check if the requested snat zone has changed for the datapath
      * or not.  If so, then fall back to full recompute of
      * ct_zone engine. */
@@ -304,14 +344,18 @@  ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
 
 /* Returns "true" if there was an update to the context. */
 bool
-ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
+ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
+                           const struct sbrec_port_binding *pb,
                            bool updated, int *scan_start)
 {
-    struct shash_node *node = shash_find(&ctx->current, name);
-    if (updated && !node) {
-        ct_zone_assign_unused(ctx, name, scan_start);
+    struct shash_node *node = shash_find(&ctx->current, pb->logical_port);
+    if (updated) {
+        if (!node) {
+            ct_zone_assign_unused(ctx, pb->logical_port, scan_start);
+        }
+        ct_zone_limit_update(ctx, pb->logical_port, ct_zone_get_pb_limit(pb));
         return true;
-    } else if (!updated && node && ct_zone_remove(ctx, node->name)) {
+    } else if (node && ct_zone_remove(ctx, node->name)) {
         return true;
     }
 
@@ -325,6 +369,25 @@  ct_zone_find_zone(const struct shash *ct_zones, const char *name)
     return ct_zone ? ct_zone->zone : 0;
 }
 
+void
+ct_zones_limits_sync(struct ct_zone_ctx *ctx,
+                     const struct hmap *local_datapaths,
+                     struct ovsdb_idl_index *pb_by_dp)
+{
+    const struct local_datapath *ld;
+    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
+        const char *name = smap_get(&ld->datapath->external_ids, "name");
+        if (!name) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
+                        "skipping zone assignment.",
+                        UUID_ARGS(&ld->datapath->header_.uuid));
+            continue;
+        }
+
+        ct_zone_limits_update_per_dp(ctx, ld->datapath, name, pb_by_dp);
+    }
+}
 
 static bool
 ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
@@ -377,7 +440,10 @@  ct_zone_add(struct ct_zone_ctx *ctx, const char *name, uint16_t zone,
         shash_add(&ctx->current, name, ct_zone);
     }
 
-    ct_zone->zone = zone;
+    *ct_zone = (struct ct_zone) {
+        .zone = zone,
+        .limit = -1,
+    };
 
     if (set_pending) {
         ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
@@ -460,6 +526,7 @@  ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
 
         struct ct_zone ct_zone = {
             .zone = zone,
+            .limit = -1,
         };
         /* Make sure we remove the uuid one in the next OvS DB commit without
          * flush. */
@@ -475,3 +542,76 @@  ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
     ct_zone_add(ctx, current_name, zone, false);
     free(new_name);
 }
+
+static void
+ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
+                             const struct sbrec_datapath_binding *dp,
+                             const char *name,
+                             struct ovsdb_idl_index *pb_by_dp)
+{
+    if (smap_get(&dp->external_ids, "logical-switch")) {
+        struct sbrec_port_binding *target =
+                sbrec_port_binding_index_init_row(pb_by_dp);
+        sbrec_port_binding_index_set_datapath(target, dp);
+
+        const struct sbrec_port_binding *pb;
+        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, pb_by_dp) {
+            ct_zone_limit_update(ctx, pb->logical_port,
+                                 ct_zone_get_pb_limit(pb));
+        }
+
+        sbrec_port_binding_index_destroy_row(target);
+    }
+
+    int64_t dp_limit = ct_zone_get_dp_limit(dp);
+    char *dnat = alloc_nat_zone_key(name, "dnat");
+    char *snat = alloc_nat_zone_key(name, "snat");
+
+    ct_zone_limit_update(ctx, dnat, dp_limit);
+    ct_zone_limit_update(ctx, snat, dp_limit);
+
+    free(dnat);
+    free(snat);
+}
+
+static void
+ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name, int64_t limit)
+{
+    struct ct_zone *ct_zone = shash_find_data(&ctx->current, name);
+
+    if (!ct_zone) {
+        return;
+    }
+
+    if (ct_zone->limit != limit) {
+        ct_zone->limit = limit;
+        /* Add pending entry only for DB store to avoid flushing the zone. */
+        ct_zone_add_pending(&ctx->pending, CT_ZONE_DB_QUEUED, ct_zone,
+                            true, name);
+        VLOG_DBG("setting ct zone %"PRIu16" limit to %"PRId64,
+                 ct_zone->zone, ct_zone->limit);
+    }
+}
+
+static int64_t
+ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp)
+{
+    int64_t limit = ovn_smap_get_llong(&dp->external_ids, "ct-zone-limit", -1);
+    return ct_zone_limit_normalize(limit);
+}
+
+static int64_t
+ct_zone_get_pb_limit(const struct sbrec_port_binding *pb)
+{
+    int64_t dp_limit = ovn_smap_get_llong(&pb->datapath->external_ids,
+                                          "ct-zone-limit", -1);
+    int64_t limit = ovn_smap_get_llong(&pb->options,
+                                       "ct-zone-limit", dp_limit);
+    return ct_zone_limit_normalize(limit);
+}
+
+static int64_t
+ct_zone_limit_normalize(int64_t limit)
+{
+    return limit >= 0 && limit <= UINT32_MAX ? limit : -1;
+}
diff --git a/controller/ct-zone.h b/controller/ct-zone.h
index a7c2011a1..295ac84f3 100644
--- a/controller/ct-zone.h
+++ b/controller/ct-zone.h
@@ -43,6 +43,7 @@  struct ct_zone_ctx {
 
 struct ct_zone {
     uint16_t zone;
+    int64_t limit;
 };
 
 /* States to move through when a new conntrack zone has been allocated. */
@@ -70,12 +71,19 @@  void ct_zones_update(const struct sset *local_lports,
                      const struct hmap *local_datapaths,
                      struct ct_zone_ctx *ctx);
 void ct_zones_commit(const struct ovsrec_bridge *br_int,
-                     struct shash *pending_ct_zones);
+                     const struct ovsrec_datapath *ovs_dp,
+                     struct ovsdb_idl_txn *ovs_idl_txn,
+                     struct ct_zone_ctx *ctx);
 void ct_zones_pending_clear_commited(struct shash *pending);
 bool ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
-                              const struct sbrec_datapath_binding *dp);
-bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx, const char *name,
+                              const struct sbrec_datapath_binding *dp,
+                              struct ovsdb_idl_index *pb_by_dp);
+bool ct_zone_handle_port_update(struct ct_zone_ctx *ctx,
+                                const struct sbrec_port_binding *pb,
                                 bool updated, int *scan_start);
 uint16_t ct_zone_find_zone(const struct shash *ct_zones, const char *name);
+void ct_zones_limits_sync(struct ct_zone_ctx *ctx,
+                          const struct hmap *local_datapaths,
+                          struct ovsdb_idl_index *pb_by_dp);
 
 #endif /* controller/ct-zone.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 99a7c8617..a875977ad 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -798,6 +798,7 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_private_key);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_ct_zones);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
@@ -807,6 +808,8 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_external_ids);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_link_state);
+    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ct_zone);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_ct_zone_col_limit);
 
     chassis_register_ovs_idl(ovs_idl);
     encaps_register_ovs_idl(ovs_idl);
@@ -2217,6 +2220,10 @@  en_ct_zones_run(struct engine_node *node, void *data)
     struct ed_type_ct_zones *ct_zones_data = data;
     struct ed_type_runtime_data *rt_data =
         engine_get_input_data("runtime_data", node);
+    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
+            engine_ovsdb_node_get_index(
+                    engine_get_input("SB_port_binding", node),
+                    "datapath");
 
     const struct ovsrec_open_vswitch_table *ovs_table =
         EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
@@ -2230,6 +2237,8 @@  en_ct_zones_run(struct engine_node *node, void *data)
     ct_zones_restore(&ct_zones_data->ctx, ovs_table, dp_table, br_int);
     ct_zones_update(&rt_data->local_lports, &rt_data->local_datapaths,
                     &ct_zones_data->ctx);
+    ct_zones_limits_sync(&ct_zones_data->ctx, &rt_data->local_datapaths,
+                         sbrec_port_binding_by_datapath);
 
     ct_zones_data->recomputed = true;
     engine_set_node_state(node, EN_UPDATED);
@@ -2247,6 +2256,10 @@  ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
         engine_get_input_data("runtime_data", node);
     const struct sbrec_datapath_binding_table *dp_table =
         EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
+    struct ovsdb_idl_index *sbrec_port_binding_by_datapath =
+            engine_ovsdb_node_get_index(
+                    engine_get_input("SB_port_binding", node),
+                    "datapath");
 
     SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
         if (!get_local_datapath(&rt_data->local_datapaths,
@@ -2260,7 +2273,8 @@  ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
             return false;
         }
 
-        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp)) {
+        if (!ct_zone_handle_dp_update(&ct_zones_data->ctx, dp,
+                                      sbrec_port_binding_by_datapath)) {
             return false;
         }
     }
@@ -2309,8 +2323,8 @@  ct_zones_runtime_data_handler(struct engine_node *node, void *data)
                     t_lport->tracked_type == TRACKED_RESOURCE_NEW ||
                     t_lport->tracked_type == TRACKED_RESOURCE_UPDATED;
             updated |= ct_zone_handle_port_update(&ct_zones_data->ctx,
-                                                  t_lport->pb->logical_port,
-                                                  port_updated, &scan_start);
+                                                  t_lport->pb, port_updated,
+                                                  &scan_start);
         }
     }
 
@@ -5151,6 +5165,9 @@  main(int argc, char *argv[])
                      ct_zones_datapath_binding_handler);
     engine_add_input(&en_ct_zones, &en_runtime_data,
                      ct_zones_runtime_data_handler);
+    /* The CT node just needs the index for port bindings by name. The data
+     * for ports are processed in the runtime handler. */
+    engine_add_input(&en_ct_zones, &en_sb_port_binding, engine_noop_handler);
 
     engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
                      ovs_interface_shadow_ovs_interface_handler);
@@ -5555,8 +5572,8 @@  main(int argc, char *argv[])
                         if (ct_zones_data) {
                             stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
                                             time_msec());
-                            ct_zones_commit(br_int,
-                                            &ct_zones_data->ctx.pending);
+                            ct_zones_commit(br_int, br_int_dp, ovs_idl_txn,
+                                            &ct_zones_data->ctx);
                             stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
                                            time_msec());
                         }
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 58e941193..1ad347419 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -816,6 +816,23 @@  str_tolower(const char *orig)
     return copy;
 }
 
+/* This is a wrapper function which get the value associated with 'key' in
+ * 'smap' and converts it to a long long. If 'key' is not in 'smap' or a
+ * valid unsigned integer can't be parsed from its value, returns 'def'.
+ */
+long long
+ovn_smap_get_llong(const struct smap *smap, const char *key, long long def)
+{
+    const char *value = smap_get(smap, key);
+    long long ll_value;
+
+    if (!value || !str_to_llong(value, 10, &ll_value)) {
+        return def;
+    }
+
+    return ll_value;
+}
+
 /* For a 'key' of the form "IP:port" or just "IP", sets 'port',
  * 'ip_address' and 'ip' ('struct in6_addr' IPv6 or IPv4 mapped address).
  * The caller must free() the memory allocated for 'ip_address'.
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index f75b821b6..ae971ce5a 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -211,6 +211,9 @@  char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
  */
 char *str_tolower(const char *orig);
 
+long long ovn_smap_get_llong(const struct smap *smap, const char *key,
+                             long long def);
+
 /* OVN daemon options. Taken from ovs/lib/daemon.h. */
 #define OVN_DAEMON_OPTION_ENUMS                     \
     OVN_OPT_DETACH,                                 \
diff --git a/northd/northd.c b/northd/northd.c
index 6898daa00..08208405b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -741,6 +741,14 @@  ovn_datapath_update_external_ids(struct ovn_datapath *od)
         smap_add(&ids, "name2", name2);
     }
 
+    int64_t ct_zone_limit = ovn_smap_get_llong(od->nbs ?
+                                               &od->nbs->other_config :
+                                               &od->nbr->options,
+                                               "ct-zone-limit", -1);
+    if (ct_zone_limit > 0) {
+        smap_add_format(&ids, "ct-zone-limit", "%"PRId64, ct_zone_limit);
+    }
+
     /* Set interconn-ts. */
     if (od->nbs) {
         const char *ts = smap_get(&od->nbs->other_config, "interconn-ts");
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 9552534f6..8e369a850 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -731,6 +731,17 @@ 
         this timeout will be automatically removed. The value defaults
         to 0, which means disabled.
       </column>
+
+      <column name="other_config" key="ct-zone-limit"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+        CT zone <code>limit</code> value for given
+        <ref table="Logical_Switch"/>. This value will be propagated to all
+        <ref table="Logical_Switch_Port"/> when configured, but can be
+        overwritten individually per <ref table="Logical_Switch_Port"/>. The
+        value 0 means  unlimited, when the option is not present the limit
+        is not set and the zone limit is derived from OvS default datapath
+        limit.
+      </column>
     </group>
 
     <group title="IP Multicast Snooping Options">
@@ -1132,6 +1143,16 @@ 
           <code>false</code>.
         </column>
 
+        <column name="options" key="ct-zone-limit"
+                type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+          CT zone <code>limit</code> value for given
+          <ref table="Logical_Switch_Port"/>. This value has priority over
+          limit specified on <ref table="Logical_Switch"/> when configured.
+          The value 0 means unlimited, when the option is not present the limit
+          is not set and the zone limit is derived from OvS default datapath
+          limit.
+        </column>
+
       </group>
 
       <group title="Options for localnet ports">
@@ -2795,6 +2816,14 @@  or
         </p>
 
       </column>
+
+      <column name="options" key="ct-zone-limit"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+        CT zone <code>limit</code> value for given
+        <ref table="Logical_Router"/>. The value 0 means unlimited, when the
+        option is not present the limit is not set and the zone limit is
+        derived from OvS default datapath limit.
+      </column>
     </group>
 
     <group title="Common Columns">
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 2cb86dc98..0a20cbc09 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -3127,3 +3127,102 @@  OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log])
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - CT zone limit])
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+check ovn-appctl -t ovn-controller vlog/set dbg:ct_zone
+
+check ovs-vsctl add-port br-int lsp \
+    -- set Interface lsp external-ids:iface-id=lsp
+
+check ovn-nbctl lr-add lr
+
+check ovn-nbctl ls-add ls
+check ovn-nbctl lsp-add ls ls-lr
+check ovn-nbctl lsp-set-type ls-lr router
+check ovn-nbctl lsp-set-addresses ls-lr router
+check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 10.0.0.1
+
+check ovn-nbctl lsp-add ls lsp
+check ovn-nbctl lsp-set-addresses lsp "00:00:00:00:00:02 10.0.0.2"
+
+check ovn-nbctl lrp-add lr lrp-gw 01:00:00:00:00:01 172.16.0.1
+check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+get_zone_num() {
+    output=$1
+    name=$2
+
+    printf "$output" | grep $name | cut -d ' ' -f 2
+}
+
+check_ovs_ct_limit() {
+    zone=$1
+    limit=$2
+
+    AT_CHECK_UNQUOTED([ovs-appctl dpctl/ct-get-limits zone=$zone | sed "s/count=.*/count=?/;s/default limit=.*/default limit=?/" | sort], [0], [dnl
+default limit=?
+zone=$zone,limit=$limit,count=?
+])
+}
+
+wait_ovs_ct_limit_count() {
+    count=$1
+
+    OVS_WAIT_UNTIL([test $count -eq $(ovs-vsctl --no-headings --format=table list CT_Zone | wc -l)])
+}
+
+ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
+lr_dnat=$(get_zone_num "$ct_zones" lr_dnat)
+lr_snat=$(get_zone_num "$ct_zones" lr_snat)
+
+ls_dnat=$(get_zone_num "$ct_zones" ls_dnat)
+ls_snat=$(get_zone_num "$ct_zones" ls_snat)
+
+lsp=$(get_zone_num "$ct_zones" lsp)
+
+wait_ovs_ct_limit_count 0
+
+check ovn-nbctl --wait=hv set Logical_Router lr options:ct-zone-limit=5
+wait_ovs_ct_limit_count 2
+check_ovs_ct_limit $lr_dnat 5
+check_ovs_ct_limit $lr_snat 5
+
+check ovn-nbctl --wait=hv remove Logical_Router lr options ct-zone-limit
+wait_ovs_ct_limit_count 0
+
+check ovn-nbctl --wait=hv set Logical_Switch ls other_config:ct-zone-limit=10
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 10
+
+check ovn-nbctl --wait=hv set Logical_Switch_Port lsp options:ct-zone-limit=5
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 5
+
+check ovn-nbctl --wait=hv remove Logical_Switch_Port lsp options ct-zone-limit
+wait_ovs_ct_limit_count 3
+check_ovs_ct_limit $ls_dnat 10
+check_ovs_ct_limit $ls_snat 10
+check_ovs_ct_limit $lsp 10
+
+check ovn-nbctl --wait=hv remove Logical_Switch ls other_config ct-zone-limit
+wait_ovs_ct_limit_count 0
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])