diff mbox series

[ovs-dev] northd: Add support for disabling vxlan mode.

Message ID 20240403103418.843302-1-odivlad@gmail.com
State Changes Requested, archived
Headers show
Series [ovs-dev] northd: Add support for disabling vxlan mode. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_ovn-kubernetes success github build: passed
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Vladislav Odintsov April 3, 2024, 10:34 a.m. UTC
Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
for available tunnel IDs because of lack of space in VXLAN VNI.
In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
and 2047 logical switch ports per datapath.

Prior to this patch vxlan mode was enabled automatically if at least one
chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
only for HW VTEP (RAMP) switch, such limitation makes no sence.

This patch adds support for explicit disabling of vxlan mode via
Northbound database.

0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068

CC: Ihar Hrachyshka <ihrachys@redhat.com>
Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
---
 northd/en-global-config.c |  9 +++-
 northd/northd.c           | 90 ++++++++++++++++++---------------------
 northd/northd.h           |  6 ++-
 ovn-nb.xml                | 12 ++++++
 tests/ovn-northd.at       | 29 +++++++++++++
 5 files changed, 94 insertions(+), 52 deletions(-)

Comments

Vladislav Odintsov April 3, 2024, 12:55 p.m. UTC | #1
The failed new testcase assumes that patch [1] is applied.
Should I resend them both as a single patchset?

1: https://patchwork.ozlabs.org/project/ovn/patch/20240401121510.758326-1-odivlad@gmail.com/

> On 3 Apr 2024, at 13:34, Vladislav Odintsov <odivlad@gmail.com> wrote:
> 
> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
> for available tunnel IDs because of lack of space in VXLAN VNI.
> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
> and 2047 logical switch ports per datapath.
> 
> Prior to this patch vxlan mode was enabled automatically if at least one
> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
> only for HW VTEP (RAMP) switch, such limitation makes no sence.
> 
> This patch adds support for explicit disabling of vxlan mode via
> Northbound database.
> 
> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
> 
> CC: Ihar Hrachyshka <ihrachys@redhat.com>
> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> ---
> northd/en-global-config.c |  9 +++-
> northd/northd.c           | 90 ++++++++++++++++++---------------------
> northd/northd.h           |  6 ++-
> ovn-nb.xml                | 12 ++++++
> tests/ovn-northd.at       | 29 +++++++++++++
> 5 files changed, 94 insertions(+), 52 deletions(-)
> 
> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> index 34e393b33..9310c4575 100644
> --- a/northd/en-global-config.c
> +++ b/northd/en-global-config.c
> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>                      config_data->svc_monitor_mac);
>     }
> 
> -    char *max_tunid = xasprintf("%d",
> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>     smap_replace(options, "max_tunid", max_tunid);
>     free(max_tunid);
> 
> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>         return true;
>     }
> 
> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> +                           "disable_vxlan_mode", false)) {
> +        return true;
> +    }
> +
>     return false;
> }
> 
> diff --git a/northd/northd.c b/northd/northd.c
> index c568f6360..859b233e8 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>  */
> static bool default_acl_drop;
> 
> +/* If this option is 'true' northd will use limited 24-bit space for datapath
> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
> +static bool vxlan_mode;
> +
> #define MAX_OVN_TAGS 4096
> 
> 
> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>     }
> }
> 
> -static bool
> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table)
> {
> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
> +        vxlan_mode = false;
> +        return;
> +    }
> +
>     const struct sbrec_chassis *chassis;
>     SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>         for (int i = 0; i < chassis->n_encaps; i++) {
>             if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
> -                return true;
> +                vxlan_mode = true;
> +                return;
>             }
>         }
>     }
> -    return false;
> +    vxlan_mode = false;
> }
> 
> uint32_t
> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
> +get_ovn_max_dp_key_local(void)
> {
> -    if (is_vxlan_mode(sbrec_chassis_table)) {
> +    if (vxlan_mode) {
>         /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>         return OVN_MAX_DP_VXLAN_KEY;
>     }
> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
> }
> 
> static void
> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>                           struct ovn_datapath *od, uint32_t *hint)
> {
>     if (!od->tunnel_key) {
>         od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
> -                                    OVN_MIN_DP_KEY_LOCAL,
> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
> -                                    hint);
> +                                            OVN_MIN_DP_KEY_LOCAL,
> +                                            get_ovn_max_dp_key_local(),
> +                                            hint);
>         if (!od->tunnel_key) {
>             if (od->sb) {
>                 sbrec_datapath_binding_delete(od->sb);
> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
> 
> static void
> ovn_datapath_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>     struct hmap *dp_tnlids, struct ovn_datapath *od)
> {
>     const struct smap *other_config = (od->nbs
> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>     uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>     if (tunnel_key) {
>         const char *interconn_ts = smap_get(other_config, "interconn-ts");
> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
> -            tunnel_key >= 1 << 12) {
> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>                          "incompatible with VXLAN", tunnel_key,
> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>                 const struct nbrec_logical_switch_table *nbrec_ls_table,
>                 const struct nbrec_logical_router_table *nbrec_lr_table,
>                 const struct sbrec_datapath_binding_table *sbrec_dp_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                 struct ovn_datapaths *ls_datapaths,
>                 struct ovn_datapaths *lr_datapaths,
>                 struct ovs_list *lr_list)
> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>     struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>     struct ovn_datapath *od;
>     LIST_FOR_EACH (od, list, &both) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
> -                                             od);
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>     }
>     LIST_FOR_EACH (od, list, &nb_only) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
> -                                             od); }
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> +    }
> 
>     /* Keep nonconflicting tunnel IDs that are already assigned. */
>     LIST_FOR_EACH (od, list, &both) {
> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>     /* Assign new tunnel ids where needed. */
>     uint32_t hint = 0;
>     LIST_FOR_EACH_SAFE (od, list, &both) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>     }
>     LIST_FOR_EACH_SAFE (od, list, &nb_only) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>     }
> 
>     /* Sync tunnel ids from nb to sb. */
> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>  * that the I-P engine can fallback to recompute if needed; otherwise return
>  * true (even if the key is not allocated). */
> static bool
> -ovn_port_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
> {
>     const struct smap *options = (op->nbsp
>                                   ? &op->nbsp->options
>                                   : &op->nbrp->options);
>     uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>     if (tunnel_key) {
> -        if (is_vxlan_mode(sbrec_chassis_table) &&
> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>                          "is incompatible with VXLAN",
> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
> }
> 
> static bool
> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
> -                      struct hmap *ports,
> -                      struct ovn_port *op)
> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
> {
>     if (!op->tunnel_key) {
> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>         op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>                                             1, (1u << (key_bits - 1)) - 1,
>                                             &op->od->port_key_hint);
> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
> static void
> build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>     const struct sbrec_port_binding_table *sbrec_port_binding_table,
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>     const struct sbrec_mirror_table *sbrec_mirror_table,
>     const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>     const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>     /* Assign explicitly requested tunnel ids first. */
>     struct ovn_port *op;
>     LIST_FOR_EACH (op, list, &both) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>     }
>     LIST_FOR_EACH (op, list, &nb_only) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>     }
> 
>     /* Keep nonconflicting tunnel IDs that are already assigned. */
> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> 
>     /* Assign new tunnel ids where needed. */
>     LIST_FOR_EACH_SAFE (op, list, &both) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>     }
>     LIST_FOR_EACH_SAFE (op, list, &nb_only) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>     }
> 
>     /* For logical ports that are in both databases, update the southbound
> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>              struct hmap *ls_ports, struct ovn_datapath *od,
>              const struct sbrec_port_binding *sb,
>              const struct sbrec_mirror_table *sbrec_mirror_table,
> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>              struct ovsdb_idl_index *sbrec_chassis_by_name,
>              struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> {
>     op->od = od;
>     parse_lsp_addrs(op);
>     /* Assign explicitly requested tunnel ids first. */
> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>         return false;
>     }
>     if (sb) {
> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>         sbrec_port_binding_set_logical_port(op->sb, op->key);
>     }
>     /* Assign new tunnel ids where needed. */
> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>         return false;
>     }
>     ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>                const char *key, const struct nbrec_logical_switch_port *nbsp,
>                struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>                const struct sbrec_mirror_table *sbrec_mirror_table,
> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>                struct ovsdb_idl_index *sbrec_chassis_by_name,
>                struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> {
>     struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>                                           NULL);
>     hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                      sbrec_mirror_table, sbrec_chassis_table,
> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>                       sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>         ovn_port_destroy(ls_ports, op);
>         return NULL;
> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>                 struct ovn_datapath *od,
>                 const struct sbrec_port_binding *sb,
>                 const struct sbrec_mirror_table *sbrec_mirror_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> {
> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>     op->sb = sb;
>     ovn_port_set_nb(op, nbsp, nbrp);
>     op->l3dgw_port = op->cr_port = NULL;
> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                        sbrec_mirror_table, sbrec_chassis_table,
> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>                         sbrec_chassis_by_name, sbrec_chassis_by_hostname);
> }
> 
> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>             op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>                                 new_nbsp->name, new_nbsp, od, NULL,
>                                 ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                 ni->sbrec_chassis_by_name,
>                                 ni->sbrec_chassis_by_hostname);
>             if (!op) {
> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>             if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>                                 new_nbsp, NULL,
>                                 od, sb, ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                 ni->sbrec_chassis_by_name,
>                                 ni->sbrec_chassis_by_hostname)) {
>                 goto fail;
> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>     use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>                                     false);
> 
> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
> +
>     build_datapaths(ovnsb_txn,
>                     input_data->nbrec_logical_switch_table,
>                     input_data->nbrec_logical_router_table,
>                     input_data->sbrec_datapath_binding_table,
> -                    input_data->sbrec_chassis_table,
>                     &data->ls_datapaths,
>                     &data->lr_datapaths, &data->lr_list);
>     build_lb_datapaths(input_data->lbs, input_data->lbgrps,
> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>                        &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>     build_ports(ovnsb_txn,
>                 input_data->sbrec_port_binding_table,
> -                input_data->sbrec_chassis_table,
>                 input_data->sbrec_mirror_table,
>                 input_data->sbrec_mac_binding_table,
>                 input_data->sbrec_ha_chassis_group_table,
> diff --git a/northd/northd.h b/northd/northd.h
> index 5e9fa4745..ed9d992fb 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>     return od->n_l3dgw_ports > 1 && !od->is_gw_router;
> }
> 
> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table);
> +
> +uint32_t get_ovn_max_dp_key_local(void);
> 
> #endif /* NORTHD_H */
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b652046a7..9b2cb355e 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -381,6 +381,18 @@
>         of SB changes would be very noticeable.
>       </column>
> 
> +      <column name="options" key="disable_vxlan_mode">
> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
> +        northd will run in a `vxlan mode`, which means it splits 24-bit
> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
> +        datapaths count in this mode is 4095 and maximum ports per datapath is
> +        2047.  Rest of IDs are used for multicast groups.
> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
> +        functionality, and main encap type is GENEVE or STT, set this option to
> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
> +        bits for port tunnel IDs.
> +      </column>
> +
>       <group title="Options for configuring interconnection route advertisement">
>         <p>
>           These options control how routes are advertised between OVN
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index cd53755b2..f5d025852 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2819,6 +2819,35 @@ AT_CHECK(
> get_tunnel_keys
> AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
> 
> +AT_CLEANUP
> +])
> +OVN_FOR_EACH_NORTHD_NO_HV([
> +AT_SETUP([check vxlan mode disabling])
> +ovn_start
> +
> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
> +ovn-sbctl \
> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
> +    -- --id=@c create chassis name=hv1 encaps=@e
> +
> +cmd="ovn-nbctl --wait=sb"
> +for i in {1..4097..1}; do
> +    cmd="${cmd} -- ls-add lsw-${i}"
> +done
> +
> +eval $cmd
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4095
> +
> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
> +
> +# Explicitly disable vxlan mode and check that two remaining datapaths were created.
> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4097
> +
> AT_CLEANUP
> ])
> 
> -- 
> 2.44.0
> 


Regards,
Vladislav Odintsov
Ihar Hrachyshka April 3, 2024, 5:43 p.m. UTC | #2
Thank you Vladislav.

FYI it was reported in the past in
https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html
but fell through cracks then. Thanks for picking it up!

In your patch, you introduce a new config option to disable the
'vxlan-mode' behavior. This will definitely work. But I wonder if we can
automatically detect this scenario by ignoring the chassis that are VTEP
from consideration? I believe ovn-controller-vtep sets `is-vtep` in
other_config, so - would it work if we modify is_vxlan_mode to consider it
too?

Thanks again for looking into this.
Ihar

On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com> wrote:

> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
> for available tunnel IDs because of lack of space in VXLAN VNI.
> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
> and 2047 logical switch ports per datapath.
>
> Prior to this patch vxlan mode was enabled automatically if at least one
> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>
> This patch adds support for explicit disabling of vxlan mode via
> Northbound database.
>
> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>
> CC: Ihar Hrachyshka <ihrachys@redhat.com>
> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> ---
>  northd/en-global-config.c |  9 +++-
>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>  northd/northd.h           |  6 ++-
>  ovn-nb.xml                | 12 ++++++
>  tests/ovn-northd.at       | 29 +++++++++++++
>  5 files changed, 94 insertions(+), 52 deletions(-)
>
> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> index 34e393b33..9310c4575 100644
> --- a/northd/en-global-config.c
> +++ b/northd/en-global-config.c
> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void
> *data)
>                       config_data->svc_monitor_mac);
>      }
>
> -    char *max_tunid = xasprintf("%d",
> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>      smap_replace(options, "max_tunid", max_tunid);
>      free(max_tunid);
>
> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
> nbrec_nb_global *nb,
>          return true;
>      }
>
> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> +                           "disable_vxlan_mode", false)) {
> +        return true;
> +    }
> +
>      return false;
>  }
>
> diff --git a/northd/northd.c b/northd/northd.c
> index c568f6360..859b233e8 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>   */
>  static bool default_acl_drop;
>
> +/* If this option is 'true' northd will use limited 24-bit space for
> datapath
> + * and ports tunnel key allocation (12 bits for each instead of default
> 16). */
> +static bool vxlan_mode;
> +
>  #define MAX_OVN_TAGS 4096
>
>
> @@ -875,24 +879,31 @@ join_datapaths(const struct
> nbrec_logical_switch_table *nbrec_ls_table,
>      }
>  }
>
> -static bool
> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>  {
> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
> +        vxlan_mode = false;
> +        return;
> +    }
> +
>      const struct sbrec_chassis *chassis;
>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>          for (int i = 0; i < chassis->n_encaps; i++) {
>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
> -                return true;
> +                vxlan_mode = true;
> +                return;
>              }
>          }
>      }
> -    return false;
> +    vxlan_mode = false;
>  }
>
>  uint32_t
> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
> *sbrec_chassis_table)
> +get_ovn_max_dp_key_local(void)
>  {
> -    if (is_vxlan_mode(sbrec_chassis_table)) {
> +    if (vxlan_mode) {
>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>          return OVN_MAX_DP_VXLAN_KEY;
>      }
> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
> sbrec_chassis_table *sbrec_chassis_table)
>  }
>
>  static void
> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
> *sbrec_ch_table,
> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>                            struct ovn_datapath *od, uint32_t *hint)
>  {
>      if (!od->tunnel_key) {
>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
> -                                    OVN_MIN_DP_KEY_LOCAL,
> -
> get_ovn_max_dp_key_local(sbrec_ch_table),
> -                                    hint);
> +                                            OVN_MIN_DP_KEY_LOCAL,
> +                                            get_ovn_max_dp_key_local(),
> +                                            hint);
>          if (!od->tunnel_key) {
>              if (od->sb) {
>                  sbrec_datapath_binding_delete(od->sb);
> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
> sbrec_chassis_table *sbrec_ch_table,
>
>  static void
>  ovn_datapath_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>  {
>      const struct smap *other_config = (od->nbs
> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>      uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key",
> 0);
>      if (tunnel_key) {
>          const char *interconn_ts = smap_get(other_config, "interconn-ts");
> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
> -            tunnel_key >= 1 << 12) {
> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>                           "incompatible with VXLAN", tunnel_key,
> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>                  const struct sbrec_datapath_binding_table *sbrec_dp_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovn_datapaths *ls_datapaths,
>                  struct ovn_datapaths *lr_datapaths,
>                  struct ovs_list *lr_list)
> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>      struct ovn_datapath *od;
>      LIST_FOR_EACH (od, list, &both) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od);
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>      }
>      LIST_FOR_EACH (od, list, &nb_only) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od); }
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> +    }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>      LIST_FOR_EACH (od, list, &both) {
> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign new tunnel ids where needed. */
>      uint32_t hint = 0;
>      LIST_FOR_EACH_SAFE (od, list, &both) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>
>      /* Sync tunnel ids from nb to sb. */
> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t
> tunnel_key)
>   * that the I-P engine can fallback to recompute if needed; otherwise
> return
>   * true (even if the key is not allocated). */
>  static bool
> -ovn_port_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
> ovn_port *op)
> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>  {
>      const struct smap *options = (op->nbsp
>                                    ? &op->nbsp->options
>                                    : &op->nbrp->options);
>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>      if (tunnel_key) {
> -        if (is_vxlan_mode(sbrec_chassis_table) &&
> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>                           "is incompatible with VXLAN",
> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>  }
>
>  static bool
> -ovn_port_allocate_key(const struct sbrec_chassis_table
> *sbrec_chassis_table,
> -                      struct hmap *ports,
> -                      struct ovn_port *op)
> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>  {
>      if (!op->tunnel_key) {
> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>                                              1, (1u << (key_bits - 1)) - 1,
>                                              &op->od->port_key_hint);
> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
> sbrec_chassis_table *sbrec_chassis_table,
>  static void
>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      const struct sbrec_mirror_table *sbrec_mirror_table,
>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>      const struct sbrec_ha_chassis_group_table
> *sbrec_ha_chassis_group_table,
> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign explicitly requested tunnel ids first. */
>      struct ovn_port *op;
>      LIST_FOR_EACH (op, list, &both) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>      LIST_FOR_EACH (op, list, &nb_only) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>
>      /* Assign new tunnel ids where needed. */
>      LIST_FOR_EACH_SAFE (op, list, &both) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>
>      /* For logical ports that are in both databases, update the southbound
> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>               struct hmap *ls_ports, struct ovn_datapath *od,
>               const struct sbrec_port_binding *sb,
>               const struct sbrec_mirror_table *sbrec_mirror_table,
> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      op->od = od;
>      parse_lsp_addrs(op);
>      /* Assign explicitly requested tunnel ids first. */
> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>          return false;
>      }
>      if (sb) {
> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>      }
>      /* Assign new tunnel ids where needed. */
> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>          return false;
>      }
>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn,
> struct hmap *ls_ports,
>                 const char *key, const struct nbrec_logical_switch_port
> *nbsp,
>                 struct ovn_datapath *od, const struct sbrec_port_binding
> *sb,
>                 const struct sbrec_mirror_table *sbrec_mirror_table,
> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>                                            NULL);
>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                      sbrec_mirror_table, sbrec_chassis_table,
> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>          ovn_port_destroy(ls_ports, op);
>          return NULL;
> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>                  struct ovn_datapath *od,
>                  const struct sbrec_port_binding *sb,
>                  const struct sbrec_mirror_table *sbrec_mirror_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>      op->sb = sb;
>      ovn_port_set_nb(op, nbsp, nbrp);
>      op->l3dgw_port = op->cr_port = NULL;
> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                        sbrec_mirror_table, sbrec_chassis_table,
> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> sbrec_mirror_table,
>                          sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>  }
>
> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp->name, new_nbsp, od, NULL,
>                                  ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname);
>              if (!op) {
> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp, NULL,
>                                  od, sb, ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname)) {
>                  goto fail;
> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>      use_common_zone = smap_get_bool(input_data->nb_options,
> "use_common_zone",
>                                      false);
>
> +    init_vxlan_mode(input_data->nb_options,
> input_data->sbrec_chassis_table);
> +
>      build_datapaths(ovnsb_txn,
>                      input_data->nbrec_logical_switch_table,
>                      input_data->nbrec_logical_router_table,
>                      input_data->sbrec_datapath_binding_table,
> -                    input_data->sbrec_chassis_table,
>                      &data->ls_datapaths,
>                      &data->lr_datapaths, &data->lr_list);
>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>                         &data->lb_datapaths_map,
> &data->lb_group_datapaths_map);
>      build_ports(ovnsb_txn,
>                  input_data->sbrec_port_binding_table,
> -                input_data->sbrec_chassis_table,
>                  input_data->sbrec_mirror_table,
>                  input_data->sbrec_mac_binding_table,
>                  input_data->sbrec_ha_chassis_group_table,
> diff --git a/northd/northd.h b/northd/northd.h
> index 5e9fa4745..ed9d992fb 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath
> *od)
>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>  }
>
> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table);
> +
> +uint32_t get_ovn_max_dp_key_local(void);
>
>  #endif /* NORTHD_H */
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b652046a7..9b2cb355e 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -381,6 +381,18 @@
>          of SB changes would be very noticeable.
>        </column>
>
> +      <column name="options" key="disable_vxlan_mode">
> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
> +        northd will run in a `vxlan mode`, which means it splits 24-bit
> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.
> Maximum
> +        datapaths count in this mode is 4095 and maximum ports per
> datapath is
> +        2047.  Rest of IDs are used for multicast groups.
> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
> +        functionality, and main encap type is GENEVE or STT, set this
> option to
> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS
> and 15
> +        bits for port tunnel IDs.
> +      </column>
> +
>        <group title="Options for configuring interconnection route
> advertisement">
>          <p>
>            These options control how routes are advertised between OVN
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index cd53755b2..f5d025852 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2819,6 +2819,35 @@ AT_CHECK(
>  get_tunnel_keys
>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>
> +AT_CLEANUP
> +])
> +OVN_FOR_EACH_NORTHD_NO_HV([
> +AT_SETUP([check vxlan mode disabling])
> +ovn_start
> +
> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
> +ovn-sbctl \
> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
> +    -- --id=@c create chassis name=hv1 encaps=@e
> +
> +cmd="ovn-nbctl --wait=sb"
> +for i in {1..4097..1}; do
> +    cmd="${cmd} -- ls-add lsw-${i}"
> +done
> +
> +eval $cmd
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4095
> +
> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
> northd/ovn-northd.log])
> +
> +# Explicitly disable vxlan mode and check that two remaining datapaths
> were created.
> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4097
> +
>  AT_CLEANUP
>  ])
>
> --
> 2.44.0
>
>
Vladislav Odintsov April 3, 2024, 8:05 p.m. UTC | #3
re-sending email because ovs list rejected previous its content for some reason:

Hi Ihar,

thanks for your quick reaction!
I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.

Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.

So, detection could be implemented like this:
Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
"effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
Such behavior was my first idea.

But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.
So it seems that to have an option that statically sets vxlan mode is more resilient.
What do you think?


> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> 
> Thank you Vladislav.
> 
> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
> 
> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
> 
> Thanks again for looking into this.
> Ihar
> 
> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>> for available tunnel IDs because of lack of space in VXLAN VNI.
>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>> and 2047 logical switch ports per datapath.
>> 
>> Prior to this patch vxlan mode was enabled automatically if at least one
>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>> 
>> This patch adds support for explicit disabling of vxlan mode via
>> Northbound database.
>> 
>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>> 
>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>> ---
>>  northd/en-global-config.c |  9 +++-
>>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>>  northd/northd.h           |  6 ++-
>>  ovn-nb.xml                | 12 ++++++
>>  tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>  5 files changed, 94 insertions(+), 52 deletions(-)
>> 
>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>> index 34e393b33..9310c4575 100644
>> --- a/northd/en-global-config.c
>> +++ b/northd/en-global-config.c
>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>                       config_data->svc_monitor_mac);
>>      }
>> 
>> -    char *max_tunid = xasprintf("%d",
>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>      smap_replace(options, "max_tunid", max_tunid);
>>      free(max_tunid);
>> 
>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>          return true;
>>      }
>> 
>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>> +                           "disable_vxlan_mode", false)) {
>> +        return true;
>> +    }
>> +
>>      return false;
>>  }
>> 
>> diff --git a/northd/northd.c b/northd/northd.c
>> index c568f6360..859b233e8 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>   */
>>  static bool default_acl_drop;
>> 
>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>> +static bool vxlan_mode;
>> +
>>  #define MAX_OVN_TAGS 4096
>> 
>> 
>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>      }
>>  }
>> 
>> -static bool
>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>> +void
>> +init_vxlan_mode(const struct smap *nb_options,
>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>  {
>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>> +        vxlan_mode = false;
>> +        return;
>> +    }
>> +
>>      const struct sbrec_chassis *chassis;
>>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>          for (int i = 0; i < chassis->n_encaps; i++) {
>>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>> -                return true;
>> +                vxlan_mode = true;
>> +                return;
>>              }
>>          }
>>      }
>> -    return false;
>> +    vxlan_mode = false;
>>  }
>> 
>>  uint32_t
>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>> +get_ovn_max_dp_key_local(void)
>>  {
>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>> +    if (vxlan_mode) {
>>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>          return OVN_MAX_DP_VXLAN_KEY;
>>      }
>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>  }
>> 
>>  static void
>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>                            struct ovn_datapath *od, uint32_t *hint)
>>  {
>>      if (!od->tunnel_key) {
>>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>> -                                    OVN_MIN_DP_KEY_LOCAL,
>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>> -                                    hint);
>> +                                            OVN_MIN_DP_KEY_LOCAL,
>> +                                            get_ovn_max_dp_key_local(),
>> +                                            hint);
>>          if (!od->tunnel_key) {
>>              if (od->sb) {
>>                  sbrec_datapath_binding_delete(od->sb);
>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>> 
>>  static void
>>  ovn_datapath_assign_requested_tnl_id(
>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>>  {
>>      const struct smap *other_config = (od->nbs
>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>      uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>      if (tunnel_key) {
>>          const char *interconn_ts = smap_get(other_config, "interconn-ts");
>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>> -            tunnel_key >= 1 << 12) {
>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>                           "incompatible with VXLAN", tunnel_key,
>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>>                  const struct sbrec_datapath_binding_table *sbrec_dp_table,
>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>                  struct ovn_datapaths *ls_datapaths,
>>                  struct ovn_datapaths *lr_datapaths,
>>                  struct ovs_list *lr_list)
>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>      struct ovn_datapath *od;
>>      LIST_FOR_EACH (od, list, &both) {
>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>> -                                             od);
>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>      }
>>      LIST_FOR_EACH (od, list, &nb_only) {
>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>> -                                             od); }
>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>> +    }
>> 
>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>      LIST_FOR_EACH (od, list, &both) {
>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>      /* Assign new tunnel ids where needed. */
>>      uint32_t hint = 0;
>>      LIST_FOR_EACH_SAFE (od, list, &both) {
>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>> -                                  datapaths, &dp_tnlids, od, &hint);
>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>      }
>>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>> -                                  datapaths, &dp_tnlids, od, &hint);
>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>      }
>> 
>>      /* Sync tunnel ids from nb to sb. */
>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>   * that the I-P engine can fallback to recompute if needed; otherwise return
>>   * true (even if the key is not allocated). */
>>  static bool
>> -ovn_port_assign_requested_tnl_id(
>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>  {
>>      const struct smap *options = (op->nbsp
>>                                    ? &op->nbsp->options
>>                                    : &op->nbrp->options);
>>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>      if (tunnel_key) {
>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>                           "is incompatible with VXLAN",
>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>  }
>> 
>>  static bool
>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>> -                      struct hmap *ports,
>> -                      struct ovn_port *op)
>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>  {
>>      if (!op->tunnel_key) {
>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>                                              1, (1u << (key_bits - 1)) - 1,
>>                                              &op->od->port_key_hint);
>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>  static void
>>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>      const struct sbrec_mirror_table *sbrec_mirror_table,
>>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>      const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>      /* Assign explicitly requested tunnel ids first. */
>>      struct ovn_port *op;
>>      LIST_FOR_EACH (op, list, &both) {
>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>> +        ovn_port_assign_requested_tnl_id(op);
>>      }
>>      LIST_FOR_EACH (op, list, &nb_only) {
>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>> +        ovn_port_assign_requested_tnl_id(op);
>>      }
>> 
>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>> 
>>      /* Assign new tunnel ids where needed. */
>>      LIST_FOR_EACH_SAFE (op, list, &both) {
>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>> +        ovn_port_allocate_key(ports, op);
>>      }
>>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>> +        ovn_port_allocate_key(ports, op);
>>      }
>> 
>>      /* For logical ports that are in both databases, update the southbound
>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>               struct hmap *ls_ports, struct ovn_datapath *od,
>>               const struct sbrec_port_binding *sb,
>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>>      op->od = od;
>>      parse_lsp_addrs(op);
>>      /* Assign explicitly requested tunnel ids first. */
>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>          return false;
>>      }
>>      if (sb) {
>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>>      }
>>      /* Assign new tunnel ids where needed. */
>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>          return false;
>>      }
>>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>                 const char *key, const struct nbrec_logical_switch_port *nbsp,
>>                 struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>                                            NULL);
>>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> -                      sbrec_mirror_table, sbrec_chassis_table,
>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>          ovn_port_destroy(ls_ports, op);
>>          return NULL;
>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>                  struct ovn_datapath *od,
>>                  const struct sbrec_port_binding *sb,
>>                  const struct sbrec_mirror_table *sbrec_mirror_table,
>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>      op->sb = sb;
>>      ovn_port_set_nb(op, nbsp, nbrp);
>>      op->l3dgw_port = op->cr_port = NULL;
>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> -                        sbrec_mirror_table, sbrec_chassis_table,
>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>                          sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>  }
>> 
>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>                                  new_nbsp->name, new_nbsp, od, NULL,
>>                                  ni->sbrec_mirror_table,
>> -                                ni->sbrec_chassis_table,
>>                                  ni->sbrec_chassis_by_name,
>>                                  ni->sbrec_chassis_by_hostname);
>>              if (!op) {
>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>                                  new_nbsp, NULL,
>>                                  od, sb, ni->sbrec_mirror_table,
>> -                                ni->sbrec_chassis_table,
>>                                  ni->sbrec_chassis_by_name,
>>                                  ni->sbrec_chassis_by_hostname)) {
>>                  goto fail;
>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>      use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>                                      false);
>> 
>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>> +
>>      build_datapaths(ovnsb_txn,
>>                      input_data->nbrec_logical_switch_table,
>>                      input_data->nbrec_logical_router_table,
>>                      input_data->sbrec_datapath_binding_table,
>> -                    input_data->sbrec_chassis_table,
>>                      &data->ls_datapaths,
>>                      &data->lr_datapaths, &data->lr_list);
>>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>                         &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>      build_ports(ovnsb_txn,
>>                  input_data->sbrec_port_binding_table,
>> -                input_data->sbrec_chassis_table,
>>                  input_data->sbrec_mirror_table,
>>                  input_data->sbrec_mac_binding_table,
>>                  input_data->sbrec_ha_chassis_group_table,
>> diff --git a/northd/northd.h b/northd/northd.h
>> index 5e9fa4745..ed9d992fb 100644
>> --- a/northd/northd.h
>> +++ b/northd/northd.h
>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>  }
>> 
>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>> +void
>> +init_vxlan_mode(const struct smap *nb_options,
>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>> +
>> +uint32_t get_ovn_max_dp_key_local(void);
>> 
>>  #endif /* NORTHD_H */
>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>> index b652046a7..9b2cb355e 100644
>> --- a/ovn-nb.xml
>> +++ b/ovn-nb.xml
>> @@ -381,6 +381,18 @@
>>          of SB changes would be very noticeable.
>>        </column>
>> 
>> +      <column name="options" key="disable_vxlan_mode">
>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>> +        2047.  Rest of IDs are used for multicast groups.
>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>> +        functionality, and main encap type is GENEVE or STT, set this option to
>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>> +        bits for port tunnel IDs.
>> +      </column>
>> +
>>        <group title="Options for configuring interconnection route advertisement">
>>          <p>
>>            These options control how routes are advertised between OVN
>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/ovn-northd.at <http://ovn-northd.at/>
>> index cd53755b2..f5d025852 100644
>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>  get_tunnel_keys
>>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>> 
>> +AT_CLEANUP
>> +])
>> +OVN_FOR_EACH_NORTHD_NO_HV([
>> +AT_SETUP([check vxlan mode disabling])
>> +ovn_start
>> +
>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
>> +ovn-sbctl \
>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
>> +    -- --id=@c create chassis name=hv1 encaps=@e
>> +
>> +cmd="ovn-nbctl --wait=sb"
>> +for i in {1..4097..1}; do
>> +    cmd="${cmd} -- ls-add lsw-${i}"
>> +done
>> +
>> +eval $cmd
>> +
>> +check_row_count nb:Logical_Switch 4097
>> +wait_row_count sb:Datapath_Binding 4095
>> +
>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
>> +
>> +# Explicitly disable vxlan mode and check that two remaining datapaths were created.
>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>> +
>> +check_row_count nb:Logical_Switch 4097
>> +wait_row_count sb:Datapath_Binding 4097
>> +
>>  AT_CLEANUP
>>  ])
>> 
>> -- 
>> 2.44.0
>> 


Regards,
Vladislav Odintsov
Dumitru Ceara April 4, 2024, 10:06 a.m. UTC | #4
On 4/3/24 22:05, Vladislav Odintsov wrote:
> re-sending email because ovs list rejected previous its content for some reason:
> 
> Hi Ihar,
> 

Hi Vladislav, Ihar,

> thanks for your quick reaction!
> I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.
> 
> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.
> 
> So, detection could be implemented like this:
> Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
> Such behavior was my first idea.
> 
> But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.

These are valid points.

> So it seems that to have an option that statically sets vxlan mode is more resilient.

In general we try to avoid new config knobs.
.
> What do you think?
> 

But in this case it make actually be easier if we offload the work of
determining vxlan-mode to the CMS.

> 
>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>
>> Thank you Vladislav.
>>
>> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
>>
>> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
>>
>> Thanks again for looking into this.
>> Ihar
>>
>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>> and 2047 logical switch ports per datapath.
>>>
>>> Prior to this patch vxlan mode was enabled automatically if at least one
>>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>
>>> This patch adds support for explicit disabling of vxlan mode via
>>> Northbound database.
>>>
>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>
>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>>> ---
>>>  northd/en-global-config.c |  9 +++-
>>>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>  northd/northd.h           |  6 ++-
>>>  ovn-nb.xml                | 12 ++++++
>>>  tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>  5 files changed, 94 insertions(+), 52 deletions(-)
>>>
>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>> index 34e393b33..9310c4575 100644
>>> --- a/northd/en-global-config.c
>>> +++ b/northd/en-global-config.c
>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>>                       config_data->svc_monitor_mac);
>>>      }
>>>
>>> -    char *max_tunid = xasprintf("%d",
>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>      smap_replace(options, "max_tunid", max_tunid);
>>>      free(max_tunid);
>>>
>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>>          return true;
>>>      }
>>>
>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>> +                           "disable_vxlan_mode", false)) {
>>> +        return true;
>>> +    }
>>> +
>>>      return false;
>>>  }
>>>
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index c568f6360..859b233e8 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>   */
>>>  static bool default_acl_drop;
>>>
>>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>>> +static bool vxlan_mode;
>>> +
>>>  #define MAX_OVN_TAGS 4096
>>>
>>>
>>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>      }
>>>  }
>>>
>>> -static bool
>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>> +void
>>> +init_vxlan_mode(const struct smap *nb_options,
>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>  {
>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>> +        vxlan_mode = false;
>>> +        return;
>>> +    }
>>> +
>>>      const struct sbrec_chassis *chassis;
>>>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>          for (int i = 0; i < chassis->n_encaps; i++) {
>>>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>> -                return true;
>>> +                vxlan_mode = true;
>>> +                return;
>>>              }
>>>          }
>>>      }
>>> -    return false;
>>> +    vxlan_mode = false;
>>>  }
>>>
>>>  uint32_t
>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>> +get_ovn_max_dp_key_local(void)
>>>  {
>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>> +    if (vxlan_mode) {
>>>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>          return OVN_MAX_DP_VXLAN_KEY;
>>>      }
>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>  }
>>>
>>>  static void
>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>>                            struct ovn_datapath *od, uint32_t *hint)
>>>  {
>>>      if (!od->tunnel_key) {
>>>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>>> -                                    hint);
>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>> +                                            get_ovn_max_dp_key_local(),
>>> +                                            hint);
>>>          if (!od->tunnel_key) {
>>>              if (od->sb) {
>>>                  sbrec_datapath_binding_delete(od->sb);
>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>
>>>  static void
>>>  ovn_datapath_assign_requested_tnl_id(
>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>  {
>>>      const struct smap *other_config = (od->nbs
>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>      uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>>      if (tunnel_key) {
>>>          const char *interconn_ts = smap_get(other_config, "interconn-ts");
>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>> -            tunnel_key >= 1 << 12) {
>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>>                           "incompatible with VXLAN", tunnel_key,
>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>>>                  const struct sbrec_datapath_binding_table *sbrec_dp_table,
>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>                  struct ovn_datapaths *ls_datapaths,
>>>                  struct ovn_datapaths *lr_datapaths,
>>>                  struct ovs_list *lr_list)
>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>      struct ovn_datapath *od;
>>>      LIST_FOR_EACH (od, list, &both) {
>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>> -                                             od);
>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>      }
>>>      LIST_FOR_EACH (od, list, &nb_only) {
>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>> -                                             od); }
>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>> +    }
>>>
>>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>      LIST_FOR_EACH (od, list, &both) {
>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>      /* Assign new tunnel ids where needed. */
>>>      uint32_t hint = 0;
>>>      LIST_FOR_EACH_SAFE (od, list, &both) {
>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>      }
>>>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>      }
>>>
>>>      /* Sync tunnel ids from nb to sb. */
>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>>   * that the I-P engine can fallback to recompute if needed; otherwise return
>>>   * true (even if the key is not allocated). */
>>>  static bool
>>> -ovn_port_assign_requested_tnl_id(
>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>  {
>>>      const struct smap *options = (op->nbsp
>>>                                    ? &op->nbsp->options
>>>                                    : &op->nbrp->options);
>>>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>>      if (tunnel_key) {
>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>                           "is incompatible with VXLAN",
>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>  }
>>>
>>>  static bool
>>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>> -                      struct hmap *ports,
>>> -                      struct ovn_port *op)
>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>  {
>>>      if (!op->tunnel_key) {
>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>>                                              1, (1u << (key_bits - 1)) - 1,
>>>                                              &op->od->port_key_hint);
>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>  static void
>>>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>      const struct sbrec_mirror_table *sbrec_mirror_table,
>>>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>      const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>      /* Assign explicitly requested tunnel ids first. */
>>>      struct ovn_port *op;
>>>      LIST_FOR_EACH (op, list, &both) {
>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>> +        ovn_port_assign_requested_tnl_id(op);
>>>      }
>>>      LIST_FOR_EACH (op, list, &nb_only) {
>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>> +        ovn_port_assign_requested_tnl_id(op);
>>>      }
>>>
>>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>
>>>      /* Assign new tunnel ids where needed. */
>>>      LIST_FOR_EACH_SAFE (op, list, &both) {
>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>> +        ovn_port_allocate_key(ports, op);
>>>      }
>>>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>> +        ovn_port_allocate_key(ports, op);
>>>      }
>>>
>>>      /* For logical ports that are in both databases, update the southbound
>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>               struct hmap *ls_ports, struct ovn_datapath *od,
>>>               const struct sbrec_port_binding *sb,
>>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>  {
>>>      op->od = od;
>>>      parse_lsp_addrs(op);
>>>      /* Assign explicitly requested tunnel ids first. */
>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>          return false;
>>>      }
>>>      if (sb) {
>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>      }
>>>      /* Assign new tunnel ids where needed. */
>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>          return false;
>>>      }
>>>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>>                 const char *key, const struct nbrec_logical_switch_port *nbsp,
>>>                 struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>  {
>>>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>                                            NULL);
>>>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>>          ovn_port_destroy(ls_ports, op);
>>>          return NULL;
>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>                  struct ovn_datapath *od,
>>>                  const struct sbrec_port_binding *sb,
>>>                  const struct sbrec_mirror_table *sbrec_mirror_table,
>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>  {
>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>      op->sb = sb;
>>>      ovn_port_set_nb(op, nbsp, nbrp);
>>>      op->l3dgw_port = op->cr_port = NULL;
>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>                          sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>>  }
>>>
>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>                                  new_nbsp->name, new_nbsp, od, NULL,
>>>                                  ni->sbrec_mirror_table,
>>> -                                ni->sbrec_chassis_table,
>>>                                  ni->sbrec_chassis_by_name,
>>>                                  ni->sbrec_chassis_by_hostname);
>>>              if (!op) {
>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>                                  new_nbsp, NULL,
>>>                                  od, sb, ni->sbrec_mirror_table,
>>> -                                ni->sbrec_chassis_table,
>>>                                  ni->sbrec_chassis_by_name,
>>>                                  ni->sbrec_chassis_by_hostname)) {
>>>                  goto fail;
>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>      use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>>                                      false);
>>>
>>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>>> +
>>>      build_datapaths(ovnsb_txn,
>>>                      input_data->nbrec_logical_switch_table,
>>>                      input_data->nbrec_logical_router_table,
>>>                      input_data->sbrec_datapath_binding_table,
>>> -                    input_data->sbrec_chassis_table,
>>>                      &data->ls_datapaths,
>>>                      &data->lr_datapaths, &data->lr_list);
>>>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>                         &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>>      build_ports(ovnsb_txn,
>>>                  input_data->sbrec_port_binding_table,
>>> -                input_data->sbrec_chassis_table,
>>>                  input_data->sbrec_mirror_table,
>>>                  input_data->sbrec_mac_binding_table,
>>>                  input_data->sbrec_ha_chassis_group_table,
>>> diff --git a/northd/northd.h b/northd/northd.h
>>> index 5e9fa4745..ed9d992fb 100644
>>> --- a/northd/northd.h
>>> +++ b/northd/northd.h
>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>  }
>>>
>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>> +void
>>> +init_vxlan_mode(const struct smap *nb_options,
>>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>>> +
>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>
>>>  #endif /* NORTHD_H */
>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>> index b652046a7..9b2cb355e 100644
>>> --- a/ovn-nb.xml
>>> +++ b/ovn-nb.xml
>>> @@ -381,6 +381,18 @@
>>>          of SB changes would be very noticeable.
>>>        </column>
>>>
>>> +      <column name="options" key="disable_vxlan_mode">
>>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>>> +        2047.  Rest of IDs are used for multicast groups.
>>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>>> +        functionality, and main encap type is GENEVE or STT, set this option to
>>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>>> +        bits for port tunnel IDs.
>>> +      </column>
>>> +

Let's also add a NEWS entry for this one.  It's quite an important
user-visible change.

>>>        <group title="Options for configuring interconnection route advertisement">
>>>          <p>
>>>            These options control how routes are advertised between OVN
>>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/ovn-northd.at <http://ovn-northd.at/>
>>> index cd53755b2..f5d025852 100644
>>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
>>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
>>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>>  get_tunnel_keys
>>>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>>>
>>> +AT_CLEANUP
>>> +])
>>> +OVN_FOR_EACH_NORTHD_NO_HV([
>>> +AT_SETUP([check vxlan mode disabling])
>>> +ovn_start
>>> +
>>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
>>> +ovn-sbctl \
>>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
>>> +    -- --id=@c create chassis name=hv1 encaps=@e
>>> +
>>> +cmd="ovn-nbctl --wait=sb"
>>> +for i in {1..4097..1}; do
>>> +    cmd="${cmd} -- ls-add lsw-${i}"
>>> +done
>>> +
>>> +eval $cmd
>>> +
>>> +check_row_count nb:Logical_Switch 4097
>>> +wait_row_count sb:Datapath_Binding 4095
>>> +
>>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
>>> +
>>> +# Explicitly disable vxlan mode and check that two remaining datapaths were created.
>>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>>> +
>>> +check_row_count nb:Logical_Switch 4097
>>> +wait_row_count sb:Datapath_Binding 4097
>>> +
>>>  AT_CLEANUP
>>>  ])
>>>
>>> -- 
>>> 2.44.0
>>>
> 
> 
> Regards,
> Vladislav Odintsov

Regards,
Dumitru
Vladislav Odintsov April 4, 2024, 12:33 p.m. UTC | #5
Hi Dumitru,

thanks for your attention on this!

> On 4 Apr 2024, at 13:06, Dumitru Ceara <dceara@redhat.com> wrote:
> 
> On 4/3/24 22:05, Vladislav Odintsov wrote:
>> re-sending email because ovs list rejected previous its content for some reason:
>> 
>> Hi Ihar,
>> 
> 
> Hi Vladislav, Ihar,
> 
>> thanks for your quick reaction!
>> I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.
>> 
>> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.
>> 
>> So, detection could be implemented like this:
>> Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
>> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
>> Such behavior was my first idea.
>> 
>> But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.
> 
> These are valid points.
> 
>> So it seems that to have an option that statically sets vxlan mode is more resilient.
> 
> In general we try to avoid new config knobs.
> .
>> What do you think?
>> 
> 
> But in this case it make actually be easier if we offload the work of
> determining vxlan-mode to the CMS.
> 
>> 
>>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>> 
>>> Thank you Vladislav.
>>> 
>>> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
>>> 
>>> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
>>> 
>>> Thanks again for looking into this.
>>> Ihar
>>> 
>>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>>> and 2047 logical switch ports per datapath.
>>>> 
>>>> Prior to this patch vxlan mode was enabled automatically if at least one
>>>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>> 
>>>> This patch adds support for explicit disabling of vxlan mode via
>>>> Northbound database.
>>>> 
>>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>> 
>>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>>>> ---
>>>> northd/en-global-config.c |  9 +++-
>>>> northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>> northd/northd.h           |  6 ++-
>>>> ovn-nb.xml                | 12 ++++++
>>>> tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>> 5 files changed, 94 insertions(+), 52 deletions(-)
>>>> 
>>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>>> index 34e393b33..9310c4575 100644
>>>> --- a/northd/en-global-config.c
>>>> +++ b/northd/en-global-config.c
>>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>>>                      config_data->svc_monitor_mac);
>>>>     }
>>>> 
>>>> -    char *max_tunid = xasprintf("%d",
>>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>>     smap_replace(options, "max_tunid", max_tunid);
>>>>     free(max_tunid);
>>>> 
>>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>>>         return true;
>>>>     }
>>>> 
>>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>>> +                           "disable_vxlan_mode", false)) {
>>>> +        return true;
>>>> +    }
>>>> +
>>>>     return false;
>>>> }
>>>> 
>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>> index c568f6360..859b233e8 100644
>>>> --- a/northd/northd.c
>>>> +++ b/northd/northd.c
>>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>>  */
>>>> static bool default_acl_drop;
>>>> 
>>>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>>>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>>>> +static bool vxlan_mode;
>>>> +
>>>> #define MAX_OVN_TAGS 4096
>>>> 
>>>> 
>>>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>     }
>>>> }
>>>> 
>>>> -static bool
>>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>> +void
>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>> {
>>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>>> +        vxlan_mode = false;
>>>> +        return;
>>>> +    }
>>>> +
>>>>     const struct sbrec_chassis *chassis;
>>>>     SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>>         for (int i = 0; i < chassis->n_encaps; i++) {
>>>>             if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>>> -                return true;
>>>> +                vxlan_mode = true;
>>>> +                return;
>>>>             }
>>>>         }
>>>>     }
>>>> -    return false;
>>>> +    vxlan_mode = false;
>>>> }
>>>> 
>>>> uint32_t
>>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>> +get_ovn_max_dp_key_local(void)
>>>> {
>>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>>> +    if (vxlan_mode) {
>>>>         /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>>         return OVN_MAX_DP_VXLAN_KEY;
>>>>     }
>>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>> }
>>>> 
>>>> static void
>>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>                           struct ovn_datapath *od, uint32_t *hint)
>>>> {
>>>>     if (!od->tunnel_key) {
>>>>         od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>>>> -                                    hint);
>>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>>> +                                            get_ovn_max_dp_key_local(),
>>>> +                                            hint);
>>>>         if (!od->tunnel_key) {
>>>>             if (od->sb) {
>>>>                 sbrec_datapath_binding_delete(od->sb);
>>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>> 
>>>> static void
>>>> ovn_datapath_assign_requested_tnl_id(
>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>     struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>> {
>>>>     const struct smap *other_config = (od->nbs
>>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>>     uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>>>     if (tunnel_key) {
>>>>         const char *interconn_ts = smap_get(other_config, "interconn-ts");
>>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>>> -            tunnel_key >= 1 << 12) {
>>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>>             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>>>                          "incompatible with VXLAN", tunnel_key,
>>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>                 const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>                 const struct nbrec_logical_router_table *nbrec_lr_table,
>>>>                 const struct sbrec_datapath_binding_table *sbrec_dp_table,
>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>                 struct ovn_datapaths *ls_datapaths,
>>>>                 struct ovn_datapaths *lr_datapaths,
>>>>                 struct ovs_list *lr_list)
>>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>     struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>>     struct ovn_datapath *od;
>>>>     LIST_FOR_EACH (od, list, &both) {
>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>> -                                             od);
>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>     }
>>>>     LIST_FOR_EACH (od, list, &nb_only) {
>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>> -                                             od); }
>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>> +    }
>>>> 
>>>>     /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>     LIST_FOR_EACH (od, list, &both) {
>>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>     /* Assign new tunnel ids where needed. */
>>>>     uint32_t hint = 0;
>>>>     LIST_FOR_EACH_SAFE (od, list, &both) {
>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>     }
>>>>     LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>     }
>>>> 
>>>>     /* Sync tunnel ids from nb to sb. */
>>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>>>  * that the I-P engine can fallback to recompute if needed; otherwise return
>>>>  * true (even if the key is not allocated). */
>>>> static bool
>>>> -ovn_port_assign_requested_tnl_id(
>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>> {
>>>>     const struct smap *options = (op->nbsp
>>>>                                   ? &op->nbsp->options
>>>>                                   : &op->nbrp->options);
>>>>     uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>>>     if (tunnel_key) {
>>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>>                          "is incompatible with VXLAN",
>>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>> }
>>>> 
>>>> static bool
>>>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>> -                      struct hmap *ports,
>>>> -                      struct ovn_port *op)
>>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>> {
>>>>     if (!op->tunnel_key) {
>>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>>         op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>>>                                             1, (1u << (key_bits - 1)) - 1,
>>>>                                             &op->od->port_key_hint);
>>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>> static void
>>>> build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>     const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>     const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>     const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>>     const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>     /* Assign explicitly requested tunnel ids first. */
>>>>     struct ovn_port *op;
>>>>     LIST_FOR_EACH (op, list, &both) {
>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>     }
>>>>     LIST_FOR_EACH (op, list, &nb_only) {
>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>     }
>>>> 
>>>>     /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>> 
>>>>     /* Assign new tunnel ids where needed. */
>>>>     LIST_FOR_EACH_SAFE (op, list, &both) {
>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>> +        ovn_port_allocate_key(ports, op);
>>>>     }
>>>>     LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>> +        ovn_port_allocate_key(ports, op);
>>>>     }
>>>> 
>>>>     /* For logical ports that are in both databases, update the southbound
>>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>              struct hmap *ls_ports, struct ovn_datapath *od,
>>>>              const struct sbrec_port_binding *sb,
>>>>              const struct sbrec_mirror_table *sbrec_mirror_table,
>>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>              struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>              struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>> {
>>>>     op->od = od;
>>>>     parse_lsp_addrs(op);
>>>>     /* Assign explicitly requested tunnel ids first. */
>>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>>         return false;
>>>>     }
>>>>     if (sb) {
>>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>         sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>>     }
>>>>     /* Assign new tunnel ids where needed. */
>>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>>         return false;
>>>>     }
>>>>     ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>>>                const char *key, const struct nbrec_logical_switch_port *nbsp,
>>>>                struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>>>                const struct sbrec_mirror_table *sbrec_mirror_table,
>>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>                struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>                struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>> {
>>>>     struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>>                                           NULL);
>>>>     hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>                       sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>>>         ovn_port_destroy(ls_ports, op);
>>>>         return NULL;
>>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>                 struct ovn_datapath *od,
>>>>                 const struct sbrec_port_binding *sb,
>>>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>> {
>>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>     op->sb = sb;
>>>>     ovn_port_set_nb(op, nbsp, nbrp);
>>>>     op->l3dgw_port = op->cr_port = NULL;
>>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>                         sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>>> }
>>>> 
>>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>             op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>>                                 new_nbsp->name, new_nbsp, od, NULL,
>>>>                                 ni->sbrec_mirror_table,
>>>> -                                ni->sbrec_chassis_table,
>>>>                                 ni->sbrec_chassis_by_name,
>>>>                                 ni->sbrec_chassis_by_hostname);
>>>>             if (!op) {
>>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>             if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>>                                 new_nbsp, NULL,
>>>>                                 od, sb, ni->sbrec_mirror_table,
>>>> -                                ni->sbrec_chassis_table,
>>>>                                 ni->sbrec_chassis_by_name,
>>>>                                 ni->sbrec_chassis_by_hostname)) {
>>>>                 goto fail;
>>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>     use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>>>                                     false);
>>>> 
>>>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>>>> +
>>>>     build_datapaths(ovnsb_txn,
>>>>                     input_data->nbrec_logical_switch_table,
>>>>                     input_data->nbrec_logical_router_table,
>>>>                     input_data->sbrec_datapath_binding_table,
>>>> -                    input_data->sbrec_chassis_table,
>>>>                     &data->ls_datapaths,
>>>>                     &data->lr_datapaths, &data->lr_list);
>>>>     build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>                        &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>>>     build_ports(ovnsb_txn,
>>>>                 input_data->sbrec_port_binding_table,
>>>> -                input_data->sbrec_chassis_table,
>>>>                 input_data->sbrec_mirror_table,
>>>>                 input_data->sbrec_mac_binding_table,
>>>>                 input_data->sbrec_ha_chassis_group_table,
>>>> diff --git a/northd/northd.h b/northd/northd.h
>>>> index 5e9fa4745..ed9d992fb 100644
>>>> --- a/northd/northd.h
>>>> +++ b/northd/northd.h
>>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>>>     return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>> }
>>>> 
>>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>>> +void
>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>>>> +
>>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>> 
>>>> #endif /* NORTHD_H */
>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>> index b652046a7..9b2cb355e 100644
>>>> --- a/ovn-nb.xml
>>>> +++ b/ovn-nb.xml
>>>> @@ -381,6 +381,18 @@
>>>>         of SB changes would be very noticeable.
>>>>       </column>
>>>> 
>>>> +      <column name="options" key="disable_vxlan_mode">
>>>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>>>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>>>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>>>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>>>> +        2047.  Rest of IDs are used for multicast groups.
>>>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>>>> +        functionality, and main encap type is GENEVE or STT, set this option to
>>>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>>>> +        bits for port tunnel IDs.
>>>> +      </column>
>>>> +
> 
> Let's also add a NEWS entry for this one.  It's quite an important
> user-visible change.

ACK, I’ll address this in v2.

Also, if my testcase in this current patch requires patch [1], should I send them both in v2?

> 
>>>>       <group title="Options for configuring interconnection route advertisement">
>>>>         <p>
>>>>           These options control how routes are advertised between OVN
>>>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/ovn-northd.at <http://ovn-northd.at/>
>>>> index cd53755b2..f5d025852 100644
>>>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
>>>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
>>>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>>> get_tunnel_keys
>>>> AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>>>> 
>>>> +AT_CLEANUP
>>>> +])
>>>> +OVN_FOR_EACH_NORTHD_NO_HV([
>>>> +AT_SETUP([check vxlan mode disabling])
>>>> +ovn_start
>>>> +
>>>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
>>>> +ovn-sbctl \
>>>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
>>>> +    -- --id=@c create chassis name=hv1 encaps=@e
>>>> +
>>>> +cmd="ovn-nbctl --wait=sb"
>>>> +for i in {1..4097..1}; do
>>>> +    cmd="${cmd} -- ls-add lsw-${i}"
>>>> +done
>>>> +
>>>> +eval $cmd
>>>> +
>>>> +check_row_count nb:Logical_Switch 4097
>>>> +wait_row_count sb:Datapath_Binding 4095
>>>> +
>>>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
>>>> +
>>>> +# Explicitly disable vxlan mode and check that two remaining datapaths were created.
>>>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>>>> +
>>>> +check_row_count nb:Logical_Switch 4097
>>>> +wait_row_count sb:Datapath_Binding 4097
>>>> +
>>>> AT_CLEANUP
>>>> ])
>>>> 
>>>> -- 
>>>> 2.44.0
>>>> 
>> 
>> 
>> Regards,
>> Vladislav Odintsov
> 
> Regards,
> Dumitru
> 


Regards,
Vladislav Odintsov
Vladislav Odintsov April 4, 2024, 12:38 p.m. UTC | #6
*Patch [1] is https://patchwork.ozlabs.org/project/ovn/patch/20240401121510.758326-1-odivlad@gmail.com/

> On 4 Apr 2024, at 15:33, Vladislav Odintsov <odivlad@gmail.com> wrote:
> 
> Hi Dumitru,
> 
> thanks for your attention on this!
> 
>> On 4 Apr 2024, at 13:06, Dumitru Ceara <dceara@redhat.com> wrote:
>> 
>> On 4/3/24 22:05, Vladislav Odintsov wrote:
>>> re-sending email because ovs list rejected previous its content for some reason:
>>> 
>>> Hi Ihar,
>>> 
>> 
>> Hi Vladislav, Ihar,
>> 
>>> thanks for your quick reaction!
>>> I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.
>>> 
>>> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.
>>> 
>>> So, detection could be implemented like this:
>>> Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
>>> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
>>> Such behavior was my first idea.
>>> 
>>> But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.
>> 
>> These are valid points.
>> 
>>> So it seems that to have an option that statically sets vxlan mode is more resilient.
>> 
>> In general we try to avoid new config knobs.
>> .
>>> What do you think?
>>> 
>> 
>> But in this case it make actually be easier if we offload the work of
>> determining vxlan-mode to the CMS.
>> 
>>> 
>>>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>>> 
>>>> Thank you Vladislav.
>>>> 
>>>> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
>>>> 
>>>> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
>>>> 
>>>> Thanks again for looking into this.
>>>> Ihar
>>>> 
>>>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>>>> and 2047 logical switch ports per datapath.
>>>>> 
>>>>> Prior to this patch vxlan mode was enabled automatically if at least one
>>>>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>>>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>>> 
>>>>> This patch adds support for explicit disabling of vxlan mode via
>>>>> Northbound database.
>>>>> 
>>>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>>> 
>>>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>>>>> ---
>>>>> northd/en-global-config.c |  9 +++-
>>>>> northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>>> northd/northd.h           |  6 ++-
>>>>> ovn-nb.xml                | 12 ++++++
>>>>> tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>>> 5 files changed, 94 insertions(+), 52 deletions(-)
>>>>> 
>>>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>>>> index 34e393b33..9310c4575 100644
>>>>> --- a/northd/en-global-config.c
>>>>> +++ b/northd/en-global-config.c
>>>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>>>>                     config_data->svc_monitor_mac);
>>>>>    }
>>>>> 
>>>>> -    char *max_tunid = xasprintf("%d",
>>>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>>>    smap_replace(options, "max_tunid", max_tunid);
>>>>>    free(max_tunid);
>>>>> 
>>>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>>>>        return true;
>>>>>    }
>>>>> 
>>>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>>>> +                           "disable_vxlan_mode", false)) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>>    return false;
>>>>> }
>>>>> 
>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>> index c568f6360..859b233e8 100644
>>>>> --- a/northd/northd.c
>>>>> +++ b/northd/northd.c
>>>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>>> */
>>>>> static bool default_acl_drop;
>>>>> 
>>>>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>>>>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>>>>> +static bool vxlan_mode;
>>>>> +
>>>>> #define MAX_OVN_TAGS 4096
>>>>> 
>>>>> 
>>>>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>    }
>>>>> }
>>>>> 
>>>>> -static bool
>>>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>> +void
>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>> {
>>>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>>>> +        vxlan_mode = false;
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>>    const struct sbrec_chassis *chassis;
>>>>>    SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>>>        for (int i = 0; i < chassis->n_encaps; i++) {
>>>>>            if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>>>> -                return true;
>>>>> +                vxlan_mode = true;
>>>>> +                return;
>>>>>            }
>>>>>        }
>>>>>    }
>>>>> -    return false;
>>>>> +    vxlan_mode = false;
>>>>> }
>>>>> 
>>>>> uint32_t
>>>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>> +get_ovn_max_dp_key_local(void)
>>>>> {
>>>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>>>> +    if (vxlan_mode) {
>>>>>        /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>>>        return OVN_MAX_DP_VXLAN_KEY;
>>>>>    }
>>>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>> }
>>>>> 
>>>>> static void
>>>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>>                          struct ovn_datapath *od, uint32_t *hint)
>>>>> {
>>>>>    if (!od->tunnel_key) {
>>>>>        od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>>>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>>>>> -                                    hint);
>>>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>>>> +                                            get_ovn_max_dp_key_local(),
>>>>> +                                            hint);
>>>>>        if (!od->tunnel_key) {
>>>>>            if (od->sb) {
>>>>>                sbrec_datapath_binding_delete(od->sb);
>>>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>> 
>>>>> static void
>>>>> ovn_datapath_assign_requested_tnl_id(
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>    struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>>> {
>>>>>    const struct smap *other_config = (od->nbs
>>>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>>>    uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>>>>    if (tunnel_key) {
>>>>>        const char *interconn_ts = smap_get(other_config, "interconn-ts");
>>>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>>>> -            tunnel_key >= 1 << 12) {
>>>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>            VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>>>>                         "incompatible with VXLAN", tunnel_key,
>>>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>                const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>                const struct nbrec_logical_router_table *nbrec_lr_table,
>>>>>                const struct sbrec_datapath_binding_table *sbrec_dp_table,
>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>                struct ovn_datapaths *ls_datapaths,
>>>>>                struct ovn_datapaths *lr_datapaths,
>>>>>                struct ovs_list *lr_list)
>>>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>    struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>>>    struct ovn_datapath *od;
>>>>>    LIST_FOR_EACH (od, list, &both) {
>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>> -                                             od);
>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>    }
>>>>>    LIST_FOR_EACH (od, list, &nb_only) {
>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>> -                                             od); }
>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>> +    }
>>>>> 
>>>>>    /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>    LIST_FOR_EACH (od, list, &both) {
>>>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>    /* Assign new tunnel ids where needed. */
>>>>>    uint32_t hint = 0;
>>>>>    LIST_FOR_EACH_SAFE (od, list, &both) {
>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>    }
>>>>>    LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>    }
>>>>> 
>>>>>    /* Sync tunnel ids from nb to sb. */
>>>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>>>> * that the I-P engine can fallback to recompute if needed; otherwise return
>>>>> * true (even if the key is not allocated). */
>>>>> static bool
>>>>> -ovn_port_assign_requested_tnl_id(
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>>>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>>> {
>>>>>    const struct smap *options = (op->nbsp
>>>>>                                  ? &op->nbsp->options
>>>>>                                  : &op->nbrp->options);
>>>>>    uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>>>>    if (tunnel_key) {
>>>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>            VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>>>                         "is incompatible with VXLAN",
>>>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>>> }
>>>>> 
>>>>> static bool
>>>>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>> -                      struct hmap *ports,
>>>>> -                      struct ovn_port *op)
>>>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>>> {
>>>>>    if (!op->tunnel_key) {
>>>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>>>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>>>        op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>>>>                                            1, (1u << (key_bits - 1)) - 1,
>>>>>                                            &op->od->port_key_hint);
>>>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>> static void
>>>>> build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>    const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>    const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>    const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>>>    const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>>>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>    /* Assign explicitly requested tunnel ids first. */
>>>>>    struct ovn_port *op;
>>>>>    LIST_FOR_EACH (op, list, &both) {
>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>    }
>>>>>    LIST_FOR_EACH (op, list, &nb_only) {
>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>    }
>>>>> 
>>>>>    /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>> 
>>>>>    /* Assign new tunnel ids where needed. */
>>>>>    LIST_FOR_EACH_SAFE (op, list, &both) {
>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>    }
>>>>>    LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>    }
>>>>> 
>>>>>    /* For logical ports that are in both databases, update the southbound
>>>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>             struct hmap *ls_ports, struct ovn_datapath *od,
>>>>>             const struct sbrec_port_binding *sb,
>>>>>             const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>             struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>             struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>> {
>>>>>    op->od = od;
>>>>>    parse_lsp_addrs(op);
>>>>>    /* Assign explicitly requested tunnel ids first. */
>>>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>>>        return false;
>>>>>    }
>>>>>    if (sb) {
>>>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>        sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>>>    }
>>>>>    /* Assign new tunnel ids where needed. */
>>>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>>>        return false;
>>>>>    }
>>>>>    ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>>>>               const char *key, const struct nbrec_logical_switch_port *nbsp,
>>>>>               struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>>>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>> {
>>>>>    struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>>>                                          NULL);
>>>>>    hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>>>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>                      sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>>>>        ovn_port_destroy(ls_ports, op);
>>>>>        return NULL;
>>>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>                struct ovn_datapath *od,
>>>>>                const struct sbrec_port_binding *sb,
>>>>>                const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>                struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>                struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>> {
>>>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>    op->sb = sb;
>>>>>    ovn_port_set_nb(op, nbsp, nbrp);
>>>>>    op->l3dgw_port = op->cr_port = NULL;
>>>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>>>> }
>>>>> 
>>>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>            op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>>>                                new_nbsp->name, new_nbsp, od, NULL,
>>>>>                                ni->sbrec_mirror_table,
>>>>> -                                ni->sbrec_chassis_table,
>>>>>                                ni->sbrec_chassis_by_name,
>>>>>                                ni->sbrec_chassis_by_hostname);
>>>>>            if (!op) {
>>>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>            if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>>>                                new_nbsp, NULL,
>>>>>                                od, sb, ni->sbrec_mirror_table,
>>>>> -                                ni->sbrec_chassis_table,
>>>>>                                ni->sbrec_chassis_by_name,
>>>>>                                ni->sbrec_chassis_by_hostname)) {
>>>>>                goto fail;
>>>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>    use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>>>>                                    false);
>>>>> 
>>>>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>>>>> +
>>>>>    build_datapaths(ovnsb_txn,
>>>>>                    input_data->nbrec_logical_switch_table,
>>>>>                    input_data->nbrec_logical_router_table,
>>>>>                    input_data->sbrec_datapath_binding_table,
>>>>> -                    input_data->sbrec_chassis_table,
>>>>>                    &data->ls_datapaths,
>>>>>                    &data->lr_datapaths, &data->lr_list);
>>>>>    build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>                       &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>>>>    build_ports(ovnsb_txn,
>>>>>                input_data->sbrec_port_binding_table,
>>>>> -                input_data->sbrec_chassis_table,
>>>>>                input_data->sbrec_mirror_table,
>>>>>                input_data->sbrec_mac_binding_table,
>>>>>                input_data->sbrec_ha_chassis_group_table,
>>>>> diff --git a/northd/northd.h b/northd/northd.h
>>>>> index 5e9fa4745..ed9d992fb 100644
>>>>> --- a/northd/northd.h
>>>>> +++ b/northd/northd.h
>>>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>>>>    return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>>> }
>>>>> 
>>>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>>>> +void
>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>>>>> +
>>>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>>> 
>>>>> #endif /* NORTHD_H */
>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>> index b652046a7..9b2cb355e 100644
>>>>> --- a/ovn-nb.xml
>>>>> +++ b/ovn-nb.xml
>>>>> @@ -381,6 +381,18 @@
>>>>>        of SB changes would be very noticeable.
>>>>>      </column>
>>>>> 
>>>>> +      <column name="options" key="disable_vxlan_mode">
>>>>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>>>>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>>>>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>>>>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>>>>> +        2047.  Rest of IDs are used for multicast groups.
>>>>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>>>>> +        functionality, and main encap type is GENEVE or STT, set this option to
>>>>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>>>>> +        bits for port tunnel IDs.
>>>>> +      </column>
>>>>> +
>> 
>> Let's also add a NEWS entry for this one.  It's quite an important
>> user-visible change.
> 
> ACK, I’ll address this in v2.
> 
> Also, if my testcase in this current patch requires patch [1], should I send them both in v2?
> 
>> 
>>>>>      <group title="Options for configuring interconnection route advertisement">
>>>>>        <p>
>>>>>          These options control how routes are advertised between OVN
>>>>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/ovn-northd.at <http://ovn-northd.at/>
>>>>> index cd53755b2..f5d025852 100644
>>>>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
>>>>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
>>>>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>>>> get_tunnel_keys
>>>>> AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>>>>> 
>>>>> +AT_CLEANUP
>>>>> +])
>>>>> +OVN_FOR_EACH_NORTHD_NO_HV([
>>>>> +AT_SETUP([check vxlan mode disabling])
>>>>> +ovn_start
>>>>> +
>>>>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
>>>>> +ovn-sbctl \
>>>>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
>>>>> +    -- --id=@c create chassis name=hv1 encaps=@e
>>>>> +
>>>>> +cmd="ovn-nbctl --wait=sb"
>>>>> +for i in {1..4097..1}; do
>>>>> +    cmd="${cmd} -- ls-add lsw-${i}"
>>>>> +done
>>>>> +
>>>>> +eval $cmd
>>>>> +
>>>>> +check_row_count nb:Logical_Switch 4097
>>>>> +wait_row_count sb:Datapath_Binding 4095
>>>>> +
>>>>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
>>>>> +
>>>>> +# Explicitly disable vxlan mode and check that two remaining datapaths were created.
>>>>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>>>>> +
>>>>> +check_row_count nb:Logical_Switch 4097
>>>>> +wait_row_count sb:Datapath_Binding 4097
>>>>> +
>>>>> AT_CLEANUP
>>>>> ])
>>>>> 
>>>>> -- 
>>>>> 2.44.0
>>>>> 
>>> 
>>> 
>>> Regards,
>>> Vladislav Odintsov
>> 
>> Regards,
>> Dumitru
>> 
> 
> 
> Regards,
> Vladislav Odintsov
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Regards,
Vladislav Odintsov
Dumitru Ceara April 4, 2024, 3:27 p.m. UTC | #7
On 4/4/24 14:38, Vladislav Odintsov wrote:
> *Patch [1] is https://patchwork.ozlabs.org/project/ovn/patch/20240401121510.758326-1-odivlad@gmail.com/
> 
>> On 4 Apr 2024, at 15:33, Vladislav Odintsov <odivlad@gmail.com> wrote:
>>
>> Hi Dumitru,
>>
>> thanks for your attention on this!
>>
>>> On 4 Apr 2024, at 13:06, Dumitru Ceara <dceara@redhat.com> wrote:
>>>
>>> On 4/3/24 22:05, Vladislav Odintsov wrote:
>>>> re-sending email because ovs list rejected previous its content for some reason:
>>>>
>>>> Hi Ihar,
>>>>
>>>
>>> Hi Vladislav, Ihar,
>>>
>>>> thanks for your quick reaction!
>>>> I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.
>>>>
>>>> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.
>>>>
>>>> So, detection could be implemented like this:
>>>> Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
>>>> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
>>>> Such behavior was my first idea.
>>>>
>>>> But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.
>>>
>>> These are valid points.
>>>
>>>> So it seems that to have an option that statically sets vxlan mode is more resilient.
>>>
>>> In general we try to avoid new config knobs.
>>> .
>>>> What do you think?
>>>>
>>>
>>> But in this case it make actually be easier if we offload the work of
>>> determining vxlan-mode to the CMS.
>>>
>>>>
>>>>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>>>>
>>>>> Thank you Vladislav.
>>>>>
>>>>> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
>>>>>
>>>>> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
>>>>>
>>>>> Thanks again for looking into this.
>>>>> Ihar
>>>>>
>>>>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>>>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>>>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>>>>> and 2047 logical switch ports per datapath.
>>>>>>
>>>>>> Prior to this patch vxlan mode was enabled automatically if at least one
>>>>>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>>>>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>>>>
>>>>>> This patch adds support for explicit disabling of vxlan mode via
>>>>>> Northbound database.
>>>>>>
>>>>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>>>>
>>>>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>>>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>>>>>> ---
>>>>>> northd/en-global-config.c |  9 +++-
>>>>>> northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>>>> northd/northd.h           |  6 ++-
>>>>>> ovn-nb.xml                | 12 ++++++
>>>>>> tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>>>> 5 files changed, 94 insertions(+), 52 deletions(-)
>>>>>>
>>>>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>>>>> index 34e393b33..9310c4575 100644
>>>>>> --- a/northd/en-global-config.c
>>>>>> +++ b/northd/en-global-config.c
>>>>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>>>>>                     config_data->svc_monitor_mac);
>>>>>>    }
>>>>>>
>>>>>> -    char *max_tunid = xasprintf("%d",
>>>>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>>>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>>>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>>>>    smap_replace(options, "max_tunid", max_tunid);
>>>>>>    free(max_tunid);
>>>>>>
>>>>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>>>>>        return true;
>>>>>>    }
>>>>>>
>>>>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>>>>> +                           "disable_vxlan_mode", false)) {
>>>>>> +        return true;
>>>>>> +    }
>>>>>> +
>>>>>>    return false;
>>>>>> }
>>>>>>
>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>> index c568f6360..859b233e8 100644
>>>>>> --- a/northd/northd.c
>>>>>> +++ b/northd/northd.c
>>>>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>>>> */
>>>>>> static bool default_acl_drop;
>>>>>>
>>>>>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>>>>>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>>>>>> +static bool vxlan_mode;
>>>>>> +
>>>>>> #define MAX_OVN_TAGS 4096
>>>>>>
>>>>>>
>>>>>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>>    }
>>>>>> }
>>>>>>
>>>>>> -static bool
>>>>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>> +void
>>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>> {
>>>>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>>>>> +        vxlan_mode = false;
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>>    const struct sbrec_chassis *chassis;
>>>>>>    SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>>>>        for (int i = 0; i < chassis->n_encaps; i++) {
>>>>>>            if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>>>>> -                return true;
>>>>>> +                vxlan_mode = true;
>>>>>> +                return;
>>>>>>            }
>>>>>>        }
>>>>>>    }
>>>>>> -    return false;
>>>>>> +    vxlan_mode = false;
>>>>>> }
>>>>>>
>>>>>> uint32_t
>>>>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>> +get_ovn_max_dp_key_local(void)
>>>>>> {
>>>>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>>>>> +    if (vxlan_mode) {
>>>>>>        /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>>>>        return OVN_MAX_DP_VXLAN_KEY;
>>>>>>    }
>>>>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>> }
>>>>>>
>>>>>> static void
>>>>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>>>                          struct ovn_datapath *od, uint32_t *hint)
>>>>>> {
>>>>>>    if (!od->tunnel_key) {
>>>>>>        od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>>>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>>>>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>>>>>> -                                    hint);
>>>>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>>>>> +                                            get_ovn_max_dp_key_local(),
>>>>>> +                                            hint);
>>>>>>        if (!od->tunnel_key) {
>>>>>>            if (od->sb) {
>>>>>>                sbrec_datapath_binding_delete(od->sb);
>>>>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>>>
>>>>>> static void
>>>>>> ovn_datapath_assign_requested_tnl_id(
>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>    struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>>>> {
>>>>>>    const struct smap *other_config = (od->nbs
>>>>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>>>>    uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>>>>>    if (tunnel_key) {
>>>>>>        const char *interconn_ts = smap_get(other_config, "interconn-ts");
>>>>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>>>>> -            tunnel_key >= 1 << 12) {
>>>>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>>            VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>>>>>                         "incompatible with VXLAN", tunnel_key,
>>>>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>                const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>>                const struct nbrec_logical_router_table *nbrec_lr_table,
>>>>>>                const struct sbrec_datapath_binding_table *sbrec_dp_table,
>>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>                struct ovn_datapaths *ls_datapaths,
>>>>>>                struct ovn_datapaths *lr_datapaths,
>>>>>>                struct ovs_list *lr_list)
>>>>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>    struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>>>>    struct ovn_datapath *od;
>>>>>>    LIST_FOR_EACH (od, list, &both) {
>>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>>> -                                             od);
>>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>>    }
>>>>>>    LIST_FOR_EACH (od, list, &nb_only) {
>>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>>> -                                             od); }
>>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>> +    }
>>>>>>
>>>>>>    /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>>    LIST_FOR_EACH (od, list, &both) {
>>>>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>    /* Assign new tunnel ids where needed. */
>>>>>>    uint32_t hint = 0;
>>>>>>    LIST_FOR_EACH_SAFE (od, list, &both) {
>>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>>    }
>>>>>>    LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>>    }
>>>>>>
>>>>>>    /* Sync tunnel ids from nb to sb. */
>>>>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>>>>> * that the I-P engine can fallback to recompute if needed; otherwise return
>>>>>> * true (even if the key is not allocated). */
>>>>>> static bool
>>>>>> -ovn_port_assign_requested_tnl_id(
>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>>>>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>>>> {
>>>>>>    const struct smap *options = (op->nbsp
>>>>>>                                  ? &op->nbsp->options
>>>>>>                                  : &op->nbrp->options);
>>>>>>    uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>>>>>    if (tunnel_key) {
>>>>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>>>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>>            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>>            VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>>>>                         "is incompatible with VXLAN",
>>>>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>>>> }
>>>>>>
>>>>>> static bool
>>>>>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>> -                      struct hmap *ports,
>>>>>> -                      struct ovn_port *op)
>>>>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>>>> {
>>>>>>    if (!op->tunnel_key) {
>>>>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>>>>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>>>>        op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>>>>>                                            1, (1u << (key_bits - 1)) - 1,
>>>>>>                                            &op->od->port_key_hint);
>>>>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>> static void
>>>>>> build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>    const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>    const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>>    const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>>>>    const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>>>>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>    /* Assign explicitly requested tunnel ids first. */
>>>>>>    struct ovn_port *op;
>>>>>>    LIST_FOR_EACH (op, list, &both) {
>>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>>    }
>>>>>>    LIST_FOR_EACH (op, list, &nb_only) {
>>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>>    }
>>>>>>
>>>>>>    /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>
>>>>>>    /* Assign new tunnel ids where needed. */
>>>>>>    LIST_FOR_EACH_SAFE (op, list, &both) {
>>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>>    }
>>>>>>    LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>>    }
>>>>>>
>>>>>>    /* For logical ports that are in both databases, update the southbound
>>>>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>             struct hmap *ls_ports, struct ovn_datapath *od,
>>>>>>             const struct sbrec_port_binding *sb,
>>>>>>             const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>             struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>             struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>> {
>>>>>>    op->od = od;
>>>>>>    parse_lsp_addrs(op);
>>>>>>    /* Assign explicitly requested tunnel ids first. */
>>>>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>>>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>>>>        return false;
>>>>>>    }
>>>>>>    if (sb) {
>>>>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>        sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>>>>    }
>>>>>>    /* Assign new tunnel ids where needed. */
>>>>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>>>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>>>>        return false;
>>>>>>    }
>>>>>>    ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>>>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>>>>>               const char *key, const struct nbrec_logical_switch_port *nbsp,
>>>>>>               struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>>>>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>> {
>>>>>>    struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>>>>                                          NULL);
>>>>>>    hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>>>>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>>>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>>                      sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>>>>>        ovn_port_destroy(ls_ports, op);
>>>>>>        return NULL;
>>>>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>                struct ovn_datapath *od,
>>>>>>                const struct sbrec_port_binding *sb,
>>>>>>                const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>                struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>                struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>> {
>>>>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>    op->sb = sb;
>>>>>>    ovn_port_set_nb(op, nbsp, nbrp);
>>>>>>    op->l3dgw_port = op->cr_port = NULL;
>>>>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>>>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>>>>> }
>>>>>>
>>>>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>            op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>>>>                                new_nbsp->name, new_nbsp, od, NULL,
>>>>>>                                ni->sbrec_mirror_table,
>>>>>> -                                ni->sbrec_chassis_table,
>>>>>>                                ni->sbrec_chassis_by_name,
>>>>>>                                ni->sbrec_chassis_by_hostname);
>>>>>>            if (!op) {
>>>>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>            if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>>>>                                new_nbsp, NULL,
>>>>>>                                od, sb, ni->sbrec_mirror_table,
>>>>>> -                                ni->sbrec_chassis_table,
>>>>>>                                ni->sbrec_chassis_by_name,
>>>>>>                                ni->sbrec_chassis_by_hostname)) {
>>>>>>                goto fail;
>>>>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>    use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>>>>>                                    false);
>>>>>>
>>>>>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>>>>>> +
>>>>>>    build_datapaths(ovnsb_txn,
>>>>>>                    input_data->nbrec_logical_switch_table,
>>>>>>                    input_data->nbrec_logical_router_table,
>>>>>>                    input_data->sbrec_datapath_binding_table,
>>>>>> -                    input_data->sbrec_chassis_table,
>>>>>>                    &data->ls_datapaths,
>>>>>>                    &data->lr_datapaths, &data->lr_list);
>>>>>>    build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>>>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>                       &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>>>>>    build_ports(ovnsb_txn,
>>>>>>                input_data->sbrec_port_binding_table,
>>>>>> -                input_data->sbrec_chassis_table,
>>>>>>                input_data->sbrec_mirror_table,
>>>>>>                input_data->sbrec_mac_binding_table,
>>>>>>                input_data->sbrec_ha_chassis_group_table,
>>>>>> diff --git a/northd/northd.h b/northd/northd.h
>>>>>> index 5e9fa4745..ed9d992fb 100644
>>>>>> --- a/northd/northd.h
>>>>>> +++ b/northd/northd.h
>>>>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>>>>>    return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>>>> }
>>>>>>
>>>>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>>>>> +void
>>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>>>>>> +
>>>>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>>>>
>>>>>> #endif /* NORTHD_H */
>>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>>> index b652046a7..9b2cb355e 100644
>>>>>> --- a/ovn-nb.xml
>>>>>> +++ b/ovn-nb.xml
>>>>>> @@ -381,6 +381,18 @@
>>>>>>        of SB changes would be very noticeable.
>>>>>>      </column>
>>>>>>
>>>>>> +      <column name="options" key="disable_vxlan_mode">
>>>>>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>>>>>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>>>>>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>>>>>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>>>>>> +        2047.  Rest of IDs are used for multicast groups.
>>>>>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>>>>>> +        functionality, and main encap type is GENEVE or STT, set this option to
>>>>>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>>>>>> +        bits for port tunnel IDs.
>>>>>> +      </column>
>>>>>> +
>>>
>>> Let's also add a NEWS entry for this one.  It's quite an important
>>> user-visible change.
>>
>> ACK, I’ll address this in v2.
>>
>> Also, if my testcase in this current patch requires patch [1], should I send them both in v2?
>>

It was acked by Mark so I applied it and backported it now.

Thanks,
Dumitru
Vladislav Odintsov April 4, 2024, 4:07 p.m. UTC | #8
I’ve sent v2:
https://patchwork.ozlabs.org/project/ovn/patch/20240404160628.970615-1-odivlad@gmail.com/

> On 4 Apr 2024, at 18:27, Dumitru Ceara <dceara@redhat.com> wrote:
> 
> On 4/4/24 14:38, Vladislav Odintsov wrote:
>> *Patch [1] is https://patchwork.ozlabs.org/project/ovn/patch/20240401121510.758326-1-odivlad@gmail.com/
>> 
>>> On 4 Apr 2024, at 15:33, Vladislav Odintsov <odivlad@gmail.com> wrote:
>>> 
>>> Hi Dumitru,
>>> 
>>> thanks for your attention on this!
>>> 
>>>> On 4 Apr 2024, at 13:06, Dumitru Ceara <dceara@redhat.com> wrote:
>>>> 
>>>> On 4/3/24 22:05, Vladislav Odintsov wrote:
>>>>> re-sending email because ovs list rejected previous its content for some reason:
>>>>> 
>>>>> Hi Ihar,
>>>>> 
>>>> 
>>>> Hi Vladislav, Ihar,
>>>> 
>>>>> thanks for your quick reaction!
>>>>> I didn’t see mentioned thread, but I think that it is not safe enough to have automatic detection of this scenario here.
>>>>> 
>>>>> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must configure also either GENEVE and/or STT encap(s) for HV chassis.
>>>>> 
>>>>> So, detection could be implemented like this:
>>>>> Check all non-VTEP chassis' encaps and find "effective encap" for each of them. If we detect at least one chassis with "effective encap" == vxlan, then enable vxlan mode. Normal mode otherwise.
>>>>> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
>>>>> Such behavior was my first idea.
>>>>> 
>>>>> But I decided that there possible flapping of modes if there is a problem/bug in deployment tooling and it is enough to have only one chassis with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode flapping can result in problems with tunnel ids allocation.
>>>> 
>>>> These are valid points.
>>>> 
>>>>> So it seems that to have an option that statically sets vxlan mode is more resilient.
>>>> 
>>>> In general we try to avoid new config knobs.
>>>> .
>>>>> What do you think?
>>>>> 
>>>> 
>>>> But in this case it make actually be easier if we offload the work of
>>>> determining vxlan-mode to the CMS.
>>>> 
>>>>> 
>>>>>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>>>>> 
>>>>>> Thank you Vladislav.
>>>>>> 
>>>>>> FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
>>>>>> 
>>>>>> In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too?
>>>>>> 
>>>>>> Thanks again for looking into this.
>>>>>> Ihar
>>>>>> 
>>>>>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>>>>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>>>>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>>>>>> and 2047 logical switch ports per datapath.
>>>>>>> 
>>>>>>> Prior to this patch vxlan mode was enabled automatically if at least one
>>>>>>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>>>>>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>>>>> 
>>>>>>> This patch adds support for explicit disabling of vxlan mode via
>>>>>>> Northbound database.
>>>>>>> 
>>>>>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>>>>> 
>>>>>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>>>>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
>>>>>>> ---
>>>>>>> northd/en-global-config.c |  9 +++-
>>>>>>> northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>>>>> northd/northd.h           |  6 ++-
>>>>>>> ovn-nb.xml                | 12 ++++++
>>>>>>> tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>>>>> 5 files changed, 94 insertions(+), 52 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>>>>>> index 34e393b33..9310c4575 100644
>>>>>>> --- a/northd/en-global-config.c
>>>>>>> +++ b/northd/en-global-config.c
>>>>>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void *data)
>>>>>>>                    config_data->svc_monitor_mac);
>>>>>>>   }
>>>>>>> 
>>>>>>> -    char *max_tunid = xasprintf("%d",
>>>>>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>>>>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>>>>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>>>>>   smap_replace(options, "max_tunid", max_tunid);
>>>>>>>   free(max_tunid);
>>>>>>> 
>>>>>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>>>>>>>       return true;
>>>>>>>   }
>>>>>>> 
>>>>>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>>>>>> +                           "disable_vxlan_mode", false)) {
>>>>>>> +        return true;
>>>>>>> +    }
>>>>>>> +
>>>>>>>   return false;
>>>>>>> }
>>>>>>> 
>>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>>> index c568f6360..859b233e8 100644
>>>>>>> --- a/northd/northd.c
>>>>>>> +++ b/northd/northd.c
>>>>>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>>>>> */
>>>>>>> static bool default_acl_drop;
>>>>>>> 
>>>>>>> +/* If this option is 'true' northd will use limited 24-bit space for datapath
>>>>>>> + * and ports tunnel key allocation (12 bits for each instead of default 16). */
>>>>>>> +static bool vxlan_mode;
>>>>>>> +
>>>>>>> #define MAX_OVN_TAGS 4096
>>>>>>> 
>>>>>>> 
>>>>>>> @@ -875,24 +879,31 @@ join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>>>   }
>>>>>>> }
>>>>>>> 
>>>>>>> -static bool
>>>>>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>>> +void
>>>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>>> {
>>>>>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>>>>>> +        vxlan_mode = false;
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>>   const struct sbrec_chassis *chassis;
>>>>>>>   SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>>>>>       for (int i = 0; i < chassis->n_encaps; i++) {
>>>>>>>           if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>>>>>> -                return true;
>>>>>>> +                vxlan_mode = true;
>>>>>>> +                return;
>>>>>>>           }
>>>>>>>       }
>>>>>>>   }
>>>>>>> -    return false;
>>>>>>> +    vxlan_mode = false;
>>>>>>> }
>>>>>>> 
>>>>>>> uint32_t
>>>>>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>>> +get_ovn_max_dp_key_local(void)
>>>>>>> {
>>>>>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>>>>>> +    if (vxlan_mode) {
>>>>>>>       /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>>>>>       return OVN_MAX_DP_VXLAN_KEY;
>>>>>>>   }
>>>>>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>>> }
>>>>>>> 
>>>>>>> static void
>>>>>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>>>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>>>>>>                         struct ovn_datapath *od, uint32_t *hint)
>>>>>>> {
>>>>>>>   if (!od->tunnel_key) {
>>>>>>>       od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>>>>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>>>>>> -                                    get_ovn_max_dp_key_local(sbrec_ch_table),
>>>>>>> -                                    hint);
>>>>>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>>>>>> +                                            get_ovn_max_dp_key_local(),
>>>>>>> +                                            hint);
>>>>>>>       if (!od->tunnel_key) {
>>>>>>>           if (od->sb) {
>>>>>>>               sbrec_datapath_binding_delete(od->sb);
>>>>>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
>>>>>>> 
>>>>>>> static void
>>>>>>> ovn_datapath_assign_requested_tnl_id(
>>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>   struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>>>>> {
>>>>>>>   const struct smap *other_config = (od->nbs
>>>>>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>>>>>   uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
>>>>>>>   if (tunnel_key) {
>>>>>>>       const char *interconn_ts = smap_get(other_config, "interconn-ts");
>>>>>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>>>>>> -            tunnel_key >= 1 << 12) {
>>>>>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>>>>>           static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>>>           VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>>>>>>                        "incompatible with VXLAN", tunnel_key,
>>>>>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>               const struct nbrec_logical_switch_table *nbrec_ls_table,
>>>>>>>               const struct nbrec_logical_router_table *nbrec_lr_table,
>>>>>>>               const struct sbrec_datapath_binding_table *sbrec_dp_table,
>>>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>               struct ovn_datapaths *ls_datapaths,
>>>>>>>               struct ovn_datapaths *lr_datapaths,
>>>>>>>               struct ovs_list *lr_list)
>>>>>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>   struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>>>>>   struct ovn_datapath *od;
>>>>>>>   LIST_FOR_EACH (od, list, &both) {
>>>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>>>> -                                             od);
>>>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>>>   }
>>>>>>>   LIST_FOR_EACH (od, list, &nb_only) {
>>>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
>>>>>>> -                                             od); }
>>>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>>> +    }
>>>>>>> 
>>>>>>>   /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>>>   LIST_FOR_EACH (od, list, &both) {
>>>>>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>   /* Assign new tunnel ids where needed. */
>>>>>>>   uint32_t hint = 0;
>>>>>>>   LIST_FOR_EACH_SAFE (od, list, &both) {
>>>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>>>   }
>>>>>>>   LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>>>   }
>>>>>>> 
>>>>>>>   /* Sync tunnel ids from nb to sb. */
>>>>>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
>>>>>>> * that the I-P engine can fallback to recompute if needed; otherwise return
>>>>>>> * true (even if the key is not allocated). */
>>>>>>> static bool
>>>>>>> -ovn_port_assign_requested_tnl_id(
>>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
>>>>>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>>>>> {
>>>>>>>   const struct smap *options = (op->nbsp
>>>>>>>                                 ? &op->nbsp->options
>>>>>>>                                 : &op->nbrp->options);
>>>>>>>   uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>>>>>>   if (tunnel_key) {
>>>>>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>>>>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>>>           static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>>>           VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>>>>>                        "is incompatible with VXLAN",
>>>>>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>>>>> }
>>>>>>> 
>>>>>>> static bool
>>>>>>> -ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>> -                      struct hmap *ports,
>>>>>>> -                      struct ovn_port *op)
>>>>>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>>>>> {
>>>>>>>   if (!op->tunnel_key) {
>>>>>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>>>>>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>>>>>       op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>>>>>>                                           1, (1u << (key_bits - 1)) - 1,
>>>>>>>                                           &op->od->port_key_hint);
>>>>>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>> static void
>>>>>>> build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>   const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>   const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>>>   const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>>>>>   const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
>>>>>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>   /* Assign explicitly requested tunnel ids first. */
>>>>>>>   struct ovn_port *op;
>>>>>>>   LIST_FOR_EACH (op, list, &both) {
>>>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>>>   }
>>>>>>>   LIST_FOR_EACH (op, list, &nb_only) {
>>>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>>>   }
>>>>>>> 
>>>>>>>   /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>> 
>>>>>>>   /* Assign new tunnel ids where needed. */
>>>>>>>   LIST_FOR_EACH_SAFE (op, list, &both) {
>>>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>>>   }
>>>>>>>   LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>>>   }
>>>>>>> 
>>>>>>>   /* For logical ports that are in both databases, update the southbound
>>>>>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>            struct hmap *ls_ports, struct ovn_datapath *od,
>>>>>>>            const struct sbrec_port_binding *sb,
>>>>>>>            const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>            struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>>            struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>>> {
>>>>>>>   op->od = od;
>>>>>>>   parse_lsp_addrs(op);
>>>>>>>   /* Assign explicitly requested tunnel ids first. */
>>>>>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>>>>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>>>>>       return false;
>>>>>>>   }
>>>>>>>   if (sb) {
>>>>>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>       sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>>>>>   }
>>>>>>>   /* Assign new tunnel ids where needed. */
>>>>>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>>>>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>>>>>       return false;
>>>>>>>   }
>>>>>>>   ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>>>>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
>>>>>>>              const char *key, const struct nbrec_logical_switch_port *nbsp,
>>>>>>>              struct ovn_datapath *od, const struct sbrec_port_binding *sb,
>>>>>>>              const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>              struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>>              struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>>> {
>>>>>>>   struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>>>>>                                         NULL);
>>>>>>>   hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>>>>>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>>>>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>>>                     sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>>>>>>>       ovn_port_destroy(ls_ports, op);
>>>>>>>       return NULL;
>>>>>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>               struct ovn_datapath *od,
>>>>>>>               const struct sbrec_port_binding *sb,
>>>>>>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>>> {
>>>>>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
>>>>>>>   op->sb = sb;
>>>>>>>   ovn_port_set_nb(op, nbsp, nbrp);
>>>>>>>   op->l3dgw_port = op->cr_port = NULL;
>>>>>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>>>>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>>>>>>>                       sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>>>>>>> }
>>>>>>> 
>>>>>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>>           op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>>>>>                               new_nbsp->name, new_nbsp, od, NULL,
>>>>>>>                               ni->sbrec_mirror_table,
>>>>>>> -                                ni->sbrec_chassis_table,
>>>>>>>                               ni->sbrec_chassis_by_name,
>>>>>>>                               ni->sbrec_chassis_by_hostname);
>>>>>>>           if (!op) {
>>>>>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>>           if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>>>>>                               new_nbsp, NULL,
>>>>>>>                               od, sb, ni->sbrec_mirror_table,
>>>>>>> -                                ni->sbrec_chassis_table,
>>>>>>>                               ni->sbrec_chassis_by_name,
>>>>>>>                               ni->sbrec_chassis_by_hostname)) {
>>>>>>>               goto fail;
>>>>>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>>   use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
>>>>>>>                                   false);
>>>>>>> 
>>>>>>> +    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
>>>>>>> +
>>>>>>>   build_datapaths(ovnsb_txn,
>>>>>>>                   input_data->nbrec_logical_switch_table,
>>>>>>>                   input_data->nbrec_logical_router_table,
>>>>>>>                   input_data->sbrec_datapath_binding_table,
>>>>>>> -                    input_data->sbrec_chassis_table,
>>>>>>>                   &data->ls_datapaths,
>>>>>>>                   &data->lr_datapaths, &data->lr_list);
>>>>>>>   build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>>>>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>>                      &data->lb_datapaths_map, &data->lb_group_datapaths_map);
>>>>>>>   build_ports(ovnsb_txn,
>>>>>>>               input_data->sbrec_port_binding_table,
>>>>>>> -                input_data->sbrec_chassis_table,
>>>>>>>               input_data->sbrec_mirror_table,
>>>>>>>               input_data->sbrec_mac_binding_table,
>>>>>>>               input_data->sbrec_ha_chassis_group_table,
>>>>>>> diff --git a/northd/northd.h b/northd/northd.h
>>>>>>> index 5e9fa4745..ed9d992fb 100644
>>>>>>> --- a/northd/northd.h
>>>>>>> +++ b/northd/northd.h
>>>>>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath *od)
>>>>>>>   return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>>>>> }
>>>>>>> 
>>>>>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>>>>>> +void
>>>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>>>>>>> +
>>>>>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>>>>> 
>>>>>>> #endif /* NORTHD_H */
>>>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>>>> index b652046a7..9b2cb355e 100644
>>>>>>> --- a/ovn-nb.xml
>>>>>>> +++ b/ovn-nb.xml
>>>>>>> @@ -381,6 +381,18 @@
>>>>>>>       of SB changes would be very noticeable.
>>>>>>>     </column>
>>>>>>> 
>>>>>>> +      <column name="options" key="disable_vxlan_mode">
>>>>>>> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
>>>>>>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>>>>>>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
>>>>>>> +        datapaths count in this mode is 4095 and maximum ports per datapath is
>>>>>>> +        2047.  Rest of IDs are used for multicast groups.
>>>>>>> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
>>>>>>> +        functionality, and main encap type is GENEVE or STT, set this option to
>>>>>>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
>>>>>>> +        bits for port tunnel IDs.
>>>>>>> +      </column>
>>>>>>> +
>>>> 
>>>> Let's also add a NEWS entry for this one.  It's quite an important
>>>> user-visible change.
>>> 
>>> ACK, I’ll address this in v2.
>>> 
>>> Also, if my testcase in this current patch requires patch [1], should I send them both in v2?
>>> 
> 
> It was acked by Mark so I applied it and backported it now.
> 
> Thanks,
> Dumitru
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Regards,
Vladislav Odintsov
Ihar Hrachyshka April 4, 2024, 7:23 p.m. UTC | #9
On Thu, Apr 4, 2024 at 6:06 AM Dumitru Ceara <dceara@redhat.com> wrote:

> On 4/3/24 22:05, Vladislav Odintsov wrote:
> > re-sending email because ovs list rejected previous its content for some
> reason:
> >
> > Hi Ihar,
> >
>
> Hi Vladislav, Ihar,
>
> > thanks for your quick reaction!
> > I didn’t see mentioned thread, but I think that it is not safe enough to
> have automatic detection of this scenario here.
> >
> > Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must
> configure also either GENEVE and/or STT encap(s) for HV chassis.
> >
> > So, detection could be implemented like this:
> > Check all non-VTEP chassis' encaps and find "effective encap" for each
> of them. If we detect at least one chassis with "effective encap" == vxlan,
> then enable vxlan mode. Normal mode otherwise.
> > "effective encap" means that for 'vxlan,geneve,stt' encaps effective is
> geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
> > Such behavior was my first idea.
> >
> > But I decided that there possible flapping of modes if there is a
> problem/bug in deployment tooling and it is enough to have only one chassis
> with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode
> flapping can result in problems with tunnel ids allocation.
>
> These are valid points.
>
>
This is a valid concern. Should it perhaps be handled in the general case
then? Meaning: calculate the max tunid once and store it in db? (I am
thinking that maybe recalculating the max_tunid on every engine cycle - or
on every controller restart - is unsafe?)


> > So it seems that to have an option that statically sets vxlan mode is
> more resilient.
>
> In general we try to avoid new config knobs.
> .
> > What do you think?
> >
>
> But in this case it make actually be easier if we offload the work of
> determining vxlan-mode to the CMS.
>
> >
> >> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> >>
> >> Thank you Vladislav.
> >>
> >> FYI it was reported in the past in
> https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html
> but fell through cracks then. Thanks for picking it up!
> >>
> >> In your patch, you introduce a new config option to disable the
> 'vxlan-mode' behavior. This will definitely work. But I wonder if we can
> automatically detect this scenario by ignoring the chassis that are VTEP
> from consideration? I believe ovn-controller-vtep sets `is-vtep` in
> other_config, so - would it work if we modify is_vxlan_mode to consider it
> too?
> >>
> >> Thanks again for looking into this.
> >> Ihar
> >>
> >> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com
> <mailto:odivlad@gmail.com>> wrote:
> >>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
> >>> for available tunnel IDs because of lack of space in VXLAN VNI.
> >>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
> >>> and 2047 logical switch ports per datapath.
> >>>
> >>> Prior to this patch vxlan mode was enabled automatically if at least
> one
> >>> chassis had encap of vxlan type.  In scenarios where one want to use
> VXLAN
> >>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
> >>>
> >>> This patch adds support for explicit disabling of vxlan mode via
> >>> Northbound database.
> >>>
> >>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
> >>>
> >>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
> >>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath
> bindings")
> >>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:
> odivlad@gmail.com>>
> >>> ---
> >>>  northd/en-global-config.c |  9 +++-
> >>>  northd/northd.c           | 90 ++++++++++++++++++---------------------
> >>>  northd/northd.h           |  6 ++-
> >>>  ovn-nb.xml                | 12 ++++++
> >>>  tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
> >>>  5 files changed, 94 insertions(+), 52 deletions(-)
> >>>
> >>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> >>> index 34e393b33..9310c4575 100644
> >>> --- a/northd/en-global-config.c
> >>> +++ b/northd/en-global-config.c
> >>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node ,
> void *data)
> >>>                       config_data->svc_monitor_mac);
> >>>      }
> >>>
> >>> -    char *max_tunid = xasprintf("%d",
> >>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
> >>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
> >>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
> >>>      smap_replace(options, "max_tunid", max_tunid);
> >>>      free(max_tunid);
> >>>
> >>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
> nbrec_nb_global *nb,
> >>>          return true;
> >>>      }
> >>>
> >>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> >>> +                           "disable_vxlan_mode", false)) {
> >>> +        return true;
> >>> +    }
> >>> +
> >>>      return false;
> >>>  }
> >>>
> >>> diff --git a/northd/northd.c b/northd/northd.c
> >>> index c568f6360..859b233e8 100644
> >>> --- a/northd/northd.c
> >>> +++ b/northd/northd.c
> >>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
> >>>   */
> >>>  static bool default_acl_drop;
> >>>
> >>> +/* If this option is 'true' northd will use limited 24-bit space for
> datapath
> >>> + * and ports tunnel key allocation (12 bits for each instead of
> default 16). */
> >>> +static bool vxlan_mode;
> >>> +
> >>>  #define MAX_OVN_TAGS 4096
> >>>
> >>>
> >>> @@ -875,24 +879,31 @@ join_datapaths(const struct
> nbrec_logical_switch_table *nbrec_ls_table,
> >>>      }
> >>>  }
> >>>
> >>> -static bool
> >>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
> >>> +void
> >>> +init_vxlan_mode(const struct smap *nb_options,
> >>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
> >>>  {
> >>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
> >>> +        vxlan_mode = false;
> >>> +        return;
> >>> +    }
> >>> +
> >>>      const struct sbrec_chassis *chassis;
> >>>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
> >>>          for (int i = 0; i < chassis->n_encaps; i++) {
> >>>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
> >>> -                return true;
> >>> +                vxlan_mode = true;
> >>> +                return;
> >>>              }
> >>>          }
> >>>      }
> >>> -    return false;
> >>> +    vxlan_mode = false;
> >>>  }
> >>>
> >>>  uint32_t
> >>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
> *sbrec_chassis_table)
> >>> +get_ovn_max_dp_key_local(void)
> >>>  {
> >>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
> >>> +    if (vxlan_mode) {
> >>>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
> >>>          return OVN_MAX_DP_VXLAN_KEY;
> >>>      }
> >>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
> sbrec_chassis_table *sbrec_chassis_table)
> >>>  }
> >>>
> >>>  static void
> >>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
> *sbrec_ch_table,
> >>> -                          struct hmap *datapaths, struct hmap
> *dp_tnlids,
> >>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap
> *dp_tnlids,
> >>>                            struct ovn_datapath *od, uint32_t *hint)
> >>>  {
> >>>      if (!od->tunnel_key) {
> >>>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
> >>> -                                    OVN_MIN_DP_KEY_LOCAL,
> >>> -
> get_ovn_max_dp_key_local(sbrec_ch_table),
> >>> -                                    hint);
> >>> +                                            OVN_MIN_DP_KEY_LOCAL,
> >>> +
> get_ovn_max_dp_key_local(),
> >>> +                                            hint);
> >>>          if (!od->tunnel_key) {
> >>>              if (od->sb) {
> >>>                  sbrec_datapath_binding_delete(od->sb);
> >>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
> sbrec_chassis_table *sbrec_ch_table,
> >>>
> >>>  static void
> >>>  ovn_datapath_assign_requested_tnl_id(
> >>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>      struct hmap *dp_tnlids, struct ovn_datapath *od)
> >>>  {
> >>>      const struct smap *other_config = (od->nbs
> >>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
> >>>      uint32_t tunnel_key = smap_get_int(other_config,
> "requested-tnl-key", 0);
> >>>      if (tunnel_key) {
> >>>          const char *interconn_ts = smap_get(other_config,
> "interconn-ts");
> >>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
> >>> -            tunnel_key >= 1 << 12) {
> >>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
> >>>              static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(1, 1);
> >>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s
> is "
> >>>                           "incompatible with VXLAN", tunnel_key,
> >>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
> >>>                  const struct nbrec_logical_switch_table
> *nbrec_ls_table,
> >>>                  const struct nbrec_logical_router_table
> *nbrec_lr_table,
> >>>                  const struct sbrec_datapath_binding_table
> *sbrec_dp_table,
> >>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>                  struct ovn_datapaths *ls_datapaths,
> >>>                  struct ovn_datapaths *lr_datapaths,
> >>>                  struct ovs_list *lr_list)
> >>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
> >>>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
> >>>      struct ovn_datapath *od;
> >>>      LIST_FOR_EACH (od, list, &both) {
> >>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> >>> -                                             od);
> >>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> >>>      }
> >>>      LIST_FOR_EACH (od, list, &nb_only) {
> >>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> >>> -                                             od); }
> >>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> >>> +    }
> >>>
> >>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
> >>>      LIST_FOR_EACH (od, list, &both) {
> >>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn
> *ovnsb_txn,
> >>>      /* Assign new tunnel ids where needed. */
> >>>      uint32_t hint = 0;
> >>>      LIST_FOR_EACH_SAFE (od, list, &both) {
> >>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> >>> -                                  datapaths, &dp_tnlids, od, &hint);
> >>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
> >>>      }
> >>>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
> >>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> >>> -                                  datapaths, &dp_tnlids, od, &hint);
> >>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
> >>>      }
> >>>
> >>>      /* Sync tunnel ids from nb to sb. */
> >>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op,
> uint32_t tunnel_key)
> >>>   * that the I-P engine can fallback to recompute if needed; otherwise
> return
> >>>   * true (even if the key is not allocated). */
> >>>  static bool
> >>> -ovn_port_assign_requested_tnl_id(
> >>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
> ovn_port *op)
> >>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
> >>>  {
> >>>      const struct smap *options = (op->nbsp
> >>>                                    ? &op->nbsp->options
> >>>                                    : &op->nbrp->options);
> >>>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key",
> 0);
> >>>      if (tunnel_key) {
> >>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
> >>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> >>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> >>>              static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(1, 1);
> >>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
> >>>                           "is incompatible with VXLAN",
> >>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
> >>>  }
> >>>
> >>>  static bool
> >>> -ovn_port_allocate_key(const struct sbrec_chassis_table
> *sbrec_chassis_table,
> >>> -                      struct hmap *ports,
> >>> -                      struct ovn_port *op)
> >>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
> >>>  {
> >>>      if (!op->tunnel_key) {
> >>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 :
> 16;
> >>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
> >>>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids,
> "port",
> >>>                                              1, (1u << (key_bits - 1))
> - 1,
> >>>                                              &op->od->port_key_hint);
> >>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
> sbrec_chassis_table *sbrec_chassis_table,
> >>>  static void
> >>>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> >>>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
> >>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>      const struct sbrec_mirror_table *sbrec_mirror_table,
> >>>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
> >>>      const struct sbrec_ha_chassis_group_table
> *sbrec_ha_chassis_group_table,
> >>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> >>>      /* Assign explicitly requested tunnel ids first. */
> >>>      struct ovn_port *op;
> >>>      LIST_FOR_EACH (op, list, &both) {
> >>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> >>> +        ovn_port_assign_requested_tnl_id(op);
> >>>      }
> >>>      LIST_FOR_EACH (op, list, &nb_only) {
> >>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> >>> +        ovn_port_assign_requested_tnl_id(op);
> >>>      }
> >>>
> >>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
> >>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> >>>
> >>>      /* Assign new tunnel ids where needed. */
> >>>      LIST_FOR_EACH_SAFE (op, list, &both) {
> >>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> >>> +        ovn_port_allocate_key(ports, op);
> >>>      }
> >>>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
> >>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> >>> +        ovn_port_allocate_key(ports, op);
> >>>      }
> >>>
> >>>      /* For logical ports that are in both databases, update the
> southbound
> >>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
> >>>               struct hmap *ls_ports, struct ovn_datapath *od,
> >>>               const struct sbrec_port_binding *sb,
> >>>               const struct sbrec_mirror_table *sbrec_mirror_table,
> >>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
> >>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> >>>  {
> >>>      op->od = od;
> >>>      parse_lsp_addrs(op);
> >>>      /* Assign explicitly requested tunnel ids first. */
> >>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
> >>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
> >>>          return false;
> >>>      }
> >>>      if (sb) {
> >>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
> >>>          sbrec_port_binding_set_logical_port(op->sb, op->key);
> >>>      }
> >>>      /* Assign new tunnel ids where needed. */
> >>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
> >>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
> >>>          return false;
> >>>      }
> >>>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> >>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn
> *ovnsb_txn, struct hmap *ls_ports,
> >>>                 const char *key, const struct
> nbrec_logical_switch_port *nbsp,
> >>>                 struct ovn_datapath *od, const struct
> sbrec_port_binding *sb,
> >>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
> >>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
> >>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> >>>  {
> >>>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
> >>>                                            NULL);
> >>>      hmap_insert(&od->ports, &op->dp_node,
> hmap_node_hash(&op->key_node));
> >>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> >>> -                      sbrec_mirror_table, sbrec_chassis_table,
> >>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> sbrec_mirror_table,
> >>>                        sbrec_chassis_by_name,
> sbrec_chassis_by_hostname)) {
> >>>          ovn_port_destroy(ls_ports, op);
> >>>          return NULL;
> >>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
> >>>                  struct ovn_datapath *od,
> >>>                  const struct sbrec_port_binding *sb,
> >>>                  const struct sbrec_mirror_table *sbrec_mirror_table,
> >>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
> >>>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
> >>>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> >>>  {
> >>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
> >>>      op->sb = sb;
> >>>      ovn_port_set_nb(op, nbsp, nbrp);
> >>>      op->l3dgw_port = op->cr_port = NULL;
> >>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> >>> -                        sbrec_mirror_table, sbrec_chassis_table,
> >>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> sbrec_mirror_table,
> >>>                          sbrec_chassis_by_name,
> sbrec_chassis_by_hostname);
> >>>  }
> >>>
> >>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> >>>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
> >>>                                  new_nbsp->name, new_nbsp, od, NULL,
> >>>                                  ni->sbrec_mirror_table,
> >>> -                                ni->sbrec_chassis_table,
> >>>                                  ni->sbrec_chassis_by_name,
> >>>                                  ni->sbrec_chassis_by_hostname);
> >>>              if (!op) {
> >>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> >>>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
> >>>                                  new_nbsp, NULL,
> >>>                                  od, sb, ni->sbrec_mirror_table,
> >>> -                                ni->sbrec_chassis_table,
> >>>                                  ni->sbrec_chassis_by_name,
> >>>                                  ni->sbrec_chassis_by_hostname)) {
> >>>                  goto fail;
> >>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
> >>>      use_common_zone = smap_get_bool(input_data->nb_options,
> "use_common_zone",
> >>>                                      false);
> >>>
> >>> +    init_vxlan_mode(input_data->nb_options,
> input_data->sbrec_chassis_table);
> >>> +
> >>>      build_datapaths(ovnsb_txn,
> >>>                      input_data->nbrec_logical_switch_table,
> >>>                      input_data->nbrec_logical_router_table,
> >>>                      input_data->sbrec_datapath_binding_table,
> >>> -                    input_data->sbrec_chassis_table,
> >>>                      &data->ls_datapaths,
> >>>                      &data->lr_datapaths, &data->lr_list);
> >>>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
> >>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
> >>>                         &data->lb_datapaths_map,
> &data->lb_group_datapaths_map);
> >>>      build_ports(ovnsb_txn,
> >>>                  input_data->sbrec_port_binding_table,
> >>> -                input_data->sbrec_chassis_table,
> >>>                  input_data->sbrec_mirror_table,
> >>>                  input_data->sbrec_mac_binding_table,
> >>>                  input_data->sbrec_ha_chassis_group_table,
> >>> diff --git a/northd/northd.h b/northd/northd.h
> >>> index 5e9fa4745..ed9d992fb 100644
> >>> --- a/northd/northd.h
> >>> +++ b/northd/northd.h
> >>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct
> ovn_datapath *od)
> >>>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
> >>>  }
> >>>
> >>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
> >>> +void
> >>> +init_vxlan_mode(const struct smap *nb_options,
> >>> +                const struct sbrec_chassis_table
> *sbrec_chassis_table);
> >>> +
> >>> +uint32_t get_ovn_max_dp_key_local(void);
> >>>
> >>>  #endif /* NORTHD_H */
> >>> diff --git a/ovn-nb.xml b/ovn-nb.xml
> >>> index b652046a7..9b2cb355e 100644
> >>> --- a/ovn-nb.xml
> >>> +++ b/ovn-nb.xml
> >>> @@ -381,6 +381,18 @@
> >>>          of SB changes would be very noticeable.
> >>>        </column>
> >>>
> >>> +      <column name="options" key="disable_vxlan_mode">
> >>> +        Be default if at least one chassis in OVN cluster has VXLAN
> encap,
> >>> +        northd will run in a `vxlan mode`, which means it splits
> 24-bit
> >>> +        VXLAN VNI for datapath and port tunnel IDs allocation
> evenly.  Maximum
> >>> +        datapaths count in this mode is 4095 and maximum ports per
> datapath is
> >>> +        2047.  Rest of IDs are used for multicast groups.
> >>> +        In case VXLAN encaps are needed on chassis only to support HW
> VTEP
> >>> +        functionality, and main encap type is GENEVE or STT, set this
> option to
> >>> +        `false` to use defaults -- 16-bit space for datapath tunnel
> IDS and 15
> >>> +        bits for port tunnel IDs.
> >>> +      </column>
> >>> +
>
> Let's also add a NEWS entry for this one.  It's quite an important
> user-visible change.
>
> >>>        <group title="Options for configuring interconnection route
> advertisement">
> >>>          <p>
> >>>            These options control how routes are advertised between OVN
> >>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/
> ovn-northd.at <http://ovn-northd.at/>
> >>> index cd53755b2..f5d025852 100644
> >>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
> >>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
> >>> @@ -2819,6 +2819,35 @@ AT_CHECK(
> >>>  get_tunnel_keys
> >>>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
> >>>
> >>> +AT_CLEANUP
> >>> +])
> >>> +OVN_FOR_EACH_NORTHD_NO_HV([
> >>> +AT_SETUP([check vxlan mode disabling])
> >>> +ovn_start
> >>> +
> >>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan
> mode.
> >>> +ovn-sbctl \
> >>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1"
> type="vxlan" \
> >>> +    -- --id=@c create chassis name=hv1 encaps=@e
> >>> +
> >>> +cmd="ovn-nbctl --wait=sb"
> >>> +for i in {1..4097..1}; do
> >>> +    cmd="${cmd} -- ls-add lsw-${i}"
> >>> +done
> >>> +
> >>> +eval $cmd
> >>> +
> >>> +check_row_count nb:Logical_Switch 4097
> >>> +wait_row_count sb:Datapath_Binding 4095
> >>> +
> >>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
> northd/ovn-northd.log])
> >>> +
> >>> +# Explicitly disable vxlan mode and check that two remaining
> datapaths were created.
> >>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
> >>> +
> >>> +check_row_count nb:Logical_Switch 4097
> >>> +wait_row_count sb:Datapath_Binding 4097
> >>> +
> >>>  AT_CLEANUP
> >>>  ])
> >>>
> >>> --
> >>> 2.44.0
> >>>
> >
> >
> > Regards,
> > Vladislav Odintsov
>
> Regards,
> Dumitru
>
>
Ihar Hrachyshka April 4, 2024, 11:13 p.m. UTC | #10
On Wed, Apr 3, 2024 at 3:01 PM Vladislav Odintsov <odivlad@gmail.com> wrote:

> Hi Ihar,
>
> thanks for your quick reaction!
> I didn’t see mentioned thread, but think that it is not safe enough to
> have automatic detection of this scenario here.
> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must
> configure also either GENEVE and/or STT encap(s) for HV chassis.
> So, detection could be implemented like this:
> Check all non-VTEP chassis’ encaps and find ‘effective’ encap for each of
> them. If we detect at least one chassis with ‘effective’ encap == vxlan,
> enable vxlan mode. Normal mode otherwise.
> ‘effective encap’ means that for ‘vxlan,geneve,stt’ encaps effective is
> geneve, for ‘vxlan,stt’ -> stt, for ‘vxlan’ -> vxlan.
> Such behavior was my first idea.
>
> But I decided that there possible flapping of modes if there is a
> problem/bug in deployment tooling and it is enough to have only one chassis
> with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode
> flapping can result in problems with tunnel ids allocation.
> So it seems that to have an option that statically sets vxlan mode is more
> resilient.
> What do you think?
>

To close the loop here, I've looked into is-vtep auto-detection myself and
realized that Encaps do not reflect the *peer* chassis, so it's not as
trivial to detect the mode automatically. One would have to iterate over
port bindings and check that all "vxlan peer chassis" hosts only type=vtep
bindings... I agree that a config knob here is the way to go.


>
> regards,
> Vladislav Odintsov
>
> On 3 Apr 2024, at 20:44, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>
> 
> Thank you Vladislav.
>
> FYI it was reported in the past in
> https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html
> but fell through cracks then. Thanks for picking it up!
>
> In your patch, you introduce a new config option to disable the
> 'vxlan-mode' behavior. This will definitely work. But I wonder if we can
> automatically detect this scenario by ignoring the chassis that are VTEP
> from consideration? I believe ovn-controller-vtep sets `is-vtep` in
> other_config, so - would it work if we modify is_vxlan_mode to consider it
> too?
>
> Thanks again for looking into this.
> Ihar
>
> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com>
> wrote:
>
>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>> for available tunnel IDs because of lack of space in VXLAN VNI.
>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>> and 2047 logical switch ports per datapath.
>>
>> Prior to this patch vxlan mode was enabled automatically if at least one
>> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>
>> This patch adds support for explicit disabling of vxlan mode via
>> Northbound database.
>>
>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>
>> CC: Ihar Hrachyshka <ihrachys@redhat.com>
>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>> ---
>>  northd/en-global-config.c |  9 +++-
>>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>>  northd/northd.h           |  6 ++-
>>  ovn-nb.xml                | 12 ++++++
>>  tests/ovn-northd.at       | 29 +++++++++++++
>>  5 files changed, 94 insertions(+), 52 deletions(-)
>>
>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>> index 34e393b33..9310c4575 100644
>> --- a/northd/en-global-config.c
>> +++ b/northd/en-global-config.c
>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void
>> *data)
>>                       config_data->svc_monitor_mac);
>>      }
>>
>> -    char *max_tunid = xasprintf("%d",
>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>      smap_replace(options, "max_tunid", max_tunid);
>>      free(max_tunid);
>>
>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
>> nbrec_nb_global *nb,
>>          return true;
>>      }
>>
>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>> +                           "disable_vxlan_mode", false)) {
>> +        return true;
>> +    }
>> +
>>      return false;
>>  }
>>
>> diff --git a/northd/northd.c b/northd/northd.c
>> index c568f6360..859b233e8 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>   */
>>  static bool default_acl_drop;
>>
>> +/* If this option is 'true' northd will use limited 24-bit space for
>> datapath
>> + * and ports tunnel key allocation (12 bits for each instead of default
>> 16). */
>> +static bool vxlan_mode;
>> +
>>  #define MAX_OVN_TAGS 4096
>>
>>
>> @@ -875,24 +879,31 @@ join_datapaths(const struct
>> nbrec_logical_switch_table *nbrec_ls_table,
>>      }
>>  }
>>
>> -static bool
>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>> +void
>> +init_vxlan_mode(const struct smap *nb_options,
>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>  {
>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>> +        vxlan_mode = false;
>> +        return;
>> +    }
>> +
>>      const struct sbrec_chassis *chassis;
>>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>          for (int i = 0; i < chassis->n_encaps; i++) {
>>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>> -                return true;
>> +                vxlan_mode = true;
>> +                return;
>>              }
>>          }
>>      }
>> -    return false;
>> +    vxlan_mode = false;
>>  }
>>
>>  uint32_t
>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
>> *sbrec_chassis_table)
>> +get_ovn_max_dp_key_local(void)
>>  {
>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>> +    if (vxlan_mode) {
>>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>          return OVN_MAX_DP_VXLAN_KEY;
>>      }
>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
>> sbrec_chassis_table *sbrec_chassis_table)
>>  }
>>
>>  static void
>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
>> *sbrec_ch_table,
>> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>>                            struct ovn_datapath *od, uint32_t *hint)
>>  {
>>      if (!od->tunnel_key) {
>>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>> -                                    OVN_MIN_DP_KEY_LOCAL,
>> -
>> get_ovn_max_dp_key_local(sbrec_ch_table),
>> -                                    hint);
>> +                                            OVN_MIN_DP_KEY_LOCAL,
>> +                                            get_ovn_max_dp_key_local(),
>> +                                            hint);
>>          if (!od->tunnel_key) {
>>              if (od->sb) {
>>                  sbrec_datapath_binding_delete(od->sb);
>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
>> sbrec_chassis_table *sbrec_ch_table,
>>
>>  static void
>>  ovn_datapath_assign_requested_tnl_id(
>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>>  {
>>      const struct smap *other_config = (od->nbs
>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>      uint32_t tunnel_key = smap_get_int(other_config,
>> "requested-tnl-key", 0);
>>      if (tunnel_key) {
>>          const char *interconn_ts = smap_get(other_config,
>> "interconn-ts");
>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>> -            tunnel_key >= 1 << 12) {
>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
>> 1);
>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>>                           "incompatible with VXLAN", tunnel_key,
>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>>                  const struct sbrec_datapath_binding_table
>> *sbrec_dp_table,
>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>                  struct ovn_datapaths *ls_datapaths,
>>                  struct ovn_datapaths *lr_datapaths,
>>                  struct ovs_list *lr_list)
>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>      struct ovn_datapath *od;
>>      LIST_FOR_EACH (od, list, &both) {
>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
>> &dp_tnlids,
>> -                                             od);
>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>      }
>>      LIST_FOR_EACH (od, list, &nb_only) {
>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
>> &dp_tnlids,
>> -                                             od); }
>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>> +    }
>>
>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>      LIST_FOR_EACH (od, list, &both) {
>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>      /* Assign new tunnel ids where needed. */
>>      uint32_t hint = 0;
>>      LIST_FOR_EACH_SAFE (od, list, &both) {
>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>> -                                  datapaths, &dp_tnlids, od, &hint);
>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>      }
>>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>> -                                  datapaths, &dp_tnlids, od, &hint);
>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>      }
>>
>>      /* Sync tunnel ids from nb to sb. */
>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t
>> tunnel_key)
>>   * that the I-P engine can fallback to recompute if needed; otherwise
>> return
>>   * true (even if the key is not allocated). */
>>  static bool
>> -ovn_port_assign_requested_tnl_id(
>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
>> ovn_port *op)
>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>  {
>>      const struct smap *options = (op->nbsp
>>                                    ? &op->nbsp->options
>>                                    : &op->nbrp->options);
>>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>>      if (tunnel_key) {
>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
>> 1);
>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>                           "is incompatible with VXLAN",
>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>  }
>>
>>  static bool
>> -ovn_port_allocate_key(const struct sbrec_chassis_table
>> *sbrec_chassis_table,
>> -                      struct hmap *ports,
>> -                      struct ovn_port *op)
>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>  {
>>      if (!op->tunnel_key) {
>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>>                                              1, (1u << (key_bits - 1)) -
>> 1,
>>                                              &op->od->port_key_hint);
>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
>> sbrec_chassis_table *sbrec_chassis_table,
>>  static void
>>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>      const struct sbrec_mirror_table *sbrec_mirror_table,
>>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>      const struct sbrec_ha_chassis_group_table
>> *sbrec_ha_chassis_group_table,
>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>      /* Assign explicitly requested tunnel ids first. */
>>      struct ovn_port *op;
>>      LIST_FOR_EACH (op, list, &both) {
>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>> +        ovn_port_assign_requested_tnl_id(op);
>>      }
>>      LIST_FOR_EACH (op, list, &nb_only) {
>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>> +        ovn_port_assign_requested_tnl_id(op);
>>      }
>>
>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>
>>      /* Assign new tunnel ids where needed. */
>>      LIST_FOR_EACH_SAFE (op, list, &both) {
>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>> +        ovn_port_allocate_key(ports, op);
>>      }
>>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>> +        ovn_port_allocate_key(ports, op);
>>      }
>>
>>      /* For logical ports that are in both databases, update the
>> southbound
>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>               struct hmap *ls_ports, struct ovn_datapath *od,
>>               const struct sbrec_port_binding *sb,
>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>>      op->od = od;
>>      parse_lsp_addrs(op);
>>      /* Assign explicitly requested tunnel ids first. */
>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>          return false;
>>      }
>>      if (sb) {
>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>>      }
>>      /* Assign new tunnel ids where needed. */
>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>          return false;
>>      }
>>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn,
>> struct hmap *ls_ports,
>>                 const char *key, const struct nbrec_logical_switch_port
>> *nbsp,
>>                 struct ovn_datapath *od, const struct sbrec_port_binding
>> *sb,
>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>                                            NULL);
>>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> -                      sbrec_mirror_table, sbrec_chassis_table,
>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> sbrec_mirror_table,
>>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname))
>> {
>>          ovn_port_destroy(ls_ports, op);
>>          return NULL;
>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>                  struct ovn_datapath *od,
>>                  const struct sbrec_port_binding *sb,
>>                  const struct sbrec_mirror_table *sbrec_mirror_table,
>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>  {
>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>      op->sb = sb;
>>      ovn_port_set_nb(op, nbsp, nbrp);
>>      op->l3dgw_port = op->cr_port = NULL;
>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> -                        sbrec_mirror_table, sbrec_chassis_table,
>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> sbrec_mirror_table,
>>                          sbrec_chassis_by_name,
>> sbrec_chassis_by_hostname);
>>  }
>>
>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
>> *ovnsb_idl_txn,
>>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>                                  new_nbsp->name, new_nbsp, od, NULL,
>>                                  ni->sbrec_mirror_table,
>> -                                ni->sbrec_chassis_table,
>>                                  ni->sbrec_chassis_by_name,
>>                                  ni->sbrec_chassis_by_hostname);
>>              if (!op) {
>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
>> *ovnsb_idl_txn,
>>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>                                  new_nbsp, NULL,
>>                                  od, sb, ni->sbrec_mirror_table,
>> -                                ni->sbrec_chassis_table,
>>                                  ni->sbrec_chassis_by_name,
>>                                  ni->sbrec_chassis_by_hostname)) {
>>                  goto fail;
>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>      use_common_zone = smap_get_bool(input_data->nb_options,
>> "use_common_zone",
>>                                      false);
>>
>> +    init_vxlan_mode(input_data->nb_options,
>> input_data->sbrec_chassis_table);
>> +
>>      build_datapaths(ovnsb_txn,
>>                      input_data->nbrec_logical_switch_table,
>>                      input_data->nbrec_logical_router_table,
>>                      input_data->sbrec_datapath_binding_table,
>> -                    input_data->sbrec_chassis_table,
>>                      &data->ls_datapaths,
>>                      &data->lr_datapaths, &data->lr_list);
>>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>                         &data->lb_datapaths_map,
>> &data->lb_group_datapaths_map);
>>      build_ports(ovnsb_txn,
>>                  input_data->sbrec_port_binding_table,
>> -                input_data->sbrec_chassis_table,
>>                  input_data->sbrec_mirror_table,
>>                  input_data->sbrec_mac_binding_table,
>>                  input_data->sbrec_ha_chassis_group_table,
>> diff --git a/northd/northd.h b/northd/northd.h
>> index 5e9fa4745..ed9d992fb 100644
>> --- a/northd/northd.h
>> +++ b/northd/northd.h
>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath
>> *od)
>>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>  }
>>
>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>> +void
>> +init_vxlan_mode(const struct smap *nb_options,
>> +                const struct sbrec_chassis_table *sbrec_chassis_table);
>> +
>> +uint32_t get_ovn_max_dp_key_local(void);
>>
>>  #endif /* NORTHD_H */
>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>> index b652046a7..9b2cb355e 100644
>> --- a/ovn-nb.xml
>> +++ b/ovn-nb.xml
>> @@ -381,6 +381,18 @@
>>          of SB changes would be very noticeable.
>>        </column>
>>
>> +      <column name="options" key="disable_vxlan_mode">
>> +        Be default if at least one chassis in OVN cluster has VXLAN
>> encap,
>> +        northd will run in a `vxlan mode`, which means it splits 24-bit
>> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.
>> Maximum
>> +        datapaths count in this mode is 4095 and maximum ports per
>> datapath is
>> +        2047.  Rest of IDs are used for multicast groups.
>> +        In case VXLAN encaps are needed on chassis only to support HW
>> VTEP
>> +        functionality, and main encap type is GENEVE or STT, set this
>> option to
>> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS
>> and 15
>> +        bits for port tunnel IDs.
>> +      </column>
>> +
>>        <group title="Options for configuring interconnection route
>> advertisement">
>>          <p>
>>            These options control how routes are advertised between OVN
>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>> index cd53755b2..f5d025852 100644
>> --- a/tests/ovn-northd.at
>> +++ b/tests/ovn-northd.at
>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>  get_tunnel_keys
>>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>>
>> +AT_CLEANUP
>> +])
>> +OVN_FOR_EACH_NORTHD_NO_HV([
>> +AT_SETUP([check vxlan mode disabling])
>> +ovn_start
>> +
>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
>> +ovn-sbctl \
>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
>> +    -- --id=@c create chassis name=hv1 encaps=@e
>> +
>> +cmd="ovn-nbctl --wait=sb"
>> +for i in {1..4097..1}; do
>> +    cmd="${cmd} -- ls-add lsw-${i}"
>> +done
>> +
>> +eval $cmd
>> +
>> +check_row_count nb:Logical_Switch 4097
>> +wait_row_count sb:Datapath_Binding 4095
>> +
>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
>> northd/ovn-northd.log])
>> +
>> +# Explicitly disable vxlan mode and check that two remaining datapaths
>> were created.
>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>> +
>> +check_row_count nb:Logical_Switch 4097
>> +wait_row_count sb:Datapath_Binding 4097
>> +
>>  AT_CLEANUP
>>  ])
>>
>> --
>> 2.44.0
>>
>>
Dumitru Ceara April 19, 2024, 8:27 a.m. UTC | #11
On 4/4/24 21:23, Ihar Hrachyshka wrote:
> On Thu, Apr 4, 2024 at 6:06 AM Dumitru Ceara <dceara@redhat.com> wrote:
> 
>> On 4/3/24 22:05, Vladislav Odintsov wrote:
>>> re-sending email because ovs list rejected previous its content for some
>> reason:
>>>
>>> Hi Ihar,
>>>
>>
>> Hi Vladislav, Ihar,
>>
>>> thanks for your quick reaction!
>>> I didn’t see mentioned thread, but I think that it is not safe enough to
>> have automatic detection of this scenario here.
>>>
>>> Imagine: for VXLAN with HW VTEP scenario besides VXLAN encap one must
>> configure also either GENEVE and/or STT encap(s) for HV chassis.
>>>
>>> So, detection could be implemented like this:
>>> Check all non-VTEP chassis' encaps and find "effective encap" for each
>> of them. If we detect at least one chassis with "effective encap" == vxlan,
>> then enable vxlan mode. Normal mode otherwise.
>>> "effective encap" means that for 'vxlan,geneve,stt' encaps effective is
>> geneve, for 'vxlan,stt' -> stt, for 'vxlan' -> vxlan.
>>> Such behavior was my first idea.
>>>
>>> But I decided that there possible flapping of modes if there is a
>> problem/bug in deployment tooling and it is enough to have only one chassis
>> with wrong encap set to affect vxlan mode for entire OVN cluster. Such mode
>> flapping can result in problems with tunnel ids allocation.
>>
>> These are valid points.
>>
>>
> This is a valid concern. Should it perhaps be handled in the general case
> then? Meaning: calculate the max tunid once and store it in db? (I am
> thinking that maybe recalculating the max_tunid on every engine cycle - or
> on every controller restart - is unsafe?)
> 

OTOH, this is a rather "static" characteristic of the cluster and the
CMS can easily know what kind of encapsulation it's configuring OVN to use.

I think I'd keep it simple (for OVN).

> 
>>> So it seems that to have an option that statically sets vxlan mode is
>> more resilient.
>>
>> In general we try to avoid new config knobs.
>> .
>>> What do you think?
>>>
>>
>> But in this case it make actually be easier if we offload the work of
>> determining vxlan-mode to the CMS.
>>
>>>
>>>> On 3 Apr 2024, at 20:43, Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>>>>
>>>> Thank you Vladislav.
>>>>
>>>> FYI it was reported in the past in
>> https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html
>> but fell through cracks then. Thanks for picking it up!
>>>>
>>>> In your patch, you introduce a new config option to disable the
>> 'vxlan-mode' behavior. This will definitely work. But I wonder if we can
>> automatically detect this scenario by ignoring the chassis that are VTEP
>> from consideration? I believe ovn-controller-vtep sets `is-vtep` in
>> other_config, so - would it work if we modify is_vxlan_mode to consider it
>> too?
>>>>
>>>> Thanks again for looking into this.
>>>> Ihar
>>>>
>>>> On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odivlad@gmail.com
>> <mailto:odivlad@gmail.com>> wrote:
>>>>> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
>>>>> for available tunnel IDs because of lack of space in VXLAN VNI.
>>>>> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
>>>>> and 2047 logical switch ports per datapath.
>>>>>
>>>>> Prior to this patch vxlan mode was enabled automatically if at least
>> one
>>>>> chassis had encap of vxlan type.  In scenarios where one want to use
>> VXLAN
>>>>> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>>>>>
>>>>> This patch adds support for explicit disabling of vxlan mode via
>>>>> Northbound database.
>>>>>
>>>>> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>>>>>
>>>>> CC: Ihar Hrachyshka <ihrachys@redhat.com <mailto:ihrachys@redhat.com>>
>>>>> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath
>> bindings")
>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com <mailto:
>> odivlad@gmail.com>>
>>>>> ---
>>>>>  northd/en-global-config.c |  9 +++-
>>>>>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>>>>>  northd/northd.h           |  6 ++-
>>>>>  ovn-nb.xml                | 12 ++++++
>>>>>  tests/ovn-northd.at <http://ovn-northd.at/>       | 29 +++++++++++++
>>>>>  5 files changed, 94 insertions(+), 52 deletions(-)
>>>>>
>>>>> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
>>>>> index 34e393b33..9310c4575 100644
>>>>> --- a/northd/en-global-config.c
>>>>> +++ b/northd/en-global-config.c
>>>>> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node ,
>> void *data)
>>>>>                       config_data->svc_monitor_mac);
>>>>>      }
>>>>>
>>>>> -    char *max_tunid = xasprintf("%d",
>>>>> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
>>>>> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
>>>>> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>>>>>      smap_replace(options, "max_tunid", max_tunid);
>>>>>      free(max_tunid);
>>>>>
>>>>> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
>> nbrec_nb_global *nb,
>>>>>          return true;
>>>>>      }
>>>>>
>>>>> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
>>>>> +                           "disable_vxlan_mode", false)) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>>      return false;
>>>>>  }
>>>>>
>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>> index c568f6360..859b233e8 100644
>>>>> --- a/northd/northd.c
>>>>> +++ b/northd/northd.c
>>>>> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>>>>>   */
>>>>>  static bool default_acl_drop;
>>>>>
>>>>> +/* If this option is 'true' northd will use limited 24-bit space for
>> datapath
>>>>> + * and ports tunnel key allocation (12 bits for each instead of
>> default 16). */
>>>>> +static bool vxlan_mode;
>>>>> +
>>>>>  #define MAX_OVN_TAGS 4096
>>>>>
>>>>>
>>>>> @@ -875,24 +879,31 @@ join_datapaths(const struct
>> nbrec_logical_switch_table *nbrec_ls_table,
>>>>>      }
>>>>>  }
>>>>>
>>>>> -static bool
>>>>> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>> +void
>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>>>>>  {
>>>>> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
>>>>> +        vxlan_mode = false;
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>>      const struct sbrec_chassis *chassis;
>>>>>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>>>>>          for (int i = 0; i < chassis->n_encaps; i++) {
>>>>>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
>>>>> -                return true;
>>>>> +                vxlan_mode = true;
>>>>> +                return;
>>>>>              }
>>>>>          }
>>>>>      }
>>>>> -    return false;
>>>>> +    vxlan_mode = false;
>>>>>  }
>>>>>
>>>>>  uint32_t
>>>>> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
>> *sbrec_chassis_table)
>>>>> +get_ovn_max_dp_key_local(void)
>>>>>  {
>>>>> -    if (is_vxlan_mode(sbrec_chassis_table)) {
>>>>> +    if (vxlan_mode) {
>>>>>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>>>>>          return OVN_MAX_DP_VXLAN_KEY;
>>>>>      }
>>>>> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
>> sbrec_chassis_table *sbrec_chassis_table)
>>>>>  }
>>>>>
>>>>>  static void
>>>>> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
>> *sbrec_ch_table,
>>>>> -                          struct hmap *datapaths, struct hmap
>> *dp_tnlids,
>>>>> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap
>> *dp_tnlids,
>>>>>                            struct ovn_datapath *od, uint32_t *hint)
>>>>>  {
>>>>>      if (!od->tunnel_key) {
>>>>>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
>>>>> -                                    OVN_MIN_DP_KEY_LOCAL,
>>>>> -
>> get_ovn_max_dp_key_local(sbrec_ch_table),
>>>>> -                                    hint);
>>>>> +                                            OVN_MIN_DP_KEY_LOCAL,
>>>>> +
>> get_ovn_max_dp_key_local(),
>>>>> +                                            hint);
>>>>>          if (!od->tunnel_key) {
>>>>>              if (od->sb) {
>>>>>                  sbrec_datapath_binding_delete(od->sb);
>>>>> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
>> sbrec_chassis_table *sbrec_ch_table,
>>>>>
>>>>>  static void
>>>>>  ovn_datapath_assign_requested_tnl_id(
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>>>>>  {
>>>>>      const struct smap *other_config = (od->nbs
>>>>> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>>>>>      uint32_t tunnel_key = smap_get_int(other_config,
>> "requested-tnl-key", 0);
>>>>>      if (tunnel_key) {
>>>>>          const char *interconn_ts = smap_get(other_config,
>> "interconn-ts");
>>>>> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
>>>>> -            tunnel_key >= 1 << 12) {
>>>>> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>>>>>              static struct vlog_rate_limit rl =
>> VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s
>> is "
>>>>>                           "incompatible with VXLAN", tunnel_key,
>>>>> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>                  const struct nbrec_logical_switch_table
>> *nbrec_ls_table,
>>>>>                  const struct nbrec_logical_router_table
>> *nbrec_lr_table,
>>>>>                  const struct sbrec_datapath_binding_table
>> *sbrec_dp_table,
>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>                  struct ovn_datapaths *ls_datapaths,
>>>>>                  struct ovn_datapaths *lr_datapaths,
>>>>>                  struct ovs_list *lr_list)
>>>>> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>>>>>      struct ovn_datapath *od;
>>>>>      LIST_FOR_EACH (od, list, &both) {
>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
>> &dp_tnlids,
>>>>> -                                             od);
>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>>      }
>>>>>      LIST_FOR_EACH (od, list, &nb_only) {
>>>>> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
>> &dp_tnlids,
>>>>> -                                             od); }
>>>>> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>>>>> +    }
>>>>>
>>>>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>>      LIST_FOR_EACH (od, list, &both) {
>>>>> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn
>> *ovnsb_txn,
>>>>>      /* Assign new tunnel ids where needed. */
>>>>>      uint32_t hint = 0;
>>>>>      LIST_FOR_EACH_SAFE (od, list, &both) {
>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>      }
>>>>>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
>>>>> -        ovn_datapath_allocate_key(sbrec_chassis_table,
>>>>> -                                  datapaths, &dp_tnlids, od, &hint);
>>>>> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>>>>>      }
>>>>>
>>>>>      /* Sync tunnel ids from nb to sb. */
>>>>> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op,
>> uint32_t tunnel_key)
>>>>>   * that the I-P engine can fallback to recompute if needed; otherwise
>> return
>>>>>   * true (even if the key is not allocated). */
>>>>>  static bool
>>>>> -ovn_port_assign_requested_tnl_id(
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
>> ovn_port *op)
>>>>> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>>>>>  {
>>>>>      const struct smap *options = (op->nbsp
>>>>>                                    ? &op->nbsp->options
>>>>>                                    : &op->nbrp->options);
>>>>>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key",
>> 0);
>>>>>      if (tunnel_key) {
>>>>> -        if (is_vxlan_mode(sbrec_chassis_table) &&
>>>>> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>>>>>              static struct vlog_rate_limit rl =
>> VLOG_RATE_LIMIT_INIT(1, 1);
>>>>>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>>>>>                           "is incompatible with VXLAN",
>>>>> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>>>>>  }
>>>>>
>>>>>  static bool
>>>>> -ovn_port_allocate_key(const struct sbrec_chassis_table
>> *sbrec_chassis_table,
>>>>> -                      struct hmap *ports,
>>>>> -                      struct ovn_port *op)
>>>>> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>>>>>  {
>>>>>      if (!op->tunnel_key) {
>>>>> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 :
>> 16;
>>>>> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>>>>>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids,
>> "port",
>>>>>                                              1, (1u << (key_bits - 1))
>> - 1,
>>>>>                                              &op->od->port_key_hint);
>>>>> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
>> sbrec_chassis_table *sbrec_chassis_table,
>>>>>  static void
>>>>>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
>>>>> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>      const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>>>>>      const struct sbrec_ha_chassis_group_table
>> *sbrec_ha_chassis_group_table,
>>>>> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>      /* Assign explicitly requested tunnel ids first. */
>>>>>      struct ovn_port *op;
>>>>>      LIST_FOR_EACH (op, list, &both) {
>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>      }
>>>>>      LIST_FOR_EACH (op, list, &nb_only) {
>>>>> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
>>>>> +        ovn_port_assign_requested_tnl_id(op);
>>>>>      }
>>>>>
>>>>>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>>>>> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>>>>>
>>>>>      /* Assign new tunnel ids where needed. */
>>>>>      LIST_FOR_EACH_SAFE (op, list, &both) {
>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>      }
>>>>>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
>>>>> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
>>>>> +        ovn_port_allocate_key(ports, op);
>>>>>      }
>>>>>
>>>>>      /* For logical ports that are in both databases, update the
>> southbound
>>>>> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>>>>               struct hmap *ls_ports, struct ovn_datapath *od,
>>>>>               const struct sbrec_port_binding *sb,
>>>>>               const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>  {
>>>>>      op->od = od;
>>>>>      parse_lsp_addrs(op);
>>>>>      /* Assign explicitly requested tunnel ids first. */
>>>>> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
>>>>> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>>>>>          return false;
>>>>>      }
>>>>>      if (sb) {
>>>>> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>>>>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>>>>>      }
>>>>>      /* Assign new tunnel ids where needed. */
>>>>> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
>>>>> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>>>>>          return false;
>>>>>      }
>>>>>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
>>>>> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn
>> *ovnsb_txn, struct hmap *ls_ports,
>>>>>                 const char *key, const struct
>> nbrec_logical_switch_port *nbsp,
>>>>>                 struct ovn_datapath *od, const struct
>> sbrec_port_binding *sb,
>>>>>                 const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>  {
>>>>>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>>>>>                                            NULL);
>>>>>      hmap_insert(&od->ports, &op->dp_node,
>> hmap_node_hash(&op->key_node));
>>>>> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>> -                      sbrec_mirror_table, sbrec_chassis_table,
>>>>> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> sbrec_mirror_table,
>>>>>                        sbrec_chassis_by_name,
>> sbrec_chassis_by_hostname)) {
>>>>>          ovn_port_destroy(ls_ports, op);
>>>>>          return NULL;
>>>>> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>>>>                  struct ovn_datapath *od,
>>>>>                  const struct sbrec_port_binding *sb,
>>>>>                  const struct sbrec_mirror_table *sbrec_mirror_table,
>>>>> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>>>>>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>>>>>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>>>>>  {
>>>>> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
>> ovsdb_idl_txn *ovnsb_txn,
>>>>>      op->sb = sb;
>>>>>      ovn_port_set_nb(op, nbsp, nbrp);
>>>>>      op->l3dgw_port = op->cr_port = NULL;
>>>>> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>>>>> -                        sbrec_mirror_table, sbrec_chassis_table,
>>>>> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
>> sbrec_mirror_table,
>>>>>                          sbrec_chassis_by_name,
>> sbrec_chassis_by_hostname);
>>>>>  }
>>>>>
>>>>> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
>> *ovnsb_idl_txn,
>>>>>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>>>>>                                  new_nbsp->name, new_nbsp, od, NULL,
>>>>>                                  ni->sbrec_mirror_table,
>>>>> -                                ni->sbrec_chassis_table,
>>>>>                                  ni->sbrec_chassis_by_name,
>>>>>                                  ni->sbrec_chassis_by_hostname);
>>>>>              if (!op) {
>>>>> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
>> *ovnsb_idl_txn,
>>>>>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>>>>>                                  new_nbsp, NULL,
>>>>>                                  od, sb, ni->sbrec_mirror_table,
>>>>> -                                ni->sbrec_chassis_table,
>>>>>                                  ni->sbrec_chassis_by_name,
>>>>>                                  ni->sbrec_chassis_by_hostname)) {
>>>>>                  goto fail;
>>>>> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>      use_common_zone = smap_get_bool(input_data->nb_options,
>> "use_common_zone",
>>>>>                                      false);
>>>>>
>>>>> +    init_vxlan_mode(input_data->nb_options,
>> input_data->sbrec_chassis_table);
>>>>> +
>>>>>      build_datapaths(ovnsb_txn,
>>>>>                      input_data->nbrec_logical_switch_table,
>>>>>                      input_data->nbrec_logical_router_table,
>>>>>                      input_data->sbrec_datapath_binding_table,
>>>>> -                    input_data->sbrec_chassis_table,
>>>>>                      &data->ls_datapaths,
>>>>>                      &data->lr_datapaths, &data->lr_list);
>>>>>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
>>>>> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>                         &data->lb_datapaths_map,
>> &data->lb_group_datapaths_map);
>>>>>      build_ports(ovnsb_txn,
>>>>>                  input_data->sbrec_port_binding_table,
>>>>> -                input_data->sbrec_chassis_table,
>>>>>                  input_data->sbrec_mirror_table,
>>>>>                  input_data->sbrec_mac_binding_table,
>>>>>                  input_data->sbrec_ha_chassis_group_table,
>>>>> diff --git a/northd/northd.h b/northd/northd.h
>>>>> index 5e9fa4745..ed9d992fb 100644
>>>>> --- a/northd/northd.h
>>>>> +++ b/northd/northd.h
>>>>> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct
>> ovn_datapath *od)
>>>>>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>>>>>  }
>>>>>
>>>>> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
>>>>> +void
>>>>> +init_vxlan_mode(const struct smap *nb_options,
>>>>> +                const struct sbrec_chassis_table
>> *sbrec_chassis_table);
>>>>> +
>>>>> +uint32_t get_ovn_max_dp_key_local(void);
>>>>>
>>>>>  #endif /* NORTHD_H */
>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>> index b652046a7..9b2cb355e 100644
>>>>> --- a/ovn-nb.xml
>>>>> +++ b/ovn-nb.xml
>>>>> @@ -381,6 +381,18 @@
>>>>>          of SB changes would be very noticeable.
>>>>>        </column>
>>>>>
>>>>> +      <column name="options" key="disable_vxlan_mode">
>>>>> +        Be default if at least one chassis in OVN cluster has VXLAN
>> encap,
>>>>> +        northd will run in a `vxlan mode`, which means it splits
>> 24-bit
>>>>> +        VXLAN VNI for datapath and port tunnel IDs allocation
>> evenly.  Maximum
>>>>> +        datapaths count in this mode is 4095 and maximum ports per
>> datapath is
>>>>> +        2047.  Rest of IDs are used for multicast groups.
>>>>> +        In case VXLAN encaps are needed on chassis only to support HW
>> VTEP
>>>>> +        functionality, and main encap type is GENEVE or STT, set this
>> option to
>>>>> +        `false` to use defaults -- 16-bit space for datapath tunnel
>> IDS and 15
>>>>> +        bits for port tunnel IDs.
>>>>> +      </column>
>>>>> +
>>
>> Let's also add a NEWS entry for this one.  It's quite an important
>> user-visible change.
>>
>>>>>        <group title="Options for configuring interconnection route
>> advertisement">
>>>>>          <p>
>>>>>            These options control how routes are advertised between OVN
>>>>> diff --git a/tests/ovn-northd.at <http://ovn-northd.at/> b/tests/
>> ovn-northd.at <http://ovn-northd.at/>
>>>>> index cd53755b2..f5d025852 100644
>>>>> --- a/tests/ovn-northd.at <http://ovn-northd.at/>
>>>>> +++ b/tests/ovn-northd.at <http://ovn-northd.at/>
>>>>> @@ -2819,6 +2819,35 @@ AT_CHECK(
>>>>>  get_tunnel_keys
>>>>>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>>>>>
>>>>> +AT_CLEANUP
>>>>> +])
>>>>> +OVN_FOR_EACH_NORTHD_NO_HV([
>>>>> +AT_SETUP([check vxlan mode disabling])
>>>>> +ovn_start
>>>>> +
>>>>> +# Create a fake chassis with vxlan encap to implicitly enable vxlan
>> mode.
>>>>> +ovn-sbctl \
>>>>> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1"
>> type="vxlan" \
>>>>> +    -- --id=@c create chassis name=hv1 encaps=@e
>>>>> +
>>>>> +cmd="ovn-nbctl --wait=sb"
>>>>> +for i in {1..4097..1}; do
>>>>> +    cmd="${cmd} -- ls-add lsw-${i}"
>>>>> +done
>>>>> +
>>>>> +eval $cmd
>>>>> +
>>>>> +check_row_count nb:Logical_Switch 4097
>>>>> +wait_row_count sb:Datapath_Binding 4095
>>>>> +
>>>>> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
>> northd/ovn-northd.log])
>>>>> +
>>>>> +# Explicitly disable vxlan mode and check that two remaining
>> datapaths were created.
>>>>> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
>>>>> +
>>>>> +check_row_count nb:Logical_Switch 4097
>>>>> +wait_row_count sb:Datapath_Binding 4097
>>>>> +
>>>>>  AT_CLEANUP
>>>>>  ])
>>>>>
>>>>> --
>>>>> 2.44.0
>>>>>
>>>
>>>
>>> Regards,
>>> Vladislav Odintsov
>>
>> Regards,
>> Dumitru
>>
>>
>
diff mbox series

Patch

diff --git a/northd/en-global-config.c b/northd/en-global-config.c
index 34e393b33..9310c4575 100644
--- a/northd/en-global-config.c
+++ b/northd/en-global-config.c
@@ -115,8 +115,8 @@  en_global_config_run(struct engine_node *node , void *data)
                      config_data->svc_monitor_mac);
     }
 
-    char *max_tunid = xasprintf("%d",
-        get_ovn_max_dp_key_local(sbrec_chassis_table));
+    init_vxlan_mode(&nb->options, sbrec_chassis_table);
+    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
     smap_replace(options, "max_tunid", max_tunid);
     free(max_tunid);
 
@@ -523,6 +523,11 @@  check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
         return true;
     }
 
+    if (config_out_of_sync(&nb->options, &config_data->nb_options,
+                           "disable_vxlan_mode", false)) {
+        return true;
+    }
+
     return false;
 }
 
diff --git a/northd/northd.c b/northd/northd.c
index c568f6360..859b233e8 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -90,6 +90,10 @@  static bool use_ct_inv_match = true;
  */
 static bool default_acl_drop;
 
+/* If this option is 'true' northd will use limited 24-bit space for datapath
+ * and ports tunnel key allocation (12 bits for each instead of default 16). */
+static bool vxlan_mode;
+
 #define MAX_OVN_TAGS 4096
 
 
@@ -875,24 +879,31 @@  join_datapaths(const struct nbrec_logical_switch_table *nbrec_ls_table,
     }
 }
 
-static bool
-is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
+void
+init_vxlan_mode(const struct smap *nb_options,
+                const struct sbrec_chassis_table *sbrec_chassis_table)
 {
+    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
+        vxlan_mode = false;
+        return;
+    }
+
     const struct sbrec_chassis *chassis;
     SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
         for (int i = 0; i < chassis->n_encaps; i++) {
             if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
-                return true;
+                vxlan_mode = true;
+                return;
             }
         }
     }
-    return false;
+    vxlan_mode = false;
 }
 
 uint32_t
-get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
+get_ovn_max_dp_key_local(void)
 {
-    if (is_vxlan_mode(sbrec_chassis_table)) {
+    if (vxlan_mode) {
         /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
         return OVN_MAX_DP_VXLAN_KEY;
     }
@@ -900,15 +911,14 @@  get_ovn_max_dp_key_local(const struct sbrec_chassis_table *sbrec_chassis_table)
 }
 
 static void
-ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
-                          struct hmap *datapaths, struct hmap *dp_tnlids,
+ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
                           struct ovn_datapath *od, uint32_t *hint)
 {
     if (!od->tunnel_key) {
         od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
-                                    OVN_MIN_DP_KEY_LOCAL,
-                                    get_ovn_max_dp_key_local(sbrec_ch_table),
-                                    hint);
+                                            OVN_MIN_DP_KEY_LOCAL,
+                                            get_ovn_max_dp_key_local(),
+                                            hint);
         if (!od->tunnel_key) {
             if (od->sb) {
                 sbrec_datapath_binding_delete(od->sb);
@@ -921,7 +931,6 @@  ovn_datapath_allocate_key(const struct sbrec_chassis_table *sbrec_ch_table,
 
 static void
 ovn_datapath_assign_requested_tnl_id(
-    const struct sbrec_chassis_table *sbrec_chassis_table,
     struct hmap *dp_tnlids, struct ovn_datapath *od)
 {
     const struct smap *other_config = (od->nbs
@@ -930,8 +939,7 @@  ovn_datapath_assign_requested_tnl_id(
     uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", 0);
     if (tunnel_key) {
         const char *interconn_ts = smap_get(other_config, "interconn-ts");
-        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
-            tunnel_key >= 1 << 12) {
+        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
                          "incompatible with VXLAN", tunnel_key,
@@ -979,7 +987,6 @@  build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
                 const struct nbrec_logical_switch_table *nbrec_ls_table,
                 const struct nbrec_logical_router_table *nbrec_lr_table,
                 const struct sbrec_datapath_binding_table *sbrec_dp_table,
-                const struct sbrec_chassis_table *sbrec_chassis_table,
                 struct ovn_datapaths *ls_datapaths,
                 struct ovn_datapaths *lr_datapaths,
                 struct ovs_list *lr_list)
@@ -994,12 +1001,11 @@  build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
     struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
     struct ovn_datapath *od;
     LIST_FOR_EACH (od, list, &both) {
-        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
-                                             od);
+        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
     }
     LIST_FOR_EACH (od, list, &nb_only) {
-        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, &dp_tnlids,
-                                             od); }
+        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
+    }
 
     /* Keep nonconflicting tunnel IDs that are already assigned. */
     LIST_FOR_EACH (od, list, &both) {
@@ -1011,12 +1017,10 @@  build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
     /* Assign new tunnel ids where needed. */
     uint32_t hint = 0;
     LIST_FOR_EACH_SAFE (od, list, &both) {
-        ovn_datapath_allocate_key(sbrec_chassis_table,
-                                  datapaths, &dp_tnlids, od, &hint);
+        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
     }
     LIST_FOR_EACH_SAFE (od, list, &nb_only) {
-        ovn_datapath_allocate_key(sbrec_chassis_table,
-                                  datapaths, &dp_tnlids, od, &hint);
+        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
     }
 
     /* Sync tunnel ids from nb to sb. */
@@ -3976,16 +3980,14 @@  ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
  * that the I-P engine can fallback to recompute if needed; otherwise return
  * true (even if the key is not allocated). */
 static bool
-ovn_port_assign_requested_tnl_id(
-    const struct sbrec_chassis_table *sbrec_chassis_table, struct ovn_port *op)
+ovn_port_assign_requested_tnl_id(struct ovn_port *op)
 {
     const struct smap *options = (op->nbsp
                                   ? &op->nbsp->options
                                   : &op->nbrp->options);
     uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
     if (tunnel_key) {
-        if (is_vxlan_mode(sbrec_chassis_table) &&
-                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
+        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
             VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
                          "is incompatible with VXLAN",
@@ -4005,12 +4007,10 @@  ovn_port_assign_requested_tnl_id(
 }
 
 static bool
-ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
-                      struct hmap *ports,
-                      struct ovn_port *op)
+ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
 {
     if (!op->tunnel_key) {
-        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
+        uint8_t key_bits = vxlan_mode ? 12 : 16;
         op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
                                             1, (1u << (key_bits - 1)) - 1,
                                             &op->od->port_key_hint);
@@ -4035,7 +4035,6 @@  ovn_port_allocate_key(const struct sbrec_chassis_table *sbrec_chassis_table,
 static void
 build_ports(struct ovsdb_idl_txn *ovnsb_txn,
     const struct sbrec_port_binding_table *sbrec_port_binding_table,
-    const struct sbrec_chassis_table *sbrec_chassis_table,
     const struct sbrec_mirror_table *sbrec_mirror_table,
     const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
     const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
@@ -4069,10 +4068,10 @@  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
     /* Assign explicitly requested tunnel ids first. */
     struct ovn_port *op;
     LIST_FOR_EACH (op, list, &both) {
-        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
+        ovn_port_assign_requested_tnl_id(op);
     }
     LIST_FOR_EACH (op, list, &nb_only) {
-        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
+        ovn_port_assign_requested_tnl_id(op);
     }
 
     /* Keep nonconflicting tunnel IDs that are already assigned. */
@@ -4084,10 +4083,10 @@  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
 
     /* Assign new tunnel ids where needed. */
     LIST_FOR_EACH_SAFE (op, list, &both) {
-        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
+        ovn_port_allocate_key(ports, op);
     }
     LIST_FOR_EACH_SAFE (op, list, &nb_only) {
-        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
+        ovn_port_allocate_key(ports, op);
     }
 
     /* For logical ports that are in both databases, update the southbound
@@ -4294,14 +4293,13 @@  ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
              struct hmap *ls_ports, struct ovn_datapath *od,
              const struct sbrec_port_binding *sb,
              const struct sbrec_mirror_table *sbrec_mirror_table,
-             const struct sbrec_chassis_table *sbrec_chassis_table,
              struct ovsdb_idl_index *sbrec_chassis_by_name,
              struct ovsdb_idl_index *sbrec_chassis_by_hostname)
 {
     op->od = od;
     parse_lsp_addrs(op);
     /* Assign explicitly requested tunnel ids first. */
-    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
+    if (!ovn_port_assign_requested_tnl_id(op)) {
         return false;
     }
     if (sb) {
@@ -4318,7 +4316,7 @@  ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
         sbrec_port_binding_set_logical_port(op->sb, op->key);
     }
     /* Assign new tunnel ids where needed. */
-    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
+    if (!ovn_port_allocate_key(ls_ports, op)) {
         return false;
     }
     ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
@@ -4332,15 +4330,13 @@  ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *ls_ports,
                const char *key, const struct nbrec_logical_switch_port *nbsp,
                struct ovn_datapath *od, const struct sbrec_port_binding *sb,
                const struct sbrec_mirror_table *sbrec_mirror_table,
-               const struct sbrec_chassis_table *sbrec_chassis_table,
                struct ovsdb_idl_index *sbrec_chassis_by_name,
                struct ovsdb_idl_index *sbrec_chassis_by_hostname)
 {
     struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
                                           NULL);
     hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
-    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
-                      sbrec_mirror_table, sbrec_chassis_table,
+    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
                       sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
         ovn_port_destroy(ls_ports, op);
         return NULL;
@@ -4357,7 +4353,6 @@  ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
                 struct ovn_datapath *od,
                 const struct sbrec_port_binding *sb,
                 const struct sbrec_mirror_table *sbrec_mirror_table,
-                const struct sbrec_chassis_table *sbrec_chassis_table,
                 struct ovsdb_idl_index *sbrec_chassis_by_name,
                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
 {
@@ -4365,8 +4360,7 @@  ls_port_reinit(struct ovn_port *op, struct ovsdb_idl_txn *ovnsb_txn,
     op->sb = sb;
     ovn_port_set_nb(op, nbsp, nbrp);
     op->l3dgw_port = op->cr_port = NULL;
-    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
-                        sbrec_mirror_table, sbrec_chassis_table,
+    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
                         sbrec_chassis_by_name, sbrec_chassis_by_hostname);
 }
 
@@ -4511,7 +4505,6 @@  ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
             op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
                                 new_nbsp->name, new_nbsp, od, NULL,
                                 ni->sbrec_mirror_table,
-                                ni->sbrec_chassis_table,
                                 ni->sbrec_chassis_by_name,
                                 ni->sbrec_chassis_by_hostname);
             if (!op) {
@@ -4543,7 +4536,6 @@  ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
             if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
                                 new_nbsp, NULL,
                                 od, sb, ni->sbrec_mirror_table,
-                                ni->sbrec_chassis_table,
                                 ni->sbrec_chassis_by_name,
                                 ni->sbrec_chassis_by_hostname)) {
                 goto fail;
@@ -17300,11 +17292,12 @@  ovnnb_db_run(struct northd_input *input_data,
     use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone",
                                     false);
 
+    init_vxlan_mode(input_data->nb_options, input_data->sbrec_chassis_table);
+
     build_datapaths(ovnsb_txn,
                     input_data->nbrec_logical_switch_table,
                     input_data->nbrec_logical_router_table,
                     input_data->sbrec_datapath_binding_table,
-                    input_data->sbrec_chassis_table,
                     &data->ls_datapaths,
                     &data->lr_datapaths, &data->lr_list);
     build_lb_datapaths(input_data->lbs, input_data->lbgrps,
@@ -17312,7 +17305,6 @@  ovnnb_db_run(struct northd_input *input_data,
                        &data->lb_datapaths_map, &data->lb_group_datapaths_map);
     build_ports(ovnsb_txn,
                 input_data->sbrec_port_binding_table,
-                input_data->sbrec_chassis_table,
                 input_data->sbrec_mirror_table,
                 input_data->sbrec_mac_binding_table,
                 input_data->sbrec_ha_chassis_group_table,
diff --git a/northd/northd.h b/northd/northd.h
index 5e9fa4745..ed9d992fb 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -788,6 +788,10 @@  lr_has_multiple_gw_ports(const struct ovn_datapath *od)
     return od->n_l3dgw_ports > 1 && !od->is_gw_router;
 }
 
-uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
+void
+init_vxlan_mode(const struct smap *nb_options,
+                const struct sbrec_chassis_table *sbrec_chassis_table);
+
+uint32_t get_ovn_max_dp_key_local(void);
 
 #endif /* NORTHD_H */
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b652046a7..9b2cb355e 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -381,6 +381,18 @@ 
         of SB changes would be very noticeable.
       </column>
 
+      <column name="options" key="disable_vxlan_mode">
+        Be default if at least one chassis in OVN cluster has VXLAN encap,
+        northd will run in a `vxlan mode`, which means it splits 24-bit
+        VXLAN VNI for datapath and port tunnel IDs allocation evenly.  Maximum
+        datapaths count in this mode is 4095 and maximum ports per datapath is
+        2047.  Rest of IDs are used for multicast groups.
+        In case VXLAN encaps are needed on chassis only to support HW VTEP
+        functionality, and main encap type is GENEVE or STT, set this option to
+        `false` to use defaults -- 16-bit space for datapath tunnel IDS and 15
+        bits for port tunnel IDs.
+      </column>
+
       <group title="Options for configuring interconnection route advertisement">
         <p>
           These options control how routes are advertised between OVN
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index cd53755b2..f5d025852 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -2819,6 +2819,35 @@  AT_CHECK(
 get_tunnel_keys
 AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
 
+AT_CLEANUP
+])
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([check vxlan mode disabling])
+ovn_start
+
+# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
+ovn-sbctl \
+    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
+    -- --id=@c create chassis name=hv1 encaps=@e
+
+cmd="ovn-nbctl --wait=sb"
+for i in {1..4097..1}; do
+    cmd="${cmd} -- ls-add lsw-${i}"
+done
+
+eval $cmd
+
+check_row_count nb:Logical_Switch 4097
+wait_row_count sb:Datapath_Binding 4095
+
+OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" northd/ovn-northd.log])
+
+# Explicitly disable vxlan mode and check that two remaining datapaths were created.
+check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
+
+check_row_count nb:Logical_Switch 4097
+wait_row_count sb:Datapath_Binding 4097
+
 AT_CLEANUP
 ])