diff mbox series

[ovs-dev,v6,1/3] northd: Introduce ECMP_Nexthop table in SB db.

Message ID bc2d9f99ae87b0da87e729d9ae34fcbe1b20f7ea.1721722209.git.lorenzo.bianconi@redhat.com
State Superseded
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot fail apply and check: fail

Commit Message

Lorenzo Bianconi July 23, 2024, 8:15 a.m. UTC
Introduce ECMP_Nexthop table in the SB db in order to track active
ecmp-symmetric-reply connections and flush stale ones.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 northd/en-northd.c       | 35 ++++++++++++++++
 northd/en-northd.h       |  4 ++
 northd/inc-proc-northd.c |  7 +++-
 northd/northd.c          | 87 ++++++++++++++++++++++++++++++++++++++++
 northd/northd.h          | 10 +++++
 ovn-sb.ovsschema         | 18 ++++++++-
 ovn-sb.xml               | 31 ++++++++++++++
 tests/ovn-northd.at      |  4 ++
 8 files changed, 193 insertions(+), 3 deletions(-)

Comments

Ales Musil July 24, 2024, 7:14 a.m. UTC | #1
On Tue, Jul 23, 2024 at 10:17 AM Lorenzo Bianconi <
lorenzo.bianconi@redhat.com> wrote:

> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>

Hi Lorenzo,

thank you for the patch, I have one comment down below.

 northd/en-northd.c       | 35 ++++++++++++++++
>  northd/en-northd.h       |  4 ++
>  northd/inc-proc-northd.c |  7 +++-
>  northd/northd.c          | 87 ++++++++++++++++++++++++++++++++++++++++
>  northd/northd.h          | 10 +++++
>  ovn-sb.ovsschema         | 18 ++++++++-
>  ovn-sb.xml               | 31 ++++++++++++++
>  tests/ovn-northd.at      |  4 ++
>  8 files changed, 193 insertions(+), 3 deletions(-)
>
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index 817a4736c..0d86fcb60 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -402,6 +402,25 @@ en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
>      engine_set_node_state(node, EN_UPDATED);
>  }
>
> +void
> +en_ecmp_nexthop_run(struct engine_node *node, void *data)
> +{
> +    const struct engine_context *eng_ctx = engine_get_context();
> +    struct static_routes_data *static_routes_data =
> +        engine_get_input_data("static_routes", node);
> +    struct ecmp_nexthop_data *enh_data = data;
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> +
> +    ecmp_nexthop_destroy(data);
> +    ecmp_nexthop_init(data);
> +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> +                             &static_routes_data->parsed_routes,
> +                             &enh_data->nexthops,
> +                             sbrec_ecmp_nexthop_table);
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
>  void
>  *en_northd_init(struct engine_node *node OVS_UNUSED,
>                  struct engine_arg *arg OVS_UNUSED)
> @@ -449,6 +468,16 @@ void *en_bfd_sync_init(struct engine_node *node
> OVS_UNUSED,
>      return NULL;
>  }
>
> +void
> +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                      struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
> +
> +    ecmp_nexthop_init(data);
> +    return data;
> +}
> +
>  void
>  en_northd_cleanup(void *data)
>  {
> @@ -484,3 +513,9 @@ void
>  en_bfd_sync_cleanup(void *data OVS_UNUSED)
>  {
>  }
> +
> +void
> +en_ecmp_nexthop_cleanup(void *data)
> +{
> +    ecmp_nexthop_destroy(data);
> +}
> diff --git a/northd/en-northd.h b/northd/en-northd.h
> index 68f7755f8..4f30ac335 100644
> --- a/northd/en-northd.h
> +++ b/northd/en-northd.h
> @@ -42,5 +42,9 @@ void *en_bfd_sync_init(struct engine_node *node
> OVS_UNUSED,
>  void en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
>                       void *data OVS_UNUSED);
>  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                           struct engine_arg *arg OVS_UNUSED);
> +void en_ecmp_nexthop_cleanup(void *data);
>
>  #endif /* EN_NORTHD_H */
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index b3baecaff..7416b1c43 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -101,7 +101,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -159,6 +160,7 @@ static ENGINE_NODE(route_policies, "route_policies");
>  static ENGINE_NODE(static_routes, "static_routes");
>  static ENGINE_NODE(bfd, "bfd");
>  static ENGINE_NODE(bfd_sync, "bfd_sync");
> +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
>
>  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>                            struct ovsdb_idl_loop *sb)
> @@ -258,6 +260,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_bfd_sync, &en_static_routes, NULL);
>      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
>
> +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> +
>      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
>      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
>      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> diff --git a/northd/northd.c b/northd/northd.c
> index 70bfb144e..b5b332536 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -9998,6 +9998,81 @@ build_bfd_map(const struct nbrec_bfd_table
> *nbrec_bfd_table,
>      }
>  }
>
> +#define NEXTHOP_IDS_LEN        65535
> +void
> +build_ecmp_nexthop_table(
> +        struct ovsdb_idl_txn *ovnsb_txn,
> +        struct hmap *routes,
> +        struct simap *nexthops,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> +{
> +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> +
> +    if (!ovnsb_txn) {
> +        return;
> +    }
> +
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> +                                       sbrec_ecmp_nexthop_table) {
> +        simap_put(nexthops, sb_ecmp_nexthop->nexthop,
> +                  sb_ecmp_nexthop->id);
> +        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> +    }
> +
> +    struct parsed_route *pr;
> +    HMAP_FOR_EACH (pr, key_node, routes) {
> +        if (!pr->ecmp_symmetric_reply) {
> +            continue;
> +        }
> +
> +        const struct nbrec_logical_router_static_route *r = pr->route;
> +        if (!simap_contains(nexthops, r->nexthop)) {
> +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> +            if (id == NEXTHOP_IDS_LEN) {
> +                static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(1, 1);
> +                VLOG_WARN_RL(&rl, "nexthop id address space is
> exhausted");
> +                continue;
> +            }
> +            bitmap_set1(nexthop_ids, id);
> +            simap_put(nexthops, r->nexthop, id);
> +
> +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> +        }
> +    }
> +
> +    struct simap_node *n;
> +    SIMAP_FOR_EACH_SAFE (n, nexthops) {
> +        bool stale = true;
> +        HMAP_FOR_EACH (pr, key_node, routes) {
> +            if (!pr->ecmp_symmetric_reply) {
> +                continue;
> +            }
> +
> +            const struct nbrec_logical_router_static_route *r = pr->route;
> +            if (!strcmp(r->nexthop, n->name)) {
> +                stale = false;
> +                break;
> +            }
> +        }
> +
> +        if (stale) {
> +            simap_delete(nexthops, n);
> +        }
> +    }
> +
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> +                                            sbrec_ecmp_nexthop_table) {
> +        if (!simap_contains(nexthops, sb_ecmp_nexthop->nexthop)) {
> +                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> +        }
> +    }
> +
> +    bitmap_free(nexthop_ids);
> +}
>

The amount of loops in this could be greatly reduced by using sset as
representation what is in sync or not.

HMAP_FOR_EACH (pr, key_node, routes) would add everything into sset.
Then you could walk all SB entries, what is in sset would end up in the
simap
and bitmap. If it's not present in the sset we can remove it from the DB.
Last step would be to walk the sset and add an SB entry for everything that
is
in the sset but not in the simap WDYT?


> +
>  /* Returns a string of the IP address of the router port 'op' that
>   * overlaps with 'ip_s".  If one is not found, returns NULL.
>   *
> @@ -17791,6 +17866,12 @@ bfd_init(struct bfd_data *data)
>      hmap_init(&data->bfd_connections);
>  }
>
> +void
> +ecmp_nexthop_init(struct ecmp_nexthop_data *data)
> +{
> +    simap_init(&data->nexthops);
> +}
> +
>  void
>  northd_destroy(struct northd_data *data)
>  {
> @@ -17872,6 +17953,12 @@ static_routes_destroy(struct static_routes_data
> *data)
>      __bfd_destroy(&data->bfd_active_connections);
>  }
>
> +void
> +ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
> +{
> +    simap_destroy(&data->nexthops);
> +}
> +
>  void
>  ovnnb_db_run(struct northd_input *input_data,
>               struct northd_data *data,
> diff --git a/northd/northd.h b/northd/northd.h
> index 7ddb72eb7..205793e56 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -185,6 +185,10 @@ struct bfd_data {
>      struct hmap bfd_connections;
>  };
>
> +struct ecmp_nexthop_data {
> +    struct simap nexthops;
> +};
> +
>  struct lr_nat_table;
>
>  struct lflow_input {
> @@ -724,6 +728,12 @@ void static_routes_destroy(struct static_routes_data
> *);
>  void bfd_init(struct bfd_data *);
>  void bfd_destroy(struct bfd_data *);
>
> +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
> +                              struct hmap *, struct simap *,
> +                              const struct sbrec_ecmp_nexthop_table *);
> +void ecmp_nexthop_init(struct ecmp_nexthop_data *);
> +void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
> +
>  struct lflow_table;
>  struct lr_stateful_tracked_data;
>  struct ls_stateful_tracked_data;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index b6c051ae6..a4d1630b0 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.34.0",
> -    "cksum": "2786607656 31376",
> +    "version": "20.35.0",
> +    "cksum": "1887835491 32037",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -608,6 +608,20 @@
>                                            "refTable":
> "Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "id": {"type": {"key": {"type": "integer",
> +                                        "minInteger": 0,
> +                                        "maxInteger": 65535}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop"]],
> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 73a1be5ed..d84e956c5 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5179,4 +5179,35 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <p>
> +      Each record in this table represents an active ECMP route committed
> by
> +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking
> table.
> +      <code>ECMP_Nexthop</code> table is used by
> <code>ovn-controller</code>
> +      to track active ct entries and to flush stale ones.
> +    </p>
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this ECMP route. Nexthop IP address should
> +        be the IP address of a connected router port or the IP address of
> +        an external device used as nexthop for the given destination.
> +      </p>
> +    </column>
> +
> +    <column name="id">
> +      <p>
> +        Nexthop unique identifier. Nexthop ID is used to track active
> +        ecmp-symmetric-reply connections and flush stale ones.
> +      </p>
> +    </column>
> +
> +    <column name="options">
> +      Reserved for future use.
> +    </column>
> +
> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index a84fb6fab..b815eb321 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -6775,6 +6775,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
>  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.0.10
> +check_row_count ECMP_Nexthop 1
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>
> @@ -6786,6 +6787,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> ovn_strip_lflows], [0], [dn
>  ])
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.0.20
> +check_row_count ECMP_Nexthop 2
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> ovn_strip_lflows], [0], [dnl
> @@ -6822,6 +6824,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows
> | ovn_strip_lflows], [0], [
>
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.1.20
> +check_row_count ECMP_Nexthop 2
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> ovn_strip_lflows], [0], [dnl
> @@ -6836,6 +6839,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> sed 's/192\.168\.0\..0/192.
>
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows
> --
> 2.45.2
>
>
Thanks,
Ales
Lorenzo Bianconi July 24, 2024, 1:43 p.m. UTC | #2
> On Tue, Jul 23, 2024 at 10:17 AM Lorenzo Bianconi <
> lorenzo.bianconi@redhat.com> wrote:
> 
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> >
> 
> Hi Lorenzo,
> 
> thank you for the patch, I have one comment down below.

Hi Ales,

thx for the review.

> 
>  northd/en-northd.c       | 35 ++++++++++++++++
> >  northd/en-northd.h       |  4 ++
> >  northd/inc-proc-northd.c |  7 +++-
> >  northd/northd.c          | 87 ++++++++++++++++++++++++++++++++++++++++
> >  northd/northd.h          | 10 +++++
> >  ovn-sb.ovsschema         | 18 ++++++++-
> >  ovn-sb.xml               | 31 ++++++++++++++
> >  tests/ovn-northd.at      |  4 ++
> >  8 files changed, 193 insertions(+), 3 deletions(-)
> >
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 817a4736c..0d86fcb60 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -402,6 +402,25 @@ en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> >      engine_set_node_state(node, EN_UPDATED);
> >  }
> >
> > +void
> > +en_ecmp_nexthop_run(struct engine_node *node, void *data)
> > +{
> > +    const struct engine_context *eng_ctx = engine_get_context();
> > +    struct static_routes_data *static_routes_data =
> > +        engine_get_input_data("static_routes", node);
> > +    struct ecmp_nexthop_data *enh_data = data;
> > +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> > +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> > +
> > +    ecmp_nexthop_destroy(data);
> > +    ecmp_nexthop_init(data);
> > +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> > +                             &static_routes_data->parsed_routes,
> > +                             &enh_data->nexthops,
> > +                             sbrec_ecmp_nexthop_table);
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> >  void
> >  *en_northd_init(struct engine_node *node OVS_UNUSED,
> >                  struct engine_arg *arg OVS_UNUSED)
> > @@ -449,6 +468,16 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> >      return NULL;
> >  }
> >
> > +void
> > +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                      struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
> > +
> > +    ecmp_nexthop_init(data);
> > +    return data;
> > +}
> > +
> >  void
> >  en_northd_cleanup(void *data)
> >  {
> > @@ -484,3 +513,9 @@ void
> >  en_bfd_sync_cleanup(void *data OVS_UNUSED)
> >  {
> >  }
> > +
> > +void
> > +en_ecmp_nexthop_cleanup(void *data)
> > +{
> > +    ecmp_nexthop_destroy(data);
> > +}
> > diff --git a/northd/en-northd.h b/northd/en-northd.h
> > index 68f7755f8..4f30ac335 100644
> > --- a/northd/en-northd.h
> > +++ b/northd/en-northd.h
> > @@ -42,5 +42,9 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> >  void en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> >                       void *data OVS_UNUSED);
> >  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> > +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                           struct engine_arg *arg OVS_UNUSED);
> > +void en_ecmp_nexthop_cleanup(void *data);
> >
> >  #endif /* EN_NORTHD_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index b3baecaff..7416b1c43 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -101,7 +101,8 @@ static unixctl_cb_func chassis_features_list;
> >      SB_NODE(fdb, "fdb") \
> >      SB_NODE(static_mac_binding, "static_mac_binding") \
> >      SB_NODE(chassis_template_var, "chassis_template_var") \
> > -    SB_NODE(logical_dp_group, "logical_dp_group")
> > +    SB_NODE(logical_dp_group, "logical_dp_group") \
> > +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >
> >  enum sb_engine_node {
> >  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -159,6 +160,7 @@ static ENGINE_NODE(route_policies, "route_policies");
> >  static ENGINE_NODE(static_routes, "static_routes");
> >  static ENGINE_NODE(bfd, "bfd");
> >  static ENGINE_NODE(bfd_sync, "bfd_sync");
> > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
> >
> >  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >                            struct ovsdb_idl_loop *sb)
> > @@ -258,6 +260,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_bfd_sync, &en_static_routes, NULL);
> >      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
> >
> > +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> > +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> > +
> >      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
> >      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
> >      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 70bfb144e..b5b332536 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -9998,6 +9998,81 @@ build_bfd_map(const struct nbrec_bfd_table
> > *nbrec_bfd_table,
> >      }
> >  }
> >
> > +#define NEXTHOP_IDS_LEN        65535
> > +void
> > +build_ecmp_nexthop_table(
> > +        struct ovsdb_idl_txn *ovnsb_txn,
> > +        struct hmap *routes,
> > +        struct simap *nexthops,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> > +{
> > +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> > +
> > +    if (!ovnsb_txn) {
> > +        return;
> > +    }
> > +
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > +                                       sbrec_ecmp_nexthop_table) {
> > +        simap_put(nexthops, sb_ecmp_nexthop->nexthop,
> > +                  sb_ecmp_nexthop->id);
> > +        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> > +    }
> > +
> > +    struct parsed_route *pr;
> > +    HMAP_FOR_EACH (pr, key_node, routes) {
> > +        if (!pr->ecmp_symmetric_reply) {
> > +            continue;
> > +        }
> > +
> > +        const struct nbrec_logical_router_static_route *r = pr->route;
> > +        if (!simap_contains(nexthops, r->nexthop)) {
> > +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> > +            if (id == NEXTHOP_IDS_LEN) {
> > +                static struct vlog_rate_limit rl =
> > VLOG_RATE_LIMIT_INIT(1, 1);
> > +                VLOG_WARN_RL(&rl, "nexthop id address space is
> > exhausted");
> > +                continue;
> > +            }
> > +            bitmap_set1(nexthop_ids, id);
> > +            simap_put(nexthops, r->nexthop, id);
> > +
> > +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> > +        }
> > +    }
> > +
> > +    struct simap_node *n;
> > +    SIMAP_FOR_EACH_SAFE (n, nexthops) {
> > +        bool stale = true;
> > +        HMAP_FOR_EACH (pr, key_node, routes) {
> > +            if (!pr->ecmp_symmetric_reply) {
> > +                continue;
> > +            }
> > +
> > +            const struct nbrec_logical_router_static_route *r = pr->route;
> > +            if (!strcmp(r->nexthop, n->name)) {
> > +                stale = false;
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (stale) {
> > +            simap_delete(nexthops, n);
> > +        }
> > +    }
> > +
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> > +                                            sbrec_ecmp_nexthop_table) {
> > +        if (!simap_contains(nexthops, sb_ecmp_nexthop->nexthop)) {
> > +                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> > +        }
> > +    }
> > +
> > +    bitmap_free(nexthop_ids);
> > +}
> >
> 
> The amount of loops in this could be greatly reduced by using sset as
> representation what is in sync or not.
> 
> HMAP_FOR_EACH (pr, key_node, routes) would add everything into sset.
> Then you could walk all SB entries, what is in sset would end up in the
> simap
> and bitmap. If it's not present in the sset we can remove it from the DB.
> Last step would be to walk the sset and add an SB entry for everything that
> is
> in the sset but not in the simap WDYT?

ack, I guess we can get rid of the third loop here (the one to look for stale
nexthops entries). I will fix it in the next version.

Regards,
Lorenzo

> 
> 
> > +
> >  /* Returns a string of the IP address of the router port 'op' that
> >   * overlaps with 'ip_s".  If one is not found, returns NULL.
> >   *
> > @@ -17791,6 +17866,12 @@ bfd_init(struct bfd_data *data)
> >      hmap_init(&data->bfd_connections);
> >  }
> >
> > +void
> > +ecmp_nexthop_init(struct ecmp_nexthop_data *data)
> > +{
> > +    simap_init(&data->nexthops);
> > +}
> > +
> >  void
> >  northd_destroy(struct northd_data *data)
> >  {
> > @@ -17872,6 +17953,12 @@ static_routes_destroy(struct static_routes_data
> > *data)
> >      __bfd_destroy(&data->bfd_active_connections);
> >  }
> >
> > +void
> > +ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
> > +{
> > +    simap_destroy(&data->nexthops);
> > +}
> > +
> >  void
> >  ovnnb_db_run(struct northd_input *input_data,
> >               struct northd_data *data,
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 7ddb72eb7..205793e56 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -185,6 +185,10 @@ struct bfd_data {
> >      struct hmap bfd_connections;
> >  };
> >
> > +struct ecmp_nexthop_data {
> > +    struct simap nexthops;
> > +};
> > +
> >  struct lr_nat_table;
> >
> >  struct lflow_input {
> > @@ -724,6 +728,12 @@ void static_routes_destroy(struct static_routes_data
> > *);
> >  void bfd_init(struct bfd_data *);
> >  void bfd_destroy(struct bfd_data *);
> >
> > +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
> > +                              struct hmap *, struct simap *,
> > +                              const struct sbrec_ecmp_nexthop_table *);
> > +void ecmp_nexthop_init(struct ecmp_nexthop_data *);
> > +void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
> > +
> >  struct lflow_table;
> >  struct lr_stateful_tracked_data;
> >  struct ls_stateful_tracked_data;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index b6c051ae6..a4d1630b0 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.34.0",
> > -    "cksum": "2786607656 31376",
> > +    "version": "20.35.0",
> > +    "cksum": "1887835491 32037",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -608,6 +608,20 @@
> >                                            "refTable":
> > "Datapath_Binding"}}}},
> >              "indexes": [["logical_port", "ip"]],
> >              "isRoot": true},
> > +        "ECMP_Nexthop": {
> > +            "columns": {
> > +                "nexthop": {"type": "string"},
> > +                "id": {"type": {"key": {"type": "integer",
> > +                                        "minInteger": 0,
> > +                                        "maxInteger": 65535}}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["nexthop"]],
> > +            "isRoot": true},
> >          "Chassis_Template_Var": {
> >              "columns": {
> >                  "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 73a1be5ed..d84e956c5 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5179,4 +5179,35 @@ tcp.flags = RST;
> >        The set of variable values for a given chassis.
> >      </column>
> >    </table>
> > +
> > +  <table name="ECMP_Nexthop">
> > +    <p>
> > +      Each record in this table represents an active ECMP route committed
> > by
> > +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking
> > table.
> > +      <code>ECMP_Nexthop</code> table is used by
> > <code>ovn-controller</code>
> > +      to track active ct entries and to flush stale ones.
> > +    </p>
> > +    <column name="nexthop">
> > +      <p>
> > +        Nexthop IP address for this ECMP route. Nexthop IP address should
> > +        be the IP address of a connected router port or the IP address of
> > +        an external device used as nexthop for the given destination.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="id">
> > +      <p>
> > +        Nexthop unique identifier. Nexthop ID is used to track active
> > +        ecmp-symmetric-reply connections and flush stale ones.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="options">
> > +      Reserved for future use.
> > +    </column>
> > +
> > +    <column name="external_ids">
> > +      See <em>External IDs</em> at the beginning of this document.
> > +    </column>
> > +  </table>
> >  </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index a84fb6fab..b815eb321 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -6775,6 +6775,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
> >  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >
> > @@ -6786,6 +6787,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > ovn_strip_lflows], [0], [dn
> >  ])
> >
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6822,6 +6824,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows
> > | ovn_strip_lflows], [0], [
> >
> >  # add ecmp route with wrong nexthop
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.1.20
> > +check_row_count ECMP_Nexthop 2
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6836,6 +6839,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > sed 's/192\.168\.0\..0/192.
> >
> >  check ovn-nbctl lr-route-del lr0
> >  wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >
> >  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >  ovn-sbctl dump-flows lr0 > lr0flows
> > --
> > 2.45.2
> >
> >
> Thanks,
> Ales
> 
> -- 
> 
> Ales Musil
> 
> Senior Software Engineer - OVN Core
> 
> Red Hat EMEA <https://www.redhat.com>
> 
> amusil@redhat.com
> <https://red.ht/sig>
diff mbox series

Patch

diff --git a/northd/en-northd.c b/northd/en-northd.c
index 817a4736c..0d86fcb60 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -402,6 +402,25 @@  en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
     engine_set_node_state(node, EN_UPDATED);
 }
 
+void
+en_ecmp_nexthop_run(struct engine_node *node, void *data)
+{
+    const struct engine_context *eng_ctx = engine_get_context();
+    struct static_routes_data *static_routes_data =
+        engine_get_input_data("static_routes", node);
+    struct ecmp_nexthop_data *enh_data = data;
+    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
+        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
+
+    ecmp_nexthop_destroy(data);
+    ecmp_nexthop_init(data);
+    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
+                             &static_routes_data->parsed_routes,
+                             &enh_data->nexthops,
+                             sbrec_ecmp_nexthop_table);
+    engine_set_node_state(node, EN_UPDATED);
+}
+
 void
 *en_northd_init(struct engine_node *node OVS_UNUSED,
                 struct engine_arg *arg OVS_UNUSED)
@@ -449,6 +468,16 @@  void *en_bfd_sync_init(struct engine_node *node OVS_UNUSED,
     return NULL;
 }
 
+void
+*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                      struct engine_arg *arg OVS_UNUSED)
+{
+    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
+
+    ecmp_nexthop_init(data);
+    return data;
+}
+
 void
 en_northd_cleanup(void *data)
 {
@@ -484,3 +513,9 @@  void
 en_bfd_sync_cleanup(void *data OVS_UNUSED)
 {
 }
+
+void
+en_ecmp_nexthop_cleanup(void *data)
+{
+    ecmp_nexthop_destroy(data);
+}
diff --git a/northd/en-northd.h b/northd/en-northd.h
index 68f7755f8..4f30ac335 100644
--- a/northd/en-northd.h
+++ b/northd/en-northd.h
@@ -42,5 +42,9 @@  void *en_bfd_sync_init(struct engine_node *node OVS_UNUSED,
 void en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
                      void *data OVS_UNUSED);
 void en_bfd_sync_cleanup(void *data OVS_UNUSED);
+void en_ecmp_nexthop_run(struct engine_node *node, void *data);
+void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                           struct engine_arg *arg OVS_UNUSED);
+void en_ecmp_nexthop_cleanup(void *data);
 
 #endif /* EN_NORTHD_H */
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index b3baecaff..7416b1c43 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -101,7 +101,8 @@  static unixctl_cb_func chassis_features_list;
     SB_NODE(fdb, "fdb") \
     SB_NODE(static_mac_binding, "static_mac_binding") \
     SB_NODE(chassis_template_var, "chassis_template_var") \
-    SB_NODE(logical_dp_group, "logical_dp_group")
+    SB_NODE(logical_dp_group, "logical_dp_group") \
+    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -159,6 +160,7 @@  static ENGINE_NODE(route_policies, "route_policies");
 static ENGINE_NODE(static_routes, "static_routes");
 static ENGINE_NODE(bfd, "bfd");
 static ENGINE_NODE(bfd_sync, "bfd_sync");
+static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                           struct ovsdb_idl_loop *sb)
@@ -258,6 +260,9 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_bfd_sync, &en_static_routes, NULL);
     engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
 
+    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
+    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
+
     engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
     engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
     engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
diff --git a/northd/northd.c b/northd/northd.c
index 70bfb144e..b5b332536 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -9998,6 +9998,81 @@  build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table,
     }
 }
 
+#define NEXTHOP_IDS_LEN	65535
+void
+build_ecmp_nexthop_table(
+        struct ovsdb_idl_txn *ovnsb_txn,
+        struct hmap *routes,
+        struct simap *nexthops,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
+{
+    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
+
+    if (!ovnsb_txn) {
+        return;
+    }
+
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
+                                       sbrec_ecmp_nexthop_table) {
+        simap_put(nexthops, sb_ecmp_nexthop->nexthop,
+                  sb_ecmp_nexthop->id);
+        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
+    }
+
+    struct parsed_route *pr;
+    HMAP_FOR_EACH (pr, key_node, routes) {
+        if (!pr->ecmp_symmetric_reply) {
+            continue;
+        }
+
+        const struct nbrec_logical_router_static_route *r = pr->route;
+        if (!simap_contains(nexthops, r->nexthop)) {
+            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
+            if (id == NEXTHOP_IDS_LEN) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_WARN_RL(&rl, "nexthop id address space is exhausted");
+                continue;
+            }
+            bitmap_set1(nexthop_ids, id);
+            simap_put(nexthops, r->nexthop, id);
+
+            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
+            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
+            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
+        }
+    }
+
+    struct simap_node *n;
+    SIMAP_FOR_EACH_SAFE (n, nexthops) {
+        bool stale = true;
+        HMAP_FOR_EACH (pr, key_node, routes) {
+            if (!pr->ecmp_symmetric_reply) {
+                continue;
+            }
+
+            const struct nbrec_logical_router_static_route *r = pr->route;
+            if (!strcmp(r->nexthop, n->name)) {
+                stale = false;
+                break;
+            }
+        }
+
+        if (stale) {
+            simap_delete(nexthops, n);
+        }
+    }
+
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
+                                            sbrec_ecmp_nexthop_table) {
+        if (!simap_contains(nexthops, sb_ecmp_nexthop->nexthop)) {
+                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
+        }
+    }
+
+    bitmap_free(nexthop_ids);
+}
+
 /* Returns a string of the IP address of the router port 'op' that
  * overlaps with 'ip_s".  If one is not found, returns NULL.
  *
@@ -17791,6 +17866,12 @@  bfd_init(struct bfd_data *data)
     hmap_init(&data->bfd_connections);
 }
 
+void
+ecmp_nexthop_init(struct ecmp_nexthop_data *data)
+{
+    simap_init(&data->nexthops);
+}
+
 void
 northd_destroy(struct northd_data *data)
 {
@@ -17872,6 +17953,12 @@  static_routes_destroy(struct static_routes_data *data)
     __bfd_destroy(&data->bfd_active_connections);
 }
 
+void
+ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
+{
+    simap_destroy(&data->nexthops);
+}
+
 void
 ovnnb_db_run(struct northd_input *input_data,
              struct northd_data *data,
diff --git a/northd/northd.h b/northd/northd.h
index 7ddb72eb7..205793e56 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -185,6 +185,10 @@  struct bfd_data {
     struct hmap bfd_connections;
 };
 
+struct ecmp_nexthop_data {
+    struct simap nexthops;
+};
+
 struct lr_nat_table;
 
 struct lflow_input {
@@ -724,6 +728,12 @@  void static_routes_destroy(struct static_routes_data *);
 void bfd_init(struct bfd_data *);
 void bfd_destroy(struct bfd_data *);
 
+void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
+                              struct hmap *, struct simap *,
+                              const struct sbrec_ecmp_nexthop_table *);
+void ecmp_nexthop_init(struct ecmp_nexthop_data *);
+void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
+
 struct lflow_table;
 struct lr_stateful_tracked_data;
 struct ls_stateful_tracked_data;
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index b6c051ae6..a4d1630b0 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
-    "version": "20.34.0",
-    "cksum": "2786607656 31376",
+    "version": "20.35.0",
+    "cksum": "1887835491 32037",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -608,6 +608,20 @@ 
                                           "refTable": "Datapath_Binding"}}}},
             "indexes": [["logical_port", "ip"]],
             "isRoot": true},
+        "ECMP_Nexthop": {
+            "columns": {
+                "nexthop": {"type": "string"},
+                "id": {"type": {"key": {"type": "integer",
+                                        "minInteger": 0,
+                                        "maxInteger": 65535}}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["nexthop"]],
+            "isRoot": true},
         "Chassis_Template_Var": {
             "columns": {
                 "chassis": {"type": "string"},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 73a1be5ed..d84e956c5 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -5179,4 +5179,35 @@  tcp.flags = RST;
       The set of variable values for a given chassis.
     </column>
   </table>
+
+  <table name="ECMP_Nexthop">
+    <p>
+      Each record in this table represents an active ECMP route committed by
+      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
+      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
+      to track active ct entries and to flush stale ones.
+    </p>
+    <column name="nexthop">
+      <p>
+        Nexthop IP address for this ECMP route. Nexthop IP address should
+        be the IP address of a connected router port or the IP address of
+        an external device used as nexthop for the given destination.
+      </p>
+    </column>
+
+    <column name="id">
+      <p>
+        Nexthop unique identifier. Nexthop ID is used to track active
+        ecmp-symmetric-reply connections and flush stale ones.
+      </p>
+    </column>
+
+    <column name="options">
+      Reserved for future use.
+    </column>
+
+    <column name="external_ids">
+      See <em>External IDs</em> at the beginning of this document.
+    </column>
+  </table>
 </database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a84fb6fab..b815eb321 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6775,6 +6775,7 @@  check ovn-nbctl lsp-set-addresses public-lr0 router
 check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
+check_row_count ECMP_Nexthop 1
 
 ovn-sbctl dump-flows lr0 > lr0flows
 
@@ -6786,6 +6787,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
 ])
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
+check_row_count ECMP_Nexthop 2
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6822,6 +6824,7 @@  AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
 
 # add ecmp route with wrong nexthop
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
+check_row_count ECMP_Nexthop 2
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6836,6 +6839,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
 
 check ovn-nbctl lr-route-del lr0
 wait_row_count nb:Logical_Router_Static_Route 0
+check_row_count ECMP_Nexthop 0
 
 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
 ovn-sbctl dump-flows lr0 > lr0flows