diff mbox series

[ovs-dev,1/4] northd: Introduce ECMP_Nexthop table in SB db.

Message ID 78017c6fdb5acfb516405e6486b852f26a7754cd.1731495611.git.lorenzo.bianconi@redhat.com
State Changes Requested
Delegated to: Dumitru Ceara
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Lorenzo Bianconi Nov. 13, 2024, 11:04 a.m. UTC
Introduce ECMP_Nexthop table in the SB db in order to track active
ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop
table contains ip and mac address for each active nexthop.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 northd/en-northd.c       |  29 +++++++++++
 northd/en-northd.h       |   4 ++
 northd/inc-proc-northd.c |   9 +++-
 northd/northd.c          | 104 ++++++++++++++++++++++++++++++++++++++-
 northd/northd.h          |   6 +++
 ovn-sb.ovsschema         |  17 ++++++-
 ovn-sb.xml               |  31 ++++++++++++
 tests/ovn-northd.at      |  33 ++++++++++---
 8 files changed, 222 insertions(+), 11 deletions(-)

Comments

Dumitru Ceara Dec. 18, 2024, 12:11 p.m. UTC | #1
On 11/13/24 12:04 PM, Lorenzo Bianconi wrote:
> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop
> table contains ip and mac address for each active nexthop.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---

Hi Lorenzo,

Thanks for the patch!

>  northd/en-northd.c       |  29 +++++++++++
>  northd/en-northd.h       |   4 ++
>  northd/inc-proc-northd.c |   9 +++-
>  northd/northd.c          | 104 ++++++++++++++++++++++++++++++++++++++-
>  northd/northd.h          |   6 +++
>  ovn-sb.ovsschema         |  17 ++++++-
>  ovn-sb.xml               |  31 ++++++++++++
>  tests/ovn-northd.at      |  33 ++++++++++---
>  8 files changed, 222 insertions(+), 11 deletions(-)
> 
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index 24ed31517..165af44a0 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -404,6 +404,23 @@ en_bfd_sync_run(struct engine_node *node, void *data)
>      engine_set_node_state(node, EN_UPDATED);
>  }
>  
> +void
> +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED)

We started splitting out independent incremental processing engine nodes
into their own files.  See northd/en-*.[ch].  I think it makes sense to
move the en_ecmp_nexthop*() functions and all other related static
functions to a new northd/en-ecmp-nexthop.[ch] module.

> +{
> +    const struct engine_context *eng_ctx = engine_get_context();
> +    struct northd_data *northd_data = engine_get_input_data("northd", node);
> +    struct static_routes_data *static_routes_data =
> +        engine_get_input_data("static_routes", node);
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> +
> +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> +                             &northd_data->lr_ports,
> +                             &static_routes_data->parsed_routes,
> +                             sbrec_ecmp_nexthop_table);
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
>  void
>  *en_northd_init(struct engine_node *node OVS_UNUSED,
>                  struct engine_arg *arg OVS_UNUSED)
> @@ -454,6 +471,13 @@ void
>      return data;
>  }
>  
> +void *
> +en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                     struct engine_arg *arg OVS_UNUSED)
> +{
> +    return NULL;
> +}
> +
>  void
>  en_northd_cleanup(void *data)
>  {
> @@ -526,3 +550,8 @@ en_bfd_sync_cleanup(void *data)
>  {
>      bfd_sync_destroy(data);
>  }
> +
> +void
> +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED)
> +{
> +}
> diff --git a/northd/en-northd.h b/northd/en-northd.h
> index 631a7c17a..2666cc67e 100644
> --- a/northd/en-northd.h
> +++ b/northd/en-northd.h
> @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node,
>                                      void *data OVS_UNUSED);
>  void en_bfd_sync_run(struct engine_node *node, void *data);
>  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                           struct engine_arg *arg OVS_UNUSED);
> +void en_ecmp_nexthop_cleanup(void *data);
>  
>  #endif /* EN_NORTHD_H */
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index 8c834facb..8e16fde80 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>  
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies");
>  static ENGINE_NODE(static_routes, "static_routes");
>  static ENGINE_NODE(bfd, "bfd");
>  static ENGINE_NODE(bfd_sync, "bfd_sync");
> +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
>  
>  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>                            struct ovsdb_idl_loop *sb)
> @@ -264,6 +266,10 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
>      engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler);
>  
> +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);

To avoid unnecessary processing, we also need to omit notifications for
SB.ECMP_Nexthop records in ovn-northd.c.

Here's an example for meters:
https://github.com/ovn-org/ovn/blob/ebe5d70122ce0f74067858f5cb19276c852a81da/northd/ovn-northd.c#L929-L931

> +    engine_add_input(&en_ecmp_nexthop, &en_northd, NULL);
> +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> +
>      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
>      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
>      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> @@ -334,6 +340,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL);
>      engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL);
>  
> +    engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL);
>      engine_add_input(&en_northd_output, &en_sync_from_sb, NULL);
>      engine_add_input(&en_northd_output, &en_sync_to_sb,
>                       northd_output_sync_to_sb_handler);
> diff --git a/northd/northd.c b/northd/northd.c
> index 64b2e3859..d54fbf14e 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10720,6 +10720,106 @@ build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table,
>      }
>  }
>  
> +struct ecmp_nexthop_data {
> +    struct hmap_node hmap_node;
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nh;
> +    bool stale;

We don't need this 'stale' field, please see the comment related to
build_ecmp_nexthop_table() below.

> +};
> +
> +static struct ecmp_nexthop_data *
> +ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh,
> +                         struct hmap *map)

The name is misleading.  We actually insert the entry.  I'd call this
ecmp_nexthop_insert_entry().

> +{
> +    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
> +    e->sb_ecmp_nh = sb_ecmp_nh;
> +
> +    const char *sb_port = sb_ecmp_nh->port->logical_port;
> +    const char *sb_nexthop = sb_ecmp_nh->nexthop;
> +
> +    uint32_t hash = hash_string(sb_nexthop, 0);
> +    hash = hash_add(hash, hash_string(sb_port, 0));

Port names can be long.  What about hashing the port binding key instead?

We could rewrite this as:

    uint32_t hash = hash_string(sb_ecmp_nh->nexthop, 0);
    hash = hash_int(sb_ecmp_nh->port->tunnel_key, hash);
    hmap_insert(map, &e->hmap_node, hash);

> +    hmap_insert(map, &e->hmap_node, hash);
> +
> +    return e;
> +}
> +
> +static struct ecmp_nexthop_data *
> +ecmp_nexthop_find_entry(const char *nexthop, const char *port,
> +                        struct hmap *map)
> +{
> +    uint32_t hash = hash_string(nexthop, 0);
> +    hash = hash_add(hash, hash_string(port, 0));
> +

If we change the hash function to use the port binding key, this should
also change to something like:

static struct ecmp_nexthop_data *
ecmp_nexthop_find_entry(const char *nexthop,
                        const struct sbrec_port_binding *pb,
                        struct hmap *map)
{
    uint32_t hash = hash_string(nexthop, 0);
    hash = hash_int(pb->tunnel_key, hash);

> +    struct ecmp_nexthop_data *e;
> +    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
> +        const char *sb_port = e->sb_ecmp_nh->port->logical_port;
> +        const char *sb_nexthop = e->sb_ecmp_nh->nexthop;
> +        if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) {
> +            return e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +void
> +build_ecmp_nexthop_table(
> +        struct ovsdb_idl_txn *ovnsb_txn,
> +        const struct hmap *lr_ports, const struct hmap *routes,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> +{
> +    if (!ovnsb_txn) {
> +        return;
> +    }
> +
> +    struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map);
> +
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> +                                       sbrec_ecmp_nexthop_table) {
> +        struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry(
> +                sb_ecmp_nexthop, &sb_nexthops_map);
> +        e->stale = true;
> +    }
> +
> +    struct parsed_route *pr;
> +    HMAP_FOR_EACH (pr, key_node, routes) {
> +        if (!pr->ecmp_symmetric_reply) {
> +            continue;
> +        }
> +
> +        if (!pr->out_port) {
> +            continue;
> +        }
> +
> +        struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key);

Why do we lookup again here?  pr->out_port is the ovn_port already.

> +        if (!out_port || !out_port->sb) {
> +            continue;
> +        }
> +
> +        const struct nbrec_logical_router_static_route *r = pr->route;
> +        const char *pb_name = out_port->sb->logical_port;
> +
> +        struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry(
> +                r->nexthop, pb_name, &sb_nexthops_map);
> +        if (!e) {
> +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> +            sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb);
> +            e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map);

Instead of adding the entry to sb_nexthops_map here we can just remove
'e' from sb_nexthops_map and free it if found (else branch of this if) ...

> +        }
> +        e->stale = false;
> +    }
> +
> +    struct ecmp_nexthop_data *e;
> +    HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) {
> +        if (e->stale) {
> +            sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh);

... then here all remaining items in sb_nexthops_map are stale.

> +        }
> +        free(e);
> +    }
> +    hmap_destroy(&sb_nexthops_map);
> +}
> +
>  /* Returns a string of the IP address of the router port 'op' that
>   * overlaps with 'ip_s".  If one is not found, returns NULL.
>   *
> @@ -11160,10 +11260,11 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
>      }
>  
>      /* Verify that ip_prefix and nexthop are on the same network. */
> +    struct ovn_port *out_port = NULL;
>      if (!is_discard_route &&
>          !find_static_route_outport(od, lr_ports, route,
>                                     IN6_IS_ADDR_V4MAPPED(&prefix),
> -                                   NULL, NULL)) {
> +                                   NULL, &out_port)) {
>          return;
>      }
>  
> @@ -11206,6 +11307,7 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
>      new_pr->hash = route_hash(new_pr);
>      new_pr->route = route;
>      new_pr->nbr = od->nbr;
> +    new_pr->out_port = out_port;
>      new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
>                                                   "ecmp_symmetric_reply",
>                                                   false);
> diff --git a/northd/northd.h b/northd/northd.h
> index c1442ff40..3bd2e29e3 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -703,6 +703,7 @@ struct parsed_route {
>      uint32_t route_table_id;
>      uint32_t hash;
>      const struct nbrec_logical_router_static_route *route;
> +    struct ovn_port *out_port;
>      bool ecmp_symmetric_reply;
>      bool is_discard_route;
>      const struct nbrec_logical_router *nbr;
> @@ -746,6 +747,11 @@ void bfd_destroy(struct bfd_data *);
>  void bfd_sync_init(struct bfd_sync_data *);
>  void bfd_sync_destroy(struct bfd_sync_data *);
>  
> +void build_ecmp_nexthop_table(
> +        struct ovsdb_idl_txn *ovnsb_txn,
> +        const struct hmap *lr_ports, const struct hmap *routes,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table);
> +
>  struct lflow_table;
>  struct lr_stateful_tracked_data;
>  struct ls_stateful_tracked_data;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 73abf2c8d..864cb0ed6 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.37.0",
> -    "cksum": "1950136776 31493",
> +    "version": "20.38.0",
> +    "cksum": "466210938 32119",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -610,6 +610,19 @@
>                                            "refTable": "Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "port": {"type": {"key": {"type": "uuid",
> +                                          "refTable": "Port_Binding",
> +                                          "refType": "strong"},
> +                                   "min": 0, "max": 1}},

I thought there's never a case when we don't have a port for the
next-hop.  I think this shouldn't be optional.

> +                "mac": {"type": "string"},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop", "port"]],
> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index ea4adc1c3..ea1d484a7 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5217,4 +5217,35 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <p>
> +      Each record in this table represents an active ECMP route committed by

This seems wrong, it's not a route, it's a next hop.  And the next hop
can be shared by multiple routes.

Also, we only create ECMP_Nexthop records for routes that have
--ecmp-symmetric-reply set.  It's probably good to mention that here in
case users expect other ECMP nexthops to be created in the SB.

> +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.

Nit: maybe rephrase this to "to the <code>ovs</code> connection tracker".

> +      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>

Nit: The <code>ECMP_Nexthop</code> table.

> +      to track active ct entries and to flush stale ones.
> +    </p>
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this ECMP route. Nexthop IP address should
> +        be the IP address of a connected router port or the IP address of
> +        an external device used as nexthop for the given destination.
> +      </p>
> +    </column>
> +    <column name="port">
> +      <p>
> +        The reference to <ref table="Port_Binding"/> table for the port used
> +        to connect to the configured next-hop.
> +      </p>
> +    </column>
> +    <column name="mac">
> +      <p>
> +        Nexthop mac address.
> +      </p>
> +    </column>
> +
> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 8477e4250..1e01c2614 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3886,7 +3886,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \
>  check_engine_stats northd norecompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3901,7 +3901,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100
>  check_engine_stats northd norecompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3913,7 +3913,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3929,7 +3929,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3941,7 +3941,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats route_policies recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3976,7 +3976,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3991,7 +3991,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats route_policies recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -6816,6 +6816,10 @@ check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>  
>  # ECMP flows will be added even if there is only one next-hop.
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
> +check_row_count ECMP_Nexthop 1

As far as I can tell we only do positive checks, i.e., check that next
hop records are created in the SB for routes with --ecmp-symmetric-reply
set.

We could also add a test to ensure that no ECMP_Nexthop is created for
routes that have --ecmp set but no --ecmp-symmetric-reply.

> +uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public)
> +check_column 192.168.0.10 ECMP_Nexthop nexthop
> +check_column "$uuid" ECMP_Nexthop port
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  
> @@ -6835,6 +6839,13 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
>  ])
>  
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
> +check_row_count ECMP_Nexthop 2
> +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
> +192.168.0.10
> +])
> +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
> +192.168.0.20
> +])
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6864,6 +6875,13 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
>  
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
> +check_row_count ECMP_Nexthop 2
> +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
> +192.168.0.10
> +])
> +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
> +192.168.0.20
> +])
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6883,6 +6901,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
>  
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>  
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows

Regards,
Dumitru
Lorenzo Bianconi Dec. 19, 2024, 3 p.m. UTC | #2
> On 11/13/24 12:04 PM, Lorenzo Bianconi wrote:
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop
> > table contains ip and mac address for each active nexthop.
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> 
> Hi Lorenzo,
> 
> Thanks for the patch!

Hi Dumitru,

thx for the review.

> 
> >  northd/en-northd.c       |  29 +++++++++++
> >  northd/en-northd.h       |   4 ++
> >  northd/inc-proc-northd.c |   9 +++-
> >  northd/northd.c          | 104 ++++++++++++++++++++++++++++++++++++++-
> >  northd/northd.h          |   6 +++
> >  ovn-sb.ovsschema         |  17 ++++++-
> >  ovn-sb.xml               |  31 ++++++++++++
> >  tests/ovn-northd.at      |  33 ++++++++++---
> >  8 files changed, 222 insertions(+), 11 deletions(-)
> > 
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 24ed31517..165af44a0 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -404,6 +404,23 @@ en_bfd_sync_run(struct engine_node *node, void *data)
> >      engine_set_node_state(node, EN_UPDATED);
> >  }
> >  
> > +void
> > +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED)
> 
> We started splitting out independent incremental processing engine nodes
> into their own files.  See northd/en-*.[ch].  I think it makes sense to
> move the en_ecmp_nexthop*() functions and all other related static
> functions to a new northd/en-ecmp-nexthop.[ch] module.

ack, I will fix it.

> 
> > +{
> > +    const struct engine_context *eng_ctx = engine_get_context();
> > +    struct northd_data *northd_data = engine_get_input_data("northd", node);
> > +    struct static_routes_data *static_routes_data =
> > +        engine_get_input_data("static_routes", node);
> > +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> > +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> > +
> > +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> > +                             &northd_data->lr_ports,
> > +                             &static_routes_data->parsed_routes,
> > +                             sbrec_ecmp_nexthop_table);
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> >  void
> >  *en_northd_init(struct engine_node *node OVS_UNUSED,
> >                  struct engine_arg *arg OVS_UNUSED)
> > @@ -454,6 +471,13 @@ void
> >      return data;
> >  }
> >  
> > +void *
> > +en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                     struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    return NULL;
> > +}
> > +
> >  void
> >  en_northd_cleanup(void *data)
> >  {
> > @@ -526,3 +550,8 @@ en_bfd_sync_cleanup(void *data)
> >  {
> >      bfd_sync_destroy(data);
> >  }
> > +
> > +void
> > +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED)
> > +{
> > +}
> > diff --git a/northd/en-northd.h b/northd/en-northd.h
> > index 631a7c17a..2666cc67e 100644
> > --- a/northd/en-northd.h
> > +++ b/northd/en-northd.h
> > @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node,
> >                                      void *data OVS_UNUSED);
> >  void en_bfd_sync_run(struct engine_node *node, void *data);
> >  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> > +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                           struct engine_arg *arg OVS_UNUSED);
> > +void en_ecmp_nexthop_cleanup(void *data);
> >  
> >  #endif /* EN_NORTHD_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index 8c834facb..8e16fde80 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list;
> >      SB_NODE(fdb, "fdb") \
> >      SB_NODE(static_mac_binding, "static_mac_binding") \
> >      SB_NODE(chassis_template_var, "chassis_template_var") \
> > -    SB_NODE(logical_dp_group, "logical_dp_group")
> > +    SB_NODE(logical_dp_group, "logical_dp_group") \
> > +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >  
> >  enum sb_engine_node {
> >  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies");
> >  static ENGINE_NODE(static_routes, "static_routes");
> >  static ENGINE_NODE(bfd, "bfd");
> >  static ENGINE_NODE(bfd_sync, "bfd_sync");
> > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
> >  
> >  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >                            struct ovsdb_idl_loop *sb)
> > @@ -264,6 +266,10 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
> >      engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler);
> >  
> > +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> 
> To avoid unnecessary processing, we also need to omit notifications for
> SB.ECMP_Nexthop records in ovn-northd.c.
> 
> Here's an example for meters:
> https://github.com/ovn-org/ovn/blob/ebe5d70122ce0f74067858f5cb19276c852a81da/northd/ovn-northd.c#L929-L931

ack, I will fix it.

> 
> > +    engine_add_input(&en_ecmp_nexthop, &en_northd, NULL);
> > +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> > +
> >      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
> >      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
> >      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> > @@ -334,6 +340,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL);
> >      engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL);
> >  
> > +    engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL);
> >      engine_add_input(&en_northd_output, &en_sync_from_sb, NULL);
> >      engine_add_input(&en_northd_output, &en_sync_to_sb,
> >                       northd_output_sync_to_sb_handler);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 64b2e3859..d54fbf14e 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -10720,6 +10720,106 @@ build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table,
> >      }
> >  }
> >  
> > +struct ecmp_nexthop_data {
> > +    struct hmap_node hmap_node;
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nh;
> > +    bool stale;
> 
> We don't need this 'stale' field, please see the comment related to
> build_ecmp_nexthop_table() below.

ack, I will fix it.

> 
> > +};
> > +
> > +static struct ecmp_nexthop_data *
> > +ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh,
> > +                         struct hmap *map)
> 
> The name is misleading.  We actually insert the entry.  I'd call this
> ecmp_nexthop_insert_entry().

ack, I will fix it.

> 
> > +{
> > +    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
> > +    e->sb_ecmp_nh = sb_ecmp_nh;
> > +
> > +    const char *sb_port = sb_ecmp_nh->port->logical_port;
> > +    const char *sb_nexthop = sb_ecmp_nh->nexthop;
> > +
> > +    uint32_t hash = hash_string(sb_nexthop, 0);
> > +    hash = hash_add(hash, hash_string(sb_port, 0));
> 
> Port names can be long.  What about hashing the port binding key instead?
> 
> We could rewrite this as:
> 
>     uint32_t hash = hash_string(sb_ecmp_nh->nexthop, 0);
>     hash = hash_int(sb_ecmp_nh->port->tunnel_key, hash);
>     hmap_insert(map, &e->hmap_node, hash);

ack, I will fix it.

> 
> > +    hmap_insert(map, &e->hmap_node, hash);
> > +
> > +    return e;
> > +}
> > +
> > +static struct ecmp_nexthop_data *
> > +ecmp_nexthop_find_entry(const char *nexthop, const char *port,
> > +                        struct hmap *map)
> > +{
> > +    uint32_t hash = hash_string(nexthop, 0);
> > +    hash = hash_add(hash, hash_string(port, 0));
> > +
> 
> If we change the hash function to use the port binding key, this should
> also change to something like:
> 
> static struct ecmp_nexthop_data *
> ecmp_nexthop_find_entry(const char *nexthop,
>                         const struct sbrec_port_binding *pb,
>                         struct hmap *map)
> {
>     uint32_t hash = hash_string(nexthop, 0);
>     hash = hash_int(pb->tunnel_key, hash);

ack

> 
> > +    struct ecmp_nexthop_data *e;
> > +    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
> > +        const char *sb_port = e->sb_ecmp_nh->port->logical_port;
> > +        const char *sb_nexthop = e->sb_ecmp_nh->nexthop;
> > +        if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) {
> > +            return e;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +void
> > +build_ecmp_nexthop_table(
> > +        struct ovsdb_idl_txn *ovnsb_txn,
> > +        const struct hmap *lr_ports, const struct hmap *routes,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> > +{
> > +    if (!ovnsb_txn) {
> > +        return;
> > +    }
> > +
> > +    struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map);
> > +
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > +                                       sbrec_ecmp_nexthop_table) {
> > +        struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry(
> > +                sb_ecmp_nexthop, &sb_nexthops_map);
> > +        e->stale = true;
> > +    }
> > +
> > +    struct parsed_route *pr;
> > +    HMAP_FOR_EACH (pr, key_node, routes) {
> > +        if (!pr->ecmp_symmetric_reply) {
> > +            continue;
> > +        }
> > +
> > +        if (!pr->out_port) {
> > +            continue;
> > +        }
> > +
> > +        struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key);
> 
> Why do we lookup again here?  pr->out_port is the ovn_port already.

I think it is a leftover, I will fix it

> 
> > +        if (!out_port || !out_port->sb) {
> > +            continue;
> > +        }
> > +
> > +        const struct nbrec_logical_router_static_route *r = pr->route;
> > +        const char *pb_name = out_port->sb->logical_port;
> > +
> > +        struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry(
> > +                r->nexthop, pb_name, &sb_nexthops_map);
> > +        if (!e) {
> > +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > +            sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb);
> > +            e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map);
> 
> Instead of adding the entry to sb_nexthops_map here we can just remove
> 'e' from sb_nexthops_map and free it if found (else branch of this if) ...

ack, I will fix it.

> 
> > +        }
> > +        e->stale = false;
> > +    }
> > +
> > +    struct ecmp_nexthop_data *e;
> > +    HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) {
> > +        if (e->stale) {
> > +            sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh);
> 
> ... then here all remaining items in sb_nexthops_map are stale.
> 
> > +        }
> > +        free(e);
> > +    }
> > +    hmap_destroy(&sb_nexthops_map);
> > +}
> > +
> >  /* Returns a string of the IP address of the router port 'op' that
> >   * overlaps with 'ip_s".  If one is not found, returns NULL.
> >   *
> > @@ -11160,10 +11260,11 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
> >      }
> >  
> >      /* Verify that ip_prefix and nexthop are on the same network. */
> > +    struct ovn_port *out_port = NULL;
> >      if (!is_discard_route &&
> >          !find_static_route_outport(od, lr_ports, route,
> >                                     IN6_IS_ADDR_V4MAPPED(&prefix),
> > -                                   NULL, NULL)) {
> > +                                   NULL, &out_port)) {
> >          return;
> >      }
> >  
> > @@ -11206,6 +11307,7 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
> >      new_pr->hash = route_hash(new_pr);
> >      new_pr->route = route;
> >      new_pr->nbr = od->nbr;
> > +    new_pr->out_port = out_port;
> >      new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
> >                                                   "ecmp_symmetric_reply",
> >                                                   false);
> > diff --git a/northd/northd.h b/northd/northd.h
> > index c1442ff40..3bd2e29e3 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -703,6 +703,7 @@ struct parsed_route {
> >      uint32_t route_table_id;
> >      uint32_t hash;
> >      const struct nbrec_logical_router_static_route *route;
> > +    struct ovn_port *out_port;
> >      bool ecmp_symmetric_reply;
> >      bool is_discard_route;
> >      const struct nbrec_logical_router *nbr;
> > @@ -746,6 +747,11 @@ void bfd_destroy(struct bfd_data *);
> >  void bfd_sync_init(struct bfd_sync_data *);
> >  void bfd_sync_destroy(struct bfd_sync_data *);
> >  
> > +void build_ecmp_nexthop_table(
> > +        struct ovsdb_idl_txn *ovnsb_txn,
> > +        const struct hmap *lr_ports, const struct hmap *routes,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table);
> > +
> >  struct lflow_table;
> >  struct lr_stateful_tracked_data;
> >  struct ls_stateful_tracked_data;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index 73abf2c8d..864cb0ed6 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.37.0",
> > -    "cksum": "1950136776 31493",
> > +    "version": "20.38.0",
> > +    "cksum": "466210938 32119",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -610,6 +610,19 @@
> >                                            "refTable": "Datapath_Binding"}}}},
> >              "indexes": [["logical_port", "ip"]],
> >              "isRoot": true},
> > +        "ECMP_Nexthop": {
> > +            "columns": {
> > +                "nexthop": {"type": "string"},
> > +                "port": {"type": {"key": {"type": "uuid",
> > +                                          "refTable": "Port_Binding",
> > +                                          "refType": "strong"},
> > +                                   "min": 0, "max": 1}},
> 
> I thought there's never a case when we don't have a port for the
> next-hop.  I think this shouldn't be optional.

ack, I will fix it.

> 
> > +                "mac": {"type": "string"},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["nexthop", "port"]],
> > +            "isRoot": true},
> >          "Chassis_Template_Var": {
> >              "columns": {
> >                  "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index ea4adc1c3..ea1d484a7 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5217,4 +5217,35 @@ tcp.flags = RST;
> >        The set of variable values for a given chassis.
> >      </column>
> >    </table>
> > +
> > +  <table name="ECMP_Nexthop">
> > +    <p>
> > +      Each record in this table represents an active ECMP route committed by
> 
> This seems wrong, it's not a route, it's a next hop.  And the next hop
> can be shared by multiple routes.
> 
> Also, we only create ECMP_Nexthop records for routes that have
> --ecmp-symmetric-reply set.  It's probably good to mention that here in
> case users expect other ECMP nexthops to be created in the SB.

ack, I will fix it.

> 
> > +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
> 
> Nit: maybe rephrase this to "to the <code>ovs</code> connection tracker".
> 
> > +      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
> 
> Nit: The <code>ECMP_Nexthop</code> table.
> 
> > +      to track active ct entries and to flush stale ones.
> > +    </p>
> > +    <column name="nexthop">
> > +      <p>
> > +        Nexthop IP address for this ECMP route. Nexthop IP address should
> > +        be the IP address of a connected router port or the IP address of
> > +        an external device used as nexthop for the given destination.
> > +      </p>
> > +    </column>
> > +    <column name="port">
> > +      <p>
> > +        The reference to <ref table="Port_Binding"/> table for the port used
> > +        to connect to the configured next-hop.
> > +      </p>
> > +    </column>
> > +    <column name="mac">
> > +      <p>
> > +        Nexthop mac address.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="external_ids">
> > +      See <em>External IDs</em> at the beginning of this document.
> > +    </column>
> > +  </table>
> >  </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 8477e4250..1e01c2614 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -3886,7 +3886,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \
> >  check_engine_stats northd norecompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3901,7 +3901,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100
> >  check_engine_stats northd norecompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3913,7 +3913,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3929,7 +3929,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3941,7 +3941,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats route_policies recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3976,7 +3976,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3991,7 +3991,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats route_policies recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -6816,6 +6816,10 @@ check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >  
> >  # ECMP flows will be added even if there is only one next-hop.
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> 
> As far as I can tell we only do positive checks, i.e., check that next
> hop records are created in the SB for routes with --ecmp-symmetric-reply
> set.
> 
> We could also add a test to ensure that no ECMP_Nexthop is created for
> routes that have --ecmp set but no --ecmp-symmetric-reply.

ack, I will fix it.

Regards,
Lorenzo

> 
> > +uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public)
> > +check_column 192.168.0.10 ECMP_Nexthop nexthop
> > +check_column "$uuid" ECMP_Nexthop port
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  
> > @@ -6835,6 +6839,13 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
> >  ])
> >  
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
> > +192.168.0.10
> > +])
> > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
> > +192.168.0.20
> > +])
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> > @@ -6864,6 +6875,13 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
> >  
> >  # add ecmp route with wrong nexthop
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
> > +check_row_count ECMP_Nexthop 2
> > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
> > +192.168.0.10
> > +])
> > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
> > +192.168.0.20
> > +])
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> > @@ -6883,6 +6901,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
> >  
> >  check ovn-nbctl lr-route-del lr0
> >  wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >  
> >  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >  ovn-sbctl dump-flows lr0 > lr0flows
> 
> Regards,
> Dumitru
>
diff mbox series

Patch

diff --git a/northd/en-northd.c b/northd/en-northd.c
index 24ed31517..165af44a0 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -404,6 +404,23 @@  en_bfd_sync_run(struct engine_node *node, void *data)
     engine_set_node_state(node, EN_UPDATED);
 }
 
+void
+en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+    const struct engine_context *eng_ctx = engine_get_context();
+    struct northd_data *northd_data = engine_get_input_data("northd", node);
+    struct static_routes_data *static_routes_data =
+        engine_get_input_data("static_routes", node);
+    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
+        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
+
+    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
+                             &northd_data->lr_ports,
+                             &static_routes_data->parsed_routes,
+                             sbrec_ecmp_nexthop_table);
+    engine_set_node_state(node, EN_UPDATED);
+}
+
 void
 *en_northd_init(struct engine_node *node OVS_UNUSED,
                 struct engine_arg *arg OVS_UNUSED)
@@ -454,6 +471,13 @@  void
     return data;
 }
 
+void *
+en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                     struct engine_arg *arg OVS_UNUSED)
+{
+    return NULL;
+}
+
 void
 en_northd_cleanup(void *data)
 {
@@ -526,3 +550,8 @@  en_bfd_sync_cleanup(void *data)
 {
     bfd_sync_destroy(data);
 }
+
+void
+en_ecmp_nexthop_cleanup(void *data OVS_UNUSED)
+{
+}
diff --git a/northd/en-northd.h b/northd/en-northd.h
index 631a7c17a..2666cc67e 100644
--- a/northd/en-northd.h
+++ b/northd/en-northd.h
@@ -42,5 +42,9 @@  bool bfd_sync_northd_change_handler(struct engine_node *node,
                                     void *data OVS_UNUSED);
 void en_bfd_sync_run(struct engine_node *node, void *data);
 void en_bfd_sync_cleanup(void *data OVS_UNUSED);
+void en_ecmp_nexthop_run(struct engine_node *node, void *data);
+void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                           struct engine_arg *arg OVS_UNUSED);
+void en_ecmp_nexthop_cleanup(void *data);
 
 #endif /* EN_NORTHD_H */
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 8c834facb..8e16fde80 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -103,7 +103,8 @@  static unixctl_cb_func chassis_features_list;
     SB_NODE(fdb, "fdb") \
     SB_NODE(static_mac_binding, "static_mac_binding") \
     SB_NODE(chassis_template_var, "chassis_template_var") \
-    SB_NODE(logical_dp_group, "logical_dp_group")
+    SB_NODE(logical_dp_group, "logical_dp_group") \
+    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -162,6 +163,7 @@  static ENGINE_NODE(route_policies, "route_policies");
 static ENGINE_NODE(static_routes, "static_routes");
 static ENGINE_NODE(bfd, "bfd");
 static ENGINE_NODE(bfd_sync, "bfd_sync");
+static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                           struct ovsdb_idl_loop *sb)
@@ -264,6 +266,10 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
     engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler);
 
+    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
+    engine_add_input(&en_ecmp_nexthop, &en_northd, NULL);
+    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
+
     engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
     engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
     engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
@@ -334,6 +340,7 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL);
     engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL);
 
+    engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL);
     engine_add_input(&en_northd_output, &en_sync_from_sb, NULL);
     engine_add_input(&en_northd_output, &en_sync_to_sb,
                      northd_output_sync_to_sb_handler);
diff --git a/northd/northd.c b/northd/northd.c
index 64b2e3859..d54fbf14e 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10720,6 +10720,106 @@  build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table,
     }
 }
 
+struct ecmp_nexthop_data {
+    struct hmap_node hmap_node;
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nh;
+    bool stale;
+};
+
+static struct ecmp_nexthop_data *
+ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh,
+                         struct hmap *map)
+{
+    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
+    e->sb_ecmp_nh = sb_ecmp_nh;
+
+    const char *sb_port = sb_ecmp_nh->port->logical_port;
+    const char *sb_nexthop = sb_ecmp_nh->nexthop;
+
+    uint32_t hash = hash_string(sb_nexthop, 0);
+    hash = hash_add(hash, hash_string(sb_port, 0));
+    hmap_insert(map, &e->hmap_node, hash);
+
+    return e;
+}
+
+static struct ecmp_nexthop_data *
+ecmp_nexthop_find_entry(const char *nexthop, const char *port,
+                        struct hmap *map)
+{
+    uint32_t hash = hash_string(nexthop, 0);
+    hash = hash_add(hash, hash_string(port, 0));
+
+    struct ecmp_nexthop_data *e;
+    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
+        const char *sb_port = e->sb_ecmp_nh->port->logical_port;
+        const char *sb_nexthop = e->sb_ecmp_nh->nexthop;
+        if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) {
+            return e;
+        }
+    }
+    return NULL;
+}
+
+void
+build_ecmp_nexthop_table(
+        struct ovsdb_idl_txn *ovnsb_txn,
+        const struct hmap *lr_ports, const struct hmap *routes,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
+{
+    if (!ovnsb_txn) {
+        return;
+    }
+
+    struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map);
+
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
+                                       sbrec_ecmp_nexthop_table) {
+        struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry(
+                sb_ecmp_nexthop, &sb_nexthops_map);
+        e->stale = true;
+    }
+
+    struct parsed_route *pr;
+    HMAP_FOR_EACH (pr, key_node, routes) {
+        if (!pr->ecmp_symmetric_reply) {
+            continue;
+        }
+
+        if (!pr->out_port) {
+            continue;
+        }
+
+        struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key);
+        if (!out_port || !out_port->sb) {
+            continue;
+        }
+
+        const struct nbrec_logical_router_static_route *r = pr->route;
+        const char *pb_name = out_port->sb->logical_port;
+
+        struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry(
+                r->nexthop, pb_name, &sb_nexthops_map);
+        if (!e) {
+            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
+            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
+            sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb);
+            e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map);
+        }
+        e->stale = false;
+    }
+
+    struct ecmp_nexthop_data *e;
+    HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) {
+        if (e->stale) {
+            sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh);
+        }
+        free(e);
+    }
+    hmap_destroy(&sb_nexthops_map);
+}
+
 /* Returns a string of the IP address of the router port 'op' that
  * overlaps with 'ip_s".  If one is not found, returns NULL.
  *
@@ -11160,10 +11260,11 @@  parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
     }
 
     /* Verify that ip_prefix and nexthop are on the same network. */
+    struct ovn_port *out_port = NULL;
     if (!is_discard_route &&
         !find_static_route_outport(od, lr_ports, route,
                                    IN6_IS_ADDR_V4MAPPED(&prefix),
-                                   NULL, NULL)) {
+                                   NULL, &out_port)) {
         return;
     }
 
@@ -11206,6 +11307,7 @@  parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports,
     new_pr->hash = route_hash(new_pr);
     new_pr->route = route;
     new_pr->nbr = od->nbr;
+    new_pr->out_port = out_port;
     new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
                                                  "ecmp_symmetric_reply",
                                                  false);
diff --git a/northd/northd.h b/northd/northd.h
index c1442ff40..3bd2e29e3 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -703,6 +703,7 @@  struct parsed_route {
     uint32_t route_table_id;
     uint32_t hash;
     const struct nbrec_logical_router_static_route *route;
+    struct ovn_port *out_port;
     bool ecmp_symmetric_reply;
     bool is_discard_route;
     const struct nbrec_logical_router *nbr;
@@ -746,6 +747,11 @@  void bfd_destroy(struct bfd_data *);
 void bfd_sync_init(struct bfd_sync_data *);
 void bfd_sync_destroy(struct bfd_sync_data *);
 
+void build_ecmp_nexthop_table(
+        struct ovsdb_idl_txn *ovnsb_txn,
+        const struct hmap *lr_ports, const struct hmap *routes,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table);
+
 struct lflow_table;
 struct lr_stateful_tracked_data;
 struct ls_stateful_tracked_data;
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 73abf2c8d..864cb0ed6 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
-    "version": "20.37.0",
-    "cksum": "1950136776 31493",
+    "version": "20.38.0",
+    "cksum": "466210938 32119",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -610,6 +610,19 @@ 
                                           "refTable": "Datapath_Binding"}}}},
             "indexes": [["logical_port", "ip"]],
             "isRoot": true},
+        "ECMP_Nexthop": {
+            "columns": {
+                "nexthop": {"type": "string"},
+                "port": {"type": {"key": {"type": "uuid",
+                                          "refTable": "Port_Binding",
+                                          "refType": "strong"},
+                                   "min": 0, "max": 1}},
+                "mac": {"type": "string"},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["nexthop", "port"]],
+            "isRoot": true},
         "Chassis_Template_Var": {
             "columns": {
                 "chassis": {"type": "string"},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index ea4adc1c3..ea1d484a7 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -5217,4 +5217,35 @@  tcp.flags = RST;
       The set of variable values for a given chassis.
     </column>
   </table>
+
+  <table name="ECMP_Nexthop">
+    <p>
+      Each record in this table represents an active ECMP route committed by
+      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
+      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
+      to track active ct entries and to flush stale ones.
+    </p>
+    <column name="nexthop">
+      <p>
+        Nexthop IP address for this ECMP route. Nexthop IP address should
+        be the IP address of a connected router port or the IP address of
+        an external device used as nexthop for the given destination.
+      </p>
+    </column>
+    <column name="port">
+      <p>
+        The reference to <ref table="Port_Binding"/> table for the port used
+        to connect to the configured next-hop.
+      </p>
+    </column>
+    <column name="mac">
+      <p>
+        Nexthop mac address.
+      </p>
+    </column>
+
+    <column name="external_ids">
+      See <em>External IDs</em> at the beginning of this document.
+    </column>
+  </table>
 </database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 8477e4250..1e01c2614 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -3886,7 +3886,7 @@  wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \
 check_engine_stats northd norecompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3901,7 +3901,7 @@  wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100
 check_engine_stats northd norecompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3913,7 +3913,7 @@  check_engine_stats northd recompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats static_routes recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3929,7 +3929,7 @@  check_engine_stats northd recompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats static_routes recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3941,7 +3941,7 @@  check_engine_stats northd recompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats route_policies recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3976,7 +3976,7 @@  check_engine_stats northd recompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats static_routes recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -3991,7 +3991,7 @@  check_engine_stats northd recompute nocompute
 check_engine_stats bfd recompute nocompute
 check_engine_stats route_policies recompute nocompute
 check_engine_stats lflow recompute nocompute
-check_engine_stats northd_output norecompute compute
+check_engine_stats northd_output recompute nocompute
 CHECK_NO_CHANGE_AFTER_RECOMPUTE
 check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
 
@@ -6816,6 +6816,10 @@  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
 
 # ECMP flows will be added even if there is only one next-hop.
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
+check_row_count ECMP_Nexthop 1
+uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public)
+check_column 192.168.0.10 ECMP_Nexthop nexthop
+check_column "$uuid" ECMP_Nexthop port
 
 ovn-sbctl dump-flows lr0 > lr0flows
 
@@ -6835,6 +6839,13 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
 ])
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
+check_row_count ECMP_Nexthop 2
+AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
+192.168.0.10
+])
+AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
+192.168.0.20
+])
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6864,6 +6875,13 @@  AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
 
 # add ecmp route with wrong nexthop
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
+check_row_count ECMP_Nexthop 2
+AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl
+192.168.0.10
+])
+AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl
+192.168.0.20
+])
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6883,6 +6901,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
 
 check ovn-nbctl lr-route-del lr0
 wait_row_count nb:Logical_Router_Static_Route 0
+check_row_count ECMP_Nexthop 0
 
 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
 ovn-sbctl dump-flows lr0 > lr0flows