diff mbox series

[ovs-dev,4/4] ofctrl: Introduce ecmp_nexthop_monitor.

Message ID a242e61072e6587779a5bc5ca243dcc22808eb23.1731495611.git.lorenzo.bianconi@redhat.com
State Changes Requested
Delegated to: Dumitru Ceara
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi Nov. 13, 2024, 11:05 a.m. UTC
Introduce ecmp_nexthop_monitor in ovn-controller in order to track and
flush ecmp-symmetric reply ct entires when requested by the CMS (e.g
removing the related static ecmp routes). CT entries are flushed using
the ethernet mac address stored in ct_label.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 NEWS                               |   2 +
 controller/automake.mk             |   4 +-
 controller/ecmp-next-hop-monitor.c | 184 ++++++++++
 controller/ecmp-next-hop-monitor.h |  25 ++
 controller/ofctrl.c                |   7 +
 controller/ofctrl.h                |   3 +
 controller/ovn-controller.c        |   3 +
 include/ovn/logical-fields.h       |   3 +
 tests/system-ovn.at                | 526 +++++++++++++++++++++++++++++
 9 files changed, 756 insertions(+), 1 deletion(-)
 create mode 100644 controller/ecmp-next-hop-monitor.c
 create mode 100644 controller/ecmp-next-hop-monitor.h

Comments

Dumitru Ceara Dec. 18, 2024, 12:12 p.m. UTC | #1
On 11/13/24 12:05 PM, Lorenzo Bianconi wrote:
> Introduce ecmp_nexthop_monitor in ovn-controller in order to track and
> flush ecmp-symmetric reply ct entires when requested by the CMS (e.g
> removing the related static ecmp routes). CT entries are flushed using
> the ethernet mac address stored in ct_label.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---

Hi Lorenzo,

Thanks for the patch!

>  NEWS                               |   2 +
>  controller/automake.mk             |   4 +-
>  controller/ecmp-next-hop-monitor.c | 184 ++++++++++
>  controller/ecmp-next-hop-monitor.h |  25 ++
>  controller/ofctrl.c                |   7 +
>  controller/ofctrl.h                |   3 +
>  controller/ovn-controller.c        |   3 +
>  include/ovn/logical-fields.h       |   3 +
>  tests/system-ovn.at                | 526 +++++++++++++++++++++++++++++
>  9 files changed, 756 insertions(+), 1 deletion(-)
>  create mode 100644 controller/ecmp-next-hop-monitor.c
>  create mode 100644 controller/ecmp-next-hop-monitor.h
> 
> diff --git a/NEWS b/NEWS
> index 1f8f54d5d..f46285d32 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,8 @@ Post v24.09.0
>       ECMP-nexthop.
>       By default ovn-controller continuously sends ARP/ND packets for
>       ECMP-nexthop.
> +   - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct

While we (OVN developers) know what "ct" stands for, the NEWS file
targets users.  I think we should rephrase this to:

"Auto flush ECMP symmetric reply connection states when an ECMP route is
removed by the CMS."

> +     entries when related ecmp routes are removed by the CMS.
>  
>  OVN v24.09.0 - 13 Sep 2024
>  --------------------------
> diff --git a/controller/automake.mk b/controller/automake.mk
> index bb0bf2d33..766e36382 100644
> --- a/controller/automake.mk
> +++ b/controller/automake.mk
> @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \
>  	controller/ct-zone.h \
>  	controller/ct-zone.c \
>  	controller/ovn-dns.c \
> -	controller/ovn-dns.h
> +	controller/ovn-dns.h \
> +	controller/ecmp-next-hop-monitor.h \
> +	controller/ecmp-next-hop-monitor.c
>  
>  controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
>  man_MANS += controller/ovn-controller.8
> diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c
> new file mode 100644
> index 000000000..bafe9750f
> --- /dev/null
> +++ b/controller/ecmp-next-hop-monitor.c
> @@ -0,0 +1,184 @@
> +/* Copyright (c) 2024, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include "ct-zone.h"
> +#include "lib/ovn-util.h"
> +#include "lib/simap.h"
> +#include "openvswitch/hmap.h"
> +#include "openvswitch/ofp-ct.h"
> +#include "openvswitch/rconn.h"
> +#include "openvswitch/vlog.h"
> +#include "ovn/logical-fields.h"
> +#include "ovn-sb-idl.h"
> +#include "controller/ecmp-next-hop-monitor.h"
> +
> +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor);
> +

We don't log anything in this module.  Should we?  If not, we can remove
the line above.

> +static struct hmap ecmp_nexthop;
> +
> +struct ecmp_nexthop_data {
> +    struct hmap_node hmap_node;
> +    uint16_t zone_id;
> +    char *nexthop;
> +    char *mac;
> +};
> +
> +void ecmp_nexthop_init(void)
> +{
> +    hmap_init(&ecmp_nexthop);
> +}
> +
> +static void
> +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e)

Nit: I see you used "erase" with other occasions in the past for other
features but I think in most places in the code we call these kind of
functions *_destroy().  We could call this ecmp_nexthop_destroy_entry(),
wdyt?

> +{
> +    free(e->nexthop);
> +    free(e->mac);
> +    free(e);
> +}
> +
> +static void
> +ecmp_nexthop_destroy_map(struct hmap *map)
> +{
> +    struct ecmp_nexthop_data *e;
> +    HMAP_FOR_EACH_POP (e, hmap_node, map) {
> +        ecmp_nexthop_erase_entry(e);
> +    }
> +    hmap_destroy(map);
> +}
> +
> +void ecmp_nexthop_destroy(void)
> +{
> +    ecmp_nexthop_destroy_map(&ecmp_nexthop);
> +}
> +
> +static struct ecmp_nexthop_data *
> +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac,
> +                         const uint16_t zone_id, struct hmap *map)
> +{
> +    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
> +    e->nexthop = xstrdup(nexthop);
> +    e->mac = xstrdup(mac);
> +    e->zone_id = zone_id;
> +
> +    uint32_t hash = hash_string(nexthop, 0);
> +    hash = hash_add(hash, hash_string(mac, 0));
> +    hash = hash_add(hash, zone_id);
> +    hmap_insert(map, &e->hmap_node, hash);
> +
> +    return e;
> +}
> +
> +static struct ecmp_nexthop_data *
> +ecmp_nexthop_find_entry(const char *nexthop, const char *mac,
> +                        const uint16_t zone_id, struct hmap *map)
> +{
> +    uint32_t hash = hash_string(nexthop, 0);
> +    hash = hash_add(hash, hash_string(mac, 0));
> +    hash = hash_add(hash, zone_id);
> +
> +    struct ecmp_nexthop_data *e;
> +    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
> +        if (!strcmp(e->nexthop, nexthop) &&
> +            !strcmp(e->mac, mac) && e->zone_id == zone_id) {
> +            return e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static void
> +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn,
> +                                    const char *mac, uint16_t zone_id,
> +                                    struct ovs_list *msgs)
> +{
> +    struct eth_addr ea;
> +    if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
> +        return;
> +    }
> +
> +    ovs_u128 mask = {
> +        /* ct_label.ecmp_reply_eth BITS[32-79] */
> +        .u64.hi = OVN_CT_ECMP_ETH_HIGH,
> +        .u64.lo = OVN_CT_ECMP_ETH_LOW,
> +    };
> +
> +    ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]);
> +    ovs_u128 nexthop = {
> +        .u64.hi = ntohs(ea.be16[0]),
> +        .u64.lo = (uint64_t) ntohl(lo) << 32,
> +    };
> +
> +    struct ofp_ct_match match = {
> +        .labels = nexthop,
> +        .labels_mask = mask,
> +    };
> +    struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id,
> +                                             rconn_get_version(swconn));
> +    ovs_list_push_back(msgs, &msg->list_node);
> +}
> +
> +void
> +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
> +                         const struct shash *current_ct_zones,
> +                         const struct rconn *swconn, struct ovs_list *msgs)
> +{
> +    struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop);
> +
> +    const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop;
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) {

We should filter out non-local datapaths.

> +        struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port;
> +        if (!pb) {

Can this ever happen?  The port is a strong reference in the SB and it
should be mandatory (see comment in patch 1/4).

> +            continue;
> +        }
> +
> +        const char *dp_name = smap_get(&pb->datapath->external_ids, "name");
> +        if (!dp_name) {
> +            continue;
> +        }
> +
> +        char *name = xasprintf("%s_dnat", dp_name);

This makes assumptions about how the zone name is created in ct-zone.c.

We have the alloc_nat_zone_key(), please use that instead.

> +        struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name);
> +        free(name);
> +
> +        if (!ct_zone) {
> +            continue;
> +        }
> +
> +        if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop,
> +                                     sbrec_ecmp_nexthop->mac, ct_zone->zone,
> +                                     &ecmp_nexthop)) {
> +            ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
> +                                     sbrec_ecmp_nexthop->mac,
> +                                     ct_zone->zone, &ecmp_nexthop);
> +        }
> +        ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
> +                                 sbrec_ecmp_nexthop->mac, ct_zone->zone,
> +                                 &sb_ecmp_nexthop);
> +    }
> +
> +    struct ecmp_nexthop_data *e;
> +    HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) {
> +        if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id,
> +                                     &sb_ecmp_nexthop)) {
> +            ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac,
> +                                                e->zone_id, msgs);
> +            hmap_remove(&ecmp_nexthop, &e->hmap_node);
> +            ecmp_nexthop_erase_entry(e);
> +        }
> +    }
> +
> +    ecmp_nexthop_destroy_map(&sb_ecmp_nexthop);
> +}
> diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h
> new file mode 100644
> index 000000000..ee8278e3b
> --- /dev/null
> +++ b/controller/ecmp-next-hop-monitor.h
> @@ -0,0 +1,25 @@
> +/* Copyright (c) 2024, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H
> +#define OVN_CMP_NEXT_HOP_MONITOR_H

OVN_CMP_NEXT_HOP_MONITOR_H?

Did you mean OVN_ECMP_NEXT_HOP_MONITOR_H instead?  Same thing in the
comment at the end of the file.

> +
> +void ecmp_nexthop_init(void);
> +void ecmp_nexthop_destroy(void);
> +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,> +                              const struct shash *current_ct_zones,
> +                              const struct rconn *swconn,
> +                              struct ovs_list *msgs);
> +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */
> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> index f9387d375..e44da749d 100644
> --- a/controller/ofctrl.c
> +++ b/controller/ofctrl.c
> @@ -54,6 +54,7 @@
>  #include "vswitch-idl.h"
>  #include "ovn-sb-idl.h"
>  #include "ct-zone.h"
> +#include "ecmp-next-hop-monitor.h"
>  
>  VLOG_DEFINE_THIS_MODULE(ofctrl);
>  
> @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table,
>      tx_counter = rconn_packet_counter_create();
>      hmap_init(&installed_lflows);
>      hmap_init(&installed_pflows);
> +    ecmp_nexthop_init();
>      ovs_list_init(&flow_updates);
>      ovn_init_symtab(&symtab);
>      groups = group_table;
> @@ -877,6 +879,7 @@ ofctrl_destroy(void)
>      expr_symtab_destroy(&symtab);
>      shash_destroy(&symtab);
>      ofctrl_meter_bands_destroy();
> +    ecmp_nexthop_destroy();
>  }
>  
>  uint64_t
> @@ -2662,8 +2665,10 @@ void
>  ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>             struct ovn_desired_flow_table *pflow_table,
>             struct shash *pending_ct_zones,
> +           struct shash *current_ct_zones,
>             struct hmap *pending_lb_tuples,
>             struct ovsdb_idl_index *sbrec_meter_by_name,
> +           const struct sbrec_ecmp_nexthop_table *enh_table,
>             uint64_t req_cfg,
>             bool lflows_changed,
>             bool pflows_changed)
> @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>      /* OpenFlow messages to send to the switch to bring it up-to-date. */
>      struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
>  
> +    ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs);
> +
>      /* Iterate through ct zones that need to be flushed. */
>      struct shash_node *iter;
>      SHASH_FOR_EACH(iter, pending_ct_zones) {
> diff --git a/controller/ofctrl.h b/controller/ofctrl.h
> index 129e3b6ad..5735cd553 100644
> --- a/controller/ofctrl.h
> +++ b/controller/ofctrl.h
> @@ -31,6 +31,7 @@ struct ofpbuf;
>  struct ovsrec_bridge;
>  struct ovsrec_open_vswitch_table;
>  struct sbrec_meter_table;
> +struct sbrec_ecmp_nexthop_table;
>  struct shash;
>  
>  struct ovn_desired_flow_table {
> @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void);
>  void ofctrl_put(struct ovn_desired_flow_table *lflow_table,
>                  struct ovn_desired_flow_table *pflow_table,
>                  struct shash *pending_ct_zones,
> +                struct shash *current_ct_zones,
>                  struct hmap *pending_lb_tuples,
>                  struct ovsdb_idl_index *sbrec_meter_by_name,
> +                const struct sbrec_ecmp_nexthop_table *enh_table,
>                  uint64_t nb_cfg,
>                  bool lflow_changed,
>                  bool pflow_changed);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 6cee6450d..4b05077d3 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5821,8 +5821,11 @@ main(int argc, char *argv[])
>                          ofctrl_put(&lflow_output_data->flow_table,
>                                     &pflow_output_data->flow_table,
>                                     &ct_zones_data->ctx.pending,
> +                                   &ct_zones_data->ctx.current,
>                                     &lb_data->removed_tuples,
>                                     sbrec_meter_by_name,
> +                                   sbrec_ecmp_nexthop_table_get(
> +                                        ovnsb_idl_loop.idl),
>                                     ofctrl_seqno_get_req_cfg(),
>                                     engine_node_changed(&en_lflow_output),
>                                     engine_node_changed(&en_pflow_output));
> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> index d563e044c..a024b0cd3 100644
> --- a/include/ovn/logical-fields.h
> +++ b/include/ovn/logical-fields.h
> @@ -212,6 +212,9 @@ const struct ovn_field *ovn_field_from_name(const char *name);
>  #define OVN_CT_ECMP_ETH_1ST_BIT 32
>  #define OVN_CT_ECMP_ETH_END_BIT 79
>  
> +#define OVN_CT_ECMP_ETH_LOW     (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32)
> +#define OVN_CT_ECMP_ETH_HIGH    ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1)
> +

These two defines are only relevant to
ecmp_nexthop_monitor_flush_ct_entry() and we only need them because we
pass the labels_mask to ovs as a set of two 64bit values.  I'd move the
defines just above the ecmp_nexthop_monitor_flush_ct_entry() function
definition so it's clear what we're doing.

>  #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE)
>  #define OVN_CT_MASKED_STR(LABEL_VALUE) \
>      OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE)
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 6dfc3055a..e9d15898f 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -14002,3 +14002,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>  /.*terminating with signal 15.*/d"])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ECMP Flush CT entries - IPv4])
> +AT_KEYWORDS([ecmp])
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_BR([br-int])
> +ADD_BR([br-ext])
> +ADD_BR([br-ecmp])
> +
> +ovs-ofctl add-flow br-ext action=normal
> +ovs-ofctl add-flow br-ecmp action=normal

Missing check?

> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \

Missing check.

> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
> +

'arp-max-timeout-sec' doesn't exist as an actual option for
ovn-controller.  Also, missing check

> +check ovn-nbctl lr-add R1
> +check ovn-nbctl set logical_router R1 options:chassis=hv1
> +check ovn-nbctl lr-add R2
> +check ovn-nbctl set logical_router R2 options:chassis=hv1
> +
> +check ovn-nbctl ls-add sw0
> +check ovn-nbctl ls-add sw1
> +check ovn-nbctl ls-add public
> +
> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
> +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24
> +
> +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
> +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24
> +
> +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> +    type=router options:router-port=rp-sw0 \
> +    -- lsp-set-addresses sw0-rp router
> +
> +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> +    type=router options:router-port=rp-sw1 \
> +    -- lsp-set-addresses sw1-rp router
> +
> +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
> +    type=router options:router-port=rp-public1 \
> +    -- lsp-set-addresses public-rp1 router
> +
> +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
> +    type=router options:router-port=rp-public2 \
> +    -- lsp-set-addresses public-rp2 router
> +
> +ADD_NAMESPACES(alice)
> +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
> +         "192.168.1.1")
> +check ovn-nbctl lsp-add sw0 alice \
> +    -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2"
> +
> +ADD_NAMESPACES(peter)
> +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \
> +         "192.168.2.1")
> +check ovn-nbctl lsp-add sw1 peter \
> +    -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2"
> +
> +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
> +check ovn-nbctl lsp-add public public1 \
> +        -- lsp-set-addresses public1 unknown \
> +        -- lsp-set-type public1 localnet \
> +        -- lsp-set-options public1 network_name=phynet
> +
> +ADD_NAMESPACES(ecmp-path0)
> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1")
> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04")
> +
> +ADD_NAMESPACES(ecmp-path1)
> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1")
> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05")
> +
> +ADD_NAMESPACES(bob)
> +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2")
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
> +
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid])
> +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid])
> +
> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> +
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Change bob default IP address
> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2])
> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3])
> +
> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> +
> +wait_row_count ECMP_Nexthop 2
> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Remove first ECMP route
> +check ovn-nbctl lr-route-del  R1 172.16.2.0/24 172.16.1.2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 1
> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> +
> +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh
> +

Debugging leftover?

> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> +
> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2'
> +
> +# Change bob default IP address
> +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3])
> +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2])
> +
> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Remove first ECMP route
> +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 1
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +])
> +
> +# Remove second ECMP route
> +check ovn-nbctl lr-route-del R1
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 0
> +
> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3
> +
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 4
> +
> +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5])
> +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5])
> +
> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> +
> +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +])
> +
> +check ovn-nbctl lr-route-del R1
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +])
> +
> +check ovn-nbctl lr-route-del R2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 0
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
> +/.*terminating with signal 15.*/d"])
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ECMP Flush CT entries - IPv6])
> +AT_KEYWORDS([ecmp])
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_BR([br-int])
> +ADD_BR([br-ext])
> +ADD_BR([br-ecmp])
> +
> +ovs-ofctl add-flow br-ext action=normal
> +ovs-ofctl add-flow br-ecmp action=normal
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true

Missing checks for ovs-ofctl and ovs-vsctl.

> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
> +

Wrong option name and missing check.

> +check ovn-nbctl lr-add R1
> +check ovn-nbctl set logical_router R1 options:chassis=hv1
> +check ovn-nbctl lr-add R2
> +check ovn-nbctl set logical_router R2 options:chassis=hv1
> +
> +check ovn-nbctl ls-add sw0
> +check ovn-nbctl ls-add sw1
> +check ovn-nbctl ls-add public
> +
> +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64
> +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64
> +
> +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64
> +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64
> +
> +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> +    type=router options:router-port=rp-sw0 \
> +    -- lsp-set-addresses sw0-rp router
> +
> +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> +    type=router options:router-port=rp-sw1 \
> +    -- lsp-set-addresses sw1-rp router
> +
> +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
> +    type=router options:router-port=rp-public1 \
> +    -- lsp-set-addresses public-rp1 router
> +
> +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
> +    type=router options:router-port=rp-public2 \
> +    -- lsp-set-addresses public-rp2 router
> +
> +ADD_NAMESPACES(alice)
> +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad")
> +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2"
> +
> +ADD_NAMESPACES(peter)
> +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad")
> +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2"
> +
> +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
> +check ovn-nbctl lsp-add public public1 \
> +        -- lsp-set-addresses public1 unknown \
> +        -- lsp-set-type public1 localnet \
> +        -- lsp-set-options public1 network_name=phynet
> +
> +ADD_NAMESPACES(ecmp-path0)
> +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad")
> +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04")
> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])])
> +
> +ADD_NAMESPACES(ecmp-path1)
> +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad")
> +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05")
> +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])])
> +
> +ADD_NAMESPACES(bob)
> +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad")
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
> +
> +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
> +net.ipv6.conf.all.forwarding = 1
> +])
> +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
> +net.ipv6.conf.all.forwarding = 1
> +])
> +
> +ovn-nbctl --wait=hv sync
> +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid])
> +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid])
> +
> +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> +
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Change bob default IP address
> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2])
> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3])
> +
> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> +
> +wait_row_count ECMP_Nexthop 2
> +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Remove first ECMP route
> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 1
> +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> +#
> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"'
> +
> +# Change bob default IP address
> +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3])
> +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2])
> +
> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Remove first ECMP route
> +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 1
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +])
> +
> +# Remove second ECMP route
> +check ovn-nbctl lr-route-del R1
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 0
> +
> +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
> +
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2
> +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3
> +
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 4
> +
> +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5])
> +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5])
> +
> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> +
> +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +])
> +
> +# Remove second ECMP route
> +check ovn-nbctl lr-route-del R1
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 2
> +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"'
> +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> +])
> +
> +check ovn-nbctl lr-route-del R2
> +check ovn-nbctl --wait=hv sync
> +wait_row_count ECMP_Nexthop 0
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> +])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
> +/.*terminating with signal 15.*/d"])
> +AT_CLEANUP
> +])

Regards,
Dumitru
Lorenzo Bianconi Dec. 19, 2024, 4:04 p.m. UTC | #2
On Dec 18, Dumitru Ceara wrote:
> On 11/13/24 12:05 PM, Lorenzo Bianconi wrote:
> > Introduce ecmp_nexthop_monitor in ovn-controller in order to track and
> > flush ecmp-symmetric reply ct entires when requested by the CMS (e.g
> > removing the related static ecmp routes). CT entries are flushed using
> > the ethernet mac address stored in ct_label.
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> 
> Hi Lorenzo,
> 
> Thanks for the patch!

Hi Dumitru,

Thanks for the review.

> 
> >  NEWS                               |   2 +
> >  controller/automake.mk             |   4 +-
> >  controller/ecmp-next-hop-monitor.c | 184 ++++++++++
> >  controller/ecmp-next-hop-monitor.h |  25 ++
> >  controller/ofctrl.c                |   7 +
> >  controller/ofctrl.h                |   3 +
> >  controller/ovn-controller.c        |   3 +
> >  include/ovn/logical-fields.h       |   3 +
> >  tests/system-ovn.at                | 526 +++++++++++++++++++++++++++++
> >  9 files changed, 756 insertions(+), 1 deletion(-)
> >  create mode 100644 controller/ecmp-next-hop-monitor.c
> >  create mode 100644 controller/ecmp-next-hop-monitor.h
> > 
> > diff --git a/NEWS b/NEWS
> > index 1f8f54d5d..f46285d32 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -9,6 +9,8 @@ Post v24.09.0
> >       ECMP-nexthop.
> >       By default ovn-controller continuously sends ARP/ND packets for
> >       ECMP-nexthop.
> > +   - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct
> 
> While we (OVN developers) know what "ct" stands for, the NEWS file
> targets users.  I think we should rephrase this to:
> 
> "Auto flush ECMP symmetric reply connection states when an ECMP route is
> removed by the CMS."

ack, I will fix it.

> 
> > +     entries when related ecmp routes are removed by the CMS.
> >  
> >  OVN v24.09.0 - 13 Sep 2024
> >  --------------------------
> > diff --git a/controller/automake.mk b/controller/automake.mk
> > index bb0bf2d33..766e36382 100644
> > --- a/controller/automake.mk
> > +++ b/controller/automake.mk
> > @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \
> >  	controller/ct-zone.h \
> >  	controller/ct-zone.c \
> >  	controller/ovn-dns.c \
> > -	controller/ovn-dns.h
> > +	controller/ovn-dns.h \
> > +	controller/ecmp-next-hop-monitor.h \
> > +	controller/ecmp-next-hop-monitor.c
> >  
> >  controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
> >  man_MANS += controller/ovn-controller.8
> > diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c
> > new file mode 100644
> > index 000000000..bafe9750f
> > --- /dev/null
> > +++ b/controller/ecmp-next-hop-monitor.c
> > @@ -0,0 +1,184 @@
> > +/* Copyright (c) 2024, Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +#include "ct-zone.h"
> > +#include "lib/ovn-util.h"
> > +#include "lib/simap.h"
> > +#include "openvswitch/hmap.h"
> > +#include "openvswitch/ofp-ct.h"
> > +#include "openvswitch/rconn.h"
> > +#include "openvswitch/vlog.h"
> > +#include "ovn/logical-fields.h"
> > +#include "ovn-sb-idl.h"
> > +#include "controller/ecmp-next-hop-monitor.h"
> > +
> > +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor);
> > +
> 
> We don't log anything in this module.  Should we?  If not, we can remove
> the line above.

ack, I will remove it.

> 
> > +static struct hmap ecmp_nexthop;
> > +
> > +struct ecmp_nexthop_data {
> > +    struct hmap_node hmap_node;
> > +    uint16_t zone_id;
> > +    char *nexthop;
> > +    char *mac;
> > +};
> > +
> > +void ecmp_nexthop_init(void)
> > +{
> > +    hmap_init(&ecmp_nexthop);
> > +}
> > +
> > +static void
> > +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e)
> 
> Nit: I see you used "erase" with other occasions in the past for other
> features but I think in most places in the code we call these kind of
> functions *_destroy().  We could call this ecmp_nexthop_destroy_entry(),
> wdyt?

ack

> 
> > +{
> > +    free(e->nexthop);
> > +    free(e->mac);
> > +    free(e);
> > +}
> > +
> > +static void
> > +ecmp_nexthop_destroy_map(struct hmap *map)
> > +{
> > +    struct ecmp_nexthop_data *e;
> > +    HMAP_FOR_EACH_POP (e, hmap_node, map) {
> > +        ecmp_nexthop_erase_entry(e);
> > +    }
> > +    hmap_destroy(map);
> > +}
> > +
> > +void ecmp_nexthop_destroy(void)
> > +{
> > +    ecmp_nexthop_destroy_map(&ecmp_nexthop);
> > +}
> > +
> > +static struct ecmp_nexthop_data *
> > +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac,
> > +                         const uint16_t zone_id, struct hmap *map)
> > +{
> > +    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
> > +    e->nexthop = xstrdup(nexthop);
> > +    e->mac = xstrdup(mac);
> > +    e->zone_id = zone_id;
> > +
> > +    uint32_t hash = hash_string(nexthop, 0);
> > +    hash = hash_add(hash, hash_string(mac, 0));
> > +    hash = hash_add(hash, zone_id);
> > +    hmap_insert(map, &e->hmap_node, hash);
> > +
> > +    return e;
> > +}
> > +
> > +static struct ecmp_nexthop_data *
> > +ecmp_nexthop_find_entry(const char *nexthop, const char *mac,
> > +                        const uint16_t zone_id, struct hmap *map)
> > +{
> > +    uint32_t hash = hash_string(nexthop, 0);
> > +    hash = hash_add(hash, hash_string(mac, 0));
> > +    hash = hash_add(hash, zone_id);
> > +
> > +    struct ecmp_nexthop_data *e;
> > +    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
> > +        if (!strcmp(e->nexthop, nexthop) &&
> > +            !strcmp(e->mac, mac) && e->zone_id == zone_id) {
> > +            return e;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static void
> > +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn,
> > +                                    const char *mac, uint16_t zone_id,
> > +                                    struct ovs_list *msgs)
> > +{
> > +    struct eth_addr ea;
> > +    if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
> > +        return;
> > +    }
> > +
> > +    ovs_u128 mask = {
> > +        /* ct_label.ecmp_reply_eth BITS[32-79] */
> > +        .u64.hi = OVN_CT_ECMP_ETH_HIGH,
> > +        .u64.lo = OVN_CT_ECMP_ETH_LOW,
> > +    };
> > +
> > +    ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]);
> > +    ovs_u128 nexthop = {
> > +        .u64.hi = ntohs(ea.be16[0]),
> > +        .u64.lo = (uint64_t) ntohl(lo) << 32,
> > +    };
> > +
> > +    struct ofp_ct_match match = {
> > +        .labels = nexthop,
> > +        .labels_mask = mask,
> > +    };
> > +    struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id,
> > +                                             rconn_get_version(swconn));
> > +    ovs_list_push_back(msgs, &msg->list_node);
> > +}
> > +
> > +void
> > +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
> > +                         const struct shash *current_ct_zones,
> > +                         const struct rconn *swconn, struct ovs_list *msgs)
> > +{
> > +    struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop);
> > +
> > +    const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop;
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) {
> 
> We should filter out non-local datapaths.

ack, I will fix it

> 
> > +        struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port;
> > +        if (!pb) {
> 
> Can this ever happen?  The port is a strong reference in the SB and it
> should be mandatory (see comment in patch 1/4).

ack, I will fix it

> 
> > +            continue;
> > +        }
> > +
> > +        const char *dp_name = smap_get(&pb->datapath->external_ids, "name");
> > +        if (!dp_name) {
> > +            continue;
> > +        }
> > +
> > +        char *name = xasprintf("%s_dnat", dp_name);
> 
> This makes assumptions about how the zone name is created in ct-zone.c.
> 
> We have the alloc_nat_zone_key(), please use that instead.

ack

> 
> > +        struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name);
> > +        free(name);
> > +
> > +        if (!ct_zone) {
> > +            continue;
> > +        }
> > +
> > +        if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop,
> > +                                     sbrec_ecmp_nexthop->mac, ct_zone->zone,
> > +                                     &ecmp_nexthop)) {
> > +            ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
> > +                                     sbrec_ecmp_nexthop->mac,
> > +                                     ct_zone->zone, &ecmp_nexthop);
> > +        }
> > +        ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
> > +                                 sbrec_ecmp_nexthop->mac, ct_zone->zone,
> > +                                 &sb_ecmp_nexthop);
> > +    }
> > +
> > +    struct ecmp_nexthop_data *e;
> > +    HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) {
> > +        if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id,
> > +                                     &sb_ecmp_nexthop)) {
> > +            ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac,
> > +                                                e->zone_id, msgs);
> > +            hmap_remove(&ecmp_nexthop, &e->hmap_node);
> > +            ecmp_nexthop_erase_entry(e);
> > +        }
> > +    }
> > +
> > +    ecmp_nexthop_destroy_map(&sb_ecmp_nexthop);
> > +}
> > diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h
> > new file mode 100644
> > index 000000000..ee8278e3b
> > --- /dev/null
> > +++ b/controller/ecmp-next-hop-monitor.h
> > @@ -0,0 +1,25 @@
> > +/* Copyright (c) 2024, Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H
> > +#define OVN_CMP_NEXT_HOP_MONITOR_H
> 
> OVN_CMP_NEXT_HOP_MONITOR_H?
> 
> Did you mean OVN_ECMP_NEXT_HOP_MONITOR_H instead?  Same thing in the
> comment at the end of the file.

ack, I will fix it

> 
> > +
> > +void ecmp_nexthop_init(void);
> > +void ecmp_nexthop_destroy(void);
> > +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,> +                              const struct shash *current_ct_zones,
> > +                              const struct rconn *swconn,
> > +                              struct ovs_list *msgs);
> > +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */
> > diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> > index f9387d375..e44da749d 100644
> > --- a/controller/ofctrl.c
> > +++ b/controller/ofctrl.c
> > @@ -54,6 +54,7 @@
> >  #include "vswitch-idl.h"
> >  #include "ovn-sb-idl.h"
> >  #include "ct-zone.h"
> > +#include "ecmp-next-hop-monitor.h"
> >  
> >  VLOG_DEFINE_THIS_MODULE(ofctrl);
> >  
> > @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table,
> >      tx_counter = rconn_packet_counter_create();
> >      hmap_init(&installed_lflows);
> >      hmap_init(&installed_pflows);
> > +    ecmp_nexthop_init();
> >      ovs_list_init(&flow_updates);
> >      ovn_init_symtab(&symtab);
> >      groups = group_table;
> > @@ -877,6 +879,7 @@ ofctrl_destroy(void)
> >      expr_symtab_destroy(&symtab);
> >      shash_destroy(&symtab);
> >      ofctrl_meter_bands_destroy();
> > +    ecmp_nexthop_destroy();
> >  }
> >  
> >  uint64_t
> > @@ -2662,8 +2665,10 @@ void
> >  ofctrl_put(struct ovn_desired_flow_table *lflow_table,
> >             struct ovn_desired_flow_table *pflow_table,
> >             struct shash *pending_ct_zones,
> > +           struct shash *current_ct_zones,
> >             struct hmap *pending_lb_tuples,
> >             struct ovsdb_idl_index *sbrec_meter_by_name,
> > +           const struct sbrec_ecmp_nexthop_table *enh_table,
> >             uint64_t req_cfg,
> >             bool lflows_changed,
> >             bool pflows_changed)
> > @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table,
> >      /* OpenFlow messages to send to the switch to bring it up-to-date. */
> >      struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
> >  
> > +    ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs);
> > +
> >      /* Iterate through ct zones that need to be flushed. */
> >      struct shash_node *iter;
> >      SHASH_FOR_EACH(iter, pending_ct_zones) {
> > diff --git a/controller/ofctrl.h b/controller/ofctrl.h
> > index 129e3b6ad..5735cd553 100644
> > --- a/controller/ofctrl.h
> > +++ b/controller/ofctrl.h
> > @@ -31,6 +31,7 @@ struct ofpbuf;
> >  struct ovsrec_bridge;
> >  struct ovsrec_open_vswitch_table;
> >  struct sbrec_meter_table;
> > +struct sbrec_ecmp_nexthop_table;
> >  struct shash;
> >  
> >  struct ovn_desired_flow_table {
> > @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void);
> >  void ofctrl_put(struct ovn_desired_flow_table *lflow_table,
> >                  struct ovn_desired_flow_table *pflow_table,
> >                  struct shash *pending_ct_zones,
> > +                struct shash *current_ct_zones,
> >                  struct hmap *pending_lb_tuples,
> >                  struct ovsdb_idl_index *sbrec_meter_by_name,
> > +                const struct sbrec_ecmp_nexthop_table *enh_table,
> >                  uint64_t nb_cfg,
> >                  bool lflow_changed,
> >                  bool pflow_changed);
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 6cee6450d..4b05077d3 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -5821,8 +5821,11 @@ main(int argc, char *argv[])
> >                          ofctrl_put(&lflow_output_data->flow_table,
> >                                     &pflow_output_data->flow_table,
> >                                     &ct_zones_data->ctx.pending,
> > +                                   &ct_zones_data->ctx.current,
> >                                     &lb_data->removed_tuples,
> >                                     sbrec_meter_by_name,
> > +                                   sbrec_ecmp_nexthop_table_get(
> > +                                        ovnsb_idl_loop.idl),
> >                                     ofctrl_seqno_get_req_cfg(),
> >                                     engine_node_changed(&en_lflow_output),
> >                                     engine_node_changed(&en_pflow_output));
> > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> > index d563e044c..a024b0cd3 100644
> > --- a/include/ovn/logical-fields.h
> > +++ b/include/ovn/logical-fields.h
> > @@ -212,6 +212,9 @@ const struct ovn_field *ovn_field_from_name(const char *name);
> >  #define OVN_CT_ECMP_ETH_1ST_BIT 32
> >  #define OVN_CT_ECMP_ETH_END_BIT 79
> >  
> > +#define OVN_CT_ECMP_ETH_LOW     (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32)
> > +#define OVN_CT_ECMP_ETH_HIGH    ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1)
> > +
> 
> These two defines are only relevant to
> ecmp_nexthop_monitor_flush_ct_entry() and we only need them because we
> pass the labels_mask to ovs as a set of two 64bit values.  I'd move the
> defines just above the ecmp_nexthop_monitor_flush_ct_entry() function
> definition so it's clear what we're doing.

ack, I will move them above ecmp_nexthop_monitor_flush_ct_entry()

> 
> >  #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE)
> >  #define OVN_CT_MASKED_STR(LABEL_VALUE) \
> >      OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE)
> > diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> > index 6dfc3055a..e9d15898f 100644
> > --- a/tests/system-ovn.at
> > +++ b/tests/system-ovn.at
> > @@ -14002,3 +14002,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
> >  /.*terminating with signal 15.*/d"])
> >  AT_CLEANUP
> >  ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ECMP Flush CT entries - IPv4])
> > +AT_KEYWORDS([ecmp])
> > +ovn_start
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +
> > +ADD_BR([br-int])
> > +ADD_BR([br-ext])
> > +ADD_BR([br-ecmp])
> > +
> > +ovs-ofctl add-flow br-ext action=normal
> > +ovs-ofctl add-flow br-ecmp action=normal
> 
> Missing check?
> 
> > +# Set external-ids in br-int needed for ovn-controller
> > +ovs-vsctl \
> 
> Missing check.
> 
> > +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> > +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> > +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> > +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> > +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
> > +
> > +# Start ovn-controller
> > +start_daemon ovn-controller
> > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
> > +
> 
> 'arp-max-timeout-sec' doesn't exist as an actual option for
> ovn-controller.  Also, missing check

it is arp-nd-max-timeout-sec, I will fix it.

> 
> > +check ovn-nbctl lr-add R1
> > +check ovn-nbctl set logical_router R1 options:chassis=hv1
> > +check ovn-nbctl lr-add R2
> > +check ovn-nbctl set logical_router R2 options:chassis=hv1
> > +
> > +check ovn-nbctl ls-add sw0
> > +check ovn-nbctl ls-add sw1
> > +check ovn-nbctl ls-add public
> > +
> > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
> > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24
> > +
> > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
> > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24
> > +
> > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> > +    type=router options:router-port=rp-sw0 \
> > +    -- lsp-set-addresses sw0-rp router
> > +
> > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> > +    type=router options:router-port=rp-sw1 \
> > +    -- lsp-set-addresses sw1-rp router
> > +
> > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
> > +    type=router options:router-port=rp-public1 \
> > +    -- lsp-set-addresses public-rp1 router
> > +
> > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
> > +    type=router options:router-port=rp-public2 \
> > +    -- lsp-set-addresses public-rp2 router
> > +
> > +ADD_NAMESPACES(alice)
> > +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
> > +         "192.168.1.1")
> > +check ovn-nbctl lsp-add sw0 alice \
> > +    -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2"
> > +
> > +ADD_NAMESPACES(peter)
> > +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \
> > +         "192.168.2.1")
> > +check ovn-nbctl lsp-add sw1 peter \
> > +    -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2"
> > +
> > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
> > +check ovn-nbctl lsp-add public public1 \
> > +        -- lsp-set-addresses public1 unknown \
> > +        -- lsp-set-type public1 localnet \
> > +        -- lsp-set-options public1 network_name=phynet
> > +
> > +ADD_NAMESPACES(ecmp-path0)
> > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1")
> > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04")
> > +
> > +ADD_NAMESPACES(ecmp-path1)
> > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1")
> > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05")
> > +
> > +ADD_NAMESPACES(bob)
> > +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2")
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
> > +
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid])
> > +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid])
> > +
> > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> > +
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Change bob default IP address
> > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2])
> > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3])
> > +
> > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> > +
> > +wait_row_count ECMP_Nexthop 2
> > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Remove first ECMP route
> > +check ovn-nbctl lr-route-del  R1 172.16.2.0/24 172.16.1.2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 1
> > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> > +
> > +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh
> > +
> 
> Debugging leftover?

yep :)

> 
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2'
> > +
> > +# Change bob default IP address
> > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3])
> > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2])
> > +
> > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Remove first ECMP route
> > +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 1
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +])
> > +
> > +# Remove second ECMP route
> > +check ovn-nbctl lr-route-del R1
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 0
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3
> > +
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 4
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5])
> > +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5])
> > +
> > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
> > +
> > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +check ovn-nbctl lr-route-del R1
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +check ovn-nbctl lr-route-del R2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 0
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +])
> > +
> > +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> > +
> > +as ovn-sb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as ovn-nb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as northd
> > +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> > +
> > +as
> > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
> > +/.*terminating with signal 15.*/d"])
> > +AT_CLEANUP
> > +])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ECMP Flush CT entries - IPv6])
> > +AT_KEYWORDS([ecmp])
> > +ovn_start
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +
> > +ADD_BR([br-int])
> > +ADD_BR([br-ext])
> > +ADD_BR([br-ecmp])
> > +
> > +ovs-ofctl add-flow br-ext action=normal
> > +ovs-ofctl add-flow br-ecmp action=normal
> > +# Set external-ids in br-int needed for ovn-controller
> > +ovs-vsctl \
> > +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> > +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> > +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> > +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> > +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
> 
> Missing checks for ovs-ofctl and ovs-vsctl.

ack, I will fix it

> 
> > +
> > +# Start ovn-controller
> > +start_daemon ovn-controller
> > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
> > +
> 
> Wrong option name and missing check.

ack, I will fix it

Regards,
Lorenzo

> 
> > +check ovn-nbctl lr-add R1
> > +check ovn-nbctl set logical_router R1 options:chassis=hv1
> > +check ovn-nbctl lr-add R2
> > +check ovn-nbctl set logical_router R2 options:chassis=hv1
> > +
> > +check ovn-nbctl ls-add sw0
> > +check ovn-nbctl ls-add sw1
> > +check ovn-nbctl ls-add public
> > +
> > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64
> > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64
> > +
> > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64
> > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64
> > +
> > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> > +    type=router options:router-port=rp-sw0 \
> > +    -- lsp-set-addresses sw0-rp router
> > +
> > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> > +    type=router options:router-port=rp-sw1 \
> > +    -- lsp-set-addresses sw1-rp router
> > +
> > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
> > +    type=router options:router-port=rp-public1 \
> > +    -- lsp-set-addresses public-rp1 router
> > +
> > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
> > +    type=router options:router-port=rp-public2 \
> > +    -- lsp-set-addresses public-rp2 router
> > +
> > +ADD_NAMESPACES(alice)
> > +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad")
> > +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2"
> > +
> > +ADD_NAMESPACES(peter)
> > +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad")
> > +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2"
> > +
> > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
> > +check ovn-nbctl lsp-add public public1 \
> > +        -- lsp-set-addresses public1 unknown \
> > +        -- lsp-set-type public1 localnet \
> > +        -- lsp-set-options public1 network_name=phynet
> > +
> > +ADD_NAMESPACES(ecmp-path0)
> > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad")
> > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04")
> > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])])
> > +
> > +ADD_NAMESPACES(ecmp-path1)
> > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad")
> > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05")
> > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])])
> > +
> > +ADD_NAMESPACES(bob)
> > +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad")
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
> > +net.ipv6.conf.all.forwarding = 1
> > +])
> > +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
> > +net.ipv6.conf.all.forwarding = 1
> > +])
> > +
> > +ovn-nbctl --wait=hv sync
> > +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid])
> > +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid])
> > +
> > +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> > +
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Change bob default IP address
> > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2])
> > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3])
> > +
> > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> > +
> > +wait_row_count ECMP_Nexthop 2
> > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Remove first ECMP route
> > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 1
> > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> > +#
> > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"'
> > +
> > +# Change bob default IP address
> > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3])
> > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2])
> > +
> > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Remove first ECMP route
> > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 1
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +])
> > +
> > +# Remove second ECMP route
> > +check ovn-nbctl lr-route-del R1
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 0
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
> > +
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2
> > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3
> > +
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 4
> > +
> > +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5])
> > +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5])
> > +
> > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
> > +
> > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +# Remove second ECMP route
> > +check ovn-nbctl lr-route-del R1
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 2
> > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"'
> > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
> > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
> > +])
> > +
> > +check ovn-nbctl lr-route-del R2
> > +check ovn-nbctl --wait=hv sync
> > +wait_row_count ECMP_Nexthop 0
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
> > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
> > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
> > +])
> > +
> > +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> > +
> > +as ovn-sb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as ovn-nb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as northd
> > +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> > +
> > +as
> > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
> > +/.*terminating with signal 15.*/d"])
> > +AT_CLEANUP
> > +])
> 
> Regards,
> Dumitru
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 1f8f54d5d..f46285d32 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,8 @@  Post v24.09.0
      ECMP-nexthop.
      By default ovn-controller continuously sends ARP/ND packets for
      ECMP-nexthop.
+   - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct
+     entries when related ecmp routes are removed by the CMS.
 
 OVN v24.09.0 - 13 Sep 2024
 --------------------------
diff --git a/controller/automake.mk b/controller/automake.mk
index bb0bf2d33..766e36382 100644
--- a/controller/automake.mk
+++ b/controller/automake.mk
@@ -51,7 +51,9 @@  controller_ovn_controller_SOURCES = \
 	controller/ct-zone.h \
 	controller/ct-zone.c \
 	controller/ovn-dns.c \
-	controller/ovn-dns.h
+	controller/ovn-dns.h \
+	controller/ecmp-next-hop-monitor.h \
+	controller/ecmp-next-hop-monitor.c
 
 controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
 man_MANS += controller/ovn-controller.8
diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c
new file mode 100644
index 000000000..bafe9750f
--- /dev/null
+++ b/controller/ecmp-next-hop-monitor.c
@@ -0,0 +1,184 @@ 
+/* Copyright (c) 2024, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "ct-zone.h"
+#include "lib/ovn-util.h"
+#include "lib/simap.h"
+#include "openvswitch/hmap.h"
+#include "openvswitch/ofp-ct.h"
+#include "openvswitch/rconn.h"
+#include "openvswitch/vlog.h"
+#include "ovn/logical-fields.h"
+#include "ovn-sb-idl.h"
+#include "controller/ecmp-next-hop-monitor.h"
+
+VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor);
+
+static struct hmap ecmp_nexthop;
+
+struct ecmp_nexthop_data {
+    struct hmap_node hmap_node;
+    uint16_t zone_id;
+    char *nexthop;
+    char *mac;
+};
+
+void ecmp_nexthop_init(void)
+{
+    hmap_init(&ecmp_nexthop);
+}
+
+static void
+ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e)
+{
+    free(e->nexthop);
+    free(e->mac);
+    free(e);
+}
+
+static void
+ecmp_nexthop_destroy_map(struct hmap *map)
+{
+    struct ecmp_nexthop_data *e;
+    HMAP_FOR_EACH_POP (e, hmap_node, map) {
+        ecmp_nexthop_erase_entry(e);
+    }
+    hmap_destroy(map);
+}
+
+void ecmp_nexthop_destroy(void)
+{
+    ecmp_nexthop_destroy_map(&ecmp_nexthop);
+}
+
+static struct ecmp_nexthop_data *
+ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac,
+                         const uint16_t zone_id, struct hmap *map)
+{
+    struct ecmp_nexthop_data *e = xmalloc(sizeof *e);
+    e->nexthop = xstrdup(nexthop);
+    e->mac = xstrdup(mac);
+    e->zone_id = zone_id;
+
+    uint32_t hash = hash_string(nexthop, 0);
+    hash = hash_add(hash, hash_string(mac, 0));
+    hash = hash_add(hash, zone_id);
+    hmap_insert(map, &e->hmap_node, hash);
+
+    return e;
+}
+
+static struct ecmp_nexthop_data *
+ecmp_nexthop_find_entry(const char *nexthop, const char *mac,
+                        const uint16_t zone_id, struct hmap *map)
+{
+    uint32_t hash = hash_string(nexthop, 0);
+    hash = hash_add(hash, hash_string(mac, 0));
+    hash = hash_add(hash, zone_id);
+
+    struct ecmp_nexthop_data *e;
+    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
+        if (!strcmp(e->nexthop, nexthop) &&
+            !strcmp(e->mac, mac) && e->zone_id == zone_id) {
+            return e;
+        }
+    }
+    return NULL;
+}
+
+static void
+ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn,
+                                    const char *mac, uint16_t zone_id,
+                                    struct ovs_list *msgs)
+{
+    struct eth_addr ea;
+    if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
+        return;
+    }
+
+    ovs_u128 mask = {
+        /* ct_label.ecmp_reply_eth BITS[32-79] */
+        .u64.hi = OVN_CT_ECMP_ETH_HIGH,
+        .u64.lo = OVN_CT_ECMP_ETH_LOW,
+    };
+
+    ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]);
+    ovs_u128 nexthop = {
+        .u64.hi = ntohs(ea.be16[0]),
+        .u64.lo = (uint64_t) ntohl(lo) << 32,
+    };
+
+    struct ofp_ct_match match = {
+        .labels = nexthop,
+        .labels_mask = mask,
+    };
+    struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id,
+                                             rconn_get_version(swconn));
+    ovs_list_push_back(msgs, &msg->list_node);
+}
+
+void
+ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
+                         const struct shash *current_ct_zones,
+                         const struct rconn *swconn, struct ovs_list *msgs)
+{
+    struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop);
+
+    const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop;
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) {
+        struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port;
+        if (!pb) {
+            continue;
+        }
+
+        const char *dp_name = smap_get(&pb->datapath->external_ids, "name");
+        if (!dp_name) {
+            continue;
+        }
+
+        char *name = xasprintf("%s_dnat", dp_name);
+        struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name);
+        free(name);
+
+        if (!ct_zone) {
+            continue;
+        }
+
+        if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop,
+                                     sbrec_ecmp_nexthop->mac, ct_zone->zone,
+                                     &ecmp_nexthop)) {
+            ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
+                                     sbrec_ecmp_nexthop->mac,
+                                     ct_zone->zone, &ecmp_nexthop);
+        }
+        ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop,
+                                 sbrec_ecmp_nexthop->mac, ct_zone->zone,
+                                 &sb_ecmp_nexthop);
+    }
+
+    struct ecmp_nexthop_data *e;
+    HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) {
+        if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id,
+                                     &sb_ecmp_nexthop)) {
+            ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac,
+                                                e->zone_id, msgs);
+            hmap_remove(&ecmp_nexthop, &e->hmap_node);
+            ecmp_nexthop_erase_entry(e);
+        }
+    }
+
+    ecmp_nexthop_destroy_map(&sb_ecmp_nexthop);
+}
diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h
new file mode 100644
index 000000000..ee8278e3b
--- /dev/null
+++ b/controller/ecmp-next-hop-monitor.h
@@ -0,0 +1,25 @@ 
+/* Copyright (c) 2024, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_CMP_NEXT_HOP_MONITOR_H
+#define OVN_CMP_NEXT_HOP_MONITOR_H
+
+void ecmp_nexthop_init(void);
+void ecmp_nexthop_destroy(void);
+void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,
+                              const struct shash *current_ct_zones,
+                              const struct rconn *swconn,
+                              struct ovs_list *msgs);
+#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */
diff --git a/controller/ofctrl.c b/controller/ofctrl.c
index f9387d375..e44da749d 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -54,6 +54,7 @@ 
 #include "vswitch-idl.h"
 #include "ovn-sb-idl.h"
 #include "ct-zone.h"
+#include "ecmp-next-hop-monitor.h"
 
 VLOG_DEFINE_THIS_MODULE(ofctrl);
 
@@ -425,6 +426,7 @@  ofctrl_init(struct ovn_extend_table *group_table,
     tx_counter = rconn_packet_counter_create();
     hmap_init(&installed_lflows);
     hmap_init(&installed_pflows);
+    ecmp_nexthop_init();
     ovs_list_init(&flow_updates);
     ovn_init_symtab(&symtab);
     groups = group_table;
@@ -877,6 +879,7 @@  ofctrl_destroy(void)
     expr_symtab_destroy(&symtab);
     shash_destroy(&symtab);
     ofctrl_meter_bands_destroy();
+    ecmp_nexthop_destroy();
 }
 
 uint64_t
@@ -2662,8 +2665,10 @@  void
 ofctrl_put(struct ovn_desired_flow_table *lflow_table,
            struct ovn_desired_flow_table *pflow_table,
            struct shash *pending_ct_zones,
+           struct shash *current_ct_zones,
            struct hmap *pending_lb_tuples,
            struct ovsdb_idl_index *sbrec_meter_by_name,
+           const struct sbrec_ecmp_nexthop_table *enh_table,
            uint64_t req_cfg,
            bool lflows_changed,
            bool pflows_changed)
@@ -2704,6 +2709,8 @@  ofctrl_put(struct ovn_desired_flow_table *lflow_table,
     /* OpenFlow messages to send to the switch to bring it up-to-date. */
     struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
 
+    ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs);
+
     /* Iterate through ct zones that need to be flushed. */
     struct shash_node *iter;
     SHASH_FOR_EACH(iter, pending_ct_zones) {
diff --git a/controller/ofctrl.h b/controller/ofctrl.h
index 129e3b6ad..5735cd553 100644
--- a/controller/ofctrl.h
+++ b/controller/ofctrl.h
@@ -31,6 +31,7 @@  struct ofpbuf;
 struct ovsrec_bridge;
 struct ovsrec_open_vswitch_table;
 struct sbrec_meter_table;
+struct sbrec_ecmp_nexthop_table;
 struct shash;
 
 struct ovn_desired_flow_table {
@@ -57,8 +58,10 @@  enum mf_field_id ofctrl_get_mf_field_id(void);
 void ofctrl_put(struct ovn_desired_flow_table *lflow_table,
                 struct ovn_desired_flow_table *pflow_table,
                 struct shash *pending_ct_zones,
+                struct shash *current_ct_zones,
                 struct hmap *pending_lb_tuples,
                 struct ovsdb_idl_index *sbrec_meter_by_name,
+                const struct sbrec_ecmp_nexthop_table *enh_table,
                 uint64_t nb_cfg,
                 bool lflow_changed,
                 bool pflow_changed);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6cee6450d..4b05077d3 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -5821,8 +5821,11 @@  main(int argc, char *argv[])
                         ofctrl_put(&lflow_output_data->flow_table,
                                    &pflow_output_data->flow_table,
                                    &ct_zones_data->ctx.pending,
+                                   &ct_zones_data->ctx.current,
                                    &lb_data->removed_tuples,
                                    sbrec_meter_by_name,
+                                   sbrec_ecmp_nexthop_table_get(
+                                        ovnsb_idl_loop.idl),
                                    ofctrl_seqno_get_req_cfg(),
                                    engine_node_changed(&en_lflow_output),
                                    engine_node_changed(&en_pflow_output));
diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
index d563e044c..a024b0cd3 100644
--- a/include/ovn/logical-fields.h
+++ b/include/ovn/logical-fields.h
@@ -212,6 +212,9 @@  const struct ovn_field *ovn_field_from_name(const char *name);
 #define OVN_CT_ECMP_ETH_1ST_BIT 32
 #define OVN_CT_ECMP_ETH_END_BIT 79
 
+#define OVN_CT_ECMP_ETH_LOW     (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32)
+#define OVN_CT_ECMP_ETH_HIGH    ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1)
+
 #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE)
 #define OVN_CT_MASKED_STR(LABEL_VALUE) \
     OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE)
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 6dfc3055a..e9d15898f 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -14002,3 +14002,529 @@  OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
 /.*terminating with signal 15.*/d"])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ECMP Flush CT entries - IPv4])
+AT_KEYWORDS([ecmp])
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_BR([br-int])
+ADD_BR([br-ext])
+ADD_BR([br-ecmp])
+
+ovs-ofctl add-flow br-ext action=normal
+ovs-ofctl add-flow br-ecmp action=normal
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
+
+check ovn-nbctl lr-add R1
+check ovn-nbctl set logical_router R1 options:chassis=hv1
+check ovn-nbctl lr-add R2
+check ovn-nbctl set logical_router R2 options:chassis=hv1
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl ls-add sw1
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
+check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24
+
+check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
+check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24
+
+check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
+    type=router options:router-port=rp-sw0 \
+    -- lsp-set-addresses sw0-rp router
+
+check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
+    type=router options:router-port=rp-sw1 \
+    -- lsp-set-addresses sw1-rp router
+
+check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
+    type=router options:router-port=rp-public1 \
+    -- lsp-set-addresses public-rp1 router
+
+check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
+    type=router options:router-port=rp-public2 \
+    -- lsp-set-addresses public-rp2 router
+
+ADD_NAMESPACES(alice)
+ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+         "192.168.1.1")
+check ovn-nbctl lsp-add sw0 alice \
+    -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2"
+
+ADD_NAMESPACES(peter)
+ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \
+         "192.168.2.1")
+check ovn-nbctl lsp-add sw1 peter \
+    -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2"
+
+check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
+check ovn-nbctl lsp-add public public1 \
+        -- lsp-set-addresses public1 unknown \
+        -- lsp-set-type public1 localnet \
+        -- lsp-set-options public1 network_name=phynet
+
+ADD_NAMESPACES(ecmp-path0)
+ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1")
+ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04")
+
+ADD_NAMESPACES(ecmp-path1)
+ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1")
+ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05")
+
+ADD_NAMESPACES(bob)
+ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2")
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid])
+NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid])
+
+NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
+
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
+])
+
+# Change bob default IP address
+NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2])
+NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3])
+
+NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
+
+wait_row_count ECMP_Nexthop 2
+check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
+check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+# Remove first ECMP route
+check ovn-nbctl lr-route-del  R1 172.16.2.0/24 172.16.1.2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 1
+check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
+
+ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
+
+NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2'
+
+# Change bob default IP address
+NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3])
+NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2])
+
+NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+# Remove first ECMP route
+check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 1
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+])
+
+# Remove second ECMP route
+check ovn-nbctl lr-route-del R1
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 0
+
+NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3
+
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 4
+
+NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5])
+NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5])
+
+NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0])
+
+NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+])
+
+check ovn-nbctl lr-route-del R1
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+])
+
+check ovn-nbctl lr-route-del R2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 0
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
+/.*terminating with signal 15.*/d"])
+AT_CLEANUP
+])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ECMP Flush CT entries - IPv6])
+AT_KEYWORDS([ecmp])
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_BR([br-int])
+ADD_BR([br-ext])
+ADD_BR([br-ecmp])
+
+ovs-ofctl add-flow br-ext action=normal
+ovs-ofctl add-flow br-ecmp action=normal
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1
+
+check ovn-nbctl lr-add R1
+check ovn-nbctl set logical_router R1 options:chassis=hv1
+check ovn-nbctl lr-add R2
+check ovn-nbctl set logical_router R2 options:chassis=hv1
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl ls-add sw1
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64
+check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64
+
+check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64
+check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64
+
+check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
+    type=router options:router-port=rp-sw0 \
+    -- lsp-set-addresses sw0-rp router
+
+check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
+    type=router options:router-port=rp-sw1 \
+    -- lsp-set-addresses sw1-rp router
+
+check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \
+    type=router options:router-port=rp-public1 \
+    -- lsp-set-addresses public-rp1 router
+
+check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \
+    type=router options:router-port=rp-public2 \
+    -- lsp-set-addresses public-rp2 router
+
+ADD_NAMESPACES(alice)
+ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad")
+check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2"
+
+ADD_NAMESPACES(peter)
+ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad")
+check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2"
+
+check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
+check ovn-nbctl lsp-add public public1 \
+        -- lsp-set-addresses public1 unknown \
+        -- lsp-set-type public1 localnet \
+        -- lsp-set-options public1 network_name=phynet
+
+ADD_NAMESPACES(ecmp-path0)
+ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad")
+ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04")
+OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])])
+
+ADD_NAMESPACES(ecmp-path1)
+ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad")
+ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05")
+OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])])
+
+ADD_NAMESPACES(bob)
+ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad")
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
+
+NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
+net.ipv6.conf.all.forwarding = 1
+])
+NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl
+net.ipv6.conf.all.forwarding = 1
+])
+
+ovn-nbctl --wait=hv sync
+NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid])
+NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid])
+
+NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
+
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
+])
+
+# Change bob default IP address
+NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2])
+NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3])
+
+NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
+
+wait_row_count ECMP_Nexthop 2
+check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
+check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>)
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+# Remove first ECMP route
+check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 1
+check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+ Add the route back and verify we do not flush if we have multiple next-hops with the same mac address
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
+#
+NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05])
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"'
+
+# Change bob default IP address
+NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3])
+NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2])
+
+NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>)
+])
+
+# Remove first ECMP route
+check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 1
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+])
+
+# Remove second ECMP route
+check ovn-nbctl lr-route-del R1
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 0
+
+NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06])
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3
+
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2
+check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3
+
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 4
+
+NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5])
+NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5])
+
+NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0])
+
+NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+])
+
+# Remove second ECMP route
+check ovn-nbctl lr-route-del R1
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 2
+wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"'
+wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"'
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000
+tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>)
+])
+
+check ovn-nbctl lr-route-del R2
+check ovn-nbctl --wait=hv sync
+wait_row_count ECMP_Nexthop 0
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
+/.*terminating with signal 15.*/d"])
+AT_CLEANUP
+])