diff mbox series

[ovs-dev,RFC,v2,2/4] pinctrl: Send periodic arp/nd to ecmp next-hops.

Message ID 6b8ae1e72ade79c9901e638de570034a8402e512.1726765502.git.lorenzo.bianconi@redhat.com
State New
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Lorenzo Bianconi Sept. 19, 2024, 5:08 p.m. UTC
Introduce the capbility to periodically send ARP/ND packets for
ECMP nexthops in order to resolve their L2 address. This is a preliminary
patch to introduce the capability to flush stale ECMP CT entries.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 controller/ovn-controller.8.xml |  11 ++
 controller/ovn-controller.c     |   2 +
 controller/pinctrl.c            | 269 +++++++++++++++++++++++++++++---
 controller/pinctrl.h            |   2 +
 4 files changed, 261 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index aeaa374c1..c7fcad1fa 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -385,6 +385,17 @@ 
         cap for the exponential backoff used by <code>ovn-controller</code>
         to send GARPs packets.
       </dd>
+      <dt><code>external_ids:arp-nd-max-timeout-sec</code></dt>
+      <dd>
+        When used, this configuration value specifies the maximum timeout
+        (in seconds) between two consecutive ARP/ND packets sent by
+        <code>ovn-controller</code> to resolve ECMP nexthop mac address.
+        <code>ovn-controller</code> by default sends just 4 ARP/ND packets
+        with an exponential backoff timeout.
+        Setting <code>external_ids:arp-nd-max-timeout-sec</code> allows to
+        cap for the exponential backoff used by <code>ovn-controller</code>
+        to send ARPs/NDs packets.
+      </dd>
       <dt><code>external_ids:ovn-bridge-remote</code></dt>
       <dd>
         <p>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 64be08ce3..e72d28731 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -5653,6 +5653,8 @@  main(int argc, char *argv[])
                                     sbrec_mac_binding_table_get(
                                         ovnsb_idl_loop.idl),
                                     sbrec_bfd_table_get(ovnsb_idl_loop.idl),
+                                    sbrec_ecmp_nexthop_table_get(
+                                        ovnsb_idl_loop.idl),
                                     br_int, chassis,
                                     &runtime_data->local_datapaths,
                                     &runtime_data->active_tunnels,
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index c86b4f940..daf0bfa41 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -170,6 +170,9 @@  static struct seq *pinctrl_main_seq;
 static long long int garp_rarp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
 static bool garp_rarp_continuous;
 
+static long long int arp_nd_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
+static bool arp_nd_continuous;
+
 static void *pinctrl_handler(void *arg);
 
 struct pinctrl {
@@ -229,13 +232,17 @@  static void run_activated_ports(
     const struct sbrec_chassis *chassis);
 
 static void init_send_garps_rarps(void);
+static void init_send_arps_nds(void);
 static void destroy_send_garps_rarps(void);
+static void destroy_send_arps_nds(void);
 static void send_garp_rarp_wait(long long int send_garp_rarp_time);
+static void send_arp_nd_wait(long long int send_arp_nd_time);
 static void send_garp_rarp_prepare(
     struct ovsdb_idl_txn *ovnsb_idl_txn,
     struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
     struct ovsdb_idl_index *sbrec_port_binding_by_name,
     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
     const struct ovsrec_bridge *,
     const struct sbrec_chassis *,
     const struct hmap *local_datapaths,
@@ -245,6 +252,9 @@  static void send_garp_rarp_prepare(
 static void send_garp_rarp_run(struct rconn *swconn,
                                long long int *send_garp_rarp_time)
     OVS_REQUIRES(pinctrl_mutex);
+static void send_arp_nd_run(struct rconn *swconn,
+                            long long int *send_arp_nd_time)
+    OVS_REQUIRES(pinctrl_mutex);
 static void pinctrl_handle_nd_na(struct rconn *swconn,
                                  const struct flow *ip_flow,
                                  const struct match *md,
@@ -554,6 +564,7 @@  pinctrl_init(void)
 {
     init_put_mac_bindings();
     init_send_garps_rarps();
+    init_send_arps_nds();
     init_ipv6_ras();
     init_ipv6_prefixd();
     init_buffered_packets_ctx();
@@ -4000,6 +4011,7 @@  pinctrl_handler(void *arg_)
     static long long int send_ipv6_ra_time = LLONG_MAX;
     /* Next GARP/RARP announcement in ms. */
     static long long int send_garp_rarp_time = LLONG_MAX;
+    static long long int send_arp_nd_time = LLONG_MAX;
     /* Next multicast query (IGMP) in ms. */
     static long long int send_mcast_query_time = LLONG_MAX;
     static long long int svc_monitors_next_run_time = LLONG_MAX;
@@ -4037,6 +4049,7 @@  pinctrl_handler(void *arg_)
             if (may_inject_pkts()) {
                 ovs_mutex_lock(&pinctrl_mutex);
                 send_garp_rarp_run(swconn, &send_garp_rarp_time);
+                send_arp_nd_run(swconn, &send_arp_nd_time);
                 send_ipv6_ras(swconn, &send_ipv6_ra_time);
                 send_ipv6_prefixd(swconn, &send_prefixd_time);
                 send_mac_binding_buffered_pkts(swconn);
@@ -4055,6 +4068,7 @@  pinctrl_handler(void *arg_)
         rconn_recv_wait(swconn);
         if (rconn_is_connected(swconn)) {
             send_garp_rarp_wait(send_garp_rarp_time);
+            send_arp_nd_wait(send_arp_nd_time);
             ipv6_ra_wait(send_ipv6_ra_time);
             ip_mcast_querier_wait(send_mcast_query_time);
             svc_monitors_wait(svc_monitors_next_run_time);
@@ -4151,6 +4165,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             const struct sbrec_service_monitor_table *svc_mon_table,
             const struct sbrec_mac_binding_table *mac_binding_table,
             const struct sbrec_bfd_table *bfd_table,
+            const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
             const struct ovsrec_bridge *br_int,
             const struct sbrec_chassis *chassis,
             const struct hmap *local_datapaths,
@@ -4167,8 +4182,9 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                            sbrec_port_binding_by_key, chassis);
     send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                            sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int, chassis,
-                           local_datapaths, active_tunnels, ovs_table);
+                           sbrec_mac_binding_by_lport_ip, ecmp_nh_table,
+                           br_int, chassis, local_datapaths, active_tunnels,
+                           ovs_table);
     prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
     prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
                          local_active_ports_ipv6_pd, chassis,
@@ -4702,6 +4718,7 @@  pinctrl_destroy(void)
     latch_destroy(&pinctrl.pinctrl_thread_exit);
     rconn_destroy(pinctrl.swconn);
     destroy_send_garps_rarps();
+    destroy_send_arps_nds();
     destroy_ipv6_ras();
     destroy_ipv6_prefixd();
     destroy_buffered_packets_ctx();
@@ -5029,7 +5046,8 @@  wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
  */
 struct garp_rarp_data {
     struct eth_addr ea;          /* Ethernet address of port. */
-    ovs_be32 ipv4;               /* Ipv4 address of port. */
+    struct in6_addr src_ip;      /* IP address of port. */
+    struct in6_addr dst_ip;      /* Destination IP address */
     long long int announce_time; /* Next announcement in ms. */
     int backoff;                 /* Backoff timeout for the next
                                   * announcement (in msecs). */
@@ -5052,19 +5070,37 @@  destroy_send_garps_rarps(void)
     shash_destroy_free_data(&send_garp_rarp_data);
 }
 
+/* Contains ARPs data to be sent to track ECMP next-hop mac address. Protected
+ * by pinctrl_mutex. */
+static struct shash send_arp_nd_data;
+
+static void
+init_send_arps_nds(void)
+{
+    shash_init(&send_arp_nd_data);
+}
+
+static void
+destroy_send_arps_nds(void)
+{
+    shash_destroy_free_data(&send_arp_nd_data);
+}
+
 /* Runs with in the main ovn-controller thread context. */
 static void
-add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+add_garp_rarp(const char *name, const struct eth_addr ea,
+              struct in6_addr src_ip, struct in6_addr dst_ip,
+              uint32_t dp_key, uint32_t port_key, struct shash *shash)
 {
     struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
     garp_rarp->ea = ea;
-    garp_rarp->ipv4 = ip;
+    garp_rarp->src_ip = src_ip;
+    garp_rarp->dst_ip = dst_ip;
     garp_rarp->announce_time = time_msec() + 1000;
     garp_rarp->backoff = 1000; /* msec. */
     garp_rarp->dp_key = dp_key;
     garp_rarp->port_key = port_key;
-    shash_add(&send_garp_rarp_data, name, garp_rarp);
+    shash_add(shash, name, garp_rarp);
 
     /* Notify pinctrl_handler so that it can wakeup and process
      * these GARP/RARP requests. */
@@ -5113,9 +5149,10 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                     }
                 } else if (ovnsb_idl_txn) {
                     add_garp_rarp(name, laddrs->ea,
-                                  laddrs->ipv4_addrs[i].addr,
-                                  binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                            in6_addr_mapped_ipv4(laddrs->ipv4_addrs[i].addr),
+                            in6_addr_mapped_ipv4(laddrs->ipv4_addrs[i].addr),
+                            binding_rec->datapath->tunnel_key,
+                            binding_rec->tunnel_key, &send_garp_rarp_data);
                     send_garp_locally(ovnsb_idl_txn,
                                       sbrec_mac_binding_by_lport_ip,
                                       local_datapaths, binding_rec, laddrs->ea,
@@ -5142,9 +5179,11 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                             garp_rarp->backoff = 1000; /* msec. */
                         }
                     } else {
-                        add_garp_rarp(name, laddrs->ea,
-                                      0, binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                        struct in6_addr ip = {};
+                        add_garp_rarp(name, laddrs->ea, ip, ip,
+                                      binding_rec->datapath->tunnel_key,
+                                      binding_rec->tunnel_key,
+                                      &send_garp_rarp_data);
                     }
                     free(name);
             }
@@ -5182,10 +5221,10 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
             ip = laddrs.ipv4_addrs[0].addr;
         }
 
-        add_garp_rarp(binding_rec->logical_port,
-                      laddrs.ea, ip,
+        add_garp_rarp(binding_rec->logical_port, laddrs.ea,
+                      in6_addr_mapped_ipv4(ip), in6_addr_mapped_ipv4(ip),
                       binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key, &send_garp_rarp_data);
         if (ip) {
             send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
                               local_datapaths, binding_rec, laddrs.ea, ip);
@@ -5196,12 +5235,52 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
     }
 }
 
+/* Add or update a vif for which ARPs need to be announced. */
+static void
+send_arp_nd_update(const struct sbrec_port_binding *pb, const char *nexthop,
+                   long long int max_arp_timeout, bool continuous_arp_nd)
+{
+    volatile struct garp_rarp_data *e = shash_find_data(&send_arp_nd_data,
+                                                        nexthop);
+    if (e) {
+        e->dp_key = pb->datapath->tunnel_key;
+        e->port_key = pb->tunnel_key;
+        if (max_arp_timeout != arp_nd_max_timeout ||
+            continuous_arp_nd != arp_nd_continuous) {
+            /* reset backoff */
+            e->announce_time = time_msec() + 1000;
+            e->backoff = 1000; /* msec. */
+        }
+    } else {
+        struct lport_addresses laddrs;
+        if (!extract_lsp_addresses(pb->mac[0], &laddrs)) {
+            return;
+        }
+
+        if (laddrs.n_ipv4_addrs) {
+            ovs_be32 dst_ip;
+            inet_pton(AF_INET, nexthop, &dst_ip);
+            add_garp_rarp(nexthop, laddrs.ea,
+                          in6_addr_mapped_ipv4(laddrs.ipv4_addrs[0].addr),
+                          in6_addr_mapped_ipv4(dst_ip),
+                          pb->datapath->tunnel_key, pb->tunnel_key,
+                          &send_arp_nd_data);
+        } else if (laddrs.n_ipv6_addrs) {
+            struct in6_addr dst_ip;
+            inet_pton(AF_INET6, nexthop, &dst_ip);
+            add_garp_rarp(nexthop, laddrs.ea, laddrs.ipv6_addrs[0].addr,
+                          dst_ip, pb->datapath->tunnel_key, pb->tunnel_key,
+                          &send_arp_nd_data);
+        }
+        destroy_lport_addresses(&laddrs);
+    }
+}
+
 /* Remove a vif from GARP announcements. */
 static void
-send_garp_rarp_delete(const char *lport)
+send_garp_rarp_delete(struct shash *shash, const char *lport)
 {
-    struct garp_rarp_data *garp_rarp = shash_find_and_delete
-                                       (&send_garp_rarp_data, lport);
+    struct garp_rarp_data *garp_rarp = shash_find_and_delete(shash, lport);
     free(garp_rarp);
     notify_pinctrl_handler();
 }
@@ -5220,9 +5299,11 @@  send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
     dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
-    if (garp_rarp->ipv4) {
+    ovs_be32 src_ipv4 = in6_addr_get_mapped_ipv4(&garp_rarp->src_ip);
+    if (src_ipv4) {
         compose_arp(&packet, ARP_OP_REQUEST, garp_rarp->ea, eth_addr_zero,
-                    true, garp_rarp->ipv4, garp_rarp->ipv4);
+                    true, src_ipv4,
+                    in6_addr_get_mapped_ipv4(&garp_rarp->dst_ip));
     } else {
         compose_rarp(&packet, garp_rarp->ea);
     }
@@ -6524,6 +6605,25 @@  get_nat_addresses_and_keys(struct ovsdb_idl_index *sbrec_port_binding_by_name,
     }
 }
 
+static void
+get_local_ecmp_nexthop_map(
+        const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
+        struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        const struct sbrec_chassis *chassis,
+        struct smap *local_ecmp_nexthop_map)
+{
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, ecmp_nh_table) {
+       const struct sbrec_port_binding *pb =
+           lport_lookup_by_name(sbrec_port_binding_by_name,
+                                sb_ecmp_nexthop->port);
+       if (pb && !strcmp(pb->type, "l3gateway") && pb->chassis == chassis) {
+           smap_add_once(local_ecmp_nexthop_map, sb_ecmp_nexthop->nexthop,
+                         sb_ecmp_nexthop->port);
+       }
+    }
+}
+
 static void
 send_garp_rarp_wait(long long int send_garp_rarp_time)
 {
@@ -6534,6 +6634,16 @@  send_garp_rarp_wait(long long int send_garp_rarp_time)
     }
 }
 
+static void
+send_arp_nd_wait(long long int send_arp_nd_time)
+{
+    /* Set the poll timer for next arp packet only if there is data to
+     * be sent. */
+    if (!shash_is_empty(&send_arp_nd_data)) {
+        poll_timer_wait_until(send_arp_nd_time);
+    }
+}
+
 /* Called with in the pinctrl_handler thread context. */
 static void
 send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
@@ -6556,6 +6666,86 @@  send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
     }
 }
 
+static long long int
+send_arp_nd(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
+            long long int current_time)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    if (current_time < garp_rarp->announce_time) {
+        return garp_rarp->announce_time;
+    }
+
+    /* Compose a ARP request packet. */
+    uint64_t packet_stub[128 / 8];
+    struct dp_packet packet;
+    dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
+    if (IN6_IS_ADDR_V4MAPPED(&garp_rarp->src_ip)) {
+        compose_arp(&packet, ARP_OP_REQUEST, garp_rarp->ea, eth_addr_zero,
+                    true, in6_addr_get_mapped_ipv4(&garp_rarp->src_ip),
+                    in6_addr_get_mapped_ipv4(&garp_rarp->dst_ip));
+    } else {
+        compose_nd_ns(&packet, garp_rarp->ea, &garp_rarp->src_ip,
+                      &garp_rarp->dst_ip);
+    }
+
+    /* Inject ARP request. */
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+    enum ofp_version version = rconn_get_version(swconn);
+    put_load(garp_rarp->dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
+    put_load(garp_rarp->port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_LOCAL_OUTPUT;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(&packet),
+        .packet_len = dp_packet_size(&packet),
+        .buffer_id = UINT32_MAX,
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+    dp_packet_uninit(&packet);
+    ofpbuf_uninit(&ofpacts);
+
+    /* Set the next announcement.  At most 5 announcements are sent for a
+     * vif if arp_nd_max_timeout is not specified otherwise cap the max
+     * timeout to arp_nd_max_timeout. */
+    if (arp_nd_continuous || garp_rarp->backoff < arp_nd_max_timeout) {
+        garp_rarp->announce_time = current_time + garp_rarp->backoff;
+    } else {
+        garp_rarp->announce_time = LLONG_MAX;
+    }
+    garp_rarp->backoff = MIN(arp_nd_max_timeout, garp_rarp->backoff * 2);
+
+    return garp_rarp->announce_time;
+}
+
+static void
+send_arp_nd_run(struct rconn *swconn, long long int *send_arp_nd_time)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    if (shash_is_empty(&send_arp_nd_data)) {
+        return;
+    }
+
+    /* Send ARPs, and update the next announcement. */
+    long long int current_time = time_msec();
+    *send_arp_nd_time = LLONG_MAX;
+
+    struct shash_node *iter;
+    SHASH_FOR_EACH (iter, &send_arp_nd_data) {
+        long long int next_announce = send_arp_nd(swconn, iter->data,
+                                                  current_time);
+        if (*send_arp_nd_time > next_announce) {
+            *send_arp_nd_time = next_announce;
+        }
+    }
+}
+
 /* Called by pinctrl_run(). Runs with in the main ovn-controller
  * thread context. */
 static void
@@ -6563,6 +6753,7 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                        struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
                        struct ovsdb_idl_index *sbrec_port_binding_by_name,
                        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+                       const struct sbrec_ecmp_nexthop_table *ecmp_nh_table,
                        const struct ovsrec_bridge *br_int,
                        const struct sbrec_chassis *chassis,
                        const struct hmap *local_datapaths,
@@ -6572,10 +6763,13 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
 {
     struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
     struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
+    struct smap local_ecmp_nexthop_map =
+        SMAP_INITIALIZER(&local_ecmp_nexthop_map);
     struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
     struct shash nat_addresses;
     unsigned long long garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
-    bool garp_continuous = false;
+    unsigned long long max_arp_nd_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
+    bool garp_continuous = false, continuous_arp_nd = true;
     const struct ovsrec_open_vswitch *cfg =
         ovsrec_open_vswitch_table_first(ovs_table);
     if (cfg) {
@@ -6585,6 +6779,11 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
         if (!garp_max_timeout) {
             garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT;
         }
+
+        max_arp_nd_timeout = smap_get_ullong(
+                &cfg->external_ids, "arp-nd-max-timeout-sec",
+                GARP_RARP_DEF_MAX_TIMEOUT / 1000) * 1000;
+        continuous_arp_nd = !!max_arp_nd_timeout;
     }
 
     shash_init(&nat_addresses);
@@ -6598,13 +6797,23 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                                &nat_ip_keys, &local_l3gw_ports,
                                chassis, active_tunnels,
                                &nat_addresses);
+
+    get_local_ecmp_nexthop_map(ecmp_nh_table, sbrec_port_binding_by_name,
+                               chassis, &local_ecmp_nexthop_map);
+
     /* For deleted ports and deleted nat ips, remove from
      * send_garp_rarp_data. */
     struct shash_node *iter;
     SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
         if (!sset_contains(&localnet_vifs, iter->name) &&
             !sset_contains(&nat_ip_keys, iter->name)) {
-            send_garp_rarp_delete(iter->name);
+            send_garp_rarp_delete(&send_garp_rarp_data, iter->name);
+        }
+    }
+
+    SHASH_FOR_EACH_SAFE (iter, &send_arp_nd_data) {
+        if (!smap_get(&local_ecmp_nexthop_map, iter->name)) {
+            send_garp_rarp_delete(&send_arp_nd_data, iter->name);
         }
     }
 
@@ -6633,10 +6842,21 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
         }
     }
 
+    struct smap_node *node;
+    SMAP_FOR_EACH (node, &local_ecmp_nexthop_map) {
+        const struct sbrec_port_binding *pb = lport_lookup_by_name(
+            sbrec_port_binding_by_name, node->value);
+        if (pb) {
+            send_arp_nd_update(pb, node->key, max_arp_nd_timeout,
+                               continuous_arp_nd);
+        }
+    }
+
     /* pinctrl_handler thread will send the GARPs. */
 
     sset_destroy(&localnet_vifs);
     sset_destroy(&local_l3gw_ports);
+    smap_destroy(&local_ecmp_nexthop_map);
 
     SHASH_FOR_EACH_SAFE (iter, &nat_addresses) {
         struct lport_addresses *laddrs = iter->data;
@@ -6650,6 +6870,9 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
     garp_rarp_max_timeout = garp_max_timeout;
     garp_rarp_continuous = garp_continuous;
+
+    arp_nd_max_timeout = max_arp_nd_timeout;
+    arp_nd_continuous = continuous_arp_nd;
 }
 
 static bool
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..5c8ea7aea 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -36,6 +36,7 @@  struct sbrec_dns_table;
 struct sbrec_controller_event_table;
 struct sbrec_service_monitor_table;
 struct sbrec_bfd_table;
+struct sbrec_ecmp_nexthop_table;
 struct sbrec_port_binding;
 struct sbrec_mac_binding_table;
 
@@ -54,6 +55,7 @@  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                  const struct sbrec_service_monitor_table *,
                  const struct sbrec_mac_binding_table *,
                  const struct sbrec_bfd_table *,
+                 const struct sbrec_ecmp_nexthop_table *,
                  const struct ovsrec_bridge *, const struct sbrec_chassis *,
                  const struct hmap *local_datapaths,
                  const struct sset *active_tunnels,