diff mbox series

[ovs-dev] controller: Cleanup FDB entries when a VIF goes down.

Message ID 20240802150728.130030-1-shibir.basak@nutanix.com
State Changes Requested
Delegated to: Dumitru Ceara
Headers show
Series [ovs-dev] controller: Cleanup FDB entries when a VIF goes down. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Shibir Basak Aug. 2, 2024, 3:07 p.m. UTC
When a VIF port goes down, all FDB entries associated
with it are deleted.
This helps is reducing traffic outage due to stale FDB
entries present until the lsp is removed.

Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
---
 controller/ovn-controller.c |  5 ++++
 controller/pinctrl.c        | 40 ++++++++++++++++++++------
 controller/pinctrl.h        |  1 +
 lib/automake.mk             |  4 ++-
 lib/fdb.c                   | 33 +++++++++++++++++++++
 lib/fdb.h                   | 26 +++++++++++++++++
 tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
 7 files changed, 157 insertions(+), 9 deletions(-)
 create mode 100644 lib/fdb.c
 create mode 100644 lib/fdb.h

Comments

Dumitru Ceara Aug. 7, 2024, 11:28 a.m. UTC | #1
On 8/2/24 17:07, Shibir Basak wrote:
> When a VIF port goes down, all FDB entries associated
> with it are deleted.
> This helps is reducing traffic outage due to stale FDB
> entries present until the lsp is removed.
> 
> Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
> Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
> ---

Hi Shibir,

Thanks for the patch!

As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.")

I have a few more comments below.

>  controller/ovn-controller.c |  5 ++++
>  controller/pinctrl.c        | 40 ++++++++++++++++++++------
>  controller/pinctrl.h        |  1 +
>  lib/automake.mk             |  4 ++-
>  lib/fdb.c                   | 33 +++++++++++++++++++++
>  lib/fdb.h                   | 26 +++++++++++++++++
>  tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
>  7 files changed, 157 insertions(+), 9 deletions(-)
>  create mode 100644 lib/fdb.c
>  create mode 100644 lib/fdb.h
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 6a7cca673..2acdddd09 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
>          = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
>                                    &sbrec_fdb_col_mac,
>                                    &sbrec_fdb_col_dp_key);
> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
> +        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
> +                                  &sbrec_fdb_col_port_key,
> +                                  &sbrec_fdb_col_dp_key);
>      struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
>          = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
>      struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
> @@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
>                                      sbrec_igmp_group,
>                                      sbrec_ip_multicast,
>                                      sbrec_fdb_by_dp_key_mac,
> +                                    sbrec_fdb_by_dp_and_port,
>                                      sbrec_dns_table_get(ovnsb_idl_loop.idl),
>                                      sbrec_controller_event_table_get(
>                                          ovnsb_idl_loop.idl),
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 7cbb0cf81..9e37e1693 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -62,6 +62,7 @@
>  #include "lflow.h"
>  #include "ip-mcast.h"
>  #include "ovn-sb-idl.h"
> +#include "lib/fdb.h"
>  
>  VLOG_DEFINE_THIS_MODULE(pinctrl);
>  
> @@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
>      struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>      const struct ovsrec_bridge *,
>      const struct sbrec_chassis *,
>      const struct hmap *local_datapaths,
> @@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>              struct ovsdb_idl_index *sbrec_igmp_groups,
>              struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>              struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
> +            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>              const struct sbrec_dns_table *dns_table,
>              const struct sbrec_controller_event_table *ce_table,
>              const struct sbrec_service_monitor_table *svc_mon_table,
> @@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                             sbrec_port_binding_by_key, chassis);
>      send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
>                             sbrec_port_binding_by_name,
> -                           sbrec_mac_binding_by_lport_ip, br_int, chassis,
> +                           sbrec_mac_binding_by_lport_ip,
> +                           sbrec_fdb_by_dp_and_port, br_int, chassis,
>                             local_datapaths, active_tunnels, ovs_table);
>      prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
>      prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
> @@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
>      }
>  }
>  
> -

Unrelated change.  This line feed should not be removed.

>  /*
>   * Send gratuitous/reverse ARP for vif on localnet.
>   *
> @@ -5034,6 +5037,7 @@ struct garp_rarp_data {
>                                    * announcement (in msecs). */
>      uint32_t dp_key;             /* Datapath used to output this GARP. */
>      uint32_t port_key;           /* Port to inject the GARP into. */
> +    bool enabled;                /* is garp/rarp announcement enabled */
>  };
>  
>  /* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
> @@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
>  /* Runs with in the main ovn-controller thread context. */
>  static void
>  add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
> -              uint32_t dp_key, uint32_t port_key)
> +              uint32_t dp_key, uint32_t port_key, bool enabled)
>  {
>      struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
>      garp_rarp->ea = ea;
> @@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
>      garp_rarp->backoff = 1000; /* msec. */
>      garp_rarp->dp_key = dp_key;
>      garp_rarp->port_key = port_key;
> +    garp_rarp->enabled = enabled;
>      shash_add(&send_garp_rarp_data, name, garp_rarp);
>  
>      /* Notify pinctrl_handler so that it can wakeup and process
> @@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                        bool garp_continuous)
>  {
>      volatile struct garp_rarp_data *garp_rarp = NULL;
> +    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
> +                                               "disable_garp_rarp", false);
>  
>      /* Skip localports as they don't need to be announced */
>      if (!strcmp(binding_rec->type, "localport")) {
> @@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                  if (garp_rarp) {
>                      garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
>                      garp_rarp->port_key = binding_rec->tunnel_key;
> +                    garp_rarp->enabled = is_garp_rarp_enabled;
>                      if (garp_max_timeout != garp_rarp_max_timeout ||
>                          garp_continuous != garp_rarp_continuous) {
>                          /* reset backoff */
> @@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                      add_garp_rarp(name, laddrs->ea,
>                                    laddrs->ipv4_addrs[i].addr,
>                                    binding_rec->datapath->tunnel_key,
> -                                  binding_rec->tunnel_key);
> +                                  binding_rec->tunnel_key,
> +                                  is_garp_rarp_enabled);
>                      send_garp_locally(ovnsb_idl_txn,
>                                        sbrec_mac_binding_by_lport_ip,
>                                        local_datapaths, binding_rec, laddrs->ea,
> @@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                      if (garp_rarp) {
>                          garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
>                          garp_rarp->port_key = binding_rec->tunnel_key;
> +                        garp_rarp->enabled = is_garp_rarp_enabled;
>                          if (garp_max_timeout != garp_rarp_max_timeout ||
>                              garp_continuous != garp_rarp_continuous) {
>                              /* reset backoff */
> @@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                      } else {
>                          add_garp_rarp(name, laddrs->ea,
>                                        0, binding_rec->datapath->tunnel_key,
> -                                      binding_rec->tunnel_key);
> +                                      binding_rec->tunnel_key,
> +                                      is_garp_rarp_enabled);
>                      }
>                      free(name);
>              }
> @@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      if (garp_rarp) {
>          garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
>          garp_rarp->port_key = binding_rec->tunnel_key;
> +        garp_rarp->enabled = is_garp_rarp_enabled;
>          if (garp_max_timeout != garp_rarp_max_timeout ||
>              garp_continuous != garp_rarp_continuous) {
>              /* reset backoff */
> @@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
>          add_garp_rarp(binding_rec->logical_port,
>                        laddrs.ea, ip,
>                        binding_rec->datapath->tunnel_key,
> -                      binding_rec->tunnel_key);
> +                      binding_rec->tunnel_key,
> +                      is_garp_rarp_enabled);
>          if (ip) {
>              send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
>                                local_datapaths, binding_rec, laddrs.ea, ip);
> @@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
>      long long int current_time = time_msec();
>      *send_garp_rarp_time = LLONG_MAX;
>      SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
> +        struct garp_rarp_data *garp_rarp = iter->data;
> +        if (!garp_rarp->enabled) {
> +            continue;
> +        }
>          long long int next_announce = send_garp_rarp(swconn, iter->data,
>                                                       current_time);
>          if (*send_garp_rarp_time > next_announce) {
> @@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                         struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>                         struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                         struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> +                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>                         const struct ovsrec_bridge *br_int,
>                         const struct sbrec_chassis *chassis,
>                         const struct hmap *local_datapaths,
> @@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                                 &nat_ip_keys, &local_l3gw_ports,
>                                 chassis, active_tunnels,
>                                 &nat_addresses);
> +
>      /* For deleted ports and deleted nat ips, remove from
>       * send_garp_rarp_data. */
>      struct shash_node *iter;
>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>          if (!sset_contains(&localnet_vifs, iter->name) &&
>              !sset_contains(&nat_ip_keys, iter->name)) {
> +            struct garp_rarp_data *data = iter->data;
> +            /* Cleanup FDB entries for inactive ports */
> +            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
> +                               data->dp_key,
> +                               data->port_key);

It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).

What if we do this in a different way?

In binding.c we set port_binding.up to "false" whenever ports are
removed.  Should we cleanup stale FDB entries there instead?

There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()

What do you think?

>              send_garp_rarp_delete(iter->name);
>          }
>      }
> @@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      SSET_FOR_EACH (iface_id, &localnet_vifs) {
>          const struct sbrec_port_binding *pb = lport_lookup_by_name(
>              sbrec_port_binding_by_name, iface_id);
> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
> +        if (pb) {
>              send_garp_rarp_update(ovnsb_idl_txn,
>                                    sbrec_mac_binding_by_lport_ip,
>                                    local_datapaths, pb, &nat_addresses,
> @@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>          const struct sbrec_port_binding *pb
>              = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
> +        if (pb) {
>              send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
>                                    local_datapaths, pb, &nat_addresses,
>                                    garp_max_timeout, garp_continuous);
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 3462b670c..6613c87e4 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                   struct ovsdb_idl_index *sbrec_igmp_groups,
>                   struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>                   struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
> +                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>                   const struct sbrec_dns_table *,
>                   const struct sbrec_controller_event_table *,
>                   const struct sbrec_service_monitor_table *,
> diff --git a/lib/automake.mk b/lib/automake.mk
> index b69e854b0..3ff098331 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
>  	lib/stopwatch-names.h \
>  	lib/vif-plug-provider.h \
>  	lib/vif-plug-provider.c \
> -	lib/vif-plug-providers/dummy/vif-plug-dummy.c
> +	lib/vif-plug-providers/dummy/vif-plug-dummy.c \
> +	lib/fdb.c \
> +	lib/fdb.h
>  nodist_lib_libovn_la_SOURCES = \
>  	lib/ovn-dirs.c \
>  	lib/ovn-nb-idl.c \
> diff --git a/lib/fdb.c b/lib/fdb.c
> new file mode 100644
> index 000000000..00e3ac902
> --- /dev/null
> +++ b/lib/fdb.c
> @@ -0,0 +1,33 @@
> +/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?

> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include "lib/fdb.h"
> +
> +void
> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> +                   uint32_t dp_key, uint32_t port_key)
> +{
> +    struct sbrec_fdb *target =
> +        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
> +    sbrec_fdb_index_set_dp_key(target, dp_key);
> +    sbrec_fdb_index_set_port_key(target, port_key);
> +
> +    struct sbrec_fdb *fdb_e;
> +    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
> +        sbrec_fdb_delete(fdb_e);
> +    }
> +    sbrec_fdb_index_destroy_row(target);
> +}
> diff --git a/lib/fdb.h b/lib/fdb.h
> new file mode 100644
> index 000000000..7b64bc7f2
> --- /dev/null
> +++ b/lib/fdb.h
> @@ -0,0 +1,26 @@
> +/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?

> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVN_FDB_H
> +#define OVN_FDB_H 1
> +
> +#include "ovsdb-idl.h"
> +#include "ovn-sb-idl.h"
> +
> +void
> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
> +                   uint32_t dp_key, uint32_t port_key);
> +
> +#endif
> diff --git a/tests/ovn.at b/tests/ovn.at
> index b31afbfb3..a0df87196 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
>  AT_CLEANUP
>  ])
>  
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([OVN FDB cleanup when vif goes down])

No real need for "OVN" in the test case name.  When running
it looks like this:

OVN end-to-end tests

405: OVN FDB cleanup when vif goes down -- parallelization=yes -- ovn_monitor_all=yes ok

> +ovn_start
> +
> +net_add n1
> +
> +AT_CHECK([ovn-nbctl ls-add ls0])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port options:localnet_learn_fdb=true])
> +
> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10 192.168.10.10" unknown])

All these "AT_CHECK" can be replaced with "check".

> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl -- add-port br-int vif0 -- \
> +    set interface vif0 external-ids:iface-id=vif0 \
> +    options:tx_pcap=hv1/vif0-tx.pcap \
> +    options:rxq_pcap=hv1/vif0-rx.pcap \
> +    ofport-request=1
> +ovs-vsctl -- add-port br-phys ext0 -- \
> +    set interface ext0 \
> +    options:tx_pcap=hv1/ext0-tx.pcap \
> +    options:rxq_pcap=hv1/ext0-rx.pcap \
> +    ofport-request=2
> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
> +
> +
> +wait_for_ports_up

You should probably add a sync here:

check ovn-nbctl --wait=hv sync

> +ls0_key=$(fetch_column datapath_binding tunnel_key external_ids:name=ls0)
> +vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
> +
> +ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key port_key=$vif0_key
> +ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key port_key=$vif0_key
> +
> +ovn-sbctl list fdb

This is not really needed for the test, it can probably be removed.

> +# vif going down should purge all relevant FDB entries
> +ovs-vsctl del-port br-int vif0
> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
> +AT_CHECK([ovn-nbctl --wait=hv sync])

Please replace AT_CHECK with "check".

> +ovn-sbctl list fdb

This one can be removed.

> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:10") = 0])
> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:20:20") = 0])> +
> +# vif0 should not be a local datapath in hv1 as vif0 is down
> +# Hence ln_port should not be a related port.
> +OVN_CLEANUP([hv1
> +ln_port])> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([container port changed to normal port and then deleted])
>  ovn_start

Regards,
Dumitru
Shibir Basak Aug. 14, 2024, 11:24 a.m. UTC | #2
Hi Dumitru,

Thanks for your comments.
I have responded inline.

On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/2/24 17:07, Shibir Basak wrote:
When a VIF port goes down, all FDB entries associated
with it are deleted.
This helps is reducing traffic outage due to stale FDB
entries present until the lsp is removed.

Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
---

Hi Shibir,

Thanks for the patch!

As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.”)
Sure

I have a few more comments below.

controller/ovn-controller.c |  5 ++++
controller/pinctrl.c        | 40 ++++++++++++++++++++------
controller/pinctrl.h        |  1 +
lib/automake.mk             |  4 ++-
lib/fdb.c                   | 33 +++++++++++++++++++++
lib/fdb.h                   | 26 +++++++++++++++++
tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
7 files changed, 157 insertions(+), 9 deletions(-)
create mode 100644 lib/fdb.c
create mode 100644 lib/fdb.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6a7cca673..2acdddd09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
                                  &sbrec_fdb_col_mac,
                                  &sbrec_fdb_col_dp_key);
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_fdb_col_port_key,
+                                  &sbrec_fdb_col_dp_key);
    struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
        = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
    struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
@@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
                                    sbrec_igmp_group,
                                    sbrec_ip_multicast,
                                    sbrec_fdb_by_dp_key_mac,
+                                    sbrec_fdb_by_dp_and_port,
                                    sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                    sbrec_controller_event_table_get(
                                        ovnsb_idl_loop.idl),
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7cbb0cf81..9e37e1693 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -62,6 +62,7 @@
#include "lflow.h"
#include "ip-mcast.h"
#include "ovn-sb-idl.h"
+#include "lib/fdb.h"

VLOG_DEFINE_THIS_MODULE(pinctrl);

@@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
    struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
    struct ovsdb_idl_index *sbrec_port_binding_by_name,
    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
    const struct ovsrec_bridge *,
    const struct sbrec_chassis *,
    const struct hmap *local_datapaths,
@@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
            struct ovsdb_idl_index *sbrec_igmp_groups,
            struct ovsdb_idl_index *sbrec_ip_multicast_opts,
            struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
            const struct sbrec_dns_table *dns_table,
            const struct sbrec_controller_event_table *ce_table,
            const struct sbrec_service_monitor_table *svc_mon_table,
@@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                           sbrec_port_binding_by_key, chassis);
    send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                           sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int, chassis,
+                           sbrec_mac_binding_by_lport_ip,
+                           sbrec_fdb_by_dp_and_port, br_int, chassis,
                           local_datapaths, active_tunnels, ovs_table);
    prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
    prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
@@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
    }
}

-

Unrelated change.  This line feed should not be removed.
Sure, will revert in v2.

/*
 * Send gratuitous/reverse ARP for vif on localnet.
 *
@@ -5034,6 +5037,7 @@ struct garp_rarp_data {
                                  * announcement (in msecs). */
    uint32_t dp_key;             /* Datapath used to output this GARP. */
    uint32_t port_key;           /* Port to inject the GARP into. */
+    bool enabled;                /* is garp/rarp announcement enabled */
};

/* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
@@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
/* Runs with in the main ovn-controller thread context. */
static void
add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+              uint32_t dp_key, uint32_t port_key, bool enabled)
{
    struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
    garp_rarp->ea = ea;
@@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
    garp_rarp->backoff = 1000; /* msec. */
    garp_rarp->dp_key = dp_key;
    garp_rarp->port_key = port_key;
+    garp_rarp->enabled = enabled;
    shash_add(&send_garp_rarp_data, name, garp_rarp);

    /* Notify pinctrl_handler so that it can wakeup and process
@@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                      bool garp_continuous)
{
    volatile struct garp_rarp_data *garp_rarp = NULL;
+    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
+                                               "disable_garp_rarp", false);

    /* Skip localports as they don't need to be announced */
    if (!strcmp(binding_rec->type, "localport")) {
@@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                if (garp_rarp) {
                    garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
                    garp_rarp->port_key = binding_rec->tunnel_key;
+                    garp_rarp->enabled = is_garp_rarp_enabled;
                    if (garp_max_timeout != garp_rarp_max_timeout ||
                        garp_continuous != garp_rarp_continuous) {
                        /* reset backoff */
@@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    add_garp_rarp(name, laddrs->ea,
                                  laddrs->ipv4_addrs[i].addr,
                                  binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                                  binding_rec->tunnel_key,
+                                  is_garp_rarp_enabled);
                    send_garp_locally(ovnsb_idl_txn,
                                      sbrec_mac_binding_by_lport_ip,
                                      local_datapaths, binding_rec, laddrs->ea,
@@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    if (garp_rarp) {
                        garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
                        garp_rarp->port_key = binding_rec->tunnel_key;
+                        garp_rarp->enabled = is_garp_rarp_enabled;
                        if (garp_max_timeout != garp_rarp_max_timeout ||
                            garp_continuous != garp_rarp_continuous) {
                            /* reset backoff */
@@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    } else {
                        add_garp_rarp(name, laddrs->ea,
                                      0, binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                                      binding_rec->tunnel_key,
+                                      is_garp_rarp_enabled);
                    }
                    free(name);
            }
@@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
    if (garp_rarp) {
        garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
        garp_rarp->port_key = binding_rec->tunnel_key;
+        garp_rarp->enabled = is_garp_rarp_enabled;
        if (garp_max_timeout != garp_rarp_max_timeout ||
            garp_continuous != garp_rarp_continuous) {
            /* reset backoff */
@@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
        add_garp_rarp(binding_rec->logical_port,
                      laddrs.ea, ip,
                      binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key,
+                      is_garp_rarp_enabled);
        if (ip) {
            send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
                              local_datapaths, binding_rec, laddrs.ea, ip);
@@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
    long long int current_time = time_msec();
    *send_garp_rarp_time = LLONG_MAX;
    SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
+        struct garp_rarp_data *garp_rarp = iter->data;
+        if (!garp_rarp->enabled) {
+            continue;
+        }
        long long int next_announce = send_garp_rarp(swconn, iter->data,
                                                     current_time);
        if (*send_garp_rarp_time > next_announce) {
@@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                       struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
                       struct ovsdb_idl_index *sbrec_port_binding_by_name,
                       struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                       const struct ovsrec_bridge *br_int,
                       const struct sbrec_chassis *chassis,
                       const struct hmap *local_datapaths,
@@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                               &nat_ip_keys, &local_l3gw_ports,
                               chassis, active_tunnels,
                               &nat_addresses);
+
    /* For deleted ports and deleted nat ips, remove from
     * send_garp_rarp_data. */
    struct shash_node *iter;
    SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
        if (!sset_contains(&localnet_vifs, iter->name) &&
            !sset_contains(&nat_ip_keys, iter->name)) {
+            struct garp_rarp_data *data = iter->data;
+            /* Cleanup FDB entries for inactive ports */
+            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
+                               data->dp_key,
+                               data->port_key);

It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).

What if we do this in a different way?

In binding.c we set port_binding.up to "false" whenever ports are
removed.  Should we cleanup stale FDB entries there instead?

There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()

What do you think?

Here, we are also trying to address the scenario where the vif port link state goes down
and not only handle scenarios such as port removal or iface-id detachment.

For example, when a VM migration is completed and the hypervisor is in process of removing
the VM on the source side, the VIF port state goes down. We need to detect the port link state
down event and delete the FDB entries as soon as possible to reduce traffic loss.

We clubbed it with garp_rarp code because this code is already tracking the VLAN bridged ports.


            send_garp_rarp_delete(iter->name);
        }
    }
@@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
    SSET_FOR_EACH (iface_id, &localnet_vifs) {
        const struct sbrec_port_binding *pb = lport_lookup_by_name(
            sbrec_port_binding_by_name, iface_id);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
                                  sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
@@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
        const struct sbrec_port_binding *pb
            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
                                  garp_max_timeout, garp_continuous);
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..6613c87e4 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 struct ovsdb_idl_index *sbrec_igmp_groups,
                 struct ovsdb_idl_index *sbrec_ip_multicast_opts,
                 struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                 const struct sbrec_dns_table *,
                 const struct sbrec_controller_event_table *,
                 const struct sbrec_service_monitor_table *,
diff --git a/lib/automake.mk b/lib/automake.mk
index b69e854b0..3ff098331 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
lib/stopwatch-names.h \
lib/vif-plug-provider.h \
lib/vif-plug-provider.c \
- lib/vif-plug-providers/dummy/vif-plug-dummy.c
+ lib/vif-plug-providers/dummy/vif-plug-dummy.c \
+ lib/fdb.c \
+ lib/fdb.h
nodist_lib_libovn_la_SOURCES = \
lib/ovn-dirs.c \
lib/ovn-nb-idl.c \
diff --git a/lib/fdb.c b/lib/fdb.c
new file mode 100644
index 000000000..00e3ac902
--- /dev/null
+++ b/lib/fdb.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lib/fdb.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key)
+{
+    struct sbrec_fdb *target =
+        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
+    sbrec_fdb_index_set_dp_key(target, dp_key);
+    sbrec_fdb_index_set_port_key(target, port_key);
+
+    struct sbrec_fdb *fdb_e;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
+        sbrec_fdb_delete(fdb_e);
+    }
+    sbrec_fdb_index_destroy_row(target);
+}
diff --git a/lib/fdb.h b/lib/fdb.h
new file mode 100644
index 000000000..7b64bc7f2
--- /dev/null
+++ b/lib/fdb.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_FDB_H
+#define OVN_FDB_H 1
+
+#include "ovsdb-idl.h"
+#include "ovn-sb-idl.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key);
+
+#endif
diff --git a/tests/ovn.at<http://ovn.at/> b/tests/ovn.at<http://ovn.at/>
index b31afbfb3..a0df87196 100644
--- a/tests/ovn.at<http://ovn.at/>
+++ b/tests/ovn.at<http://ovn.at/>
@@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
AT_CLEANUP
])

+OVN_FOR_EACH_NORTHD([
+AT_SETUP([OVN FDB cleanup when vif goes down])

No real need for "OVN" in the test case name.  When running
it looks like this:

OVN end-to-end tests

405: OVN FDB cleanup when vif goes down -- parallelization=yes -- ovn_monitor_all=yes ok
Sure, we will update appropriately in v2.

+ovn_start
+
+net_add n1
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+AT_CHECK([ovn-nbctl set logical_switch_port ln_port options:localnet_learn_fdb=true])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
+AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10 192.168.10.10" unknown])

All these "AT_CHECK" can be replaced with "check".
Sure, we will address test related comments appropriately in v2.

+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif0 -- \
+    set interface vif0 external-ids:iface-id=vif0 \
+    options:tx_pcap=hv1/vif0-tx.pcap \
+    options:rxq_pcap=hv1/vif0-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+
+wait_for_ports_up

You should probably add a sync here:

check ovn-nbctl --wait=hv sync

+ls0_key=$(fetch_column datapath_binding tunnel_key external_ids:name=ls0)
+vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
+
+ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key port_key=$vif0_key
+ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key port_key=$vif0_key
+
+ovn-sbctl list fdb

This is not really needed for the test, it can probably be removed.

+# vif going down should purge all relevant FDB entries
+ovs-vsctl del-port br-int vif0
+OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
+AT_CHECK([ovn-nbctl --wait=hv sync])

Please replace AT_CHECK with "check".

+ovn-sbctl list fdb

This one can be removed.

+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:10") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:20:20") = 0])> +
+# vif0 should not be a local datapath in hv1 as vif0 is down
+# Hence ln_port should not be a related port.
+OVN_CLEANUP([hv1
+ln_port])> +AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD([
AT_SETUP([container port changed to normal port and then deleted])
ovn_start

Regards,
Dumitru
Dumitru Ceara Aug. 14, 2024, 7:50 p.m. UTC | #3
On 8/14/24 13:24, Shibir Basak wrote:
> Hi Dumitru,
> 
> Thanks for your comments.
> I have responded inline.
> 
>> On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> !-------------------------------------------------------------------|
>>  CAUTION: External Email
>>
>> |-------------------------------------------------------------------!
>>
>> On 8/2/24 17:07, Shibir Basak wrote:
>>> When a VIF port goes down, all FDB entries associated
>>> with it are deleted.
>>> This helps is reducing traffic outage due to stale FDB
>>> entries present until the lsp is removed.
>>>
>>> Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
>>> Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
>>> ---
>>
>> Hi Shibir,
>>
>> Thanks for the patch!
>>
>> As this is a bug fix it probably needs a:
>> Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.”)
> Sure
>>
>> I have a few more comments below.
>>
>>> controller/ovn-controller.c |  5 ++++
>>> controller/pinctrl.c        | 40 ++++++++++++++++++++------
>>> controller/pinctrl.h        |  1 +
>>> lib/automake.mk             |  4 ++-
>>> lib/fdb.c                   | 33 +++++++++++++++++++++
>>> lib/fdb.h                   | 26 +++++++++++++++++
>>> tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
>>> 7 files changed, 157 insertions(+), 9 deletions(-)
>>> create mode 100644 lib/fdb.c
>>> create mode 100644 lib/fdb.h
>>>
>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>> index 6a7cca673..2acdddd09 100644
>>> --- a/controller/ovn-controller.c
>>> +++ b/controller/ovn-controller.c
>>> @@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
>>>         = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
>>>                                   &sbrec_fdb_col_mac,
>>>                                   &sbrec_fdb_col_dp_key);
>>> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
>>> +        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
>>> +                                  &sbrec_fdb_col_port_key,
>>> +                                  &sbrec_fdb_col_dp_key);
>>>     struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
>>>         = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
>>>     struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
>>> @@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
>>>                                     sbrec_igmp_group,
>>>                                     sbrec_ip_multicast,
>>>                                     sbrec_fdb_by_dp_key_mac,
>>> +                                    sbrec_fdb_by_dp_and_port,
>>>                                     sbrec_dns_table_get(ovnsb_idl_loop.idl),
>>>                                     sbrec_controller_event_table_get(
>>>                                         ovnsb_idl_loop.idl),
>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>> index 7cbb0cf81..9e37e1693 100644
>>> --- a/controller/pinctrl.c
>>> +++ b/controller/pinctrl.c
>>> @@ -62,6 +62,7 @@
>>> #include "lflow.h"
>>> #include "ip-mcast.h"
>>> #include "ovn-sb-idl.h"
>>> +#include "lib/fdb.h"
>>>
>>> VLOG_DEFINE_THIS_MODULE(pinctrl);
>>>
>>> @@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
>>>     struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>>>     struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>>> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>     const struct ovsrec_bridge *,
>>>     const struct sbrec_chassis *,
>>>     const struct hmap *local_datapaths,
>>> @@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>             struct ovsdb_idl_index *sbrec_igmp_groups,
>>>             struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>>>             struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
>>> +            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>             const struct sbrec_dns_table *dns_table,
>>>             const struct sbrec_controller_event_table *ce_table,
>>>             const struct sbrec_service_monitor_table *svc_mon_table,
>>> @@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>                            sbrec_port_binding_by_key, chassis);
>>>     send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
>>>                            sbrec_port_binding_by_name,
>>> -                           sbrec_mac_binding_by_lport_ip, br_int,
>>> chassis,
>>> +                           sbrec_mac_binding_by_lport_ip,
>>> +                           sbrec_fdb_by_dp_and_port, br_int, chassis,
>>>                            local_datapaths, active_tunnels, ovs_table);
>>>     prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
>>>     prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
>>> @@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn)
>>>     }
>>> }
>>>
>>> -
>>
>> Unrelated change.  This line feed should not be removed.
> Sure, will revert in v2.
>>
>>> /*
>>>  * Send gratuitous/reverse ARP for vif on localnet.
>>>  *
>>> @@ -5034,6 +5037,7 @@ struct garp_rarp_data {
>>>                                   * announcement (in msecs). */
>>>     uint32_t dp_key;             /* Datapath used to output this GARP. */
>>>     uint32_t port_key;           /* Port to inject the GARP into. */
>>> +    bool enabled;                /* is garp/rarp announcement enabled */
>>> };
>>>
>>> /* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
>>> @@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
>>> /* Runs with in the main ovn-controller thread context. */
>>> static void
>>> add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
>>> -              uint32_t dp_key, uint32_t port_key)
>>> +              uint32_t dp_key, uint32_t port_key, bool enabled)
>>> {
>>>     struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
>>>     garp_rarp->ea = ea;
>>> @@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct
>>> eth_addr ea, ovs_be32 ip,
>>>     garp_rarp->backoff = 1000; /* msec. */
>>>     garp_rarp->dp_key = dp_key;
>>>     garp_rarp->port_key = port_key;
>>> +    garp_rarp->enabled = enabled;
>>>     shash_add(&send_garp_rarp_data, name, garp_rarp);
>>>
>>>     /* Notify pinctrl_handler so that it can wakeup and process
>>> @@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                       bool garp_continuous)
>>> {
>>>     volatile struct garp_rarp_data *garp_rarp = NULL;
>>> +    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
>>> +                                               "disable_garp_rarp",
>>> false);
>>>
>>>     /* Skip localports as they don't need to be announced */
>>>     if (!strcmp(binding_rec->type, "localport")) {
>>> @@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                 if (garp_rarp) {
>>>                     garp_rarp->dp_key =
>>> binding_rec->datapath->tunnel_key;
>>>                     garp_rarp->port_key = binding_rec->tunnel_key;
>>> +                    garp_rarp->enabled = is_garp_rarp_enabled;
>>>                     if (garp_max_timeout != garp_rarp_max_timeout ||
>>>                         garp_continuous != garp_rarp_continuous) {
>>>                         /* reset backoff */
>>> @@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                     add_garp_rarp(name, laddrs->ea,
>>>                                   laddrs->ipv4_addrs[i].addr,
>>>                                   binding_rec->datapath->tunnel_key,
>>> -                                  binding_rec->tunnel_key);
>>> +                                  binding_rec->tunnel_key,
>>> +                                  is_garp_rarp_enabled);
>>>                     send_garp_locally(ovnsb_idl_txn,
>>>                                       sbrec_mac_binding_by_lport_ip,
>>>                                       local_datapaths, binding_rec,
>>> laddrs->ea,
>>> @@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                     if (garp_rarp) {
>>>                         garp_rarp->dp_key =
>>> binding_rec->datapath->tunnel_key;
>>>                         garp_rarp->port_key = binding_rec->tunnel_key;
>>> +                        garp_rarp->enabled = is_garp_rarp_enabled;
>>>                         if (garp_max_timeout != garp_rarp_max_timeout ||
>>>                             garp_continuous != garp_rarp_continuous) {
>>>                             /* reset backoff */
>>> @@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                     } else {
>>>                         add_garp_rarp(name, laddrs->ea,
>>>                                       0,
>>> binding_rec->datapath->tunnel_key,
>>> -                                      binding_rec->tunnel_key);
>>> +                                      binding_rec->tunnel_key,
>>> +                                      is_garp_rarp_enabled);
>>>                     }
>>>                     free(name);
>>>             }
>>> @@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>     if (garp_rarp) {
>>>         garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
>>>         garp_rarp->port_key = binding_rec->tunnel_key;
>>> +        garp_rarp->enabled = is_garp_rarp_enabled;
>>>         if (garp_max_timeout != garp_rarp_max_timeout ||
>>>             garp_continuous != garp_rarp_continuous) {
>>>             /* reset backoff */
>>> @@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>         add_garp_rarp(binding_rec->logical_port,
>>>                       laddrs.ea, ip,
>>>                       binding_rec->datapath->tunnel_key,
>>> -                      binding_rec->tunnel_key);
>>> +                      binding_rec->tunnel_key,
>>> +                      is_garp_rarp_enabled);
>>>         if (ip) {
>>>             send_garp_locally(ovnsb_idl_txn,
>>> sbrec_mac_binding_by_lport_ip,
>>>                               local_datapaths, binding_rec,
>>> laddrs.ea, ip);
>>> @@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long
>>> long int *send_garp_rarp_time)
>>>     long long int current_time = time_msec();
>>>     *send_garp_rarp_time = LLONG_MAX;
>>>     SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
>>> +        struct garp_rarp_data *garp_rarp = iter->data;
>>> +        if (!garp_rarp->enabled) {
>>> +            continue;
>>> +        }
>>>         long long int next_announce = send_garp_rarp(swconn, iter->data,
>>>                                                      current_time);
>>>         if (*send_garp_rarp_time > next_announce) {
>>> @@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                        struct ovsdb_idl_index
>>> *sbrec_port_binding_by_datapath,
>>>                        struct ovsdb_idl_index
>>> *sbrec_port_binding_by_name,
>>>                        struct ovsdb_idl_index
>>> *sbrec_mac_binding_by_lport_ip,
>>> +                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>                        const struct ovsrec_bridge *br_int,
>>>                        const struct sbrec_chassis *chassis,
>>>                        const struct hmap *local_datapaths,
>>> @@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>                                &nat_ip_keys, &local_l3gw_ports,
>>>                                chassis, active_tunnels,
>>>                                &nat_addresses);
>>> +
>>>     /* For deleted ports and deleted nat ips, remove from
>>>      * send_garp_rarp_data. */
>>>     struct shash_node *iter;
>>>     SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>>>         if (!sset_contains(&localnet_vifs, iter->name) &&
>>>             !sset_contains(&nat_ip_keys, iter->name)) {
>>> +            struct garp_rarp_data *data = iter->data;
>>> +            /* Cleanup FDB entries for inactive ports */
>>> +            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
>>> +                               data->dp_key,
>>> +                               data->port_key);
>>
>> It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
>> deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
>> flag and creating 'garp_rarp_data' structures for cases in which we
>> don't really need them (we send no garps/rarps).
>>
>> What if we do this in a different way?
>>
>> In binding.c we set port_binding.up to "false" whenever ports are
>> removed.  Should we cleanup stale FDB entries there instead?
>>
>> There are two places where we do that:
>> binding_lport_set_down()
>> port_binding_set_down()
>>
>> What do you think?
> 
> Here, we are also trying to address the scenario where the vif port link
> state goes down 
> and not only handle scenarios such as port removal or iface-id detachment. 
> 
> For example, when a VM migration is completed and the hypervisor is in
> process of removing 
> the VM on the source side, the VIF port state goes down. We need to
> detect the port link state 
> down event and delete the FDB entries as soon as possible to reduce
> traffic loss. 
> 
> We clubbed it with garp_rarp code because this code is already tracking
> the VLAN bridged ports.
> 

Sorry but I still don't really understand how you're addressing the VIF
port link going down.  Do you mind detailing that part, please?

Would it make sense to add a system test (system-ovn.at) for this case
too then?

I still think we should try to not overload the semantics of the
garp/rarp code in pinctrl.c but maybe I'm misunderstanding the situation.

Thanks,
Dumitru

>>
>>>             send_garp_rarp_delete(iter->name);
>>>         }
>>>     }
>>> @@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>     SSET_FOR_EACH (iface_id, &localnet_vifs) {
>>>         const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>             sbrec_port_binding_by_name, iface_id);
>>> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
>>> false)) {
>>> +        if (pb) {
>>>             send_garp_rarp_update(ovnsb_idl_txn,
>>>                                   sbrec_mac_binding_by_lport_ip,
>>>                                   local_datapaths, pb, &nat_addresses,
>>> @@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>     SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>>>         const struct sbrec_port_binding *pb
>>>             = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
>>> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
>>> false)) {
>>> +        if (pb) {
>>>             send_garp_rarp_update(ovnsb_idl_txn,
>>> sbrec_mac_binding_by_lport_ip,
>>>                                   local_datapaths, pb, &nat_addresses,
>>>                                   garp_max_timeout, garp_continuous);
>>> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
>>> index 3462b670c..6613c87e4 100644
>>> --- a/controller/pinctrl.h
>>> +++ b/controller/pinctrl.h
>>> @@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>                  struct ovsdb_idl_index *sbrec_igmp_groups,
>>>                  struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>>>                  struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
>>> +                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>                  const struct sbrec_dns_table *,
>>>                  const struct sbrec_controller_event_table *,
>>>                  const struct sbrec_service_monitor_table *,
>>> diff --git a/lib/automake.mk b/lib/automake.mk
>>> index b69e854b0..3ff098331 100644
>>> --- a/lib/automake.mk
>>> +++ b/lib/automake.mk
>>> @@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
>>> lib/stopwatch-names.h \
>>> lib/vif-plug-provider.h \
>>> lib/vif-plug-provider.c \
>>> -lib/vif-plug-providers/dummy/vif-plug-dummy.c
>>> +lib/vif-plug-providers/dummy/vif-plug-dummy.c \
>>> +lib/fdb.c \
>>> +lib/fdb.h
>>> nodist_lib_libovn_la_SOURCES = \
>>> lib/ovn-dirs.c \
>>> lib/ovn-nb-idl.c \
>>> diff --git a/lib/fdb.c b/lib/fdb.c
>>> new file mode 100644
>>> index 000000000..00e3ac902
>>> --- /dev/null
>>> +++ b/lib/fdb.c
>>> @@ -0,0 +1,33 @@
>>> +/* Copyright (c) 2024, Red Hat, Inc.
>>
>> This is probably a copy/paste error.  Should it say Nutanix?
> Sure, we will update appropriately in v2.
>>
>>> + *
>>> + * Licensed under the Apache License, Version 2.0 (the "License");
>>> + * you may not use this file except in compliance with the License.
>>> + * You may obtain a copy of the License at:
>>> + *
>>> + *
>>>     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=> 
>>> + *
>>> + * Unless required by applicable law or agreed to in writing, software
>>> + * distributed under the License is distributed on an "AS IS" BASIS,
>>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + * See the License for the specific language governing permissions and
>>> + * limitations under the License.
>>> + */
>>> +
>>> +#include <config.h>
>>> +#include "lib/fdb.h"
>>> +
>>> +void
>>> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>> +                   uint32_t dp_key, uint32_t port_key)
>>> +{
>>> +    struct sbrec_fdb *target =
>>> +        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
>>> +    sbrec_fdb_index_set_dp_key(target, dp_key);
>>> +    sbrec_fdb_index_set_port_key(target, port_key);
>>> +
>>> +    struct sbrec_fdb *fdb_e;
>>> +    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
>>> +        sbrec_fdb_delete(fdb_e);
>>> +    }
>>> +    sbrec_fdb_index_destroy_row(target);
>>> +}
>>> diff --git a/lib/fdb.h b/lib/fdb.h
>>> new file mode 100644
>>> index 000000000..7b64bc7f2
>>> --- /dev/null
>>> +++ b/lib/fdb.h
>>> @@ -0,0 +1,26 @@
>>> +/* Copyright (c) 2024, Red Hat, Inc.
>>
>> This is probably a copy/paste error.  Should it say Nutanix?
> Sure, we will update appropriately in v2.
>>
>>> + *
>>> + * Licensed under the Apache License, Version 2.0 (the "License");
>>> + * you may not use this file except in compliance with the License.
>>> + * You may obtain a copy of the License at:
>>> + *
>>> + *
>>>     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=> 
>>> + *
>>> + * Unless required by applicable law or agreed to in writing, software
>>> + * distributed under the License is distributed on an "AS IS" BASIS,
>>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + * See the License for the specific language governing permissions and
>>> + * limitations under the License.
>>> + */
>>> +
>>> +#ifndef OVN_FDB_H
>>> +#define OVN_FDB_H 1
>>> +
>>> +#include "ovsdb-idl.h"
>>> +#include "ovn-sb-idl.h"
>>> +
>>> +void
>>> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>> +                   uint32_t dp_key, uint32_t port_key);
>>> +
>>> +#endif
>>> diff --git a/tests/ovn.at <http://ovn.at/> b/tests/ovn.at
>>> <http://ovn.at/>
>>> index b31afbfb3..a0df87196 100644
>>> --- a/tests/ovn.at <http://ovn.at/>
>>> +++ b/tests/ovn.at <http://ovn.at/>
>>> @@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
>>> AT_CLEANUP
>>> ])
>>>
>>> +OVN_FOR_EACH_NORTHD([
>>> +AT_SETUP([OVN FDB cleanup when vif goes down])
>>
>> No real need for "OVN" in the test case name.  When running
>> it looks like this:
>>
>> OVN end-to-end tests
>>
>> 405: OVN FDB cleanup when vif goes down -- parallelization=yes --
>> ovn_monitor_all=yes ok
> Sure, we will update appropriately in v2.
>>
>>> +ovn_start
>>> +
>>> +net_add n1
>>> +
>>> +AT_CHECK([ovn-nbctl ls-add ls0])
>>> +
>>> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
>>> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
>>> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
>>> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
>>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port
>>> options:localnet_learn_fdb=true])
>>> +
>>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
>>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10
>>> 192.168.10.10" unknown])
>>
>> All these "AT_CHECK" can be replaced with "check".
> Sure, we will address test related comments appropriately in v2.
>>
>>> +
>>> +sim_add hv1
>>> +as hv1
>>> +ovs-vsctl add-br br-phys
>>> +ovn_attach n1 br-phys 192.168.0.1
>>> +ovs-vsctl -- add-port br-int vif0 -- \
>>> +    set interface vif0 external-ids:iface-id=vif0 \
>>> +    options:tx_pcap=hv1/vif0-tx.pcap \
>>> +    options:rxq_pcap=hv1/vif0-rx.pcap \
>>> +    ofport-request=1
>>> +ovs-vsctl -- add-port br-phys ext0 -- \
>>> +    set interface ext0 \
>>> +    options:tx_pcap=hv1/ext0-tx.pcap \
>>> +    options:rxq_pcap=hv1/ext0-rx.pcap \
>>> +    ofport-request=2
>>> +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
>>> +
>>> +
>>> +wait_for_ports_up
>>
>> You should probably add a sync here:
>>
>> check ovn-nbctl --wait=hv sync
>>
>>> +ls0_key=$(fetch_column datapath_binding tunnel_key
>>> external_ids:name=ls0)
>>> +vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
>>> +
>>> +ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
>>> port_key=$vif0_key
>>> +ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
>>> port_key=$vif0_key
>>> +
>>> +ovn-sbctl list fdb
>>
>> This is not really needed for the test, it can probably be removed.
>>
>>> +# vif going down should purge all relevant FDB entries
>>> +ovs-vsctl del-port br-int vif0
>>> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
>>> +AT_CHECK([ovn-nbctl --wait=hv sync])
>>
>> Please replace AT_CHECK with "check".
>>
>>> +ovn-sbctl list fdb
>>
>> This one can be removed.
>>
>>> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
>>> "00:00:00:00:10:10") = 0])
>>> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
>>> "00:00:00:00:20:20") = 0])> +
>>> +# vif0 should not be a local datapath in hv1 as vif0 is down
>>> +# Hence ln_port should not be a related port.
>>> +OVN_CLEANUP([hv1
>>> +ln_port])> +AT_CLEANUP
>>> +])
>>> +
>>> OVN_FOR_EACH_NORTHD([
>>> AT_SETUP([container port changed to normal port and then deleted])
>>> ovn_start
>>
>> Regards,
>> Dumitru
>
Shibir Basak Aug. 16, 2024, 2:06 p.m. UTC | #4
On 15-Aug-2024, at 1:20 AM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/14/24 13:24, Shibir Basak wrote:
Hi Dumitru,

Thanks for your comments.
I have responded inline.

On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/2/24 17:07, Shibir Basak wrote:
When a VIF port goes down, all FDB entries associated
with it are deleted.
This helps is reducing traffic outage due to stale FDB
entries present until the lsp is removed.

Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
---

Hi Shibir,

Thanks for the patch!

As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.”)
Sure

I have a few more comments below.

controller/ovn-controller.c |  5 ++++
controller/pinctrl.c        | 40 ++++++++++++++++++++------
controller/pinctrl.h        |  1 +
lib/automake.mk             |  4 ++-
lib/fdb.c                   | 33 +++++++++++++++++++++
lib/fdb.h                   | 26 +++++++++++++++++
tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
7 files changed, 157 insertions(+), 9 deletions(-)
create mode 100644 lib/fdb.c
create mode 100644 lib/fdb.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6a7cca673..2acdddd09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
                                  &sbrec_fdb_col_mac,
                                  &sbrec_fdb_col_dp_key);
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_fdb_col_port_key,
+                                  &sbrec_fdb_col_dp_key);
    struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
        = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
    struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
@@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
                                    sbrec_igmp_group,
                                    sbrec_ip_multicast,
                                    sbrec_fdb_by_dp_key_mac,
+                                    sbrec_fdb_by_dp_and_port,
                                    sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                    sbrec_controller_event_table_get(
                                        ovnsb_idl_loop.idl),
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7cbb0cf81..9e37e1693 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -62,6 +62,7 @@
#include "lflow.h"
#include "ip-mcast.h"
#include "ovn-sb-idl.h"
+#include "lib/fdb.h"

VLOG_DEFINE_THIS_MODULE(pinctrl);

@@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
    struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
    struct ovsdb_idl_index *sbrec_port_binding_by_name,
    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
    const struct ovsrec_bridge *,
    const struct sbrec_chassis *,
    const struct hmap *local_datapaths,
@@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
            struct ovsdb_idl_index *sbrec_igmp_groups,
            struct ovsdb_idl_index *sbrec_ip_multicast_opts,
            struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
            const struct sbrec_dns_table *dns_table,
            const struct sbrec_controller_event_table *ce_table,
            const struct sbrec_service_monitor_table *svc_mon_table,
@@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                           sbrec_port_binding_by_key, chassis);
    send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                           sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int,
chassis,
+                           sbrec_mac_binding_by_lport_ip,
+                           sbrec_fdb_by_dp_and_port, br_int, chassis,
                           local_datapaths, active_tunnels, ovs_table);
    prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
    prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
@@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn)
    }
}

-

Unrelated change.  This line feed should not be removed.
Sure, will revert in v2.

/*
 * Send gratuitous/reverse ARP for vif on localnet.
 *
@@ -5034,6 +5037,7 @@ struct garp_rarp_data {
                                  * announcement (in msecs). */
    uint32_t dp_key;             /* Datapath used to output this GARP. */
    uint32_t port_key;           /* Port to inject the GARP into. */
+    bool enabled;                /* is garp/rarp announcement enabled */
};

/* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
@@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
/* Runs with in the main ovn-controller thread context. */
static void
add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+              uint32_t dp_key, uint32_t port_key, bool enabled)
{
    struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
    garp_rarp->ea = ea;
@@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct
eth_addr ea, ovs_be32 ip,
    garp_rarp->backoff = 1000; /* msec. */
    garp_rarp->dp_key = dp_key;
    garp_rarp->port_key = port_key;
+    garp_rarp->enabled = enabled;
    shash_add(&send_garp_rarp_data, name, garp_rarp);

    /* Notify pinctrl_handler so that it can wakeup and process
@@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                      bool garp_continuous)
{
    volatile struct garp_rarp_data *garp_rarp = NULL;
+    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
+                                               "disable_garp_rarp",
false);

    /* Skip localports as they don't need to be announced */
    if (!strcmp(binding_rec->type, "localport")) {
@@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                if (garp_rarp) {
                    garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                    garp_rarp->port_key = binding_rec->tunnel_key;
+                    garp_rarp->enabled = is_garp_rarp_enabled;
                    if (garp_max_timeout != garp_rarp_max_timeout ||
                        garp_continuous != garp_rarp_continuous) {
                        /* reset backoff */
@@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    add_garp_rarp(name, laddrs->ea,
                                  laddrs->ipv4_addrs[i].addr,
                                  binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                                  binding_rec->tunnel_key,
+                                  is_garp_rarp_enabled);
                    send_garp_locally(ovnsb_idl_txn,
                                      sbrec_mac_binding_by_lport_ip,
                                      local_datapaths, binding_rec,
laddrs->ea,
@@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    if (garp_rarp) {
                        garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                        garp_rarp->port_key = binding_rec->tunnel_key;
+                        garp_rarp->enabled = is_garp_rarp_enabled;
                        if (garp_max_timeout != garp_rarp_max_timeout ||
                            garp_continuous != garp_rarp_continuous) {
                            /* reset backoff */
@@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    } else {
                        add_garp_rarp(name, laddrs->ea,
                                      0,
binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                                      binding_rec->tunnel_key,
+                                      is_garp_rarp_enabled);
                    }
                    free(name);
            }
@@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    if (garp_rarp) {
        garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
        garp_rarp->port_key = binding_rec->tunnel_key;
+        garp_rarp->enabled = is_garp_rarp_enabled;
        if (garp_max_timeout != garp_rarp_max_timeout ||
            garp_continuous != garp_rarp_continuous) {
            /* reset backoff */
@@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
        add_garp_rarp(binding_rec->logical_port,
                      laddrs.ea, ip,
                      binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key,
+                      is_garp_rarp_enabled);
        if (ip) {
            send_garp_locally(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                              local_datapaths, binding_rec,
laddrs.ea, ip);
@@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long
long int *send_garp_rarp_time)
    long long int current_time = time_msec();
    *send_garp_rarp_time = LLONG_MAX;
    SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
+        struct garp_rarp_data *garp_rarp = iter->data;
+        if (!garp_rarp->enabled) {
+            continue;
+        }
        long long int next_announce = send_garp_rarp(swconn, iter->data,
                                                     current_time);
        if (*send_garp_rarp_time > next_announce) {
@@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_datapath,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_name,
                       struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip,
+                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                       const struct ovsrec_bridge *br_int,
                       const struct sbrec_chassis *chassis,
                       const struct hmap *local_datapaths,
@@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                               &nat_ip_keys, &local_l3gw_ports,
                               chassis, active_tunnels,
                               &nat_addresses);
+
    /* For deleted ports and deleted nat ips, remove from
     * send_garp_rarp_data. */
    struct shash_node *iter;
    SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
        if (!sset_contains(&localnet_vifs, iter->name) &&
            !sset_contains(&nat_ip_keys, iter->name)) {
+            struct garp_rarp_data *data = iter->data;
+            /* Cleanup FDB entries for inactive ports */
+            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
+                               data->dp_key,
+                               data->port_key);

It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).

What if we do this in a different way?

In binding.c we set port_binding.up to "false" whenever ports are
removed.  Should we cleanup stale FDB entries there instead?

There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()

What do you think?

Here, we are also trying to address the scenario where the vif port link
state goes down
and not only handle scenarios such as port removal or iface-id detachment.

For example, when a VM migration is completed and the hypervisor is in
process of removing
the VM on the source side, the VIF port state goes down. We need to
detect the port link state
down event and delete the FDB entries as soon as possible to reduce
traffic loss.

We clubbed it with garp_rarp code because this code is already tracking
the VLAN bridged ports.


Sorry but I still don't really understand how you're addressing the VIF
port link going down.  Do you mind detailing that part, please?

Would it make sense to add a system test (system-ovn.at<http://system-ovn.at/>) for this case
too then?

I still think we should try to not overload the semantics of the
garp/rarp code in pinctrl.c but maybe I'm misunderstanding the situation.

Thanks,
Dumitru

Sometime back as part of the commit d34509941ea6c9b5e5847a9f96ea5f235f56cccd we
had registered for interface link-state events in ovn-controller.
Hence, in case of a link state change, send_garp_rarp_prepare() is triggered which
gets the latest active ports via get_localnet_vifs_l3gwports(), and compares with the stored
set of ports in the send_garp_rarp_data and takes action (send/delete) accordingly.

We planned to reuse these garp_rarp data structures to avoid a lot of code duplication.

Presently we have added a test case which is closely simulating a link down event, if not exactly.
"test x`ovn-nbctl lsp-get-up vif0` = xdown”
Perhaps, I will check in system-on.at if there is a better way.

Do let me know your views or if you have any further queries.

Thanks,
Shibir


            send_garp_rarp_delete(iter->name);
        }
    }
@@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (iface_id, &localnet_vifs) {
        const struct sbrec_port_binding *pb = lport_lookup_by_name(
            sbrec_port_binding_by_name, iface_id);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
                                  sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
@@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
        const struct sbrec_port_binding *pb
            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
                                  garp_max_timeout, garp_continuous);
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..6613c87e4 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 struct ovsdb_idl_index *sbrec_igmp_groups,
                 struct ovsdb_idl_index *sbrec_ip_multicast_opts,
                 struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                 const struct sbrec_dns_table *,
                 const struct sbrec_controller_event_table *,
                 const struct sbrec_service_monitor_table *,
diff --git a/lib/automake.mk b/lib/automake.mk
index b69e854b0..3ff098331 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
lib/stopwatch-names.h \
lib/vif-plug-provider.h \
lib/vif-plug-provider.c \
-lib/vif-plug-providers/dummy/vif-plug-dummy.c
+lib/vif-plug-providers/dummy/vif-plug-dummy.c \
+lib/fdb.c \
+lib/fdb.h
nodist_lib_libovn_la_SOURCES = \
lib/ovn-dirs.c \
lib/ovn-nb-idl.c \
diff --git a/lib/fdb.c b/lib/fdb.c
new file mode 100644
index 000000000..00e3ac902
--- /dev/null
+++ b/lib/fdb.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lib/fdb.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key)
+{
+    struct sbrec_fdb *target =
+        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
+    sbrec_fdb_index_set_dp_key(target, dp_key);
+    sbrec_fdb_index_set_port_key(target, port_key);
+
+    struct sbrec_fdb *fdb_e;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
+        sbrec_fdb_delete(fdb_e);
+    }
+    sbrec_fdb_index_destroy_row(target);
+}
diff --git a/lib/fdb.h b/lib/fdb.h
new file mode 100644
index 000000000..7b64bc7f2
--- /dev/null
+++ b/lib/fdb.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_FDB_H
+#define OVN_FDB_H 1
+
+#include "ovsdb-idl.h"
+#include "ovn-sb-idl.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key);
+
+#endif
diff --git a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= > b/tests/ovn.at<http://ovn.at/>
<https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
index b31afbfb3..a0df87196 100644
--- a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
+++ b/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
@@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
AT_CLEANUP
])

+OVN_FOR_EACH_NORTHD([
+AT_SETUP([OVN FDB cleanup when vif goes down])

No real need for "OVN" in the test case name.  When running
it looks like this:

OVN end-to-end tests

405: OVN FDB cleanup when vif goes down -- parallelization=yes --
ovn_monitor_all=yes ok
Sure, we will update appropriately in v2.

+ovn_start
+
+net_add n1
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+AT_CHECK([ovn-nbctl set logical_switch_port ln_port
options:localnet_learn_fdb=true])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
+AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10
192.168.10.10" unknown])

All these "AT_CHECK" can be replaced with "check".
Sure, we will address test related comments appropriately in v2.

+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif0 -- \
+    set interface vif0 external-ids:iface-id=vif0 \
+    options:tx_pcap=hv1/vif0-tx.pcap \
+    options:rxq_pcap=hv1/vif0-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+
+wait_for_ports_up

You should probably add a sync here:

check ovn-nbctl --wait=hv sync

+ls0_key=$(fetch_column datapath_binding tunnel_key
external_ids:name=ls0)
+vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
+
+ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
port_key=$vif0_key
+ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
port_key=$vif0_key
+
+ovn-sbctl list fdb

This is not really needed for the test, it can probably be removed.

+# vif going down should purge all relevant FDB entries
+ovs-vsctl del-port br-int vif0
+OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
+AT_CHECK([ovn-nbctl --wait=hv sync])

Please replace AT_CHECK with "check".

+ovn-sbctl list fdb

This one can be removed.

+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:10:10") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:20:20") = 0])> +
+# vif0 should not be a local datapath in hv1 as vif0 is down
+# Hence ln_port should not be a related port.
+OVN_CLEANUP([hv1
+ln_port])> +AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD([
AT_SETUP([container port changed to normal port and then deleted])
ovn_start

Regards,
Dumitru
Shibir Basak Aug. 26, 2024, 7:26 a.m. UTC | #5
Hi Dumitru,

I have sent a v2 patch on top of this with some changes along with rebase.
Please feel free add review comments or suggestions.

Thanks,
Shibir

On 16-Aug-2024, at 7:35 PM, Shibir Basak <shibir.basak@nutanix.com> wrote:



On 15-Aug-2024, at 1:20 AM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/14/24 13:24, Shibir Basak wrote:
Hi Dumitru,

Thanks for your comments.
I have responded inline.

On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/2/24 17:07, Shibir Basak wrote:
When a VIF port goes down, all FDB entries associated
with it are deleted.
This helps is reducing traffic outage due to stale FDB
entries present until the lsp is removed.

Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
---

Hi Shibir,

Thanks for the patch!

As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.”)
Sure

I have a few more comments below.

controller/ovn-controller.c |  5 ++++
controller/pinctrl.c        | 40 ++++++++++++++++++++------
controller/pinctrl.h        |  1 +
lib/automake.mk             |  4 ++-
lib/fdb.c                   | 33 +++++++++++++++++++++
lib/fdb.h                   | 26 +++++++++++++++++
tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
7 files changed, 157 insertions(+), 9 deletions(-)
create mode 100644 lib/fdb.c
create mode 100644 lib/fdb.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6a7cca673..2acdddd09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
                                  &sbrec_fdb_col_mac,
                                  &sbrec_fdb_col_dp_key);
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_fdb_col_port_key,
+                                  &sbrec_fdb_col_dp_key);
    struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
        = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
    struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
@@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
                                    sbrec_igmp_group,
                                    sbrec_ip_multicast,
                                    sbrec_fdb_by_dp_key_mac,
+                                    sbrec_fdb_by_dp_and_port,
                                    sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                    sbrec_controller_event_table_get(
                                        ovnsb_idl_loop.idl),
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7cbb0cf81..9e37e1693 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -62,6 +62,7 @@
#include "lflow.h"
#include "ip-mcast.h"
#include "ovn-sb-idl.h"
+#include "lib/fdb.h"

VLOG_DEFINE_THIS_MODULE(pinctrl);

@@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
    struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
    struct ovsdb_idl_index *sbrec_port_binding_by_name,
    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
    const struct ovsrec_bridge *,
    const struct sbrec_chassis *,
    const struct hmap *local_datapaths,
@@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
            struct ovsdb_idl_index *sbrec_igmp_groups,
            struct ovsdb_idl_index *sbrec_ip_multicast_opts,
            struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
            const struct sbrec_dns_table *dns_table,
            const struct sbrec_controller_event_table *ce_table,
            const struct sbrec_service_monitor_table *svc_mon_table,
@@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                           sbrec_port_binding_by_key, chassis);
    send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                           sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int,
chassis,
+                           sbrec_mac_binding_by_lport_ip,
+                           sbrec_fdb_by_dp_and_port, br_int, chassis,
                           local_datapaths, active_tunnels, ovs_table);
    prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
    prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
@@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn)
    }
}

-

Unrelated change.  This line feed should not be removed.
Sure, will revert in v2.

/*
 * Send gratuitous/reverse ARP for vif on localnet.
 *
@@ -5034,6 +5037,7 @@ struct garp_rarp_data {
                                  * announcement (in msecs). */
    uint32_t dp_key;             /* Datapath used to output this GARP. */
    uint32_t port_key;           /* Port to inject the GARP into. */
+    bool enabled;                /* is garp/rarp announcement enabled */
};

/* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
@@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
/* Runs with in the main ovn-controller thread context. */
static void
add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+              uint32_t dp_key, uint32_t port_key, bool enabled)
{
    struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
    garp_rarp->ea = ea;
@@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct
eth_addr ea, ovs_be32 ip,
    garp_rarp->backoff = 1000; /* msec. */
    garp_rarp->dp_key = dp_key;
    garp_rarp->port_key = port_key;
+    garp_rarp->enabled = enabled;
    shash_add(&send_garp_rarp_data, name, garp_rarp);

    /* Notify pinctrl_handler so that it can wakeup and process
@@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                      bool garp_continuous)
{
    volatile struct garp_rarp_data *garp_rarp = NULL;
+    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
+                                               "disable_garp_rarp",
false);

    /* Skip localports as they don't need to be announced */
    if (!strcmp(binding_rec->type, "localport")) {
@@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                if (garp_rarp) {
                    garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                    garp_rarp->port_key = binding_rec->tunnel_key;
+                    garp_rarp->enabled = is_garp_rarp_enabled;
                    if (garp_max_timeout != garp_rarp_max_timeout ||
                        garp_continuous != garp_rarp_continuous) {
                        /* reset backoff */
@@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    add_garp_rarp(name, laddrs->ea,
                                  laddrs->ipv4_addrs[i].addr,
                                  binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                                  binding_rec->tunnel_key,
+                                  is_garp_rarp_enabled);
                    send_garp_locally(ovnsb_idl_txn,
                                      sbrec_mac_binding_by_lport_ip,
                                      local_datapaths, binding_rec,
laddrs->ea,
@@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    if (garp_rarp) {
                        garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                        garp_rarp->port_key = binding_rec->tunnel_key;
+                        garp_rarp->enabled = is_garp_rarp_enabled;
                        if (garp_max_timeout != garp_rarp_max_timeout ||
                            garp_continuous != garp_rarp_continuous) {
                            /* reset backoff */
@@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    } else {
                        add_garp_rarp(name, laddrs->ea,
                                      0,
binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                                      binding_rec->tunnel_key,
+                                      is_garp_rarp_enabled);
                    }
                    free(name);
            }
@@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    if (garp_rarp) {
        garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
        garp_rarp->port_key = binding_rec->tunnel_key;
+        garp_rarp->enabled = is_garp_rarp_enabled;
        if (garp_max_timeout != garp_rarp_max_timeout ||
            garp_continuous != garp_rarp_continuous) {
            /* reset backoff */
@@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
        add_garp_rarp(binding_rec->logical_port,
                      laddrs.ea, ip,
                      binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key,
+                      is_garp_rarp_enabled);
        if (ip) {
            send_garp_locally(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                              local_datapaths, binding_rec,
laddrs.ea, ip);
@@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long
long int *send_garp_rarp_time)
    long long int current_time = time_msec();
    *send_garp_rarp_time = LLONG_MAX;
    SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
+        struct garp_rarp_data *garp_rarp = iter->data;
+        if (!garp_rarp->enabled) {
+            continue;
+        }
        long long int next_announce = send_garp_rarp(swconn, iter->data,
                                                     current_time);
        if (*send_garp_rarp_time > next_announce) {
@@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_datapath,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_name,
                       struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip,
+                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                       const struct ovsrec_bridge *br_int,
                       const struct sbrec_chassis *chassis,
                       const struct hmap *local_datapaths,
@@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                               &nat_ip_keys, &local_l3gw_ports,
                               chassis, active_tunnels,
                               &nat_addresses);
+
    /* For deleted ports and deleted nat ips, remove from
     * send_garp_rarp_data. */
    struct shash_node *iter;
    SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
        if (!sset_contains(&localnet_vifs, iter->name) &&
            !sset_contains(&nat_ip_keys, iter->name)) {
+            struct garp_rarp_data *data = iter->data;
+            /* Cleanup FDB entries for inactive ports */
+            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
+                               data->dp_key,
+                               data->port_key);

It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).

What if we do this in a different way?

In binding.c we set port_binding.up to "false" whenever ports are
removed.  Should we cleanup stale FDB entries there instead?

There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()

What do you think?

Here, we are also trying to address the scenario where the vif port link
state goes down
and not only handle scenarios such as port removal or iface-id detachment.

For example, when a VM migration is completed and the hypervisor is in
process of removing
the VM on the source side, the VIF port state goes down. We need to
detect the port link state
down event and delete the FDB entries as soon as possible to reduce
traffic loss.

We clubbed it with garp_rarp code because this code is already tracking
the VLAN bridged ports.


Sorry but I still don't really understand how you're addressing the VIF
port link going down.  Do you mind detailing that part, please?

Would it make sense to add a system test (system-ovn.at<http://system-ovn.at/>) for this case
too then?

I still think we should try to not overload the semantics of the
garp/rarp code in pinctrl.c but maybe I'm misunderstanding the situation.

Thanks,
Dumitru

Sometime back as part of the commit d34509941ea6c9b5e5847a9f96ea5f235f56cccd we
had registered for interface link-state events in ovn-controller.
Hence, in case of a link state change, send_garp_rarp_prepare() is triggered which
gets the latest active ports via get_localnet_vifs_l3gwports(), and compares with the stored
set of ports in the send_garp_rarp_data and takes action (send/delete) accordingly.

We planned to reuse these garp_rarp data structures to avoid a lot of code duplication.

Presently we have added a test case which is closely simulating a link down event, if not exactly.
"test x`ovn-nbctl lsp-get-up vif0` = xdown”
Perhaps, I will check in system-on.at<http://system-on.at/> if there is a better way.

Do let me know your views or if you have any further queries.

Thanks,
Shibir


            send_garp_rarp_delete(iter->name);
        }
    }
@@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (iface_id, &localnet_vifs) {
        const struct sbrec_port_binding *pb = lport_lookup_by_name(
            sbrec_port_binding_by_name, iface_id);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
                                  sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
@@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
        const struct sbrec_port_binding *pb
            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
                                  garp_max_timeout, garp_continuous);
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..6613c87e4 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 struct ovsdb_idl_index *sbrec_igmp_groups,
                 struct ovsdb_idl_index *sbrec_ip_multicast_opts,
                 struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                 const struct sbrec_dns_table *,
                 const struct sbrec_controller_event_table *,
                 const struct sbrec_service_monitor_table *,
diff --git a/lib/automake.mk b/lib/automake.mk
index b69e854b0..3ff098331 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
lib/stopwatch-names.h \
lib/vif-plug-provider.h \
lib/vif-plug-provider.c \
-lib/vif-plug-providers/dummy/vif-plug-dummy.c
+lib/vif-plug-providers/dummy/vif-plug-dummy.c \
+lib/fdb.c \
+lib/fdb.h
nodist_lib_libovn_la_SOURCES = \
lib/ovn-dirs.c \
lib/ovn-nb-idl.c \
diff --git a/lib/fdb.c b/lib/fdb.c
new file mode 100644
index 000000000..00e3ac902
--- /dev/null
+++ b/lib/fdb.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lib/fdb.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key)
+{
+    struct sbrec_fdb *target =
+        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
+    sbrec_fdb_index_set_dp_key(target, dp_key);
+    sbrec_fdb_index_set_port_key(target, port_key);
+
+    struct sbrec_fdb *fdb_e;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
+        sbrec_fdb_delete(fdb_e);
+    }
+    sbrec_fdb_index_destroy_row(target);
+}
diff --git a/lib/fdb.h b/lib/fdb.h
new file mode 100644
index 000000000..7b64bc7f2
--- /dev/null
+++ b/lib/fdb.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_FDB_H
+#define OVN_FDB_H 1
+
+#include "ovsdb-idl.h"
+#include "ovn-sb-idl.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key);
+
+#endif
diff --git a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= > b/tests/ovn.at<http://ovn.at/>
<https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
index b31afbfb3..a0df87196 100644
--- a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
+++ b/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
@@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
AT_CLEANUP
])

+OVN_FOR_EACH_NORTHD([
+AT_SETUP([OVN FDB cleanup when vif goes down])

No real need for "OVN" in the test case name.  When running
it looks like this:

OVN end-to-end tests

405: OVN FDB cleanup when vif goes down -- parallelization=yes --
ovn_monitor_all=yes ok
Sure, we will update appropriately in v2.

+ovn_start
+
+net_add n1
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+AT_CHECK([ovn-nbctl set logical_switch_port ln_port
options:localnet_learn_fdb=true])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
+AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10
192.168.10.10" unknown])

All these "AT_CHECK" can be replaced with "check".
Sure, we will address test related comments appropriately in v2.

+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif0 -- \
+    set interface vif0 external-ids:iface-id=vif0 \
+    options:tx_pcap=hv1/vif0-tx.pcap \
+    options:rxq_pcap=hv1/vif0-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+
+wait_for_ports_up

You should probably add a sync here:

check ovn-nbctl --wait=hv sync

+ls0_key=$(fetch_column datapath_binding tunnel_key
external_ids:name=ls0)
+vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
+
+ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
port_key=$vif0_key
+ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
port_key=$vif0_key
+
+ovn-sbctl list fdb

This is not really needed for the test, it can probably be removed.

+# vif going down should purge all relevant FDB entries
+ovs-vsctl del-port br-int vif0
+OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
+AT_CHECK([ovn-nbctl --wait=hv sync])

Please replace AT_CHECK with "check".

+ovn-sbctl list fdb

This one can be removed.

+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:10:10") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:20:20") = 0])> +
+# vif0 should not be a local datapath in hv1 as vif0 is down
+# Hence ln_port should not be a related port.
+OVN_CLEANUP([hv1
+ln_port])> +AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD([
AT_SETUP([container port changed to normal port and then deleted])
ovn_start

Regards,
Dumitru
Shibir Basak Sept. 13, 2024, 4:20 p.m. UTC | #6
Hi Dumitru/team,

Please feel free to take a look and share your comments (if any) on this v2 patch.

Thanks,
Shibir

On 26-Aug-2024, at 12:56 PM, Shibir Basak <shibir.basak@nutanix.com> wrote:

Hi Dumitru,

I have sent a v2 patch on top of this with some changes along with rebase.
Please feel free add review comments or suggestions.

Thanks,
Shibir

On 16-Aug-2024, at 7:35 PM, Shibir Basak <shibir.basak@nutanix.com> wrote:



On 15-Aug-2024, at 1:20 AM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/14/24 13:24, Shibir Basak wrote:
Hi Dumitru,

Thanks for your comments.
I have responded inline.

On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:

!-------------------------------------------------------------------|
 CAUTION: External Email

|-------------------------------------------------------------------!

On 8/2/24 17:07, Shibir Basak wrote:
When a VIF port goes down, all FDB entries associated
with it are deleted.
This helps is reducing traffic outage due to stale FDB
entries present until the lsp is removed.

Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
---

Hi Shibir,

Thanks for the patch!

As this is a bug fix it probably needs a:
Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for fdb.”)
Sure

I have a few more comments below.

controller/ovn-controller.c |  5 ++++
controller/pinctrl.c        | 40 ++++++++++++++++++++------
controller/pinctrl.h        |  1 +
lib/automake.mk             |  4 ++-
lib/fdb.c                   | 33 +++++++++++++++++++++
lib/fdb.h                   | 26 +++++++++++++++++
tests/ovn.at                | 57 +++++++++++++++++++++++++++++++++++++
7 files changed, 157 insertions(+), 9 deletions(-)
create mode 100644 lib/fdb.c
create mode 100644 lib/fdb.h

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6a7cca673..2acdddd09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
                                  &sbrec_fdb_col_mac,
                                  &sbrec_fdb_col_dp_key);
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_fdb_col_port_key,
+                                  &sbrec_fdb_col_dp_key);
    struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
        = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
    struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
@@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
                                    sbrec_igmp_group,
                                    sbrec_ip_multicast,
                                    sbrec_fdb_by_dp_key_mac,
+                                    sbrec_fdb_by_dp_and_port,
                                    sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                    sbrec_controller_event_table_get(
                                        ovnsb_idl_loop.idl),
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7cbb0cf81..9e37e1693 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -62,6 +62,7 @@
#include "lflow.h"
#include "ip-mcast.h"
#include "ovn-sb-idl.h"
+#include "lib/fdb.h"

VLOG_DEFINE_THIS_MODULE(pinctrl);

@@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
    struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
    struct ovsdb_idl_index *sbrec_port_binding_by_name,
    struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
    const struct ovsrec_bridge *,
    const struct sbrec_chassis *,
    const struct hmap *local_datapaths,
@@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
            struct ovsdb_idl_index *sbrec_igmp_groups,
            struct ovsdb_idl_index *sbrec_ip_multicast_opts,
            struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
            const struct sbrec_dns_table *dns_table,
            const struct sbrec_controller_event_table *ce_table,
            const struct sbrec_service_monitor_table *svc_mon_table,
@@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                           sbrec_port_binding_by_key, chassis);
    send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                           sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int,
chassis,
+                           sbrec_mac_binding_by_lport_ip,
+                           sbrec_fdb_by_dp_and_port, br_int, chassis,
                           local_datapaths, active_tunnels, ovs_table);
    prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
    prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
@@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn)
    }
}

-

Unrelated change.  This line feed should not be removed.
Sure, will revert in v2.

/*
 * Send gratuitous/reverse ARP for vif on localnet.
 *
@@ -5034,6 +5037,7 @@ struct garp_rarp_data {
                                  * announcement (in msecs). */
    uint32_t dp_key;             /* Datapath used to output this GARP. */
    uint32_t port_key;           /* Port to inject the GARP into. */
+    bool enabled;                /* is garp/rarp announcement enabled */
};

/* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
@@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
/* Runs with in the main ovn-controller thread context. */
static void
add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+              uint32_t dp_key, uint32_t port_key, bool enabled)
{
    struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
    garp_rarp->ea = ea;
@@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct
eth_addr ea, ovs_be32 ip,
    garp_rarp->backoff = 1000; /* msec. */
    garp_rarp->dp_key = dp_key;
    garp_rarp->port_key = port_key;
+    garp_rarp->enabled = enabled;
    shash_add(&send_garp_rarp_data, name, garp_rarp);

    /* Notify pinctrl_handler so that it can wakeup and process
@@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                      bool garp_continuous)
{
    volatile struct garp_rarp_data *garp_rarp = NULL;
+    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
+                                               "disable_garp_rarp",
false);

    /* Skip localports as they don't need to be announced */
    if (!strcmp(binding_rec->type, "localport")) {
@@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                if (garp_rarp) {
                    garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                    garp_rarp->port_key = binding_rec->tunnel_key;
+                    garp_rarp->enabled = is_garp_rarp_enabled;
                    if (garp_max_timeout != garp_rarp_max_timeout ||
                        garp_continuous != garp_rarp_continuous) {
                        /* reset backoff */
@@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    add_garp_rarp(name, laddrs->ea,
                                  laddrs->ipv4_addrs[i].addr,
                                  binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                                  binding_rec->tunnel_key,
+                                  is_garp_rarp_enabled);
                    send_garp_locally(ovnsb_idl_txn,
                                      sbrec_mac_binding_by_lport_ip,
                                      local_datapaths, binding_rec,
laddrs->ea,
@@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    if (garp_rarp) {
                        garp_rarp->dp_key =
binding_rec->datapath->tunnel_key;
                        garp_rarp->port_key = binding_rec->tunnel_key;
+                        garp_rarp->enabled = is_garp_rarp_enabled;
                        if (garp_max_timeout != garp_rarp_max_timeout ||
                            garp_continuous != garp_rarp_continuous) {
                            /* reset backoff */
@@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                    } else {
                        add_garp_rarp(name, laddrs->ea,
                                      0,
binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                                      binding_rec->tunnel_key,
+                                      is_garp_rarp_enabled);
                    }
                    free(name);
            }
@@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    if (garp_rarp) {
        garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
        garp_rarp->port_key = binding_rec->tunnel_key;
+        garp_rarp->enabled = is_garp_rarp_enabled;
        if (garp_max_timeout != garp_rarp_max_timeout ||
            garp_continuous != garp_rarp_continuous) {
            /* reset backoff */
@@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
*ovnsb_idl_txn,
        add_garp_rarp(binding_rec->logical_port,
                      laddrs.ea, ip,
                      binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key,
+                      is_garp_rarp_enabled);
        if (ip) {
            send_garp_locally(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                              local_datapaths, binding_rec,
laddrs.ea, ip);
@@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long
long int *send_garp_rarp_time)
    long long int current_time = time_msec();
    *send_garp_rarp_time = LLONG_MAX;
    SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
+        struct garp_rarp_data *garp_rarp = iter->data;
+        if (!garp_rarp->enabled) {
+            continue;
+        }
        long long int next_announce = send_garp_rarp(swconn, iter->data,
                                                     current_time);
        if (*send_garp_rarp_time > next_announce) {
@@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_datapath,
                       struct ovsdb_idl_index
*sbrec_port_binding_by_name,
                       struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip,
+                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                       const struct ovsrec_bridge *br_int,
                       const struct sbrec_chassis *chassis,
                       const struct hmap *local_datapaths,
@@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
                               &nat_ip_keys, &local_l3gw_ports,
                               chassis, active_tunnels,
                               &nat_addresses);
+
    /* For deleted ports and deleted nat ips, remove from
     * send_garp_rarp_data. */
    struct shash_node *iter;
    SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
        if (!sset_contains(&localnet_vifs, iter->name) &&
            !sset_contains(&nat_ip_keys, iter->name)) {
+            struct garp_rarp_data *data = iter->data;
+            /* Cleanup FDB entries for inactive ports */
+            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
+                               data->dp_key,
+                               data->port_key);

It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
flag and creating 'garp_rarp_data' structures for cases in which we
don't really need them (we send no garps/rarps).

What if we do this in a different way?

In binding.c we set port_binding.up to "false" whenever ports are
removed.  Should we cleanup stale FDB entries there instead?

There are two places where we do that:
binding_lport_set_down()
port_binding_set_down()

What do you think?

Here, we are also trying to address the scenario where the vif port link
state goes down
and not only handle scenarios such as port removal or iface-id detachment.

For example, when a VM migration is completed and the hypervisor is in
process of removing
the VM on the source side, the VIF port state goes down. We need to
detect the port link state
down event and delete the FDB entries as soon as possible to reduce
traffic loss.

We clubbed it with garp_rarp code because this code is already tracking
the VLAN bridged ports.


Sorry but I still don't really understand how you're addressing the VIF
port link going down.  Do you mind detailing that part, please?

Would it make sense to add a system test (system-ovn.at<http://system-ovn.at/>) for this case
too then?

I still think we should try to not overload the semantics of the
garp/rarp code in pinctrl.c but maybe I'm misunderstanding the situation.

Thanks,
Dumitru

Sometime back as part of the commit d34509941ea6c9b5e5847a9f96ea5f235f56cccd we
had registered for interface link-state events in ovn-controller.
Hence, in case of a link state change, send_garp_rarp_prepare() is triggered which
gets the latest active ports via get_localnet_vifs_l3gwports(), and compares with the stored
set of ports in the send_garp_rarp_data and takes action (send/delete) accordingly.

We planned to reuse these garp_rarp data structures to avoid a lot of code duplication.

Presently we have added a test case which is closely simulating a link down event, if not exactly.
"test x`ovn-nbctl lsp-get-up vif0` = xdown”
Perhaps, I will check in system-on.at<http://system-on.at/> if there is a better way.

Do let me know your views or if you have any further queries.

Thanks,
Shibir


            send_garp_rarp_delete(iter->name);
        }
    }
@@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (iface_id, &localnet_vifs) {
        const struct sbrec_port_binding *pb = lport_lookup_by_name(
            sbrec_port_binding_by_name, iface_id);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
                                  sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
@@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
*ovnsb_idl_txn,
    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
        const struct sbrec_port_binding *pb
            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
false)) {
+        if (pb) {
            send_garp_rarp_update(ovnsb_idl_txn,
sbrec_mac_binding_by_lport_ip,
                                  local_datapaths, pb, &nat_addresses,
                                  garp_max_timeout, garp_continuous);
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..6613c87e4 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 struct ovsdb_idl_index *sbrec_igmp_groups,
                 struct ovsdb_idl_index *sbrec_ip_multicast_opts,
                 struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                 const struct sbrec_dns_table *,
                 const struct sbrec_controller_event_table *,
                 const struct sbrec_service_monitor_table *,
diff --git a/lib/automake.mk b/lib/automake.mk
index b69e854b0..3ff098331 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
lib/stopwatch-names.h \
lib/vif-plug-provider.h \
lib/vif-plug-provider.c \
-lib/vif-plug-providers/dummy/vif-plug-dummy.c
+lib/vif-plug-providers/dummy/vif-plug-dummy.c \
+lib/fdb.c \
+lib/fdb.h
nodist_lib_libovn_la_SOURCES = \
lib/ovn-dirs.c \
lib/ovn-nb-idl.c \
diff --git a/lib/fdb.c b/lib/fdb.c
new file mode 100644
index 000000000..00e3ac902
--- /dev/null
+++ b/lib/fdb.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lib/fdb.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key)
+{
+    struct sbrec_fdb *target =
+        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
+    sbrec_fdb_index_set_dp_key(target, dp_key);
+    sbrec_fdb_index_set_port_key(target, port_key);
+
+    struct sbrec_fdb *fdb_e;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
+        sbrec_fdb_delete(fdb_e);
+    }
+    sbrec_fdb_index_destroy_row(target);
+}
diff --git a/lib/fdb.h b/lib/fdb.h
new file mode 100644
index 000000000..7b64bc7f2
--- /dev/null
+++ b/lib/fdb.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2024, Red Hat, Inc.

This is probably a copy/paste error.  Should it say Nutanix?
Sure, we will update appropriately in v2.

+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *
    https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_FDB_H
+#define OVN_FDB_H 1
+
+#include "ovsdb-idl.h"
+#include "ovn-sb-idl.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key);
+
+#endif
diff --git a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= > b/tests/ovn.at<http://ovn.at/>
<https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
index b31afbfb3..a0df87196 100644
--- a/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
+++ b/tests/ovn.at<http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= >
@@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
AT_CLEANUP
])

+OVN_FOR_EACH_NORTHD([
+AT_SETUP([OVN FDB cleanup when vif goes down])

No real need for "OVN" in the test case name.  When running
it looks like this:

OVN end-to-end tests

405: OVN FDB cleanup when vif goes down -- parallelization=yes --
ovn_monitor_all=yes ok
Sure, we will update appropriately in v2.

+ovn_start
+
+net_add n1
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+AT_CHECK([ovn-nbctl set logical_switch_port ln_port
options:localnet_learn_fdb=true])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
+AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10
192.168.10.10" unknown])

All these "AT_CHECK" can be replaced with "check".
Sure, we will address test related comments appropriately in v2.

+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif0 -- \
+    set interface vif0 external-ids:iface-id=vif0 \
+    options:tx_pcap=hv1/vif0-tx.pcap \
+    options:rxq_pcap=hv1/vif0-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+
+wait_for_ports_up

You should probably add a sync here:

check ovn-nbctl --wait=hv sync

+ls0_key=$(fetch_column datapath_binding tunnel_key
external_ids:name=ls0)
+vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
+
+ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
port_key=$vif0_key
+ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
port_key=$vif0_key
+
+ovn-sbctl list fdb

This is not really needed for the test, it can probably be removed.

+# vif going down should purge all relevant FDB entries
+ovs-vsctl del-port br-int vif0
+OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
+AT_CHECK([ovn-nbctl --wait=hv sync])

Please replace AT_CHECK with "check".

+ovn-sbctl list fdb

This one can be removed.

+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:10:10") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:20:20") = 0])> +
+# vif0 should not be a local datapath in hv1 as vif0 is down
+# Hence ln_port should not be a related port.
+OVN_CLEANUP([hv1
+ln_port])> +AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD([
AT_SETUP([container port changed to normal port and then deleted])
ovn_start

Regards,
Dumitru
Dumitru Ceara Sept. 16, 2024, 10:10 a.m. UTC | #7
On 9/13/24 18:20, Shibir Basak wrote:
> Hi Dumitru/team,
> 

Hi Shibir,

> Please feel free to take a look and share your comments (if any) on this
> v2 patch.
> 

Sorry for the delay in reviewing the v2 patch.  I'm starting now.

Regards,
Dumitru

> Thanks,
> Shibir
> 
>> On 26-Aug-2024, at 12:56 PM, Shibir Basak <shibir.basak@nutanix.com>
>> wrote:
>>
>> Hi Dumitru,
>>
>> I have sent a v2 patch on top of this with some changes along with rebase.
>> Please feel free add review comments or suggestions.
>>
>> Thanks,
>> Shibir
>>
>>> On 16-Aug-2024, at 7:35 PM, Shibir Basak <shibir.basak@nutanix.com>
>>> wrote:
>>>
>>>
>>>
>>>> On 15-Aug-2024, at 1:20 AM, Dumitru Ceara <dceara@redhat.com> wrote:
>>>>
>>>> !-------------------------------------------------------------------|
>>>>  CAUTION: External Email
>>>>
>>>> |-------------------------------------------------------------------!
>>>>
>>>> On 8/14/24 13:24, Shibir Basak wrote:
>>>>> Hi Dumitru,
>>>>>
>>>>> Thanks for your comments.
>>>>> I have responded inline.
>>>>>
>>>>>> On 07-Aug-2024, at 4:58 PM, Dumitru Ceara <dceara@redhat.com> wrote:
>>>>>>
>>>>>> !-------------------------------------------------------------------|
>>>>>>  CAUTION: External Email
>>>>>>
>>>>>> |-------------------------------------------------------------------!
>>>>>>
>>>>>> On 8/2/24 17:07, Shibir Basak wrote:
>>>>>>> When a VIF port goes down, all FDB entries associated
>>>>>>> with it are deleted.
>>>>>>> This helps is reducing traffic outage due to stale FDB
>>>>>>> entries present until the lsp is removed.
>>>>>>>
>>>>>>> Acked-by: Naveen Yerramneni <naveen.yerramneni@nutanix.com>
>>>>>>> Signed-off-by: Shibir Basak <shibir.basak@nutanix.com>
>>>>>>> ---
>>>>>>
>>>>>> Hi Shibir,
>>>>>>
>>>>>> Thanks for the patch!
>>>>>>
>>>>>> As this is a bug fix it probably needs a:
>>>>>> Fixes: dd94f1266ca4 ("northd: MAC learning: Add logical flows for
>>>>>> fdb.”)
>>>>> Sure
>>>>>>
>>>>>> I have a few more comments below.
>>>>>>
>>>>>>> controller/ovn-controller.c |  5 ++++
>>>>>>> controller/pinctrl.c        | 40 ++++++++++++++++++++------
>>>>>>> controller/pinctrl.h        |  1 +
>>>>>>> lib/automake.mk             |  4 ++-
>>>>>>> lib/fdb.c                   | 33 +++++++++++++++++++++
>>>>>>> lib/fdb.h                   | 26 +++++++++++++++++
>>>>>>> tests/ovn.at                | 57
>>>>>>> +++++++++++++++++++++++++++++++++++++
>>>>>>> 7 files changed, 157 insertions(+), 9 deletions(-)
>>>>>>> create mode 100644 lib/fdb.c
>>>>>>> create mode 100644 lib/fdb.h
>>>>>>>
>>>>>>> diff --git a/controller/ovn-controller.c
>>>>>>> b/controller/ovn-controller.c
>>>>>>> index 6a7cca673..2acdddd09 100644
>>>>>>> --- a/controller/ovn-controller.c
>>>>>>> +++ b/controller/ovn-controller.c
>>>>>>> @@ -4936,6 +4936,10 @@ main(int argc, char *argv[])
>>>>>>>         = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
>>>>>>>                                   &sbrec_fdb_col_mac,
>>>>>>>                                   &sbrec_fdb_col_dp_key);
>>>>>>> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
>>>>>>> +        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
>>>>>>> +                                  &sbrec_fdb_col_port_key,
>>>>>>> +                                  &sbrec_fdb_col_dp_key);
>>>>>>>     struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
>>>>>>>         = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
>>>>>>>     struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
>>>>>>> @@ -5645,6 +5649,7 @@ main(int argc, char *argv[])
>>>>>>>                                     sbrec_igmp_group,
>>>>>>>                                     sbrec_ip_multicast,
>>>>>>>                                     sbrec_fdb_by_dp_key_mac,
>>>>>>> +                                    sbrec_fdb_by_dp_and_port,
>>>>>>>                                     sbrec_dns_table_get(ovnsb_idl_loop.idl),
>>>>>>>                                     sbrec_controller_event_table_get(
>>>>>>>                                         ovnsb_idl_loop.idl),
>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>>>> index 7cbb0cf81..9e37e1693 100644
>>>>>>> --- a/controller/pinctrl.c
>>>>>>> +++ b/controller/pinctrl.c
>>>>>>> @@ -62,6 +62,7 @@
>>>>>>> #include "lflow.h"
>>>>>>> #include "ip-mcast.h"
>>>>>>> #include "ovn-sb-idl.h"
>>>>>>> +#include "lib/fdb.h"
>>>>>>>
>>>>>>> VLOG_DEFINE_THIS_MODULE(pinctrl);
>>>>>>>
>>>>>>> @@ -236,6 +237,7 @@ static void send_garp_rarp_prepare(
>>>>>>>     struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>>>>>>>     struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>>>>     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>>>>>>> +    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>>>>>     const struct ovsrec_bridge *,
>>>>>>>     const struct sbrec_chassis *,
>>>>>>>     const struct hmap *local_datapaths,
>>>>>>> @@ -4145,6 +4147,7 @@ pinctrl_run(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>             struct ovsdb_idl_index *sbrec_igmp_groups,
>>>>>>>             struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>>>>>>>             struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
>>>>>>> +            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>>>>>             const struct sbrec_dns_table *dns_table,
>>>>>>>             const struct sbrec_controller_event_table *ce_table,
>>>>>>>             const struct sbrec_service_monitor_table *svc_mon_table,
>>>>>>> @@ -4166,7 +4169,8 @@ pinctrl_run(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                            sbrec_port_binding_by_key, chassis);
>>>>>>>     send_garp_rarp_prepare(ovnsb_idl_txn,
>>>>>>> sbrec_port_binding_by_datapath,
>>>>>>>                            sbrec_port_binding_by_name,
>>>>>>> -                           sbrec_mac_binding_by_lport_ip, br_int,
>>>>>>> chassis,
>>>>>>> +                           sbrec_mac_binding_by_lport_ip,
>>>>>>> +                           sbrec_fdb_by_dp_and_port, br_int,
>>>>>>> chassis,
>>>>>>>                            local_datapaths, active_tunnels,
>>>>>>> ovs_table);
>>>>>>>     prepare_ipv6_ras(local_active_ports_ras,
>>>>>>> sbrec_port_binding_by_name);
>>>>>>>     prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
>>>>>>> @@ -5017,7 +5021,6 @@ wait_put_mac_bindings(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn)
>>>>>>>     }
>>>>>>> }
>>>>>>>
>>>>>>> -
>>>>>>
>>>>>> Unrelated change.  This line feed should not be removed.
>>>>> Sure, will revert in v2.
>>>>>>
>>>>>>> /*
>>>>>>>  * Send gratuitous/reverse ARP for vif on localnet.
>>>>>>>  *
>>>>>>> @@ -5034,6 +5037,7 @@ struct garp_rarp_data {
>>>>>>>                                   * announcement (in msecs). */
>>>>>>>     uint32_t dp_key;             /* Datapath used to output this
>>>>>>> GARP. */
>>>>>>>     uint32_t port_key;           /* Port to inject the GARP into. */
>>>>>>> +    bool enabled;                /* is garp/rarp announcement
>>>>>>> enabled */
>>>>>>> };
>>>>>>>
>>>>>>> /* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
>>>>>>> @@ -5054,7 +5058,7 @@ destroy_send_garps_rarps(void)
>>>>>>> /* Runs with in the main ovn-controller thread context. */
>>>>>>> static void
>>>>>>> add_garp_rarp(const char *name, const struct eth_addr ea,
>>>>>>> ovs_be32 ip,
>>>>>>> -              uint32_t dp_key, uint32_t port_key)
>>>>>>> +              uint32_t dp_key, uint32_t port_key, bool enabled)
>>>>>>> {
>>>>>>>     struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
>>>>>>>     garp_rarp->ea = ea;
>>>>>>> @@ -5063,6 +5067,7 @@ add_garp_rarp(const char *name, const struct
>>>>>>> eth_addr ea, ovs_be32 ip,
>>>>>>>     garp_rarp->backoff = 1000; /* msec. */
>>>>>>>     garp_rarp->dp_key = dp_key;
>>>>>>>     garp_rarp->port_key = port_key;
>>>>>>> +    garp_rarp->enabled = enabled;
>>>>>>>     shash_add(&send_garp_rarp_data, name, garp_rarp);
>>>>>>>
>>>>>>>     /* Notify pinctrl_handler so that it can wakeup and process
>>>>>>> @@ -5081,6 +5086,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                       bool garp_continuous)
>>>>>>> {
>>>>>>>     volatile struct garp_rarp_data *garp_rarp = NULL;
>>>>>>> +    bool is_garp_rarp_enabled =
>>>>>>> !smap_get_bool(&binding_rec->options,
>>>>>>> +                                               "disable_garp_rarp",
>>>>>>> false);
>>>>>>>
>>>>>>>     /* Skip localports as they don't need to be announced */
>>>>>>>     if (!strcmp(binding_rec->type, "localport")) {
>>>>>>> @@ -5104,6 +5111,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                 if (garp_rarp) {
>>>>>>>                     garp_rarp->dp_key =
>>>>>>> binding_rec->datapath->tunnel_key;
>>>>>>>                     garp_rarp->port_key = binding_rec->tunnel_key;
>>>>>>> +                    garp_rarp->enabled = is_garp_rarp_enabled;
>>>>>>>                     if (garp_max_timeout != garp_rarp_max_timeout ||
>>>>>>>                         garp_continuous != garp_rarp_continuous) {
>>>>>>>                         /* reset backoff */
>>>>>>> @@ -5114,7 +5122,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                     add_garp_rarp(name, laddrs->ea,
>>>>>>>                                   laddrs->ipv4_addrs[i].addr,
>>>>>>>                                   binding_rec->datapath->tunnel_key,
>>>>>>> -                                  binding_rec->tunnel_key);
>>>>>>> +                                  binding_rec->tunnel_key,
>>>>>>> +                                  is_garp_rarp_enabled);
>>>>>>>                     send_garp_locally(ovnsb_idl_txn,
>>>>>>>                                       sbrec_mac_binding_by_lport_ip,
>>>>>>>                                       local_datapaths, binding_rec,
>>>>>>> laddrs->ea,
>>>>>>> @@ -5134,6 +5143,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                     if (garp_rarp) {
>>>>>>>                         garp_rarp->dp_key =
>>>>>>> binding_rec->datapath->tunnel_key;
>>>>>>>                         garp_rarp->port_key =
>>>>>>> binding_rec->tunnel_key;
>>>>>>> +                        garp_rarp->enabled = is_garp_rarp_enabled;
>>>>>>>                         if (garp_max_timeout !=
>>>>>>> garp_rarp_max_timeout ||
>>>>>>>                             garp_continuous !=
>>>>>>> garp_rarp_continuous) {
>>>>>>>                             /* reset backoff */
>>>>>>> @@ -5143,7 +5153,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                     } else {
>>>>>>>                         add_garp_rarp(name, laddrs->ea,
>>>>>>>                                       0,
>>>>>>> binding_rec->datapath->tunnel_key,
>>>>>>> -                                      binding_rec->tunnel_key);
>>>>>>> +                                      binding_rec->tunnel_key,
>>>>>>> +                                      is_garp_rarp_enabled);
>>>>>>>                     }
>>>>>>>                     free(name);
>>>>>>>             }
>>>>>>> @@ -5159,6 +5170,7 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>     if (garp_rarp) {
>>>>>>>         garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
>>>>>>>         garp_rarp->port_key = binding_rec->tunnel_key;
>>>>>>> +        garp_rarp->enabled = is_garp_rarp_enabled;
>>>>>>>         if (garp_max_timeout != garp_rarp_max_timeout ||
>>>>>>>             garp_continuous != garp_rarp_continuous) {
>>>>>>>             /* reset backoff */
>>>>>>> @@ -5184,7 +5196,8 @@ send_garp_rarp_update(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>         add_garp_rarp(binding_rec->logical_port,
>>>>>>>                       laddrs.ea, ip,
>>>>>>>                       binding_rec->datapath->tunnel_key,
>>>>>>> -                      binding_rec->tunnel_key);
>>>>>>> +                      binding_rec->tunnel_key,
>>>>>>> +                      is_garp_rarp_enabled);
>>>>>>>         if (ip) {
>>>>>>>             send_garp_locally(ovnsb_idl_txn,
>>>>>>> sbrec_mac_binding_by_lport_ip,
>>>>>>>                               local_datapaths, binding_rec,
>>>>>>> laddrs.ea, ip);
>>>>>>> @@ -6547,6 +6560,10 @@ send_garp_rarp_run(struct rconn *swconn, long
>>>>>>> long int *send_garp_rarp_time)
>>>>>>>     long long int current_time = time_msec();
>>>>>>>     *send_garp_rarp_time = LLONG_MAX;
>>>>>>>     SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
>>>>>>> +        struct garp_rarp_data *garp_rarp = iter->data;
>>>>>>> +        if (!garp_rarp->enabled) {
>>>>>>> +            continue;
>>>>>>> +        }
>>>>>>>         long long int next_announce = send_garp_rarp(swconn,
>>>>>>> iter->data,
>>>>>>>                                                      current_time);
>>>>>>>         if (*send_garp_rarp_time > next_announce) {
>>>>>>> @@ -6562,6 +6579,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                        struct ovsdb_idl_index
>>>>>>> *sbrec_port_binding_by_datapath,
>>>>>>>                        struct ovsdb_idl_index
>>>>>>> *sbrec_port_binding_by_name,
>>>>>>>                        struct ovsdb_idl_index
>>>>>>> *sbrec_mac_binding_by_lport_ip,
>>>>>>> +                       struct ovsdb_idl_index
>>>>>>> *sbrec_fdb_by_dp_and_port,
>>>>>>>                        const struct ovsrec_bridge *br_int,
>>>>>>>                        const struct sbrec_chassis *chassis,
>>>>>>>                        const struct hmap *local_datapaths,
>>>>>>> @@ -6597,12 +6615,18 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                                &nat_ip_keys, &local_l3gw_ports,
>>>>>>>                                chassis, active_tunnels,
>>>>>>>                                &nat_addresses);
>>>>>>> +
>>>>>>>     /* For deleted ports and deleted nat ips, remove from
>>>>>>>      * send_garp_rarp_data. */
>>>>>>>     struct shash_node *iter;
>>>>>>>     SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>>>>>>>         if (!sset_contains(&localnet_vifs, iter->name) &&
>>>>>>>             !sset_contains(&nat_ip_keys, iter->name)) {
>>>>>>> +            struct garp_rarp_data *data = iter->data;
>>>>>>> +            /* Cleanup FDB entries for inactive ports */
>>>>>>> +            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
>>>>>>> +                               data->dp_key,
>>>>>>> +                               data->port_key);
>>>>>>
>>>>>> It feels wrong to use the garp/rarp mechanism to cleanup FDB on port
>>>>>> deletion.  It also forces the addition of the 'is_garp_rarp_enabled'
>>>>>> flag and creating 'garp_rarp_data' structures for cases in which we
>>>>>> don't really need them (we send no garps/rarps).
>>>>>>
>>>>>> What if we do this in a different way?
>>>>>>
>>>>>> In binding.c we set port_binding.up to "false" whenever ports are
>>>>>> removed.  Should we cleanup stale FDB entries there instead?
>>>>>>
>>>>>> There are two places where we do that:
>>>>>> binding_lport_set_down()
>>>>>> port_binding_set_down()
>>>>>>
>>>>>> What do you think?
>>>>>
>>>>> Here, we are also trying to address the scenario where the vif port
>>>>> link
>>>>> state goes down 
>>>>> and not only handle scenarios such as port removal or iface-id
>>>>> detachment. 
>>>>>
>>>>> For example, when a VM migration is completed and the hypervisor is in
>>>>> process of removing 
>>>>> the VM on the source side, the VIF port state goes down. We need to
>>>>> detect the port link state 
>>>>> down event and delete the FDB entries as soon as possible to reduce
>>>>> traffic loss. 
>>>>>
>>>>> We clubbed it with garp_rarp code because this code is already tracking
>>>>> the VLAN bridged ports.
>>>>>
>>>>
>>>> Sorry but I still don't really understand how you're addressing the VIF
>>>> port link going down.  Do you mind detailing that part, please?
>>>>
>>>> Would it make sense to add a system test (system-ovn.at
>>>> <http://system-ovn.at/>) for this case
>>>> too then?
>>>>
>>>> I still think we should try to not overload the semantics of the
>>>> garp/rarp code in pinctrl.c but maybe I'm misunderstanding the
>>>> situation.
>>>>
>>>> Thanks,
>>>> Dumitru
>>>
>>> Sometime back as part of the commit
>>> d34509941ea6c9b5e5847a9f96ea5f235f56cccd we
>>> had registered for interface link-state events in ovn-controller.
>>> Hence, in case of a link state change, send_garp_rarp_prepare() is
>>> triggered which
>>> gets the latest active ports via get_localnet_vifs_l3gwports(), and
>>> compares with the stored 
>>> set of ports in the send_garp_rarp_data and takes action
>>> (send/delete) accordingly.
>>>
>>> We planned to reuse these garp_rarp data structures to avoid a lot of
>>> code duplication.
>>>
>>> Presently we have added a test case which is closely simulating a
>>> link down event, if not exactly.
>>> "test x`ovn-nbctl lsp-get-up vif0` = xdown”
>>> Perhaps, I will check in system-on.at <http://system-on.at/> if there
>>> is a better way.
>>>
>>> Do let me know your views or if you have any further queries.
>>>
>>> Thanks,
>>> Shibir
>>>>
>>>>>>
>>>>>>>             send_garp_rarp_delete(iter->name);
>>>>>>>         }
>>>>>>>     }
>>>>>>> @@ -6612,7 +6636,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>     SSET_FOR_EACH (iface_id, &localnet_vifs) {
>>>>>>>         const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>>>>>             sbrec_port_binding_by_name, iface_id);
>>>>>>> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
>>>>>>> false)) {
>>>>>>> +        if (pb) {
>>>>>>>             send_garp_rarp_update(ovnsb_idl_txn,
>>>>>>>                                   sbrec_mac_binding_by_lport_ip,
>>>>>>>                                   local_datapaths, pb,
>>>>>>> &nat_addresses,
>>>>>>> @@ -6625,7 +6649,7 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>     SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>>>>>>>         const struct sbrec_port_binding *pb
>>>>>>>             = lport_lookup_by_name(sbrec_port_binding_by_name,
>>>>>>> gw_port);
>>>>>>> -        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp",
>>>>>>> false)) {
>>>>>>> +        if (pb) {
>>>>>>>             send_garp_rarp_update(ovnsb_idl_txn,
>>>>>>> sbrec_mac_binding_by_lport_ip,
>>>>>>>                                   local_datapaths, pb,
>>>>>>> &nat_addresses,
>>>>>>>                                   garp_max_timeout, garp_continuous);
>>>>>>> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
>>>>>>> index 3462b670c..6613c87e4 100644
>>>>>>> --- a/controller/pinctrl.h
>>>>>>> +++ b/controller/pinctrl.h
>>>>>>> @@ -49,6 +49,7 @@ void pinctrl_run(struct ovsdb_idl_txn
>>>>>>> *ovnsb_idl_txn,
>>>>>>>                  struct ovsdb_idl_index *sbrec_igmp_groups,
>>>>>>>                  struct ovsdb_idl_index *sbrec_ip_multicast_opts,
>>>>>>>                  struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
>>>>>>> +                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>>>>>                  const struct sbrec_dns_table *,
>>>>>>>                  const struct sbrec_controller_event_table *,
>>>>>>>                  const struct sbrec_service_monitor_table *,
>>>>>>> diff --git a/lib/automake.mk b/lib/automake.mk
>>>>>>> index b69e854b0..3ff098331 100644
>>>>>>> --- a/lib/automake.mk
>>>>>>> +++ b/lib/automake.mk
>>>>>>> @@ -49,7 +49,9 @@ lib_libovn_la_SOURCES = \
>>>>>>> lib/stopwatch-names.h \
>>>>>>> lib/vif-plug-provider.h \
>>>>>>> lib/vif-plug-provider.c \
>>>>>>> -lib/vif-plug-providers/dummy/vif-plug-dummy.c
>>>>>>> +lib/vif-plug-providers/dummy/vif-plug-dummy.c \
>>>>>>> +lib/fdb.c \
>>>>>>> +lib/fdb.h
>>>>>>> nodist_lib_libovn_la_SOURCES = \
>>>>>>> lib/ovn-dirs.c \
>>>>>>> lib/ovn-nb-idl.c \
>>>>>>> diff --git a/lib/fdb.c b/lib/fdb.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000..00e3ac902
>>>>>>> --- /dev/null
>>>>>>> +++ b/lib/fdb.c
>>>>>>> @@ -0,0 +1,33 @@
>>>>>>> +/* Copyright (c) 2024, Red Hat, Inc.
>>>>>>
>>>>>> This is probably a copy/paste error.  Should it say Nutanix?
>>>>> Sure, we will update appropriately in v2.
>>>>>>
>>>>>>> + *
>>>>>>> + * Licensed under the Apache License, Version 2.0 (the "License");
>>>>>>> + * you may not use this file except in compliance with the License.
>>>>>>> + * You may obtain a copy of the License at:
>>>>>>> + *
>>>>>>> + *
>>>>>>>     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=> <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>> 
>>>>>>> + *
>>>>>>> + * Unless required by applicable law or agreed to in writing,
>>>>>>> software
>>>>>>> + * distributed under the License is distributed on an "AS IS" BASIS,
>>>>>>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>>>>>> implied.
>>>>>>> + * See the License for the specific language governing
>>>>>>> permissions and
>>>>>>> + * limitations under the License.
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <config.h>
>>>>>>> +#include "lib/fdb.h"
>>>>>>> +
>>>>>>> +void
>>>>>>> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>>>>> +                   uint32_t dp_key, uint32_t port_key)
>>>>>>> +{
>>>>>>> +    struct sbrec_fdb *target =
>>>>>>> +        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
>>>>>>> +    sbrec_fdb_index_set_dp_key(target, dp_key);
>>>>>>> +    sbrec_fdb_index_set_port_key(target, port_key);
>>>>>>> +
>>>>>>> +    struct sbrec_fdb *fdb_e;
>>>>>>> +    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target,
>>>>>>> sbrec_fdb_by_dp_and_port) {
>>>>>>> +        sbrec_fdb_delete(fdb_e);
>>>>>>> +    }
>>>>>>> +    sbrec_fdb_index_destroy_row(target);
>>>>>>> +}
>>>>>>> diff --git a/lib/fdb.h b/lib/fdb.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000..7b64bc7f2
>>>>>>> --- /dev/null
>>>>>>> +++ b/lib/fdb.h
>>>>>>> @@ -0,0 +1,26 @@
>>>>>>> +/* Copyright (c) 2024, Red Hat, Inc.
>>>>>>
>>>>>> This is probably a copy/paste error.  Should it say Nutanix?
>>>>> Sure, we will update appropriately in v2.
>>>>>>
>>>>>>> + *
>>>>>>> + * Licensed under the Apache License, Version 2.0 (the "License");
>>>>>>> + * you may not use this file except in compliance with the License.
>>>>>>> + * You may obtain a copy of the License at:
>>>>>>> + *
>>>>>>> + *
>>>>>>>     https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=> <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.apache.org_licenses_LICENSE-2D2.0&d=DwICaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=56wz88_J295ONGQivF_7tnPT-sl6OYGFn6SgI0SyDqzhlvk9I43PsRbUI7CzhkEd&s=tQmYz8z4JIoAztE8J_AhDteZBW4vo467WTpXMviFiPs&e=>> 
>>>>>>> + *
>>>>>>> + * Unless required by applicable law or agreed to in writing,
>>>>>>> software
>>>>>>> + * distributed under the License is distributed on an "AS IS" BASIS,
>>>>>>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>>>>>> implied.
>>>>>>> + * See the License for the specific language governing
>>>>>>> permissions and
>>>>>>> + * limitations under the License.
>>>>>>> + */
>>>>>>> +
>>>>>>> +#ifndef OVN_FDB_H
>>>>>>> +#define OVN_FDB_H 1
>>>>>>> +
>>>>>>> +#include "ovsdb-idl.h"
>>>>>>> +#include "ovn-sb-idl.h"
>>>>>>> +
>>>>>>> +void
>>>>>>> +delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
>>>>>>> +                   uint32_t dp_key, uint32_t port_key);
>>>>>>> +
>>>>>>> +#endif
>>>>>>> diff --git a/tests/ovn.at
>>>>>>> <http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e=> > b/tests/ovn.at <http://ovn.at/>
>>>>>>> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e=> >
>>>>>>> index b31afbfb3..a0df87196 100644
>>>>>>> --- a/tests/ovn.at
>>>>>>> <http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e=> >
>>>>>>> +++ b/tests/ovn.at
>>>>>>> <http://ovn.at/> <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e= <https://urldefense.proofpoint.com/v2/url?u=http-3A__ovn.at_&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=awcqDnvm2rXKiid0_aQK6D2Z4I6GarSRmeHuqVFlgU4&m=AEijf5o6HDlnTtOFAM8OgRGx_e563BXUntvK-hUYmYoFNIn5UOad2n04VyTX1Rke&s=nQqUZU3nSpgtVwdzhMon7Lx94Au4jRVtMa38r78TnlE&e=> >
>>>>>>> @@ -31706,6 +31706,63 @@ OVN_CLEANUP([hv1], [hv2], [hv3])
>>>>>>> AT_CLEANUP
>>>>>>> ])
>>>>>>>
>>>>>>> +OVN_FOR_EACH_NORTHD([
>>>>>>> +AT_SETUP([OVN FDB cleanup when vif goes down])
>>>>>>
>>>>>> No real need for "OVN" in the test case name.  When running
>>>>>> it looks like this:
>>>>>>
>>>>>> OVN end-to-end tests
>>>>>>
>>>>>> 405: OVN FDB cleanup when vif goes down -- parallelization=yes --
>>>>>> ovn_monitor_all=yes ok
>>>>> Sure, we will update appropriately in v2.
>>>>>>
>>>>>>> +ovn_start
>>>>>>> +
>>>>>>> +net_add n1
>>>>>>> +
>>>>>>> +AT_CHECK([ovn-nbctl ls-add ls0])
>>>>>>> +
>>>>>>> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
>>>>>>> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
>>>>>>> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
>>>>>>> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
>>>>>>> +AT_CHECK([ovn-nbctl set logical_switch_port ln_port
>>>>>>> options:localnet_learn_fdb=true])
>>>>>>> +
>>>>>>> +AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
>>>>>>> +AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10
>>>>>>> 192.168.10.10" unknown])
>>>>>>
>>>>>> All these "AT_CHECK" can be replaced with "check".
>>>>> Sure, we will address test related comments appropriately in v2.
>>>>>>
>>>>>>> +
>>>>>>> +sim_add hv1
>>>>>>> +as hv1
>>>>>>> +ovs-vsctl add-br br-phys
>>>>>>> +ovn_attach n1 br-phys 192.168.0.1
>>>>>>> +ovs-vsctl -- add-port br-int vif0 -- \
>>>>>>> +    set interface vif0 external-ids:iface-id=vif0 \
>>>>>>> +    options:tx_pcap=hv1/vif0-tx.pcap \
>>>>>>> +    options:rxq_pcap=hv1/vif0-rx.pcap \
>>>>>>> +    ofport-request=1
>>>>>>> +ovs-vsctl -- add-port br-phys ext0 -- \
>>>>>>> +    set interface ext0 \
>>>>>>> +    options:tx_pcap=hv1/ext0-tx.pcap \
>>>>>>> +    options:rxq_pcap=hv1/ext0-rx.pcap \
>>>>>>> +    ofport-request=2
>>>>>>> +ovs-vsctl set open .
>>>>>>> external_ids:ovn-bridge-mappings=physnet1:br-phys
>>>>>>> +
>>>>>>> +
>>>>>>> +wait_for_ports_up
>>>>>>
>>>>>> You should probably add a sync here:
>>>>>>
>>>>>> check ovn-nbctl --wait=hv sync
>>>>>>
>>>>>>> +ls0_key=$(fetch_column datapath_binding tunnel_key
>>>>>>> external_ids:name=ls0)
>>>>>>> +vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
>>>>>>> +
>>>>>>> +ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key
>>>>>>> port_key=$vif0_key
>>>>>>> +ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key
>>>>>>> port_key=$vif0_key
>>>>>>> +
>>>>>>> +ovn-sbctl list fdb
>>>>>>
>>>>>> This is not really needed for the test, it can probably be removed.
>>>>>>
>>>>>>> +# vif going down should purge all relevant FDB entries
>>>>>>> +ovs-vsctl del-port br-int vif0
>>>>>>> +OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
>>>>>>> +AT_CHECK([ovn-nbctl --wait=hv sync])
>>>>>>
>>>>>> Please replace AT_CHECK with "check".
>>>>>>
>>>>>>> +ovn-sbctl list fdb
>>>>>>
>>>>>> This one can be removed.
>>>>>>
>>>>>>> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
>>>>>>> "00:00:00:00:10:10") = 0])
>>>>>>> +OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c
>>>>>>> "00:00:00:00:20:20") = 0])> +
>>>>>>> +# vif0 should not be a local datapath in hv1 as vif0 is down
>>>>>>> +# Hence ln_port should not be a related port.
>>>>>>> +OVN_CLEANUP([hv1
>>>>>>> +ln_port])> +AT_CLEANUP
>>>>>>> +])
>>>>>>> +
>>>>>>> OVN_FOR_EACH_NORTHD([
>>>>>>> AT_SETUP([container port changed to normal port and then deleted])
>>>>>>> ovn_start
>>>>>>
>>>>>> Regards,
>>>>>> Dumitru
>>
>
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6a7cca673..2acdddd09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4936,6 +4936,10 @@  main(int argc, char *argv[])
         = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
                                   &sbrec_fdb_col_mac,
                                   &sbrec_fdb_col_dp_key);
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_fdb_col_port_key,
+                                  &sbrec_fdb_col_dp_key);
     struct ovsdb_idl_index *sbrec_mac_binding_by_datapath
         = mac_binding_by_datapath_index_create(ovnsb_idl_loop.idl);
     struct ovsdb_idl_index *sbrec_static_mac_binding_by_datapath
@@ -5645,6 +5649,7 @@  main(int argc, char *argv[])
                                     sbrec_igmp_group,
                                     sbrec_ip_multicast,
                                     sbrec_fdb_by_dp_key_mac,
+                                    sbrec_fdb_by_dp_and_port,
                                     sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                     sbrec_controller_event_table_get(
                                         ovnsb_idl_loop.idl),
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 7cbb0cf81..9e37e1693 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -62,6 +62,7 @@ 
 #include "lflow.h"
 #include "ip-mcast.h"
 #include "ovn-sb-idl.h"
+#include "lib/fdb.h"
 
 VLOG_DEFINE_THIS_MODULE(pinctrl);
 
@@ -236,6 +237,7 @@  static void send_garp_rarp_prepare(
     struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
     struct ovsdb_idl_index *sbrec_port_binding_by_name,
     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+    struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
     const struct ovsrec_bridge *,
     const struct sbrec_chassis *,
     const struct hmap *local_datapaths,
@@ -4145,6 +4147,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             struct ovsdb_idl_index *sbrec_igmp_groups,
             struct ovsdb_idl_index *sbrec_ip_multicast_opts,
             struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+            struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
             const struct sbrec_dns_table *dns_table,
             const struct sbrec_controller_event_table *ce_table,
             const struct sbrec_service_monitor_table *svc_mon_table,
@@ -4166,7 +4169,8 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                            sbrec_port_binding_by_key, chassis);
     send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath,
                            sbrec_port_binding_by_name,
-                           sbrec_mac_binding_by_lport_ip, br_int, chassis,
+                           sbrec_mac_binding_by_lport_ip,
+                           sbrec_fdb_by_dp_and_port, br_int, chassis,
                            local_datapaths, active_tunnels, ovs_table);
     prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
     prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
@@ -5017,7 +5021,6 @@  wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
     }
 }
 
-
 /*
  * Send gratuitous/reverse ARP for vif on localnet.
  *
@@ -5034,6 +5037,7 @@  struct garp_rarp_data {
                                   * announcement (in msecs). */
     uint32_t dp_key;             /* Datapath used to output this GARP. */
     uint32_t port_key;           /* Port to inject the GARP into. */
+    bool enabled;                /* is garp/rarp announcement enabled */
 };
 
 /* Contains GARPs/RARPs to be sent. Protected by pinctrl_mutex*/
@@ -5054,7 +5058,7 @@  destroy_send_garps_rarps(void)
 /* Runs with in the main ovn-controller thread context. */
 static void
 add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
-              uint32_t dp_key, uint32_t port_key)
+              uint32_t dp_key, uint32_t port_key, bool enabled)
 {
     struct garp_rarp_data *garp_rarp = xmalloc(sizeof *garp_rarp);
     garp_rarp->ea = ea;
@@ -5063,6 +5067,7 @@  add_garp_rarp(const char *name, const struct eth_addr ea, ovs_be32 ip,
     garp_rarp->backoff = 1000; /* msec. */
     garp_rarp->dp_key = dp_key;
     garp_rarp->port_key = port_key;
+    garp_rarp->enabled = enabled;
     shash_add(&send_garp_rarp_data, name, garp_rarp);
 
     /* Notify pinctrl_handler so that it can wakeup and process
@@ -5081,6 +5086,8 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                       bool garp_continuous)
 {
     volatile struct garp_rarp_data *garp_rarp = NULL;
+    bool is_garp_rarp_enabled = !smap_get_bool(&binding_rec->options,
+                                               "disable_garp_rarp", false);
 
     /* Skip localports as they don't need to be announced */
     if (!strcmp(binding_rec->type, "localport")) {
@@ -5104,6 +5111,7 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 if (garp_rarp) {
                     garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
                     garp_rarp->port_key = binding_rec->tunnel_key;
+                    garp_rarp->enabled = is_garp_rarp_enabled;
                     if (garp_max_timeout != garp_rarp_max_timeout ||
                         garp_continuous != garp_rarp_continuous) {
                         /* reset backoff */
@@ -5114,7 +5122,8 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                     add_garp_rarp(name, laddrs->ea,
                                   laddrs->ipv4_addrs[i].addr,
                                   binding_rec->datapath->tunnel_key,
-                                  binding_rec->tunnel_key);
+                                  binding_rec->tunnel_key,
+                                  is_garp_rarp_enabled);
                     send_garp_locally(ovnsb_idl_txn,
                                       sbrec_mac_binding_by_lport_ip,
                                       local_datapaths, binding_rec, laddrs->ea,
@@ -5134,6 +5143,7 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                     if (garp_rarp) {
                         garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
                         garp_rarp->port_key = binding_rec->tunnel_key;
+                        garp_rarp->enabled = is_garp_rarp_enabled;
                         if (garp_max_timeout != garp_rarp_max_timeout ||
                             garp_continuous != garp_rarp_continuous) {
                             /* reset backoff */
@@ -5143,7 +5153,8 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
                     } else {
                         add_garp_rarp(name, laddrs->ea,
                                       0, binding_rec->datapath->tunnel_key,
-                                      binding_rec->tunnel_key);
+                                      binding_rec->tunnel_key,
+                                      is_garp_rarp_enabled);
                     }
                     free(name);
             }
@@ -5159,6 +5170,7 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
     if (garp_rarp) {
         garp_rarp->dp_key = binding_rec->datapath->tunnel_key;
         garp_rarp->port_key = binding_rec->tunnel_key;
+        garp_rarp->enabled = is_garp_rarp_enabled;
         if (garp_max_timeout != garp_rarp_max_timeout ||
             garp_continuous != garp_rarp_continuous) {
             /* reset backoff */
@@ -5184,7 +5196,8 @@  send_garp_rarp_update(struct ovsdb_idl_txn *ovnsb_idl_txn,
         add_garp_rarp(binding_rec->logical_port,
                       laddrs.ea, ip,
                       binding_rec->datapath->tunnel_key,
-                      binding_rec->tunnel_key);
+                      binding_rec->tunnel_key,
+                      is_garp_rarp_enabled);
         if (ip) {
             send_garp_locally(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
                               local_datapaths, binding_rec, laddrs.ea, ip);
@@ -6547,6 +6560,10 @@  send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
     long long int current_time = time_msec();
     *send_garp_rarp_time = LLONG_MAX;
     SHASH_FOR_EACH (iter, &send_garp_rarp_data) {
+        struct garp_rarp_data *garp_rarp = iter->data;
+        if (!garp_rarp->enabled) {
+            continue;
+        }
         long long int next_announce = send_garp_rarp(swconn, iter->data,
                                                      current_time);
         if (*send_garp_rarp_time > next_announce) {
@@ -6562,6 +6579,7 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                        struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
                        struct ovsdb_idl_index *sbrec_port_binding_by_name,
                        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+                       struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                        const struct ovsrec_bridge *br_int,
                        const struct sbrec_chassis *chassis,
                        const struct hmap *local_datapaths,
@@ -6597,12 +6615,18 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
                                &nat_ip_keys, &local_l3gw_ports,
                                chassis, active_tunnels,
                                &nat_addresses);
+
     /* For deleted ports and deleted nat ips, remove from
      * send_garp_rarp_data. */
     struct shash_node *iter;
     SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
         if (!sset_contains(&localnet_vifs, iter->name) &&
             !sset_contains(&nat_ip_keys, iter->name)) {
+            struct garp_rarp_data *data = iter->data;
+            /* Cleanup FDB entries for inactive ports */
+            delete_fdb_entries(sbrec_fdb_by_dp_and_port,
+                               data->dp_key,
+                               data->port_key);
             send_garp_rarp_delete(iter->name);
         }
     }
@@ -6612,7 +6636,7 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
     SSET_FOR_EACH (iface_id, &localnet_vifs) {
         const struct sbrec_port_binding *pb = lport_lookup_by_name(
             sbrec_port_binding_by_name, iface_id);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
+        if (pb) {
             send_garp_rarp_update(ovnsb_idl_txn,
                                   sbrec_mac_binding_by_lport_ip,
                                   local_datapaths, pb, &nat_addresses,
@@ -6625,7 +6649,7 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
     SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
         const struct sbrec_port_binding *pb
             = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb && !smap_get_bool(&pb->options, "disable_garp_rarp", false)) {
+        if (pb) {
             send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
                                   local_datapaths, pb, &nat_addresses,
                                   garp_max_timeout, garp_continuous);
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index 3462b670c..6613c87e4 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -49,6 +49,7 @@  void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                  struct ovsdb_idl_index *sbrec_igmp_groups,
                  struct ovsdb_idl_index *sbrec_ip_multicast_opts,
                  struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac,
+                 struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
                  const struct sbrec_dns_table *,
                  const struct sbrec_controller_event_table *,
                  const struct sbrec_service_monitor_table *,
diff --git a/lib/automake.mk b/lib/automake.mk
index b69e854b0..3ff098331 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,7 +49,9 @@  lib_libovn_la_SOURCES = \
 	lib/stopwatch-names.h \
 	lib/vif-plug-provider.h \
 	lib/vif-plug-provider.c \
-	lib/vif-plug-providers/dummy/vif-plug-dummy.c
+	lib/vif-plug-providers/dummy/vif-plug-dummy.c \
+	lib/fdb.c \
+	lib/fdb.h
 nodist_lib_libovn_la_SOURCES = \
 	lib/ovn-dirs.c \
 	lib/ovn-nb-idl.c \
diff --git a/lib/fdb.c b/lib/fdb.c
new file mode 100644
index 000000000..00e3ac902
--- /dev/null
+++ b/lib/fdb.c
@@ -0,0 +1,33 @@ 
+/* Copyright (c) 2024, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lib/fdb.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key)
+{
+    struct sbrec_fdb *target =
+        sbrec_fdb_index_init_row(sbrec_fdb_by_dp_and_port);
+    sbrec_fdb_index_set_dp_key(target, dp_key);
+    sbrec_fdb_index_set_port_key(target, port_key);
+
+    struct sbrec_fdb *fdb_e;
+    SBREC_FDB_FOR_EACH_EQUAL (fdb_e, target, sbrec_fdb_by_dp_and_port) {
+        sbrec_fdb_delete(fdb_e);
+    }
+    sbrec_fdb_index_destroy_row(target);
+}
diff --git a/lib/fdb.h b/lib/fdb.h
new file mode 100644
index 000000000..7b64bc7f2
--- /dev/null
+++ b/lib/fdb.h
@@ -0,0 +1,26 @@ 
+/* Copyright (c) 2024, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_FDB_H
+#define OVN_FDB_H 1
+
+#include "ovsdb-idl.h"
+#include "ovn-sb-idl.h"
+
+void
+delete_fdb_entries(struct ovsdb_idl_index *sbrec_fdb_by_dp_and_port,
+                   uint32_t dp_key, uint32_t port_key);
+
+#endif
diff --git a/tests/ovn.at b/tests/ovn.at
index b31afbfb3..a0df87196 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -31706,6 +31706,63 @@  OVN_CLEANUP([hv1], [hv2], [hv3])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([OVN FDB cleanup when vif goes down])
+ovn_start
+
+net_add n1
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+AT_CHECK([ovn-nbctl set logical_switch_port ln_port options:localnet_learn_fdb=true])
+
+AT_CHECK([ovn-nbctl lsp-add ls0 vif0])
+AT_CHECK([ovn-nbctl lsp-set-addresses vif0 "00:00:00:00:10:10 192.168.10.10" unknown])
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int vif0 -- \
+    set interface vif0 external-ids:iface-id=vif0 \
+    options:tx_pcap=hv1/vif0-tx.pcap \
+    options:rxq_pcap=hv1/vif0-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-phys ext0 -- \
+    set interface ext0 \
+    options:tx_pcap=hv1/ext0-tx.pcap \
+    options:rxq_pcap=hv1/ext0-rx.pcap \
+    ofport-request=2
+ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+
+
+wait_for_ports_up
+ls0_key=$(fetch_column datapath_binding tunnel_key external_ids:name=ls0)
+vif0_key=$(fetch_column port_binding tunnel_key logical_port=vif0)
+
+ovn-sbctl create FDB mac="00\:00\:00\:00\:10\:10" dp_key=$ls0_key port_key=$vif0_key
+ovn-sbctl create FDB mac="00\:00\:00\:00\:20\:20" dp_key=$ls0_key port_key=$vif0_key
+
+ovn-sbctl list fdb
+# vif going down should purge all relevant FDB entries
+ovs-vsctl del-port br-int vif0
+OVS_WAIT_UNTIL([test x`ovn-nbctl lsp-get-up vif0` = xdown])
+AT_CHECK([ovn-nbctl --wait=hv sync])
+ovn-sbctl list fdb
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:10") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:20:20") = 0])
+
+# vif0 should not be a local datapath in hv1 as vif0 is down
+# Hence ln_port should not be a related port.
+OVN_CLEANUP([hv1
+ln_port])
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([container port changed to normal port and then deleted])
 ovn_start