diff mbox series

[ovs-dev,3/3] northd: add BFD support for ECMP route policy

Message ID d62c9d3ddddebb50cb5ef4961c205d6a280fa66b.1705487435.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series Introduce BFD support for ECMP route policy | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi Jan. 17, 2024, 11:11 a.m. UTC
Similar to OVN static routes, introduce the capability to link a BFD
session for OVN reroute policies.

Reported-at: https://issues.redhat.com/browse/FDP-234
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 NEWS                      |  1 +
 northd/northd.c           | 71 +++++++++++++++++++++++++++++----
 ovn-nb.ovsschema          |  9 ++++-
 ovn-nb.xml                |  7 ++++
 tests/ovn-nbctl.at        |  6 +++
 tests/ovn-northd.at       | 20 +++++++++-
 tests/system-ovn.at       | 51 +++++++++++++++++++++---
 utilities/ovn-nbctl.8.xml |  8 ++++
 utilities/ovn-nbctl.c     | 84 ++++++++++++++++++++++++++++++++++++++-
 9 files changed, 239 insertions(+), 18 deletions(-)

Comments

Mark Michelson Feb. 1, 2024, 9:44 p.m. UTC | #1
Hi Lorenzo,

I have a few comments below.

On 1/17/24 06:11, Lorenzo Bianconi wrote:
> Similar to OVN static routes, introduce the capability to link a BFD
> session for OVN reroute policies.
> 
> Reported-at: https://issues.redhat.com/browse/FDP-234
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>   NEWS                      |  1 +
>   northd/northd.c           | 71 +++++++++++++++++++++++++++++----
>   ovn-nb.ovsschema          |  9 ++++-
>   ovn-nb.xml                |  7 ++++
>   tests/ovn-nbctl.at        |  6 +++
>   tests/ovn-northd.at       | 20 +++++++++-
>   tests/system-ovn.at       | 51 +++++++++++++++++++++---
>   utilities/ovn-nbctl.8.xml |  8 ++++
>   utilities/ovn-nbctl.c     | 84 ++++++++++++++++++++++++++++++++++++++-
>   9 files changed, 239 insertions(+), 18 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 5f267b4c6..5c0ea7e80 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,7 @@ Post v23.09.0
>     - ovn-northd-ddlog has been removed.
>     - A new LSP option "enable_router_port_acl" has been added to enable
>       conntrack for the router port whose peer is l3dgw_port if set it true.
> +  - Introduce next-hop BFD availability check for OVN reroute policies.
>   
>   OVN v23.09.0 - 15 Sep 2023
>   --------------------------
> diff --git a/northd/northd.c b/northd/northd.c
> index 952f8200d..b517e5a7b 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10887,10 +10887,48 @@ get_outport_for_routing_policy_nexthop(struct ovn_datapath *od,
>       return NULL;
>   }
>   
> +static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> +
> +static bool check_bfd_state(
> +        const struct nbrec_logical_router_policy *rule,
> +        const struct hmap *bfd_connections,
> +        struct ovn_port *out_port,
> +        const char *nexthop)
> +{
> +    for (size_t i = 0; i < rule->n_bfd_sessions; i++) {
> +        /* Check if there is a BFD session associated to the reroute
> +         * policy. */
> +        const struct nbrec_bfd *nb_bt = rule->bfd_sessions[i];
> +        if (!strcmp(nb_bt->dst_ip, nexthop) &&

If nb_bt->dst_ip and nexthop are IPv6 addresses, then strcmp is a not a 
good way to compare addresses, since there are many ways to represent 
the same IPv6 address as a string.

> +            !strcmp(nb_bt->logical_port, out_port->key)) {
> +            struct bfd_entry *bfd_e = bfd_port_lookup(
> +                    bfd_connections, nb_bt->logical_port,
> +                    nb_bt->dst_ip);
> +
> +            ovs_mutex_lock(&bfd_lock);

Correct me if I'm wrong, but I think that there is a 1-to-1 relationship 
between northbound BFD rows and internal northd bfd_entry structs. If 
that's correct, then could we add a mutex to struct bfd_entry and lock 
that here instead of using a global bfd_lock? That way, contention is 
limited to reroute policies that reroute to the same destination IP address.

> +            if (bfd_e) {
> +                bfd_e->ref = true;
> +            }
> +
> +            if (!strcmp(nb_bt->status, "admin_down")) {
> +                nbrec_bfd_set_status(nb_bt, "down");

You could unlock and return false here.

> +            }
> +
> +            if (!strcmp(nb_bt->status, "down")) {
> +                ovs_mutex_unlock(&bfd_lock);
> +                return false;
> +            }
> +            ovs_mutex_unlock(&bfd_lock);

Would it be possible to return true here? Or can there be multiple BFD 
sessions associated with the nexthop IP address that we need to check 
the state of? And must all of them be "up" for us to return true?

> +        }
> +    }
> +    return true;
> +}
> +
>   static void
>   build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
>                             const struct hmap *lr_ports,
>                             const struct nbrec_logical_router_policy *rule,
> +                          const struct hmap *bfd_connections,
>                             const struct ovsdb_idl_row *stage_hint)
>   {
>       struct ds match = DS_EMPTY_INITIALIZER;
> @@ -10915,6 +10953,11 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
>                            rule->priority, nexthop);
>               return;
>           }
> +
> +        if (!check_bfd_state(rule, bfd_connections, out_port, nexthop)) {
> +            return;
> +        }
> +
>           uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
>           if (pkt_mark) {
>               ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
> @@ -10956,6 +10999,7 @@ static void
>   build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                                   const struct hmap *lr_ports,
>                                   const struct nbrec_logical_router_policy *rule,
> +                                const struct hmap *bfd_connections,
>                                   uint16_t ecmp_group_id)
>   {
>       ovs_assert(rule->n_nexthops > 1);
> @@ -10984,6 +11028,9 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>       struct ds match = DS_EMPTY_INITIALIZER;
>       struct ds actions = DS_EMPTY_INITIALIZER;
>   
> +    size_t *valid_nexthops = xcalloc(rule->n_nexthops, sizeof *valid_nexthops);
> +    size_t n_valid_nexthops = 0;
> +
>       for (size_t i = 0; i < rule->n_nexthops; i++) {
>           struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
>                od, lr_ports, rule->priority, rule->nexthops[i]);
> @@ -11001,6 +11048,13 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>               goto cleanup;
>           }
>   
> +        if (!check_bfd_state(rule, bfd_connections, out_port,
> +                             rule->nexthops[i])) {
> +            continue;
> +        }
> +
> +        valid_nexthops[n_valid_nexthops++] = i + 1;
> +
>           ds_clear(&actions);
>           uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
>           if (pkt_mark) {
> @@ -11036,12 +11090,12 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                     "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
>                     REG_ECMP_MEMBER_ID);
>   
> -    for (size_t i = 0; i < rule->n_nexthops; i++) {
> +    for (size_t i = 0; i < n_valid_nexthops; i++) {
>           if (i > 0) {
>               ds_put_cstr(&actions, ", ");
>           }
>   
> -        ds_put_format(&actions, "%"PRIuSIZE, i + 1);
> +        ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]);
>       }
>       ds_put_cstr(&actions, ");");
>       ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
> @@ -11049,6 +11103,7 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
>                               ds_cstr(&actions), &rule->header_);
>   
>   cleanup:
> +    free(valid_nexthops);
>       ds_destroy(&match);
>       ds_destroy(&actions);
>   }
> @@ -11132,8 +11187,6 @@ route_hash(struct parsed_route *route)
>                         (uint32_t)route->plen);
>   }
>   
> -static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> -
>   static bool
>   find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
>       const struct nbrec_logical_router_static_route *route, bool is_ipv4,
> @@ -13679,7 +13732,8 @@ build_mcast_lookup_flows_for_lrouter(
>   static void
>   build_ingress_policy_flows_for_lrouter(
>           struct ovn_datapath *od, struct hmap *lflows,
> -        const struct hmap *lr_ports)
> +        const struct hmap *lr_ports,
> +        const struct hmap *bfd_connections)
>   {
>       ovs_assert(od->nbr);
>       /* This is a catch-all rule. It has the lowest priority (0)
> @@ -13700,11 +13754,11 @@ build_ingress_policy_flows_for_lrouter(
>   
>           if (is_ecmp_reroute) {
>               build_ecmp_routing_policy_flows(lflows, od, lr_ports, rule,
> -                                            ecmp_group_id);
> +                                            bfd_connections, ecmp_group_id);
>               ecmp_group_id++;
>           } else {
>               build_routing_policy_flow(lflows, od, lr_ports, rule,
> -                                      &rule->header_);
> +                                      bfd_connections, &rule->header_);
>           }
>       }
>   }
> @@ -16139,7 +16193,8 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
>                                            lsi->bfd_connections);
>       build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
>                                            &lsi->actions);
> -    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports);
> +    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
> +                                           lsi->bfd_connections);
>       build_arp_resolve_flows_for_lrouter(od, lsi->lflows);
>       build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
>                                             &lsi->match, &lsi->actions,
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b2e0993e0..cec02a172 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>   {
>       "name": "OVN_Northbound",
> -    "version": "7.2.0",
> -    "cksum": "1069338687 34162",
> +    "version": "7.2.1",
> +    "cksum": "4156161406 34467",
>       "tables": {
>           "NB_Global": {
>               "columns": {
> @@ -477,6 +477,11 @@
>                   "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
>                   "nexthops": {"type": {
>                       "key": "string", "min": 0, "max": "unlimited"}},
> +                "bfd_sessions": {"type": {"key": {"type": "uuid",
> +                                                  "refTable": "BFD",
> +                                                  "refType": "weak"},
> +                                  "min": 0,
> +                                  "max": "unlimited"}},
>                   "options": {
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}},
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 765ffcf2e..d35c34517 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -3682,6 +3682,13 @@ or
>         </p>
>       </column>
>   
> +    <column name="bfd_sessions">
> +      <p>
> +        Reference to <ref table="BFD"/> row if the route policy has associated
> +        some BFD sessions.
> +      </p>
> +    </column>
> +
>       <column name="options" key="pkt_mark">
>         <p>
>           Marks the packet with the value specified when the router policy
> diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> index 2d74e9cc6..fcaee1342 100644
> --- a/tests/ovn-nbctl.at
> +++ b/tests/ovn-nbctl.at
> @@ -2189,6 +2189,12 @@ AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow])
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop])
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop])
> +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" reroute 192.168.1.1], [1], [],
> +  [ovn-nbctl: out lrp not found for 192.168.1.1 nexthop
> +])
> +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" drop], [1], [],
> +  [ovn-nbctl: BFD is valid only with reroute action.
> +])
>   
>   dnl Incomplete option set.
>   AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [],
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 9a0d418e4..d9806d5d0 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3580,7 +3580,7 @@ AT_KEYWORDS([northd-bfd])
>   ovn_start
>   
>   check ovn-nbctl --wait=sb lr-add r0
> -for i in $(seq 1 7); do
> +for i in $(seq 1 9); do
>       check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24
>       check ovn-nbctl --wait=sb ls-add sw$i
>       check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0
> @@ -3644,6 +3644,24 @@ bfd2_uuid=$(fetch_column bfd _uuid logical_port=r0-sw2)
>   check ovn-sbctl set bfd $bfd2_uuid status=up
>   wait_column up nb:bfd status logical_port=r0-sw2
>   
> +# Create reroute policies associated with BFD sessions
> +check ovn-nbctl lr-route-del r0
> +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 1.2.3.0/24" reroute 192.168.8.2
> +wait_column down bfd status logical_port=r0-sw8
> +
> +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw8)
> +AT_CHECK([ovn-nbctl list logical_router_policy | grep -q $bfd_route_policy_uuid])
> +
> +check ovn-nbctl lr-policy-del r0
> +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 2.3.4.0/24" reroute 192.168.9.2,192.168.9.3,192.168.9.4
> +
> +wait_column down bfd status dst_ip=192.168.9.2
> +wait_column down bfd status dst_ip=192.168.9.3
> +wait_column down bfd status dst_ip=192.168.9.4
> +
> +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw9)
> +AT_CHECK([ovn-nbctl list logical_router_policy | sed s/,//g | grep -q "$bfd_route_policy_uuid"])
> +
>   AT_CLEANUP
>   ])
>   
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 91fe7cac1..62455d696 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -6837,6 +6837,15 @@ OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst ==
>   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
>   wait_column "admin_down" nb:bfd status logical_port=rp-public
>   OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
> +wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl lr-policy-del R1
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +
>   NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &])
>   sleep 5
>   kill $(pidof tcpdump)
> @@ -6844,25 +6853,57 @@ AT_CHECK([grep -qi bfd bfd.pcap],[1])
>   
>   # restart the connection
>   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
>   wait_column "up" nb:bfd status logical_port=rp-public
>   
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> +
> +# stop bfd endpoint
> +NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
> +stopping
> +])
> +wait_column "down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep 172.16.1.50)" = ""])
> +
>   # switch to gw router configuration
>   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> -wait_column "admin_down" nb:bfd status logical_port=rp-public
> -OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +check ovn-nbctl lr-policy-del R1
>   check ovn-nbctl clear logical_router_port rp-public gateway_chassis
>   check ovn-nbctl set logical_router R1 options:chassis=hv1
>   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> +
> +# restart bfdd
> +NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0])
> +NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl
> +Allowing connections from 172.16.1.1
> +])
> +
>   wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +ovn-nbctl destroy bfd $uuid
> +check_row_count bfd 0
> +
> +# create reroute route policy
> +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 210.0.0.0/8" reroute 172.16.1.50
> +wait_column "up" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 210.0.0.0/8' |grep -q 172.16.1.50])
> +
> +check ovn-nbctl lr-policy-del R1
> +wait_column "admin_down" nb:bfd status logical_port=rp-public
> +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> +uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public")
>   
>   # stop bfd endpoint
>   NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
>   stopping
>   ])
>   
> -wait_column "down" nb:bfd status logical_port=rp-public
> -OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> -
>   # remove bfd entry
>   ovn-nbctl destroy bfd $uuid
>   check_row_count bfd 0
> diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
> index 6f74bd557..5aa01f2f8 100644
> --- a/utilities/ovn-nbctl.8.xml
> +++ b/utilities/ovn-nbctl.8.xml
> @@ -1119,6 +1119,14 @@
>             duplicated routing policy results in error.
>           </p>
>   
> +        <p>
> +          <code>--bfd</code> option is used to link a BFD session to the
> +          OVN reroute policy. OVN will look for an already running BFD
> +          session using next-hop as lookup key in the BFD table.
> +          If the lookup fails, a new entry in the BFD table will be created
> +          using the <var>nexthop</var> as <var>dst_ip</var>.
> +        </p>
> +
>             <p>
>             The following example shows a policy to lr1, which will drop packets
>             from<code>192.168.100.0/24</code>.
> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index 0586eccdb..3d16555a5 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -4014,14 +4014,26 @@ normalize_addr_str(const char *orig_addr)
>       return ret;
>   }
>   
> +static bool
> +ip_in_lrp_networks(const struct nbrec_logical_router_port *lrp,
> +                   const char *ip_s);
> +
>   static void
>   nbctl_pre_lr_policy_add(struct ctl_context *ctx)
>   {
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_policies);
>   
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_name);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_networks);
> +
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_priority);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_match);
> +
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_dst_ip);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_logical_port);
>   }
>   
>   static void
> @@ -4158,6 +4170,68 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
>   
>       nbrec_logical_router_update_policies_addvalue(lr, policy);
>   
> +    struct shash_node *bfd = shash_find(&ctx->options, "--bfd");
> +    const struct nbrec_bfd **bfd_sessions = NULL;
> +
> +    if (bfd) {
> +        if (!reroute) {
> +            ctl_error(ctx, "BFD is valid only with reroute action.");
> +            goto free_nexthops;
> +        }
> +
> +        bfd_sessions = xcalloc(n_nexthops, sizeof *bfd_sessions);
> +        size_t j, n_bfd_sessions = 0;
> +
> +        for (i = 0; i < n_nexthops; i++) {
> +            for (j = 0; j < lr->n_ports; j++) {
> +                if (ip_in_lrp_networks(lr->ports[j], nexthops[i])) {
> +                    break;
> +                }
> +            }
> +
> +            if (j == lr->n_ports) {
> +                ctl_error(ctx, "out lrp not found for %s nexthop",
> +                          nexthops[i]);
> +                goto free_nexthops;
> +            }
> +
> +            const struct nbrec_bfd *iter, *nb_bt = NULL;
> +            NBREC_BFD_FOR_EACH (iter, ctx->idl) {
> +                /* match endpoint ip. */
> +                if (strcmp(iter->dst_ip, nexthops[i])) {

Like my other comment, string comparison of IPv6 addresses may yield 
incorrect results.

> +                    continue;
> +                }
> +                /* match outport. */
> +                if (strcmp(iter->logical_port, lr->ports[j]->name)) {
> +                    continue;
> +                }
> +
> +                nb_bt = iter;
> +                break;
> +            }
> +
> +            /* Create the BFD session if it does not already exist. */
> +            if (!nb_bt) {
> +                nb_bt = nbrec_bfd_insert(ctx->txn);
> +                nbrec_bfd_set_dst_ip(nb_bt, nexthops[i]);
> +                nbrec_bfd_set_logical_port(nb_bt, lr->ports[j]->name);
> +            }
> +
> +            for (j = 0; j < n_bfd_sessions; j++) {
> +                if (bfd_sessions[j] == nb_bt) {
> +                    break;
> +                }
> +            }
> +            if (j == n_bfd_sessions) {
> +                bfd_sessions[n_bfd_sessions++] = nb_bt;
> +            }
> +        }
> +        nbrec_logical_router_policy_set_bfd_sessions(
> +                policy, (struct nbrec_bfd **) bfd_sessions, n_bfd_sessions);
> +    }
> +
> +free_nexthops:
> +    free(bfd_sessions);
>       for (i = 0; i < n_nexthops; i++) {
>           free(nexthops[i]);
>       }
> @@ -4282,8 +4356,11 @@ print_routing_policy(const struct nbrec_logical_router_policy *policy,
>                         policy->match, policy->action);
>       }
>   
> -    if (!smap_is_empty(&policy->options)) {
> +    if (!smap_is_empty(&policy->options) || policy->n_bfd_sessions) {
>           ds_put_format(s, "%15s", "");
> +        if (policy->n_bfd_sessions) {
> +            ds_put_cstr(s, "bfd,");
> +        }
>           struct smap_node *node;
>           SMAP_FOR_EACH (node, &policy->options) {
>               ds_put_format(s, "%s=%s,", node->key, node->value);
> @@ -4305,6 +4382,8 @@ nbctl_pre_lr_policy_list(struct ctl_context *ctx)
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_nexthops);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_action);
>       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_options);
> +    ovsdb_idl_add_column(ctx->idl,
> +                         &nbrec_logical_router_policy_col_bfd_sessions);
>   }
>   
>   static void
> @@ -7900,7 +7979,8 @@ static const struct ctl_command_syntax nbctl_commands[] = {
>       /* Policy commands */
>       { "lr-policy-add", 4, INT_MAX,
>        "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
> -     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist", RW },
> +     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist,--bfd?",
> +     RW },
>       { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]",
>         nbctl_pre_lr_policy_del, nbctl_lr_policy_del, NULL, "--if-exists", RW },
>       { "lr-policy-list", 1, 1, "ROUTER", nbctl_pre_lr_policy_list,
Lorenzo Bianconi Feb. 2, 2024, 1:28 p.m. UTC | #2
> Hi Lorenzo,
> 
> I have a few comments below.
> 
> On 1/17/24 06:11, Lorenzo Bianconi wrote:
> > Similar to OVN static routes, introduce the capability to link a BFD
> > session for OVN reroute policies.
> > 
> > Reported-at: https://issues.redhat.com/browse/FDP-234
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> >   NEWS                      |  1 +
> >   northd/northd.c           | 71 +++++++++++++++++++++++++++++----
> >   ovn-nb.ovsschema          |  9 ++++-
> >   ovn-nb.xml                |  7 ++++
> >   tests/ovn-nbctl.at        |  6 +++
> >   tests/ovn-northd.at       | 20 +++++++++-
> >   tests/system-ovn.at       | 51 +++++++++++++++++++++---
> >   utilities/ovn-nbctl.8.xml |  8 ++++
> >   utilities/ovn-nbctl.c     | 84 ++++++++++++++++++++++++++++++++++++++-
> >   9 files changed, 239 insertions(+), 18 deletions(-)
> > 
> > diff --git a/NEWS b/NEWS
> > index 5f267b4c6..5c0ea7e80 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -14,6 +14,7 @@ Post v23.09.0
> >     - ovn-northd-ddlog has been removed.
> >     - A new LSP option "enable_router_port_acl" has been added to enable
> >       conntrack for the router port whose peer is l3dgw_port if set it true.
> > +  - Introduce next-hop BFD availability check for OVN reroute policies.
> >   OVN v23.09.0 - 15 Sep 2023
> >   --------------------------
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 952f8200d..b517e5a7b 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -10887,10 +10887,48 @@ get_outport_for_routing_policy_nexthop(struct ovn_datapath *od,
> >       return NULL;
> >   }
> > +static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> > +
> > +static bool check_bfd_state(
> > +        const struct nbrec_logical_router_policy *rule,
> > +        const struct hmap *bfd_connections,
> > +        struct ovn_port *out_port,
> > +        const char *nexthop)
> > +{
> > +    for (size_t i = 0; i < rule->n_bfd_sessions; i++) {
> > +        /* Check if there is a BFD session associated to the reroute
> > +         * policy. */
> > +        const struct nbrec_bfd *nb_bt = rule->bfd_sessions[i];
> > +        if (!strcmp(nb_bt->dst_ip, nexthop) &&
> 
> If nb_bt->dst_ip and nexthop are IPv6 addresses, then strcmp is a not a good
> way to compare addresses, since there are many ways to represent the same
> IPv6 address as a string.

ack, I will fix it.

> 
> > +            !strcmp(nb_bt->logical_port, out_port->key)) {
> > +            struct bfd_entry *bfd_e = bfd_port_lookup(
> > +                    bfd_connections, nb_bt->logical_port,
> > +                    nb_bt->dst_ip);
> > +
> > +            ovs_mutex_lock(&bfd_lock);
> 
> Correct me if I'm wrong, but I think that there is a 1-to-1 relationship
> between northbound BFD rows and internal northd bfd_entry structs. If that's
> correct, then could we add a mutex to struct bfd_entry and lock that here
> instead of using a global bfd_lock? That way, contention is limited to
> reroute policies that reroute to the same destination IP address.

yep, I agree but I do not this is strictly related to this series since we
already have bfd_lock for static route as well. What about address your
comments in a subsequent patch?

> 
> > +            if (bfd_e) {
> > +                bfd_e->ref = true;
> > +            }
> > +
> > +            if (!strcmp(nb_bt->status, "admin_down")) {
> > +                nbrec_bfd_set_status(nb_bt, "down");
> 
> You could unlock and return false here.

we will need to duplicate the code with the one below in this way, don't we?

> 
> > +            }
> > +
> > +            if (!strcmp(nb_bt->status, "down")) {
> > +                ovs_mutex_unlock(&bfd_lock);
> > +                return false;
> > +            }
> > +            ovs_mutex_unlock(&bfd_lock);
> 
> Would it be possible to return true here? Or can there be multiple BFD
> sessions associated with the nexthop IP address that we need to check the
> state of? And must all of them be "up" for us to return true?

ack, we can just return here, I will fix it.

> 
> > +        }
> > +    }
> > +    return true;
> > +}
> > +
> >   static void
> >   build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
> >                             const struct hmap *lr_ports,
> >                             const struct nbrec_logical_router_policy *rule,
> > +                          const struct hmap *bfd_connections,
> >                             const struct ovsdb_idl_row *stage_hint)
> >   {
> >       struct ds match = DS_EMPTY_INITIALIZER;
> > @@ -10915,6 +10953,11 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
> >                            rule->priority, nexthop);
> >               return;
> >           }
> > +
> > +        if (!check_bfd_state(rule, bfd_connections, out_port, nexthop)) {
> > +            return;
> > +        }
> > +
> >           uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
> >           if (pkt_mark) {
> >               ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
> > @@ -10956,6 +10999,7 @@ static void
> >   build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
> >                                   const struct hmap *lr_ports,
> >                                   const struct nbrec_logical_router_policy *rule,
> > +                                const struct hmap *bfd_connections,
> >                                   uint16_t ecmp_group_id)
> >   {
> >       ovs_assert(rule->n_nexthops > 1);
> > @@ -10984,6 +11028,9 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
> >       struct ds match = DS_EMPTY_INITIALIZER;
> >       struct ds actions = DS_EMPTY_INITIALIZER;
> > +    size_t *valid_nexthops = xcalloc(rule->n_nexthops, sizeof *valid_nexthops);
> > +    size_t n_valid_nexthops = 0;
> > +
> >       for (size_t i = 0; i < rule->n_nexthops; i++) {
> >           struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
> >                od, lr_ports, rule->priority, rule->nexthops[i]);
> > @@ -11001,6 +11048,13 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
> >               goto cleanup;
> >           }
> > +        if (!check_bfd_state(rule, bfd_connections, out_port,
> > +                             rule->nexthops[i])) {
> > +            continue;
> > +        }
> > +
> > +        valid_nexthops[n_valid_nexthops++] = i + 1;
> > +
> >           ds_clear(&actions);
> >           uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
> >           if (pkt_mark) {
> > @@ -11036,12 +11090,12 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
> >                     "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
> >                     REG_ECMP_MEMBER_ID);
> > -    for (size_t i = 0; i < rule->n_nexthops; i++) {
> > +    for (size_t i = 0; i < n_valid_nexthops; i++) {
> >           if (i > 0) {
> >               ds_put_cstr(&actions, ", ");
> >           }
> > -        ds_put_format(&actions, "%"PRIuSIZE, i + 1);
> > +        ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]);
> >       }
> >       ds_put_cstr(&actions, ");");
> >       ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
> > @@ -11049,6 +11103,7 @@ build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
> >                               ds_cstr(&actions), &rule->header_);
> >   cleanup:
> > +    free(valid_nexthops);
> >       ds_destroy(&match);
> >       ds_destroy(&actions);
> >   }
> > @@ -11132,8 +11187,6 @@ route_hash(struct parsed_route *route)
> >                         (uint32_t)route->plen);
> >   }
> > -static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
> > -
> >   static bool
> >   find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
> >       const struct nbrec_logical_router_static_route *route, bool is_ipv4,
> > @@ -13679,7 +13732,8 @@ build_mcast_lookup_flows_for_lrouter(
> >   static void
> >   build_ingress_policy_flows_for_lrouter(
> >           struct ovn_datapath *od, struct hmap *lflows,
> > -        const struct hmap *lr_ports)
> > +        const struct hmap *lr_ports,
> > +        const struct hmap *bfd_connections)
> >   {
> >       ovs_assert(od->nbr);
> >       /* This is a catch-all rule. It has the lowest priority (0)
> > @@ -13700,11 +13754,11 @@ build_ingress_policy_flows_for_lrouter(
> >           if (is_ecmp_reroute) {
> >               build_ecmp_routing_policy_flows(lflows, od, lr_ports, rule,
> > -                                            ecmp_group_id);
> > +                                            bfd_connections, ecmp_group_id);
> >               ecmp_group_id++;
> >           } else {
> >               build_routing_policy_flow(lflows, od, lr_ports, rule,
> > -                                      &rule->header_);
> > +                                      bfd_connections, &rule->header_);
> >           }
> >       }
> >   }
> > @@ -16139,7 +16193,8 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
> >                                            lsi->bfd_connections);
> >       build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
> >                                            &lsi->actions);
> > -    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports);
> > +    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
> > +                                           lsi->bfd_connections);
> >       build_arp_resolve_flows_for_lrouter(od, lsi->lflows);
> >       build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
> >                                             &lsi->match, &lsi->actions,
> > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > index b2e0993e0..cec02a172 100644
> > --- a/ovn-nb.ovsschema
> > +++ b/ovn-nb.ovsschema
> > @@ -1,7 +1,7 @@
> >   {
> >       "name": "OVN_Northbound",
> > -    "version": "7.2.0",
> > -    "cksum": "1069338687 34162",
> > +    "version": "7.2.1",
> > +    "cksum": "4156161406 34467",
> >       "tables": {
> >           "NB_Global": {
> >               "columns": {
> > @@ -477,6 +477,11 @@
> >                   "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
> >                   "nexthops": {"type": {
> >                       "key": "string", "min": 0, "max": "unlimited"}},
> > +                "bfd_sessions": {"type": {"key": {"type": "uuid",
> > +                                                  "refTable": "BFD",
> > +                                                  "refType": "weak"},
> > +                                  "min": 0,
> > +                                  "max": "unlimited"}},
> >                   "options": {
> >                       "type": {"key": "string", "value": "string",
> >                                "min": 0, "max": "unlimited"}},
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index 765ffcf2e..d35c34517 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -3682,6 +3682,13 @@ or
> >         </p>
> >       </column>
> > +    <column name="bfd_sessions">
> > +      <p>
> > +        Reference to <ref table="BFD"/> row if the route policy has associated
> > +        some BFD sessions.
> > +      </p>
> > +    </column>
> > +
> >       <column name="options" key="pkt_mark">
> >         <p>
> >           Marks the packet with the value specified when the router policy
> > diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> > index 2d74e9cc6..fcaee1342 100644
> > --- a/tests/ovn-nbctl.at
> > +++ b/tests/ovn-nbctl.at
> > @@ -2189,6 +2189,12 @@ AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark
> >   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow])
> >   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop])
> >   AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop])
> > +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" reroute 192.168.1.1], [1], [],
> > +  [ovn-nbctl: out lrp not found for 192.168.1.1 nexthop
> > +])
> > +AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" drop], [1], [],
> > +  [ovn-nbctl: BFD is valid only with reroute action.
> > +])
> >   dnl Incomplete option set.
> >   AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [],
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 9a0d418e4..d9806d5d0 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -3580,7 +3580,7 @@ AT_KEYWORDS([northd-bfd])
> >   ovn_start
> >   check ovn-nbctl --wait=sb lr-add r0
> > -for i in $(seq 1 7); do
> > +for i in $(seq 1 9); do
> >       check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24
> >       check ovn-nbctl --wait=sb ls-add sw$i
> >       check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0
> > @@ -3644,6 +3644,24 @@ bfd2_uuid=$(fetch_column bfd _uuid logical_port=r0-sw2)
> >   check ovn-sbctl set bfd $bfd2_uuid status=up
> >   wait_column up nb:bfd status logical_port=r0-sw2
> > +# Create reroute policies associated with BFD sessions
> > +check ovn-nbctl lr-route-del r0
> > +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 1.2.3.0/24" reroute 192.168.8.2
> > +wait_column down bfd status logical_port=r0-sw8
> > +
> > +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw8)
> > +AT_CHECK([ovn-nbctl list logical_router_policy | grep -q $bfd_route_policy_uuid])
> > +
> > +check ovn-nbctl lr-policy-del r0
> > +check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 2.3.4.0/24" reroute 192.168.9.2,192.168.9.3,192.168.9.4
> > +
> > +wait_column down bfd status dst_ip=192.168.9.2
> > +wait_column down bfd status dst_ip=192.168.9.3
> > +wait_column down bfd status dst_ip=192.168.9.4
> > +
> > +bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw9)
> > +AT_CHECK([ovn-nbctl list logical_router_policy | sed s/,//g | grep -q "$bfd_route_policy_uuid"])
> > +
> >   AT_CLEANUP
> >   ])
> > diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> > index 91fe7cac1..62455d696 100644
> > --- a/tests/system-ovn.at
> > +++ b/tests/system-ovn.at
> > @@ -6837,6 +6837,15 @@ OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst ==
> >   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> >   wait_column "admin_down" nb:bfd status logical_port=rp-public
> >   OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> > +
> > +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
> > +wait_column "up" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> > +
> > +check ovn-nbctl lr-policy-del R1
> > +wait_column "admin_down" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> > +
> >   NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &])
> >   sleep 5
> >   kill $(pidof tcpdump)
> > @@ -6844,25 +6853,57 @@ AT_CHECK([grep -qi bfd bfd.pcap],[1])
> >   # restart the connection
> >   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> > +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
> >   wait_column "up" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> > +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
> > +
> > +# stop bfd endpoint
> > +NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
> > +stopping
> > +])
> > +wait_column "down" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> > +OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep 172.16.1.50)" = ""])
> > +
> >   # switch to gw router configuration
> >   check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> > -wait_column "admin_down" nb:bfd status logical_port=rp-public
> > -OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> > +check ovn-nbctl lr-policy-del R1
> >   check ovn-nbctl clear logical_router_port rp-public gateway_chassis
> >   check ovn-nbctl set logical_router R1 options:chassis=hv1
> >   check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
> > +
> > +# restart bfdd
> > +NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0])
> > +NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl
> > +Allowing connections from 172.16.1.1
> > +])
> > +
> >   wait_column "up" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
> > +
> > +check ovn-nbctl clear logical_router_static_route $route_uuid bfd
> > +wait_column "admin_down" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> > +ovn-nbctl destroy bfd $uuid
> > +check_row_count bfd 0
> > +
> > +# create reroute route policy
> > +check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 210.0.0.0/8" reroute 172.16.1.50
> > +wait_column "up" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 210.0.0.0/8' |grep -q 172.16.1.50])
> > +
> > +check ovn-nbctl lr-policy-del R1
> > +wait_column "admin_down" nb:bfd status logical_port=rp-public
> > +OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
> > +uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public")
> >   # stop bfd endpoint
> >   NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
> >   stopping
> >   ])
> > -wait_column "down" nb:bfd status logical_port=rp-public
> > -OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
> > -
> >   # remove bfd entry
> >   ovn-nbctl destroy bfd $uuid
> >   check_row_count bfd 0
> > diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
> > index 6f74bd557..5aa01f2f8 100644
> > --- a/utilities/ovn-nbctl.8.xml
> > +++ b/utilities/ovn-nbctl.8.xml
> > @@ -1119,6 +1119,14 @@
> >             duplicated routing policy results in error.
> >           </p>
> > +        <p>
> > +          <code>--bfd</code> option is used to link a BFD session to the
> > +          OVN reroute policy. OVN will look for an already running BFD
> > +          session using next-hop as lookup key in the BFD table.
> > +          If the lookup fails, a new entry in the BFD table will be created
> > +          using the <var>nexthop</var> as <var>dst_ip</var>.
> > +        </p>
> > +
> >             <p>
> >             The following example shows a policy to lr1, which will drop packets
> >             from<code>192.168.100.0/24</code>.
> > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> > index 0586eccdb..3d16555a5 100644
> > --- a/utilities/ovn-nbctl.c
> > +++ b/utilities/ovn-nbctl.c
> > @@ -4014,14 +4014,26 @@ normalize_addr_str(const char *orig_addr)
> >       return ret;
> >   }
> > +static bool
> > +ip_in_lrp_networks(const struct nbrec_logical_router_port *lrp,
> > +                   const char *ip_s);
> > +
> >   static void
> >   nbctl_pre_lr_policy_add(struct ctl_context *ctx)
> >   {
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_policies);
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_name);
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_networks);
> > +
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_priority);
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_match);
> > +
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_dst_ip);
> > +    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_logical_port);
> >   }
> >   static void
> > @@ -4158,6 +4170,68 @@ nbctl_lr_policy_add(struct ctl_context *ctx)
> >       nbrec_logical_router_update_policies_addvalue(lr, policy);
> > +    struct shash_node *bfd = shash_find(&ctx->options, "--bfd");
> > +    const struct nbrec_bfd **bfd_sessions = NULL;
> > +
> > +    if (bfd) {
> > +        if (!reroute) {
> > +            ctl_error(ctx, "BFD is valid only with reroute action.");
> > +            goto free_nexthops;
> > +        }
> > +
> > +        bfd_sessions = xcalloc(n_nexthops, sizeof *bfd_sessions);
> > +        size_t j, n_bfd_sessions = 0;
> > +
> > +        for (i = 0; i < n_nexthops; i++) {
> > +            for (j = 0; j < lr->n_ports; j++) {
> > +                if (ip_in_lrp_networks(lr->ports[j], nexthops[i])) {
> > +                    break;
> > +                }
> > +            }
> > +
> > +            if (j == lr->n_ports) {
> > +                ctl_error(ctx, "out lrp not found for %s nexthop",
> > +                          nexthops[i]);
> > +                goto free_nexthops;
> > +            }
> > +
> > +            const struct nbrec_bfd *iter, *nb_bt = NULL;
> > +            NBREC_BFD_FOR_EACH (iter, ctx->idl) {
> > +                /* match endpoint ip. */
> > +                if (strcmp(iter->dst_ip, nexthops[i])) {
> 
> Like my other comment, string comparison of IPv6 addresses may yield
> incorrect results.

ack, I will fix it.

Regards,
Lorenzo

> 
> > +                    continue;
> > +                }
> > +                /* match outport. */
> > +                if (strcmp(iter->logical_port, lr->ports[j]->name)) {
> > +                    continue;
> > +                }
> > +
> > +                nb_bt = iter;
> > +                break;
> > +            }
> > +
> > +            /* Create the BFD session if it does not already exist. */
> > +            if (!nb_bt) {
> > +                nb_bt = nbrec_bfd_insert(ctx->txn);
> > +                nbrec_bfd_set_dst_ip(nb_bt, nexthops[i]);
> > +                nbrec_bfd_set_logical_port(nb_bt, lr->ports[j]->name);
> > +            }
> > +
> > +            for (j = 0; j < n_bfd_sessions; j++) {
> > +                if (bfd_sessions[j] == nb_bt) {
> > +                    break;
> > +                }
> > +            }
> > +            if (j == n_bfd_sessions) {
> > +                bfd_sessions[n_bfd_sessions++] = nb_bt;
> > +            }
> > +        }
> > +        nbrec_logical_router_policy_set_bfd_sessions(
> > +                policy, (struct nbrec_bfd **) bfd_sessions, n_bfd_sessions);
> > +    }
> > +
> > +free_nexthops:
> > +    free(bfd_sessions);
> >       for (i = 0; i < n_nexthops; i++) {
> >           free(nexthops[i]);
> >       }
> > @@ -4282,8 +4356,11 @@ print_routing_policy(const struct nbrec_logical_router_policy *policy,
> >                         policy->match, policy->action);
> >       }
> > -    if (!smap_is_empty(&policy->options)) {
> > +    if (!smap_is_empty(&policy->options) || policy->n_bfd_sessions) {
> >           ds_put_format(s, "%15s", "");
> > +        if (policy->n_bfd_sessions) {
> > +            ds_put_cstr(s, "bfd,");
> > +        }
> >           struct smap_node *node;
> >           SMAP_FOR_EACH (node, &policy->options) {
> >               ds_put_format(s, "%s=%s,", node->key, node->value);
> > @@ -4305,6 +4382,8 @@ nbctl_pre_lr_policy_list(struct ctl_context *ctx)
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_nexthops);
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_action);
> >       ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_options);
> > +    ovsdb_idl_add_column(ctx->idl,
> > +                         &nbrec_logical_router_policy_col_bfd_sessions);
> >   }
> >   static void
> > @@ -7900,7 +7979,8 @@ static const struct ctl_command_syntax nbctl_commands[] = {
> >       /* Policy commands */
> >       { "lr-policy-add", 4, INT_MAX,
> >        "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
> > -     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist", RW },
> > +     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist,--bfd?",
> > +     RW },
> >       { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]",
> >         nbctl_pre_lr_policy_del, nbctl_lr_policy_del, NULL, "--if-exists", RW },
> >       { "lr-policy-list", 1, 1, "ROUTER", nbctl_pre_lr_policy_list,
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 5f267b4c6..5c0ea7e80 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,7 @@  Post v23.09.0
   - ovn-northd-ddlog has been removed.
   - A new LSP option "enable_router_port_acl" has been added to enable
     conntrack for the router port whose peer is l3dgw_port if set it true.
+  - Introduce next-hop BFD availability check for OVN reroute policies.
 
 OVN v23.09.0 - 15 Sep 2023
 --------------------------
diff --git a/northd/northd.c b/northd/northd.c
index 952f8200d..b517e5a7b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10887,10 +10887,48 @@  get_outport_for_routing_policy_nexthop(struct ovn_datapath *od,
     return NULL;
 }
 
+static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
+
+static bool check_bfd_state(
+        const struct nbrec_logical_router_policy *rule,
+        const struct hmap *bfd_connections,
+        struct ovn_port *out_port,
+        const char *nexthop)
+{
+    for (size_t i = 0; i < rule->n_bfd_sessions; i++) {
+        /* Check if there is a BFD session associated to the reroute
+         * policy. */
+        const struct nbrec_bfd *nb_bt = rule->bfd_sessions[i];
+        if (!strcmp(nb_bt->dst_ip, nexthop) &&
+            !strcmp(nb_bt->logical_port, out_port->key)) {
+            struct bfd_entry *bfd_e = bfd_port_lookup(
+                    bfd_connections, nb_bt->logical_port,
+                    nb_bt->dst_ip);
+
+            ovs_mutex_lock(&bfd_lock);
+            if (bfd_e) {
+                bfd_e->ref = true;
+            }
+
+            if (!strcmp(nb_bt->status, "admin_down")) {
+                nbrec_bfd_set_status(nb_bt, "down");
+            }
+
+            if (!strcmp(nb_bt->status, "down")) {
+                ovs_mutex_unlock(&bfd_lock);
+                return false;
+            }
+            ovs_mutex_unlock(&bfd_lock);
+        }
+    }
+    return true;
+}
+
 static void
 build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
                           const struct hmap *lr_ports,
                           const struct nbrec_logical_router_policy *rule,
+                          const struct hmap *bfd_connections,
                           const struct ovsdb_idl_row *stage_hint)
 {
     struct ds match = DS_EMPTY_INITIALIZER;
@@ -10915,6 +10953,11 @@  build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od,
                          rule->priority, nexthop);
             return;
         }
+
+        if (!check_bfd_state(rule, bfd_connections, out_port, nexthop)) {
+            return;
+        }
+
         uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
         if (pkt_mark) {
             ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark);
@@ -10956,6 +10999,7 @@  static void
 build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                                 const struct hmap *lr_ports,
                                 const struct nbrec_logical_router_policy *rule,
+                                const struct hmap *bfd_connections,
                                 uint16_t ecmp_group_id)
 {
     ovs_assert(rule->n_nexthops > 1);
@@ -10984,6 +11028,9 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
     struct ds match = DS_EMPTY_INITIALIZER;
     struct ds actions = DS_EMPTY_INITIALIZER;
 
+    size_t *valid_nexthops = xcalloc(rule->n_nexthops, sizeof *valid_nexthops);
+    size_t n_valid_nexthops = 0;
+
     for (size_t i = 0; i < rule->n_nexthops; i++) {
         struct ovn_port *out_port = get_outport_for_routing_policy_nexthop(
              od, lr_ports, rule->priority, rule->nexthops[i]);
@@ -11001,6 +11048,13 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
             goto cleanup;
         }
 
+        if (!check_bfd_state(rule, bfd_connections, out_port,
+                             rule->nexthops[i])) {
+            continue;
+        }
+
+        valid_nexthops[n_valid_nexthops++] = i + 1;
+
         ds_clear(&actions);
         uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0);
         if (pkt_mark) {
@@ -11036,12 +11090,12 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                   "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id,
                   REG_ECMP_MEMBER_ID);
 
-    for (size_t i = 0; i < rule->n_nexthops; i++) {
+    for (size_t i = 0; i < n_valid_nexthops; i++) {
         if (i > 0) {
             ds_put_cstr(&actions, ", ");
         }
 
-        ds_put_format(&actions, "%"PRIuSIZE, i + 1);
+        ds_put_format(&actions, "%"PRIuSIZE, valid_nexthops[i]);
     }
     ds_put_cstr(&actions, ");");
     ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY,
@@ -11049,6 +11103,7 @@  build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od,
                             ds_cstr(&actions), &rule->header_);
 
 cleanup:
+    free(valid_nexthops);
     ds_destroy(&match);
     ds_destroy(&actions);
 }
@@ -11132,8 +11187,6 @@  route_hash(struct parsed_route *route)
                       (uint32_t)route->plen);
 }
 
-static struct ovs_mutex bfd_lock = OVS_MUTEX_INITIALIZER;
-
 static bool
 find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
     const struct nbrec_logical_router_static_route *route, bool is_ipv4,
@@ -13679,7 +13732,8 @@  build_mcast_lookup_flows_for_lrouter(
 static void
 build_ingress_policy_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows,
-        const struct hmap *lr_ports)
+        const struct hmap *lr_ports,
+        const struct hmap *bfd_connections)
 {
     ovs_assert(od->nbr);
     /* This is a catch-all rule. It has the lowest priority (0)
@@ -13700,11 +13754,11 @@  build_ingress_policy_flows_for_lrouter(
 
         if (is_ecmp_reroute) {
             build_ecmp_routing_policy_flows(lflows, od, lr_ports, rule,
-                                            ecmp_group_id);
+                                            bfd_connections, ecmp_group_id);
             ecmp_group_id++;
         } else {
             build_routing_policy_flow(lflows, od, lr_ports, rule,
-                                      &rule->header_);
+                                      bfd_connections, &rule->header_);
         }
     }
 }
@@ -16139,7 +16193,8 @@  build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
                                          lsi->bfd_connections);
     build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
                                          &lsi->actions);
-    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports);
+    build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
+                                           lsi->bfd_connections);
     build_arp_resolve_flows_for_lrouter(od, lsi->lflows);
     build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
                                           &lsi->match, &lsi->actions,
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index b2e0993e0..cec02a172 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Northbound",
-    "version": "7.2.0",
-    "cksum": "1069338687 34162",
+    "version": "7.2.1",
+    "cksum": "4156161406 34467",
     "tables": {
         "NB_Global": {
             "columns": {
@@ -477,6 +477,11 @@ 
                 "nexthop": {"type": {"key": "string", "min": 0, "max": 1}},
                 "nexthops": {"type": {
                     "key": "string", "min": 0, "max": "unlimited"}},
+                "bfd_sessions": {"type": {"key": {"type": "uuid",
+                                                  "refTable": "BFD",
+                                                  "refType": "weak"},
+                                  "min": 0,
+                                  "max": "unlimited"}},
                 "options": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}},
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 765ffcf2e..d35c34517 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -3682,6 +3682,13 @@  or
       </p>
     </column>
 
+    <column name="bfd_sessions">
+      <p>
+        Reference to <ref table="BFD"/> row if the route policy has associated
+        some BFD sessions.
+      </p>
+    </column>
+
     <column name="options" key="pkt_mark">
       <p>
         Marks the packet with the value specified when the router policy
diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
index 2d74e9cc6..fcaee1342 100644
--- a/tests/ovn-nbctl.at
+++ b/tests/ovn-nbctl.at
@@ -2189,6 +2189,12 @@  AT_CHECK([ovn-nbctl lr-policy-add lr0 100 "ip4.src == 1.1.2.0/24" allow pkt_mark
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.1.0/24" allow])
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip4.src == 2.1.2.0/24" drop])
 AT_CHECK([ovn-nbctl lr-policy-add lr0 101 "ip6.src == 2002::/64" drop])
+AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" reroute 192.168.1.1], [1], [],
+  [ovn-nbctl: out lrp not found for 192.168.1.1 nexthop
+])
+AT_CHECK([ovn-nbctl --bfd lr-policy-add lr0 103 "ip4.src == 1.2.3.0/24" drop], [1], [],
+  [ovn-nbctl: BFD is valid only with reroute action.
+])
 
 dnl Incomplete option set.
 AT_CHECK([ovn-nbctl lr-policy-add lr0 200 "ip4.src == 1.1.4.0/24" reroute 192.168.0.10 foo], [1], [],
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 9a0d418e4..d9806d5d0 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -3580,7 +3580,7 @@  AT_KEYWORDS([northd-bfd])
 ovn_start
 
 check ovn-nbctl --wait=sb lr-add r0
-for i in $(seq 1 7); do
+for i in $(seq 1 9); do
     check ovn-nbctl --wait=sb lrp-add r0 r0-sw$i 00:00:00:00:00:0$i 192.168.$i.1/24
     check ovn-nbctl --wait=sb ls-add sw$i
     check ovn-nbctl --wait=sb lsp-add sw$i sw$i-r0
@@ -3644,6 +3644,24 @@  bfd2_uuid=$(fetch_column bfd _uuid logical_port=r0-sw2)
 check ovn-sbctl set bfd $bfd2_uuid status=up
 wait_column up nb:bfd status logical_port=r0-sw2
 
+# Create reroute policies associated with BFD sessions
+check ovn-nbctl lr-route-del r0
+check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 1.2.3.0/24" reroute 192.168.8.2
+wait_column down bfd status logical_port=r0-sw8
+
+bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw8)
+AT_CHECK([ovn-nbctl list logical_router_policy | grep -q $bfd_route_policy_uuid])
+
+check ovn-nbctl lr-policy-del r0
+check ovn-nbctl --bfd lr-policy-add r0 100 "ip4.src == 2.3.4.0/24" reroute 192.168.9.2,192.168.9.3,192.168.9.4
+
+wait_column down bfd status dst_ip=192.168.9.2
+wait_column down bfd status dst_ip=192.168.9.3
+wait_column down bfd status dst_ip=192.168.9.4
+
+bfd_route_policy_uuid=$(fetch_column nb:bfd _uuid logical_port=r0-sw9)
+AT_CHECK([ovn-nbctl list logical_router_policy | sed s/,//g | grep -q "$bfd_route_policy_uuid"])
+
 AT_CLEANUP
 ])
 
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 91fe7cac1..62455d696 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -6837,6 +6837,15 @@  OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst ==
 check ovn-nbctl clear logical_router_static_route $route_uuid bfd
 wait_column "admin_down" nb:bfd status logical_port=rp-public
 OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
+wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl lr-policy-del R1
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+
 NS_CHECK_EXEC([server], [tcpdump -nni s1 udp port 3784 -Q in > bfd.pcap &])
 sleep 5
 kill $(pidof tcpdump)
@@ -6844,25 +6853,57 @@  AT_CHECK([grep -qi bfd bfd.pcap],[1])
 
 # restart the connection
 check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 200.0.0.0/8" reroute 172.16.1.50
 wait_column "up" nb:bfd status logical_port=rp-public
 
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep -q 172.16.1.50])
+
+# stop bfd endpoint
+NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
+stopping
+])
+wait_column "down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
+OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 200.0.0.0/8' |grep 172.16.1.50)" = ""])
+
 # switch to gw router configuration
 check ovn-nbctl clear logical_router_static_route $route_uuid bfd
-wait_column "admin_down" nb:bfd status logical_port=rp-public
-OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+check ovn-nbctl lr-policy-del R1
 check ovn-nbctl clear logical_router_port rp-public gateway_chassis
 check ovn-nbctl set logical_router R1 options:chassis=hv1
 check ovn-nbctl set logical_router_static_route $route_uuid bfd=$uuid
+
+# restart bfdd
+NS_CHECK_EXEC([server], [bfdd-beacon --listen=172.16.1.50], [0])
+NS_CHECK_EXEC([server], [bfdd-control allow 172.16.1.1], [0], [dnl
+Allowing connections from 172.16.1.1
+])
+
 wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl clear logical_router_static_route $route_uuid bfd
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+ovn-nbctl destroy bfd $uuid
+check_row_count bfd 0
+
+# create reroute route policy
+check ovn-nbctl --bfd lr-policy-add R1 100 "ip4.src == 210.0.0.0/8" reroute 172.16.1.50
+wait_column "up" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ovn-sbctl dump-flows R1 |grep lr_in_policy |grep 'ip4.src == 210.0.0.0/8' |grep -q 172.16.1.50])
+
+check ovn-nbctl lr-policy-del R1
+wait_column "admin_down" nb:bfd status logical_port=rp-public
+OVS_WAIT_UNTIL([ip netns exec server bfdd-control status | grep -qi state=Down])
+uuid=$(fetch_column nb:bfd _uuid logical_port="rp-public")
 
 # stop bfd endpoint
 NS_CHECK_EXEC([server], [bfdd-control stop], [0], [dnl
 stopping
 ])
 
-wait_column "down" nb:bfd status logical_port=rp-public
-OVS_WAIT_UNTIL([test "$(ovn-sbctl dump-flows R1 |grep lr_in_ip_routing |grep 'ip4.dst == 100.0.0.0/8' |grep 172.16.1.50)" = ""])
-
 # remove bfd entry
 ovn-nbctl destroy bfd $uuid
 check_row_count bfd 0
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index 6f74bd557..5aa01f2f8 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -1119,6 +1119,14 @@ 
           duplicated routing policy results in error.
         </p>
 
+        <p>
+          <code>--bfd</code> option is used to link a BFD session to the
+          OVN reroute policy. OVN will look for an already running BFD
+          session using next-hop as lookup key in the BFD table.
+          If the lookup fails, a new entry in the BFD table will be created
+          using the <var>nexthop</var> as <var>dst_ip</var>.
+        </p>
+
           <p>
           The following example shows a policy to lr1, which will drop packets
           from<code>192.168.100.0/24</code>.
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index 0586eccdb..3d16555a5 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -4014,14 +4014,26 @@  normalize_addr_str(const char *orig_addr)
     return ret;
 }
 
+static bool
+ip_in_lrp_networks(const struct nbrec_logical_router_port *lrp,
+                   const char *ip_s);
+
 static void
 nbctl_pre_lr_policy_add(struct ctl_context *ctx)
 {
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_policies);
 
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_name);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_networks);
+
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_priority);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_match);
+
+    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_dst_ip);
+    ovsdb_idl_add_column(ctx->idl, &nbrec_bfd_col_logical_port);
 }
 
 static void
@@ -4158,6 +4170,68 @@  nbctl_lr_policy_add(struct ctl_context *ctx)
 
     nbrec_logical_router_update_policies_addvalue(lr, policy);
 
+    struct shash_node *bfd = shash_find(&ctx->options, "--bfd");
+    const struct nbrec_bfd **bfd_sessions = NULL;
+
+    if (bfd) {
+        if (!reroute) {
+            ctl_error(ctx, "BFD is valid only with reroute action.");
+            goto free_nexthops;
+        }
+
+        bfd_sessions = xcalloc(n_nexthops, sizeof *bfd_sessions);
+        size_t j, n_bfd_sessions = 0;
+
+        for (i = 0; i < n_nexthops; i++) {
+            for (j = 0; j < lr->n_ports; j++) {
+                if (ip_in_lrp_networks(lr->ports[j], nexthops[i])) {
+                    break;
+                }
+            }
+
+            if (j == lr->n_ports) {
+                ctl_error(ctx, "out lrp not found for %s nexthop",
+                          nexthops[i]);
+                goto free_nexthops;
+            }
+
+            const struct nbrec_bfd *iter, *nb_bt = NULL;
+            NBREC_BFD_FOR_EACH (iter, ctx->idl) {
+                /* match endpoint ip. */
+                if (strcmp(iter->dst_ip, nexthops[i])) {
+                    continue;
+                }
+                /* match outport. */
+                if (strcmp(iter->logical_port, lr->ports[j]->name)) {
+                    continue;
+                }
+
+                nb_bt = iter;
+                break;
+            }
+
+            /* Create the BFD session if it does not already exist. */
+            if (!nb_bt) {
+                nb_bt = nbrec_bfd_insert(ctx->txn);
+                nbrec_bfd_set_dst_ip(nb_bt, nexthops[i]);
+                nbrec_bfd_set_logical_port(nb_bt, lr->ports[j]->name);
+            }
+
+            for (j = 0; j < n_bfd_sessions; j++) {
+                if (bfd_sessions[j] == nb_bt) {
+                    break;
+                }
+            }
+            if (j == n_bfd_sessions) {
+                bfd_sessions[n_bfd_sessions++] = nb_bt;
+            }
+        }
+        nbrec_logical_router_policy_set_bfd_sessions(
+                policy, (struct nbrec_bfd **) bfd_sessions, n_bfd_sessions);
+    }
+
+free_nexthops:
+    free(bfd_sessions);
     for (i = 0; i < n_nexthops; i++) {
         free(nexthops[i]);
     }
@@ -4282,8 +4356,11 @@  print_routing_policy(const struct nbrec_logical_router_policy *policy,
                       policy->match, policy->action);
     }
 
-    if (!smap_is_empty(&policy->options)) {
+    if (!smap_is_empty(&policy->options) || policy->n_bfd_sessions) {
         ds_put_format(s, "%15s", "");
+        if (policy->n_bfd_sessions) {
+            ds_put_cstr(s, "bfd,");
+        }
         struct smap_node *node;
         SMAP_FOR_EACH (node, &policy->options) {
             ds_put_format(s, "%s=%s,", node->key, node->value);
@@ -4305,6 +4382,8 @@  nbctl_pre_lr_policy_list(struct ctl_context *ctx)
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_nexthops);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_action);
     ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_policy_col_options);
+    ovsdb_idl_add_column(ctx->idl,
+                         &nbrec_logical_router_policy_col_bfd_sessions);
 }
 
 static void
@@ -7900,7 +7979,8 @@  static const struct ctl_command_syntax nbctl_commands[] = {
     /* Policy commands */
     { "lr-policy-add", 4, INT_MAX,
      "ROUTER PRIORITY MATCH ACTION [NEXTHOP] [OPTIONS - KEY=VALUE ...]",
-     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist", RW },
+     nbctl_pre_lr_policy_add, nbctl_lr_policy_add, NULL, "--may-exist,--bfd?",
+     RW },
     { "lr-policy-del", 1, 3, "ROUTER [{PRIORITY | UUID} [MATCH]]",
       nbctl_pre_lr_policy_del, nbctl_lr_policy_del, NULL, "--if-exists", RW },
     { "lr-policy-list", 1, 1, "ROUTER", nbctl_pre_lr_policy_list,