diff mbox series

[ovs-dev,v2,23/32] controller: Support learning routes.

Message ID 3e8708272f00074ff43bdad11c3eb1b1ecc620f8.1730713432.git.felix.huettner@stackit.cloud
State Superseded
Headers show
Series OVN Fabric integration | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success

Commit Message

Felix Huettner Nov. 4, 2024, 11:04 a.m. UTC
we now learn all routes inside the vrfs we also advertise routes on.
The routes are then placed in the southbound database for processing by
northd.

Routes are only selected if matching the following rules:
1. must not be a route advertised by us
2. must not be a local connected route (as we want to not learn transfer
   networks)
3. the prefix must not be a link local address

However we can not reliably determine over which link we learned the
route in case we have two LRPs of the same LR on the same chassis.
For now we just assume the routes on both links are identical.
Future commits will refine this.

Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud>
---
 controller/ovn-controller.c         |   8 ++
 controller/route-exchange-netlink.c |  41 +++++++-
 controller/route-exchange-netlink.h |  13 ++-
 controller/route-exchange.c         | 146 +++++++++++++++++++++++++++-
 controller/route-exchange.h         |   2 +
 lib/ovn-util.c                      |  10 ++
 lib/ovn-util.h                      |   1 +
 7 files changed, 216 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 3adc96960..cd14db8de 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4953,10 +4953,16 @@  route_sb_port_binding_data_handler(struct engine_node *node, void *data)
 static void
 en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED)
 {
+    struct ovsdb_idl_index *sbrec_route_by_datapath =
+        engine_ovsdb_node_get_index(
+            engine_get_input("SB_route", node), "datapath");
+
     struct ed_type_route *route_data =
         engine_get_input_data("route", node);
 
     struct route_exchange_ctx_in r_ctx_in = {
+        .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn,
+        .sbrec_route_by_datapath = sbrec_route_by_datapath,
         .announce_routes = &route_data->announce_routes,
     };
 
@@ -5306,6 +5312,8 @@  main(int argc, char *argv[])
     engine_add_input(&en_route, &en_sb_route,
                      engine_noop_handler);
     engine_add_input(&en_route_exchange, &en_route, NULL);
+    engine_add_input(&en_route_exchange, &en_sb_route,
+                     engine_noop_handler);
 
     engine_add_input(&en_addr_sets, &en_sb_address_set,
                      addr_sets_sb_address_set_handler);
diff --git a/controller/route-exchange-netlink.c b/controller/route-exchange-netlink.c
index 84c6baa15..ee33d08d2 100644
--- a/controller/route-exchange-netlink.c
+++ b/controller/route-exchange-netlink.c
@@ -26,6 +26,7 @@ 
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/vlog.h"
 #include "packets.h"
+#include "ovn-util.h"
 #include "route-table.h"
 #include "route.h"
 
@@ -171,8 +172,27 @@  re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst,
     return modify_route(RTM_DELROUTE, 0, table_id, dst, plen);
 }
 
+static uint32_t
+route_hash(const struct in6_addr *dst, unsigned int plen)
+{
+    uint32_t hash = hash_bytes(dst->s6_addr, 16, 0);
+    return hash_int(plen, hash);
+}
+
+void
+re_nl_received_routes_destroy(struct hmap *host_routes)
+{
+    struct re_nl_received_route_node *rr;
+    HMAP_FOR_EACH_SAFE (rr, hmap_node, host_routes) {
+        hmap_remove(host_routes, &rr->hmap_node);
+        free(rr);
+    }
+    hmap_destroy(host_routes);
+}
+
 struct route_msg_handle_data {
     const struct hmap *routes;
+    struct hmap *learned_routes;
 };
 
 static void
@@ -184,8 +204,24 @@  handle_route_msg_delete_routes(const struct route_table_msg *msg, void *data)
     struct advertise_route_entry *ar;
     int err;
 
-    /* This route is not from us, we should not touch it. */
+    /* This route is not from us, so we learn it. */
     if (rd->rtm_protocol != RTPROT_OVN) {
+        if (prefix_is_link_local(&rd->rta_dst, rd->plen)) {
+            return;
+        }
+        for (int i = 0; i < rd->n_nexthops; i++) {
+            if (ipv6_is_zero(&rd->nexthops[i].rta_gw)) {
+                /* This is most likely an address on the local link.
+                 * As we just want to learn remote routes we do not need it.*/
+                continue;
+            }
+            struct re_nl_received_route_node *rr = xzalloc(sizeof *rr);
+            hmap_insert(handle_data->learned_routes, &rr->hmap_node,
+                        route_hash(&rd->rta_dst, rd->plen));
+            rr->addr = rd->rta_dst;
+            rr->plen = rd->plen;
+            rr->nexthop = rd->nexthops[i].rta_gw;
+        }
         return;
     }
 
@@ -212,7 +248,7 @@  handle_route_msg_delete_routes(const struct route_table_msg *msg, void *data)
 
 void
 re_nl_sync_routes(uint32_t table_id,
-                  const struct hmap *routes)
+                  const struct hmap *routes, struct hmap *learned_routes)
 {
     struct advertise_route_entry *ar;
     HMAP_FOR_EACH (ar, node, routes) {
@@ -224,6 +260,7 @@  re_nl_sync_routes(uint32_t table_id,
      * in the system. */
     struct route_msg_handle_data data = {
         .routes = routes,
+        .learned_routes = learned_routes,
     };
     route_table_dump_one_table(NULL, table_id, handle_route_msg_delete_routes,
                                &data);
diff --git a/controller/route-exchange-netlink.h b/controller/route-exchange-netlink.h
index f87ebd75d..566b38fde 100644
--- a/controller/route-exchange-netlink.h
+++ b/controller/route-exchange-netlink.h
@@ -16,6 +16,8 @@ 
 #define ROUTE_EXCHANGE_NETLINK_H 1
 
 #include <stdint.h>
+#include "openvswitch/hmap.h"
+#include <netinet/in.h>
 
 /* This value is arbitrary but currently unused.
  * See https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos */
@@ -24,6 +26,13 @@ 
 struct in6_addr;
 struct hmap;
 
+struct re_nl_received_route_node {
+    struct hmap_node hmap_node;
+    struct in6_addr addr;
+    unsigned int plen;
+    struct in6_addr nexthop;
+};
+
 int re_nl_create_vrf(const char *ifname, uint32_t table_id);
 int re_nl_delete_vrf(const char *ifname);
 
@@ -34,7 +43,9 @@  int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst,
 
 void re_nl_dump(uint32_t table_id);
 
+void re_nl_received_routes_destroy(struct hmap *);
 void re_nl_sync_routes(uint32_t table_id,
-                       const struct hmap *host_routes);
+                       const struct hmap *host_routes,
+                       struct hmap *learned_routes);
 
 #endif /* route-exchange-netlink.h */
diff --git a/controller/route-exchange.c b/controller/route-exchange.c
index 86ccc92cb..41fea6398 100644
--- a/controller/route-exchange.c
+++ b/controller/route-exchange.c
@@ -34,6 +34,139 @@  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
 static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs);
 
+struct route_entry {
+    struct hmap_node hmap_node;
+
+    const struct sbrec_route *sb_route;
+
+    const struct sbrec_datapath_binding *sb_db;
+    char *logical_port;
+    char *ip_prefix;
+    char *nexthop;
+    bool stale;
+};
+
+static struct route_entry *
+route_alloc_entry(struct hmap *routes,
+                  const struct sbrec_datapath_binding *sb_db,
+                  const char *logical_port,
+                  const char *ip_prefix, const char *nexthop)
+{
+    struct route_entry *route_e = xzalloc(sizeof *route_e);
+
+    route_e->sb_db = sb_db;
+    route_e->logical_port = xstrdup(logical_port);
+    route_e->ip_prefix = xstrdup(ip_prefix);
+    route_e->nexthop = xstrdup(nexthop);
+    route_e->stale = false;
+    uint32_t hash = uuid_hash(&sb_db->header_.uuid);
+    hash = hash_string(logical_port, hash);
+    hash = hash_string(ip_prefix, hash);
+    hmap_insert(routes, &route_e->hmap_node, hash);
+
+    return route_e;
+}
+
+static struct route_entry *
+route_lookup_or_add(struct hmap *route_map,
+                    const struct sbrec_datapath_binding *sb_db,
+                    const char *logical_port, const char *ip_prefix,
+                    const char *nexthop)
+{
+    struct route_entry *route_e;
+    uint32_t hash;
+
+    hash = uuid_hash(&sb_db->header_.uuid);
+    hash = hash_string(logical_port, hash);
+    hash = hash_string(ip_prefix, hash);
+    HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) {
+        if (!strcmp(route_e->nexthop, nexthop)) {
+            return route_e;
+        }
+    }
+
+    route_e = route_alloc_entry(route_map, sb_db,
+                                 logical_port, ip_prefix, nexthop);
+    return route_e;
+}
+
+static void
+route_erase_entry(struct route_entry *route_e)
+{
+    free(route_e->logical_port);
+    free(route_e->ip_prefix);
+    free(route_e->nexthop);
+    free(route_e);
+}
+
+static void
+sb_sync_learned_routes(const struct sbrec_datapath_binding *datapath,
+                       const struct hmap *learned_routes,
+                       const struct sset *bound_ports,
+                       struct ovsdb_idl_txn *ovnsb_idl_txn,
+                       struct ovsdb_idl_index *sbrec_route_by_datapath)
+{
+    struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes);
+    struct route_entry *route_e;
+    const struct sbrec_route *sb_route;
+
+    struct sbrec_route *filter =
+            sbrec_route_index_init_row(sbrec_route_by_datapath);
+    sbrec_route_index_set_datapath(filter, datapath);
+    SBREC_ROUTE_FOR_EACH_EQUAL (sb_route, filter, sbrec_route_by_datapath) {
+        if (strcmp(sb_route->type, "receive")) {
+            continue;
+        }
+        /* If the port is not local we don't care about it.
+         * Some other ovn-controller will handle it. */
+        if (!sset_contains(bound_ports, sb_route->logical_port)) {
+            continue;
+        }
+        route_e = route_alloc_entry(&sync_routes,
+                                    sb_route->datapath,
+                                    sb_route->logical_port,
+                                    sb_route->ip_prefix,
+                                    sb_route->nexthop);
+        route_e->stale = true;
+        route_e->sb_route = sb_route;
+    }
+    sbrec_route_index_destroy_row(filter);
+
+    struct re_nl_received_route_node *learned_route;
+    HMAP_FOR_EACH (learned_route, hmap_node, learned_routes) {
+        char *ip_prefix = normalize_v46_prefix(&learned_route->addr,
+                                               learned_route->plen);
+        char *nexthop = normalize_v46(&learned_route->nexthop);
+
+        const char *logical_port;
+        SSET_FOR_EACH (logical_port, bound_ports) {
+            route_e = route_lookup_or_add(&sync_routes,
+                datapath,
+                logical_port, ip_prefix, nexthop);
+            route_e->stale = false;
+            if (!route_e->sb_route) {
+                sb_route = sbrec_route_insert(ovnsb_idl_txn);
+                sbrec_route_set_datapath(sb_route, datapath);
+                sbrec_route_set_logical_port(sb_route, logical_port);
+                sbrec_route_set_ip_prefix(sb_route, ip_prefix);
+                sbrec_route_set_nexthop(sb_route, nexthop);
+                sbrec_route_set_type(sb_route, "receive");
+                route_e->sb_route = sb_route;
+            }
+        }
+        free(ip_prefix);
+        free(nexthop);
+    }
+
+    HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) {
+        if (route_e->stale) {
+            sbrec_route_delete(route_e->sb_route);
+        }
+        route_erase_entry(route_e);
+    }
+    hmap_destroy(&sync_routes);
+}
+
 void
 route_exchange_run(struct route_exchange_ctx_in *r_ctx_in,
                    struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED)
@@ -57,12 +190,21 @@  route_exchange_run(struct route_exchange_ctx_in *r_ctx_in,
                              "%"PRId64": %s.",
                              vrf_name, ad->key,
                              ovs_strerror(error));
-                continue;
+                goto out;
             }
             sset_add(&_maintained_vrfs, vrf_name);
         }
 
-        re_nl_sync_routes(ad->key, &ad->routes);
+        re_nl_sync_routes(ad->key, &ad->routes,
+                          &received_routes);
+
+        sb_sync_learned_routes(ad->db, &received_routes,
+                               &ad->bound_ports,
+                               r_ctx_in->ovnsb_idl_txn,
+                               r_ctx_in->sbrec_route_by_datapath);
+
+out:
+        re_nl_received_routes_destroy(&received_routes);
     }
 
     /* Remove VRFs previously maintained by us not found in the above loop. */
diff --git a/controller/route-exchange.h b/controller/route-exchange.h
index 2c2a9ab84..d19e83403 100644
--- a/controller/route-exchange.h
+++ b/controller/route-exchange.h
@@ -18,6 +18,8 @@ 
 #include <stdbool.h>
 
 struct route_exchange_ctx_in {
+    struct ovsdb_idl_txn *ovnsb_idl_txn;
+    struct ovsdb_idl_index *sbrec_route_by_datapath;
     /* Contains struct advertise_datapath_entry */
     struct hmap *announce_routes;
 };
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 55a081ab1..5d0db1a5a 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -802,6 +802,16 @@  normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen)
     }
 }
 
+char *
+normalize_v46(const struct in6_addr *prefix)
+{
+    if (IN6_IS_ADDR_V4MAPPED(prefix)) {
+        return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32);
+    } else {
+        return normalize_ipv6_prefix(prefix, 128);
+    }
+}
+
 char *
 str_tolower(const char *orig)
 {
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index 98cb8e69d..3ccd7d003 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -205,6 +205,7 @@  bool ip46_parse(const char *ip_str, struct in6_addr *ip);
 char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen);
 char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int plen);
 char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
+char *normalize_v46(const struct in6_addr *prefix);
 
 /* Returns a lowercase copy of orig.
  * Caller must free the returned string.