Message ID | 78017c6fdb5acfb516405e6486b852f26a7754cd.1731495611.git.lorenzo.bianconi@redhat.com |
---|---|
State | Changes Requested |
Delegated to: | Dumitru Ceara |
Headers | show |
Series | Introduce ECMP_nexthop monitor in ovn-controller | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | fail | github build: failed |
On 11/13/24 12:04 PM, Lorenzo Bianconi wrote: > Introduce ECMP_Nexthop table in the SB db in order to track active > ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop > table contains ip and mac address for each active nexthop. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > --- Hi Lorenzo, Thanks for the patch! > northd/en-northd.c | 29 +++++++++++ > northd/en-northd.h | 4 ++ > northd/inc-proc-northd.c | 9 +++- > northd/northd.c | 104 ++++++++++++++++++++++++++++++++++++++- > northd/northd.h | 6 +++ > ovn-sb.ovsschema | 17 ++++++- > ovn-sb.xml | 31 ++++++++++++ > tests/ovn-northd.at | 33 ++++++++++--- > 8 files changed, 222 insertions(+), 11 deletions(-) > > diff --git a/northd/en-northd.c b/northd/en-northd.c > index 24ed31517..165af44a0 100644 > --- a/northd/en-northd.c > +++ b/northd/en-northd.c > @@ -404,6 +404,23 @@ en_bfd_sync_run(struct engine_node *node, void *data) > engine_set_node_state(node, EN_UPDATED); > } > > +void > +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED) We started splitting out independent incremental processing engine nodes into their own files. See northd/en-*.[ch]. I think it makes sense to move the en_ecmp_nexthop*() functions and all other related static functions to a new northd/en-ecmp-nexthop.[ch] module. > +{ > + const struct engine_context *eng_ctx = engine_get_context(); > + struct northd_data *northd_data = engine_get_input_data("northd", node); > + struct static_routes_data *static_routes_data = > + engine_get_input_data("static_routes", node); > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table = > + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); > + > + build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn, > + &northd_data->lr_ports, > + &static_routes_data->parsed_routes, > + sbrec_ecmp_nexthop_table); > + engine_set_node_state(node, EN_UPDATED); > +} > + > void > *en_northd_init(struct engine_node *node OVS_UNUSED, > struct engine_arg *arg OVS_UNUSED) > @@ -454,6 +471,13 @@ void > return data; > } > > +void * > +en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, > + struct engine_arg *arg OVS_UNUSED) > +{ > + return NULL; > +} > + > void > en_northd_cleanup(void *data) > { > @@ -526,3 +550,8 @@ en_bfd_sync_cleanup(void *data) > { > bfd_sync_destroy(data); > } > + > +void > +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED) > +{ > +} > diff --git a/northd/en-northd.h b/northd/en-northd.h > index 631a7c17a..2666cc67e 100644 > --- a/northd/en-northd.h > +++ b/northd/en-northd.h > @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node, > void *data OVS_UNUSED); > void en_bfd_sync_run(struct engine_node *node, void *data); > void en_bfd_sync_cleanup(void *data OVS_UNUSED); > +void en_ecmp_nexthop_run(struct engine_node *node, void *data); > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, > + struct engine_arg *arg OVS_UNUSED); > +void en_ecmp_nexthop_cleanup(void *data); > > #endif /* EN_NORTHD_H */ > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > index 8c834facb..8e16fde80 100644 > --- a/northd/inc-proc-northd.c > +++ b/northd/inc-proc-northd.c > @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list; > SB_NODE(fdb, "fdb") \ > SB_NODE(static_mac_binding, "static_mac_binding") \ > SB_NODE(chassis_template_var, "chassis_template_var") \ > - SB_NODE(logical_dp_group, "logical_dp_group") > + SB_NODE(logical_dp_group, "logical_dp_group") \ > + SB_NODE(ecmp_nexthop, "ecmp_nexthop") > > enum sb_engine_node { > #define SB_NODE(NAME, NAME_STR) SB_##NAME, > @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies"); > static ENGINE_NODE(static_routes, "static_routes"); > static ENGINE_NODE(bfd, "bfd"); > static ENGINE_NODE(bfd_sync, "bfd_sync"); > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop"); > > void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > struct ovsdb_idl_loop *sb) > @@ -264,6 +266,10 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > engine_add_input(&en_bfd_sync, &en_route_policies, NULL); > engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler); > > + engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL); To avoid unnecessary processing, we also need to omit notifications for SB.ECMP_Nexthop records in ovn-northd.c. Here's an example for meters: https://github.com/ovn-org/ovn/blob/ebe5d70122ce0f74067858f5cb19276c852a81da/northd/ovn-northd.c#L929-L931 > + engine_add_input(&en_ecmp_nexthop, &en_northd, NULL); > + engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL); > + > engine_add_input(&en_sync_meters, &en_nb_acl, NULL); > engine_add_input(&en_sync_meters, &en_nb_meter, NULL); > engine_add_input(&en_sync_meters, &en_sb_meter, NULL); > @@ -334,6 +340,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL); > engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL); > > + engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL); > engine_add_input(&en_northd_output, &en_sync_from_sb, NULL); > engine_add_input(&en_northd_output, &en_sync_to_sb, > northd_output_sync_to_sb_handler); > diff --git a/northd/northd.c b/northd/northd.c > index 64b2e3859..d54fbf14e 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -10720,6 +10720,106 @@ build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table, > } > } > > +struct ecmp_nexthop_data { > + struct hmap_node hmap_node; > + const struct sbrec_ecmp_nexthop *sb_ecmp_nh; > + bool stale; We don't need this 'stale' field, please see the comment related to build_ecmp_nexthop_table() below. > +}; > + > +static struct ecmp_nexthop_data * > +ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh, > + struct hmap *map) The name is misleading. We actually insert the entry. I'd call this ecmp_nexthop_insert_entry(). > +{ > + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); > + e->sb_ecmp_nh = sb_ecmp_nh; > + > + const char *sb_port = sb_ecmp_nh->port->logical_port; > + const char *sb_nexthop = sb_ecmp_nh->nexthop; > + > + uint32_t hash = hash_string(sb_nexthop, 0); > + hash = hash_add(hash, hash_string(sb_port, 0)); Port names can be long. What about hashing the port binding key instead? We could rewrite this as: uint32_t hash = hash_string(sb_ecmp_nh->nexthop, 0); hash = hash_int(sb_ecmp_nh->port->tunnel_key, hash); hmap_insert(map, &e->hmap_node, hash); > + hmap_insert(map, &e->hmap_node, hash); > + > + return e; > +} > + > +static struct ecmp_nexthop_data * > +ecmp_nexthop_find_entry(const char *nexthop, const char *port, > + struct hmap *map) > +{ > + uint32_t hash = hash_string(nexthop, 0); > + hash = hash_add(hash, hash_string(port, 0)); > + If we change the hash function to use the port binding key, this should also change to something like: static struct ecmp_nexthop_data * ecmp_nexthop_find_entry(const char *nexthop, const struct sbrec_port_binding *pb, struct hmap *map) { uint32_t hash = hash_string(nexthop, 0); hash = hash_int(pb->tunnel_key, hash); > + struct ecmp_nexthop_data *e; > + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { > + const char *sb_port = e->sb_ecmp_nh->port->logical_port; > + const char *sb_nexthop = e->sb_ecmp_nh->nexthop; > + if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) { > + return e; > + } > + } > + return NULL; > +} > + > +void > +build_ecmp_nexthop_table( > + struct ovsdb_idl_txn *ovnsb_txn, > + const struct hmap *lr_ports, const struct hmap *routes, > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table) > +{ > + if (!ovnsb_txn) { > + return; > + } > + > + struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map); > + > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, > + sbrec_ecmp_nexthop_table) { > + struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry( > + sb_ecmp_nexthop, &sb_nexthops_map); > + e->stale = true; > + } > + > + struct parsed_route *pr; > + HMAP_FOR_EACH (pr, key_node, routes) { > + if (!pr->ecmp_symmetric_reply) { > + continue; > + } > + > + if (!pr->out_port) { > + continue; > + } > + > + struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key); Why do we lookup again here? pr->out_port is the ovn_port already. > + if (!out_port || !out_port->sb) { > + continue; > + } > + > + const struct nbrec_logical_router_static_route *r = pr->route; > + const char *pb_name = out_port->sb->logical_port; > + > + struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry( > + r->nexthop, pb_name, &sb_nexthops_map); > + if (!e) { > + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); > + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); > + sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb); > + e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map); Instead of adding the entry to sb_nexthops_map here we can just remove 'e' from sb_nexthops_map and free it if found (else branch of this if) ... > + } > + e->stale = false; > + } > + > + struct ecmp_nexthop_data *e; > + HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) { > + if (e->stale) { > + sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh); ... then here all remaining items in sb_nexthops_map are stale. > + } > + free(e); > + } > + hmap_destroy(&sb_nexthops_map); > +} > + > /* Returns a string of the IP address of the router port 'op' that > * overlaps with 'ip_s". If one is not found, returns NULL. > * > @@ -11160,10 +11260,11 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > } > > /* Verify that ip_prefix and nexthop are on the same network. */ > + struct ovn_port *out_port = NULL; > if (!is_discard_route && > !find_static_route_outport(od, lr_ports, route, > IN6_IS_ADDR_V4MAPPED(&prefix), > - NULL, NULL)) { > + NULL, &out_port)) { > return; > } > > @@ -11206,6 +11307,7 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > new_pr->hash = route_hash(new_pr); > new_pr->route = route; > new_pr->nbr = od->nbr; > + new_pr->out_port = out_port; > new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", > false); > diff --git a/northd/northd.h b/northd/northd.h > index c1442ff40..3bd2e29e3 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -703,6 +703,7 @@ struct parsed_route { > uint32_t route_table_id; > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > + struct ovn_port *out_port; > bool ecmp_symmetric_reply; > bool is_discard_route; > const struct nbrec_logical_router *nbr; > @@ -746,6 +747,11 @@ void bfd_destroy(struct bfd_data *); > void bfd_sync_init(struct bfd_sync_data *); > void bfd_sync_destroy(struct bfd_sync_data *); > > +void build_ecmp_nexthop_table( > + struct ovsdb_idl_txn *ovnsb_txn, > + const struct hmap *lr_ports, const struct hmap *routes, > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table); > + > struct lflow_table; > struct lr_stateful_tracked_data; > struct ls_stateful_tracked_data; > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > index 73abf2c8d..864cb0ed6 100644 > --- a/ovn-sb.ovsschema > +++ b/ovn-sb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Southbound", > - "version": "20.37.0", > - "cksum": "1950136776 31493", > + "version": "20.38.0", > + "cksum": "466210938 32119", > "tables": { > "SB_Global": { > "columns": { > @@ -610,6 +610,19 @@ > "refTable": "Datapath_Binding"}}}}, > "indexes": [["logical_port", "ip"]], > "isRoot": true}, > + "ECMP_Nexthop": { > + "columns": { > + "nexthop": {"type": "string"}, > + "port": {"type": {"key": {"type": "uuid", > + "refTable": "Port_Binding", > + "refType": "strong"}, > + "min": 0, "max": 1}}, I thought there's never a case when we don't have a port for the next-hop. I think this shouldn't be optional. > + "mac": {"type": "string"}, > + "external_ids": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}}, > + "indexes": [["nexthop", "port"]], > + "isRoot": true}, > "Chassis_Template_Var": { > "columns": { > "chassis": {"type": "string"}, > diff --git a/ovn-sb.xml b/ovn-sb.xml > index ea4adc1c3..ea1d484a7 100644 > --- a/ovn-sb.xml > +++ b/ovn-sb.xml > @@ -5217,4 +5217,35 @@ tcp.flags = RST; > The set of variable values for a given chassis. > </column> > </table> > + > + <table name="ECMP_Nexthop"> > + <p> > + Each record in this table represents an active ECMP route committed by This seems wrong, it's not a route, it's a next hop. And the next hop can be shared by multiple routes. Also, we only create ECMP_Nexthop records for routes that have --ecmp-symmetric-reply set. It's probably good to mention that here in case users expect other ECMP nexthops to be created in the SB. > + <code>ovn-northd</code> to <code>ovs</code> connection-tracking table. Nit: maybe rephrase this to "to the <code>ovs</code> connection tracker". > + <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code> Nit: The <code>ECMP_Nexthop</code> table. > + to track active ct entries and to flush stale ones. > + </p> > + <column name="nexthop"> > + <p> > + Nexthop IP address for this ECMP route. Nexthop IP address should > + be the IP address of a connected router port or the IP address of > + an external device used as nexthop for the given destination. > + </p> > + </column> > + <column name="port"> > + <p> > + The reference to <ref table="Port_Binding"/> table for the port used > + to connect to the configured next-hop. > + </p> > + </column> > + <column name="mac"> > + <p> > + Nexthop mac address. > + </p> > + </column> > + > + <column name="external_ids"> > + See <em>External IDs</em> at the beginning of this document. > + </column> > + </table> > </database> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 8477e4250..1e01c2614 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -3886,7 +3886,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \ > check_engine_stats northd norecompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3901,7 +3901,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100 > check_engine_stats northd norecompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3913,7 +3913,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats static_routes recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3929,7 +3929,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats static_routes recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3941,7 +3941,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats route_policies recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3976,7 +3976,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats static_routes recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -3991,7 +3991,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats bfd recompute nocompute > check_engine_stats route_policies recompute nocompute > check_engine_stats lflow recompute nocompute > -check_engine_stats northd_output norecompute compute > +check_engine_stats northd_output recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -6816,6 +6816,10 @@ check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public > > # ECMP flows will be added even if there is only one next-hop. > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 > +check_row_count ECMP_Nexthop 1 As far as I can tell we only do positive checks, i.e., check that next hop records are created in the SB for routes with --ecmp-symmetric-reply set. We could also add a test to ensure that no ECMP_Nexthop is created for routes that have --ecmp set but no --ecmp-symmetric-reply. > +uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public) > +check_column 192.168.0.10 ECMP_Nexthop nexthop > +check_column "$uuid" ECMP_Nexthop port > > ovn-sbctl dump-flows lr0 > lr0flows > > @@ -6835,6 +6839,13 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn > ]) > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 > +check_row_count ECMP_Nexthop 2 > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl > +192.168.0.10 > +]) > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl > +192.168.0.20 > +]) > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6864,6 +6875,13 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ > > # add ecmp route with wrong nexthop > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 > +check_row_count ECMP_Nexthop 2 > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl > +192.168.0.10 > +]) > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl > +192.168.0.20 > +]) > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6883,6 +6901,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. > > check ovn-nbctl lr-route-del lr0 > wait_row_count nb:Logical_Router_Static_Route 0 > +check_row_count ECMP_Nexthop 0 > > check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows Regards, Dumitru
> On 11/13/24 12:04 PM, Lorenzo Bianconi wrote: > > Introduce ECMP_Nexthop table in the SB db in order to track active > > ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop > > table contains ip and mac address for each active nexthop. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > --- > > Hi Lorenzo, > > Thanks for the patch! Hi Dumitru, thx for the review. > > > northd/en-northd.c | 29 +++++++++++ > > northd/en-northd.h | 4 ++ > > northd/inc-proc-northd.c | 9 +++- > > northd/northd.c | 104 ++++++++++++++++++++++++++++++++++++++- > > northd/northd.h | 6 +++ > > ovn-sb.ovsschema | 17 ++++++- > > ovn-sb.xml | 31 ++++++++++++ > > tests/ovn-northd.at | 33 ++++++++++--- > > 8 files changed, 222 insertions(+), 11 deletions(-) > > > > diff --git a/northd/en-northd.c b/northd/en-northd.c > > index 24ed31517..165af44a0 100644 > > --- a/northd/en-northd.c > > +++ b/northd/en-northd.c > > @@ -404,6 +404,23 @@ en_bfd_sync_run(struct engine_node *node, void *data) > > engine_set_node_state(node, EN_UPDATED); > > } > > > > +void > > +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED) > > We started splitting out independent incremental processing engine nodes > into their own files. See northd/en-*.[ch]. I think it makes sense to > move the en_ecmp_nexthop*() functions and all other related static > functions to a new northd/en-ecmp-nexthop.[ch] module. ack, I will fix it. > > > +{ > > + const struct engine_context *eng_ctx = engine_get_context(); > > + struct northd_data *northd_data = engine_get_input_data("northd", node); > > + struct static_routes_data *static_routes_data = > > + engine_get_input_data("static_routes", node); > > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table = > > + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); > > + > > + build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn, > > + &northd_data->lr_ports, > > + &static_routes_data->parsed_routes, > > + sbrec_ecmp_nexthop_table); > > + engine_set_node_state(node, EN_UPDATED); > > +} > > + > > void > > *en_northd_init(struct engine_node *node OVS_UNUSED, > > struct engine_arg *arg OVS_UNUSED) > > @@ -454,6 +471,13 @@ void > > return data; > > } > > > > +void * > > +en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, > > + struct engine_arg *arg OVS_UNUSED) > > +{ > > + return NULL; > > +} > > + > > void > > en_northd_cleanup(void *data) > > { > > @@ -526,3 +550,8 @@ en_bfd_sync_cleanup(void *data) > > { > > bfd_sync_destroy(data); > > } > > + > > +void > > +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED) > > +{ > > +} > > diff --git a/northd/en-northd.h b/northd/en-northd.h > > index 631a7c17a..2666cc67e 100644 > > --- a/northd/en-northd.h > > +++ b/northd/en-northd.h > > @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node, > > void *data OVS_UNUSED); > > void en_bfd_sync_run(struct engine_node *node, void *data); > > void en_bfd_sync_cleanup(void *data OVS_UNUSED); > > +void en_ecmp_nexthop_run(struct engine_node *node, void *data); > > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, > > + struct engine_arg *arg OVS_UNUSED); > > +void en_ecmp_nexthop_cleanup(void *data); > > > > #endif /* EN_NORTHD_H */ > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > > index 8c834facb..8e16fde80 100644 > > --- a/northd/inc-proc-northd.c > > +++ b/northd/inc-proc-northd.c > > @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list; > > SB_NODE(fdb, "fdb") \ > > SB_NODE(static_mac_binding, "static_mac_binding") \ > > SB_NODE(chassis_template_var, "chassis_template_var") \ > > - SB_NODE(logical_dp_group, "logical_dp_group") > > + SB_NODE(logical_dp_group, "logical_dp_group") \ > > + SB_NODE(ecmp_nexthop, "ecmp_nexthop") > > > > enum sb_engine_node { > > #define SB_NODE(NAME, NAME_STR) SB_##NAME, > > @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies"); > > static ENGINE_NODE(static_routes, "static_routes"); > > static ENGINE_NODE(bfd, "bfd"); > > static ENGINE_NODE(bfd_sync, "bfd_sync"); > > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop"); > > > > void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > struct ovsdb_idl_loop *sb) > > @@ -264,6 +266,10 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > engine_add_input(&en_bfd_sync, &en_route_policies, NULL); > > engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler); > > > > + engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL); > > To avoid unnecessary processing, we also need to omit notifications for > SB.ECMP_Nexthop records in ovn-northd.c. > > Here's an example for meters: > https://github.com/ovn-org/ovn/blob/ebe5d70122ce0f74067858f5cb19276c852a81da/northd/ovn-northd.c#L929-L931 ack, I will fix it. > > > + engine_add_input(&en_ecmp_nexthop, &en_northd, NULL); > > + engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL); > > + > > engine_add_input(&en_sync_meters, &en_nb_acl, NULL); > > engine_add_input(&en_sync_meters, &en_nb_meter, NULL); > > engine_add_input(&en_sync_meters, &en_sb_meter, NULL); > > @@ -334,6 +340,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL); > > engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL); > > > > + engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL); > > engine_add_input(&en_northd_output, &en_sync_from_sb, NULL); > > engine_add_input(&en_northd_output, &en_sync_to_sb, > > northd_output_sync_to_sb_handler); > > diff --git a/northd/northd.c b/northd/northd.c > > index 64b2e3859..d54fbf14e 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -10720,6 +10720,106 @@ build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table, > > } > > } > > > > +struct ecmp_nexthop_data { > > + struct hmap_node hmap_node; > > + const struct sbrec_ecmp_nexthop *sb_ecmp_nh; > > + bool stale; > > We don't need this 'stale' field, please see the comment related to > build_ecmp_nexthop_table() below. ack, I will fix it. > > > +}; > > + > > +static struct ecmp_nexthop_data * > > +ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh, > > + struct hmap *map) > > The name is misleading. We actually insert the entry. I'd call this > ecmp_nexthop_insert_entry(). ack, I will fix it. > > > +{ > > + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); > > + e->sb_ecmp_nh = sb_ecmp_nh; > > + > > + const char *sb_port = sb_ecmp_nh->port->logical_port; > > + const char *sb_nexthop = sb_ecmp_nh->nexthop; > > + > > + uint32_t hash = hash_string(sb_nexthop, 0); > > + hash = hash_add(hash, hash_string(sb_port, 0)); > > Port names can be long. What about hashing the port binding key instead? > > We could rewrite this as: > > uint32_t hash = hash_string(sb_ecmp_nh->nexthop, 0); > hash = hash_int(sb_ecmp_nh->port->tunnel_key, hash); > hmap_insert(map, &e->hmap_node, hash); ack, I will fix it. > > > + hmap_insert(map, &e->hmap_node, hash); > > + > > + return e; > > +} > > + > > +static struct ecmp_nexthop_data * > > +ecmp_nexthop_find_entry(const char *nexthop, const char *port, > > + struct hmap *map) > > +{ > > + uint32_t hash = hash_string(nexthop, 0); > > + hash = hash_add(hash, hash_string(port, 0)); > > + > > If we change the hash function to use the port binding key, this should > also change to something like: > > static struct ecmp_nexthop_data * > ecmp_nexthop_find_entry(const char *nexthop, > const struct sbrec_port_binding *pb, > struct hmap *map) > { > uint32_t hash = hash_string(nexthop, 0); > hash = hash_int(pb->tunnel_key, hash); ack > > > + struct ecmp_nexthop_data *e; > > + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { > > + const char *sb_port = e->sb_ecmp_nh->port->logical_port; > > + const char *sb_nexthop = e->sb_ecmp_nh->nexthop; > > + if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) { > > + return e; > > + } > > + } > > + return NULL; > > +} > > + > > +void > > +build_ecmp_nexthop_table( > > + struct ovsdb_idl_txn *ovnsb_txn, > > + const struct hmap *lr_ports, const struct hmap *routes, > > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table) > > +{ > > + if (!ovnsb_txn) { > > + return; > > + } > > + > > + struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map); > > + > > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, > > + sbrec_ecmp_nexthop_table) { > > + struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry( > > + sb_ecmp_nexthop, &sb_nexthops_map); > > + e->stale = true; > > + } > > + > > + struct parsed_route *pr; > > + HMAP_FOR_EACH (pr, key_node, routes) { > > + if (!pr->ecmp_symmetric_reply) { > > + continue; > > + } > > + > > + if (!pr->out_port) { > > + continue; > > + } > > + > > + struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key); > > Why do we lookup again here? pr->out_port is the ovn_port already. I think it is a leftover, I will fix it > > > + if (!out_port || !out_port->sb) { > > + continue; > > + } > > + > > + const struct nbrec_logical_router_static_route *r = pr->route; > > + const char *pb_name = out_port->sb->logical_port; > > + > > + struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry( > > + r->nexthop, pb_name, &sb_nexthops_map); > > + if (!e) { > > + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); > > + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); > > + sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb); > > + e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map); > > Instead of adding the entry to sb_nexthops_map here we can just remove > 'e' from sb_nexthops_map and free it if found (else branch of this if) ... ack, I will fix it. > > > + } > > + e->stale = false; > > + } > > + > > + struct ecmp_nexthop_data *e; > > + HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) { > > + if (e->stale) { > > + sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh); > > ... then here all remaining items in sb_nexthops_map are stale. > > > + } > > + free(e); > > + } > > + hmap_destroy(&sb_nexthops_map); > > +} > > + > > /* Returns a string of the IP address of the router port 'op' that > > * overlaps with 'ip_s". If one is not found, returns NULL. > > * > > @@ -11160,10 +11260,11 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > > } > > > > /* Verify that ip_prefix and nexthop are on the same network. */ > > + struct ovn_port *out_port = NULL; > > if (!is_discard_route && > > !find_static_route_outport(od, lr_ports, route, > > IN6_IS_ADDR_V4MAPPED(&prefix), > > - NULL, NULL)) { > > + NULL, &out_port)) { > > return; > > } > > > > @@ -11206,6 +11307,7 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > > new_pr->hash = route_hash(new_pr); > > new_pr->route = route; > > new_pr->nbr = od->nbr; > > + new_pr->out_port = out_port; > > new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > > "ecmp_symmetric_reply", > > false); > > diff --git a/northd/northd.h b/northd/northd.h > > index c1442ff40..3bd2e29e3 100644 > > --- a/northd/northd.h > > +++ b/northd/northd.h > > @@ -703,6 +703,7 @@ struct parsed_route { > > uint32_t route_table_id; > > uint32_t hash; > > const struct nbrec_logical_router_static_route *route; > > + struct ovn_port *out_port; > > bool ecmp_symmetric_reply; > > bool is_discard_route; > > const struct nbrec_logical_router *nbr; > > @@ -746,6 +747,11 @@ void bfd_destroy(struct bfd_data *); > > void bfd_sync_init(struct bfd_sync_data *); > > void bfd_sync_destroy(struct bfd_sync_data *); > > > > +void build_ecmp_nexthop_table( > > + struct ovsdb_idl_txn *ovnsb_txn, > > + const struct hmap *lr_ports, const struct hmap *routes, > > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table); > > + > > struct lflow_table; > > struct lr_stateful_tracked_data; > > struct ls_stateful_tracked_data; > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > > index 73abf2c8d..864cb0ed6 100644 > > --- a/ovn-sb.ovsschema > > +++ b/ovn-sb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Southbound", > > - "version": "20.37.0", > > - "cksum": "1950136776 31493", > > + "version": "20.38.0", > > + "cksum": "466210938 32119", > > "tables": { > > "SB_Global": { > > "columns": { > > @@ -610,6 +610,19 @@ > > "refTable": "Datapath_Binding"}}}}, > > "indexes": [["logical_port", "ip"]], > > "isRoot": true}, > > + "ECMP_Nexthop": { > > + "columns": { > > + "nexthop": {"type": "string"}, > > + "port": {"type": {"key": {"type": "uuid", > > + "refTable": "Port_Binding", > > + "refType": "strong"}, > > + "min": 0, "max": 1}}, > > I thought there's never a case when we don't have a port for the > next-hop. I think this shouldn't be optional. ack, I will fix it. > > > + "mac": {"type": "string"}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > + "indexes": [["nexthop", "port"]], > > + "isRoot": true}, > > "Chassis_Template_Var": { > > "columns": { > > "chassis": {"type": "string"}, > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > index ea4adc1c3..ea1d484a7 100644 > > --- a/ovn-sb.xml > > +++ b/ovn-sb.xml > > @@ -5217,4 +5217,35 @@ tcp.flags = RST; > > The set of variable values for a given chassis. > > </column> > > </table> > > + > > + <table name="ECMP_Nexthop"> > > + <p> > > + Each record in this table represents an active ECMP route committed by > > This seems wrong, it's not a route, it's a next hop. And the next hop > can be shared by multiple routes. > > Also, we only create ECMP_Nexthop records for routes that have > --ecmp-symmetric-reply set. It's probably good to mention that here in > case users expect other ECMP nexthops to be created in the SB. ack, I will fix it. > > > + <code>ovn-northd</code> to <code>ovs</code> connection-tracking table. > > Nit: maybe rephrase this to "to the <code>ovs</code> connection tracker". > > > + <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code> > > Nit: The <code>ECMP_Nexthop</code> table. > > > + to track active ct entries and to flush stale ones. > > + </p> > > + <column name="nexthop"> > > + <p> > > + Nexthop IP address for this ECMP route. Nexthop IP address should > > + be the IP address of a connected router port or the IP address of > > + an external device used as nexthop for the given destination. > > + </p> > > + </column> > > + <column name="port"> > > + <p> > > + The reference to <ref table="Port_Binding"/> table for the port used > > + to connect to the configured next-hop. > > + </p> > > + </column> > > + <column name="mac"> > > + <p> > > + Nexthop mac address. > > + </p> > > + </column> > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </table> > > </database> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index 8477e4250..1e01c2614 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -3886,7 +3886,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \ > > check_engine_stats northd norecompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3901,7 +3901,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100 > > check_engine_stats northd norecompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3913,7 +3913,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats static_routes recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3929,7 +3929,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats static_routes recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3941,7 +3941,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats route_policies recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3976,7 +3976,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats static_routes recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -3991,7 +3991,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats bfd recompute nocompute > > check_engine_stats route_policies recompute nocompute > > check_engine_stats lflow recompute nocompute > > -check_engine_stats northd_output norecompute compute > > +check_engine_stats northd_output recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > @@ -6816,6 +6816,10 @@ check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public > > > > # ECMP flows will be added even if there is only one next-hop. > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 > > +check_row_count ECMP_Nexthop 1 > > As far as I can tell we only do positive checks, i.e., check that next > hop records are created in the SB for routes with --ecmp-symmetric-reply > set. > > We could also add a test to ensure that no ECMP_Nexthop is created for > routes that have --ecmp set but no --ecmp-symmetric-reply. ack, I will fix it. Regards, Lorenzo > > > +uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public) > > +check_column 192.168.0.10 ECMP_Nexthop nexthop > > +check_column "$uuid" ECMP_Nexthop port > > > > ovn-sbctl dump-flows lr0 > lr0flows > > > > @@ -6835,6 +6839,13 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn > > ]) > > > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 > > +check_row_count ECMP_Nexthop 2 > > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl > > +192.168.0.10 > > +]) > > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl > > +192.168.0.20 > > +]) > > > > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > > @@ -6864,6 +6875,13 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ > > > > # add ecmp route with wrong nexthop > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 > > +check_row_count ECMP_Nexthop 2 > > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl > > +192.168.0.10 > > +]) > > +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl > > +192.168.0.20 > > +]) > > > > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > > @@ -6883,6 +6901,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. > > > > check ovn-nbctl lr-route-del lr0 > > wait_row_count nb:Logical_Router_Static_Route 0 > > +check_row_count ECMP_Nexthop 0 > > > > check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > > ovn-sbctl dump-flows lr0 > lr0flows > > Regards, > Dumitru >
diff --git a/northd/en-northd.c b/northd/en-northd.c index 24ed31517..165af44a0 100644 --- a/northd/en-northd.c +++ b/northd/en-northd.c @@ -404,6 +404,23 @@ en_bfd_sync_run(struct engine_node *node, void *data) engine_set_node_state(node, EN_UPDATED); } +void +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED) +{ + const struct engine_context *eng_ctx = engine_get_context(); + struct northd_data *northd_data = engine_get_input_data("northd", node); + struct static_routes_data *static_routes_data = + engine_get_input_data("static_routes", node); + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table = + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); + + build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn, + &northd_data->lr_ports, + &static_routes_data->parsed_routes, + sbrec_ecmp_nexthop_table); + engine_set_node_state(node, EN_UPDATED); +} + void *en_northd_init(struct engine_node *node OVS_UNUSED, struct engine_arg *arg OVS_UNUSED) @@ -454,6 +471,13 @@ void return data; } +void * +en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED) +{ + return NULL; +} + void en_northd_cleanup(void *data) { @@ -526,3 +550,8 @@ en_bfd_sync_cleanup(void *data) { bfd_sync_destroy(data); } + +void +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED) +{ +} diff --git a/northd/en-northd.h b/northd/en-northd.h index 631a7c17a..2666cc67e 100644 --- a/northd/en-northd.h +++ b/northd/en-northd.h @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node, void *data OVS_UNUSED); void en_bfd_sync_run(struct engine_node *node, void *data); void en_bfd_sync_cleanup(void *data OVS_UNUSED); +void en_ecmp_nexthop_run(struct engine_node *node, void *data); +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED); +void en_ecmp_nexthop_cleanup(void *data); #endif /* EN_NORTHD_H */ diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index 8c834facb..8e16fde80 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list; SB_NODE(fdb, "fdb") \ SB_NODE(static_mac_binding, "static_mac_binding") \ SB_NODE(chassis_template_var, "chassis_template_var") \ - SB_NODE(logical_dp_group, "logical_dp_group") + SB_NODE(logical_dp_group, "logical_dp_group") \ + SB_NODE(ecmp_nexthop, "ecmp_nexthop") enum sb_engine_node { #define SB_NODE(NAME, NAME_STR) SB_##NAME, @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies"); static ENGINE_NODE(static_routes, "static_routes"); static ENGINE_NODE(bfd, "bfd"); static ENGINE_NODE(bfd_sync, "bfd_sync"); +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop"); void inc_proc_northd_init(struct ovsdb_idl_loop *nb, struct ovsdb_idl_loop *sb) @@ -264,6 +266,10 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_bfd_sync, &en_route_policies, NULL); engine_add_input(&en_bfd_sync, &en_northd, bfd_sync_northd_change_handler); + engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL); + engine_add_input(&en_ecmp_nexthop, &en_northd, NULL); + engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL); + engine_add_input(&en_sync_meters, &en_nb_acl, NULL); engine_add_input(&en_sync_meters, &en_nb_meter, NULL); engine_add_input(&en_sync_meters, &en_sb_meter, NULL); @@ -334,6 +340,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL); engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL); + engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL); engine_add_input(&en_northd_output, &en_sync_from_sb, NULL); engine_add_input(&en_northd_output, &en_sync_to_sb, northd_output_sync_to_sb_handler); diff --git a/northd/northd.c b/northd/northd.c index 64b2e3859..d54fbf14e 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -10720,6 +10720,106 @@ build_bfd_map(const struct nbrec_bfd_table *nbrec_bfd_table, } } +struct ecmp_nexthop_data { + struct hmap_node hmap_node; + const struct sbrec_ecmp_nexthop *sb_ecmp_nh; + bool stale; +}; + +static struct ecmp_nexthop_data * +ecmp_nexthop_alloc_entry(const struct sbrec_ecmp_nexthop *sb_ecmp_nh, + struct hmap *map) +{ + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); + e->sb_ecmp_nh = sb_ecmp_nh; + + const char *sb_port = sb_ecmp_nh->port->logical_port; + const char *sb_nexthop = sb_ecmp_nh->nexthop; + + uint32_t hash = hash_string(sb_nexthop, 0); + hash = hash_add(hash, hash_string(sb_port, 0)); + hmap_insert(map, &e->hmap_node, hash); + + return e; +} + +static struct ecmp_nexthop_data * +ecmp_nexthop_find_entry(const char *nexthop, const char *port, + struct hmap *map) +{ + uint32_t hash = hash_string(nexthop, 0); + hash = hash_add(hash, hash_string(port, 0)); + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { + const char *sb_port = e->sb_ecmp_nh->port->logical_port; + const char *sb_nexthop = e->sb_ecmp_nh->nexthop; + if (!strcmp(sb_nexthop, nexthop) && !strcmp(sb_port, port)) { + return e; + } + } + return NULL; +} + +void +build_ecmp_nexthop_table( + struct ovsdb_idl_txn *ovnsb_txn, + const struct hmap *lr_ports, const struct hmap *routes, + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table) +{ + if (!ovnsb_txn) { + return; + } + + struct hmap sb_nexthops_map = HMAP_INITIALIZER(&sb_nexthops_map); + + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, + sbrec_ecmp_nexthop_table) { + struct ecmp_nexthop_data *e = ecmp_nexthop_alloc_entry( + sb_ecmp_nexthop, &sb_nexthops_map); + e->stale = true; + } + + struct parsed_route *pr; + HMAP_FOR_EACH (pr, key_node, routes) { + if (!pr->ecmp_symmetric_reply) { + continue; + } + + if (!pr->out_port) { + continue; + } + + struct ovn_port *out_port = ovn_port_find(lr_ports, pr->out_port->key); + if (!out_port || !out_port->sb) { + continue; + } + + const struct nbrec_logical_router_static_route *r = pr->route; + const char *pb_name = out_port->sb->logical_port; + + struct ecmp_nexthop_data *e = ecmp_nexthop_find_entry( + r->nexthop, pb_name, &sb_nexthops_map); + if (!e) { + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); + sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, out_port->sb); + e = ecmp_nexthop_alloc_entry(sb_ecmp_nexthop, &sb_nexthops_map); + } + e->stale = false; + } + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_POP (e, hmap_node, &sb_nexthops_map) { + if (e->stale) { + sbrec_ecmp_nexthop_delete(e->sb_ecmp_nh); + } + free(e); + } + hmap_destroy(&sb_nexthops_map); +} + /* Returns a string of the IP address of the router port 'op' that * overlaps with 'ip_s". If one is not found, returns NULL. * @@ -11160,10 +11260,11 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, } /* Verify that ip_prefix and nexthop are on the same network. */ + struct ovn_port *out_port = NULL; if (!is_discard_route && !find_static_route_outport(od, lr_ports, route, IN6_IS_ADDR_V4MAPPED(&prefix), - NULL, NULL)) { + NULL, &out_port)) { return; } @@ -11206,6 +11307,7 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, new_pr->hash = route_hash(new_pr); new_pr->route = route; new_pr->nbr = od->nbr; + new_pr->out_port = out_port; new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options, "ecmp_symmetric_reply", false); diff --git a/northd/northd.h b/northd/northd.h index c1442ff40..3bd2e29e3 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -703,6 +703,7 @@ struct parsed_route { uint32_t route_table_id; uint32_t hash; const struct nbrec_logical_router_static_route *route; + struct ovn_port *out_port; bool ecmp_symmetric_reply; bool is_discard_route; const struct nbrec_logical_router *nbr; @@ -746,6 +747,11 @@ void bfd_destroy(struct bfd_data *); void bfd_sync_init(struct bfd_sync_data *); void bfd_sync_destroy(struct bfd_sync_data *); +void build_ecmp_nexthop_table( + struct ovsdb_idl_txn *ovnsb_txn, + const struct hmap *lr_ports, const struct hmap *routes, + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table); + struct lflow_table; struct lr_stateful_tracked_data; struct ls_stateful_tracked_data; diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema index 73abf2c8d..864cb0ed6 100644 --- a/ovn-sb.ovsschema +++ b/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "20.37.0", - "cksum": "1950136776 31493", + "version": "20.38.0", + "cksum": "466210938 32119", "tables": { "SB_Global": { "columns": { @@ -610,6 +610,19 @@ "refTable": "Datapath_Binding"}}}}, "indexes": [["logical_port", "ip"]], "isRoot": true}, + "ECMP_Nexthop": { + "columns": { + "nexthop": {"type": "string"}, + "port": {"type": {"key": {"type": "uuid", + "refTable": "Port_Binding", + "refType": "strong"}, + "min": 0, "max": 1}}, + "mac": {"type": "string"}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "indexes": [["nexthop", "port"]], + "isRoot": true}, "Chassis_Template_Var": { "columns": { "chassis": {"type": "string"}, diff --git a/ovn-sb.xml b/ovn-sb.xml index ea4adc1c3..ea1d484a7 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -5217,4 +5217,35 @@ tcp.flags = RST; The set of variable values for a given chassis. </column> </table> + + <table name="ECMP_Nexthop"> + <p> + Each record in this table represents an active ECMP route committed by + <code>ovn-northd</code> to <code>ovs</code> connection-tracking table. + <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code> + to track active ct entries and to flush stale ones. + </p> + <column name="nexthop"> + <p> + Nexthop IP address for this ECMP route. Nexthop IP address should + be the IP address of a connected router port or the IP address of + an external device used as nexthop for the given destination. + </p> + </column> + <column name="port"> + <p> + The reference to <ref table="Port_Binding"/> table for the port used + to connect to the configured next-hop. + </p> + </column> + <column name="mac"> + <p> + Nexthop mac address. + </p> + </column> + + <column name="external_ids"> + See <em>External IDs</em> at the beginning of this document. + </column> + </table> </database> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 8477e4250..1e01c2614 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -3886,7 +3886,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 dst_ip=192.168.3.2 \ check_engine_stats northd norecompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3901,7 +3901,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 min_tx=1000 detect_mult=100 check_engine_stats northd norecompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3913,7 +3913,7 @@ check_engine_stats northd recompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats static_routes recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3929,7 +3929,7 @@ check_engine_stats northd recompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats static_routes recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3941,7 +3941,7 @@ check_engine_stats northd recompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats route_policies recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3976,7 +3976,7 @@ check_engine_stats northd recompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats static_routes recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -3991,7 +3991,7 @@ check_engine_stats northd recompute nocompute check_engine_stats bfd recompute nocompute check_engine_stats route_policies recompute nocompute check_engine_stats lflow recompute nocompute -check_engine_stats northd_output norecompute compute +check_engine_stats northd_output recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -6816,6 +6816,10 @@ check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public # ECMP flows will be added even if there is only one next-hop. check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 +check_row_count ECMP_Nexthop 1 +uuid=$(fetch_column Port_Binding _uuid logical_port=lr0-public) +check_column 192.168.0.10 ECMP_Nexthop nexthop +check_column "$uuid" ECMP_Nexthop port ovn-sbctl dump-flows lr0 > lr0flows @@ -6835,6 +6839,13 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn ]) check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 +check_row_count ECMP_Nexthop 2 +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl +192.168.0.10 +]) +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl +192.168.0.20 +]) ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6864,6 +6875,13 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ # add ecmp route with wrong nexthop check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 +check_row_count ECMP_Nexthop 2 +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.10], [0], [dnl +192.168.0.10 +]) +AT_CHECK([ovn-sbctl --columns nexthop --bare find ECMP_Nexthop nexthop=192.168.0.20], [0], [dnl +192.168.0.20 +]) ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6883,6 +6901,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. check ovn-nbctl lr-route-del lr0 wait_row_count nb:Logical_Router_Static_Route 0 +check_row_count ECMP_Nexthop 0 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 ovn-sbctl dump-flows lr0 > lr0flows
Introduce ECMP_Nexthop table in the SB db in order to track active ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop table contains ip and mac address for each active nexthop. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- northd/en-northd.c | 29 +++++++++++ northd/en-northd.h | 4 ++ northd/inc-proc-northd.c | 9 +++- northd/northd.c | 104 ++++++++++++++++++++++++++++++++++++++- northd/northd.h | 6 +++ ovn-sb.ovsschema | 17 ++++++- ovn-sb.xml | 31 ++++++++++++ tests/ovn-northd.at | 33 ++++++++++--- 8 files changed, 222 insertions(+), 11 deletions(-)