Message ID | 127b9b4f731f656a380390dffce54711b0ed960b.1709817303.git.lorenzo.bianconi@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | Introduce ECMP_nexthop monitor in ovn-controller | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
Hi Lorenzo, I have some comments below. On 3/7/24 08:19, Lorenzo Bianconi wrote: > Introduce ECMP_Nexthop table in the SB db in order to track active > ecmp-symmetric-reply connections and flush stale ones. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > --- > northd/en-northd.c | 4 ++ > northd/inc-proc-northd.c | 8 +++- > northd/northd.c | 98 ++++++++++++++++++++++++++++++++++++++++ > northd/northd.h | 3 ++ > ovn-sb.ovsschema | 15 +++++- > ovn-sb.xml | 19 ++++++++ > tests/ovn-northd.at | 4 ++ > 7 files changed, 147 insertions(+), 4 deletions(-) > > diff --git a/northd/en-northd.c b/northd/en-northd.c > index 4479b4aff..8d2ab481f 100644 > --- a/northd/en-northd.c > +++ b/northd/en-northd.c > @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node, > EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node)); > input_data->nbrec_mirror_table = > EN_OVSDB_GET(engine_get_input("NB_mirror", node)); > + input_data->nbrec_static_route_table = > + EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", node)); > > input_data->sbrec_datapath_binding_table = > EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node)); > @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node, > EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node)); > input_data->sbrec_mirror_table = > EN_OVSDB_GET(engine_get_input("SB_mirror", node)); > + input_data->sbrec_ecmp_nexthop_table = > + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); > > struct ed_type_lb_data *lb_data = > engine_get_input_data("lb_data", node); > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > index e1073812c..1c58da0bf 100644 > --- a/northd/inc-proc-northd.c > +++ b/northd/inc-proc-northd.c > @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list; > NB_NODE(meter, "meter") \ > NB_NODE(bfd, "bfd") \ > NB_NODE(static_mac_binding, "static_mac_binding") \ > - NB_NODE(chassis_template_var, "chassis_template_var") > + NB_NODE(chassis_template_var, "chassis_template_var") \ > + NB_NODE(logical_router_static_route, "logical_router_static_route") > > enum nb_engine_node { > #define NB_NODE(NAME, NAME_STR) NB_##NAME, > @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list; > SB_NODE(fdb, "fdb") \ > SB_NODE(static_mac_binding, "static_mac_binding") \ > SB_NODE(chassis_template_var, "chassis_template_var") \ > - SB_NODE(logical_dp_group, "logical_dp_group") > + SB_NODE(logical_dp_group, "logical_dp_group") \ > + SB_NODE(ecmp_nexthop, "ecmp_nexthop") > > enum sb_engine_node { > #define SB_NODE(NAME, NAME_STR) SB_##NAME, > @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > engine_add_input(&en_northd, &en_nb_mirror, NULL); > engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL); > engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL); > + engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL); > > engine_add_input(&en_northd, &en_sb_chassis, NULL); > engine_add_input(&en_northd, &en_sb_mirror, NULL); > @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > engine_add_input(&en_northd, &en_sb_fdb, NULL); > engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL); > engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL); > + engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL); > engine_add_input(&en_northd, &en_global_config, > northd_global_config_handler); > > diff --git a/northd/northd.c b/northd/northd.c > index 4b39137e7..3770f9f94 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -16654,6 +16654,101 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn, > shash_destroy(&sb_mirrors); > } > > +struct sb_ecmp_nexthop_entry { > + struct hmap_node hmap_node; > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > +}; > + > +static struct sb_ecmp_nexthop_entry * > +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop) > +{ > + uint32_t hash = hash_string(nexthop, 0); > + struct sb_ecmp_nexthop_entry *enh_e; > + > + HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) { > + if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) { > + return enh_e; > + } > + } > + return NULL; > +} > + > +#define NEXTHOP_IDS_LEN 65535 > +static void > +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn, > + const struct nbrec_logical_router_static_route_table *nbrec_sr_table, > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nextop_table) > +{ > + unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN); > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > + struct sb_ecmp_nexthop_entry *enh_e; > + int id; > + > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, > + sbrec_ecmp_nextop_table) { > + uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0); > + enh_e = xmalloc(sizeof *enh_e); > + enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop; > + hmap_insert(&sb_only, &enh_e->hmap_node, hash); > + } > + > + const struct nbrec_logical_router_static_route *r; > + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { > + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { > + continue; > + } > + > + id = smap_get_int(&r->options, "nexthop-id", -1); I think having options:nexthop-id in the NB DB can cause problems. Clearly it's only intended for northd to set, but an admin also has access to it. Let's say that a user has two static routes in the NB DB with the same "nexthop" value. These should have the same nexthop-id associated with them. But let's imagine an admin sets the two static routes to have two different options:nexthop-id values. northd will end up creating one SB ECMP_Nexthop entry, but it will be unpredictable which of the two nexthop-ids will be set on this entry. Since patch 2 uses the NB nexthop-id in the ct_label, it means that one of these two static routes will end up setting a nonsense ID in the ct_label. I think that the nexthop-id should only exist on the SB ECMP_Nexthop, and not in the NB Logical_Router_Static_Route table. In patch 2, OVN should use the SB ECMP_Nexthop nexthop-id when setting the ct_label value. This will ensure that all ECMP routes with the same nexthop will have the same nexthop-id set in the ct_label. > + if (id < 0) { > + continue; > + } > + bitmap_set1(nexthop_ids, id); > + } > + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { > + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { > + continue; > + } > + > + id = smap_get_int(&r->options, "nexthop-id", -1); > + if (id < 0) { > + id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN); > + if (id == NEXTHOP_IDS_LEN) { > + continue; > + } > + bitmap_set1(nexthop_ids, id); > + > + struct smap options = SMAP_INITIALIZER(&options); > + smap_clone(&options, &r->options); > + smap_add_format(&options, "nexthop-id", "%d", id); > + nbrec_logical_router_static_route_set_options(r, &options); > + smap_destroy(&options); > + } > + > + enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop); > + if (!enh_e) { > + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); > + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); > + > + struct smap options = SMAP_INITIALIZER(&options); > + smap_add_format(&options, "nexthop-id", "%d", id); > + sbrec_ecmp_nexthop_set_options(sb_ecmp_nexthop, &options); > + smap_destroy(&options); > + } else { > + hmap_remove(&sb_only, &enh_e->hmap_node); > + free(enh_e); > + } > + } > + > + HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) { > + sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop); > + free(enh_e); > + } > + hmap_destroy(&sb_only); > + > + bitmap_free(nexthop_ids); > +} > + > /* > * struct 'dns_info' is used to sync the DNS records between OVN Northbound db > * and Southbound db. > @@ -17334,6 +17429,9 @@ ovnnb_db_run(struct northd_input *input_data, > &data->ls_datapaths.datapaths); > sync_template_vars(ovnsb_txn, input_data->nbrec_chassis_template_var_table, > input_data->sbrec_chassis_template_var_table); > + sync_ecmp_symmetric_reply_nexthop(ovnsb_txn, > + input_data->nbrec_static_route_table, > + input_data->sbrec_ecmp_nexthop_table); > > cleanup_stale_fdb_entries(input_data->sbrec_fdb_table, > &data->ls_datapaths.datapaths); > diff --git a/northd/northd.h b/northd/northd.h > index 3f1cd8341..48b38b542 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -34,6 +34,8 @@ struct northd_input { > const struct nbrec_chassis_template_var_table > *nbrec_chassis_template_var_table; > const struct nbrec_mirror_table *nbrec_mirror_table; > + const struct nbrec_logical_router_static_route_table > + *nbrec_static_route_table; > > /* Southbound table references */ > const struct sbrec_datapath_binding_table *sbrec_datapath_binding_table; > @@ -50,6 +52,7 @@ struct northd_input { > const struct sbrec_chassis_template_var_table > *sbrec_chassis_template_var_table; > const struct sbrec_mirror_table *sbrec_mirror_table; > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table; > > /* Northd lb data node inputs*/ > const struct hmap *lbs; > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > index 84ae09515..fe32dd030 100644 > --- a/ovn-sb.ovsschema > +++ b/ovn-sb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Southbound", > - "version": "20.33.0", > - "cksum": "4076371179 31328", > + "version": "20.34.0", > + "cksum": "1685306144 31809", > "tables": { > "SB_Global": { > "columns": { > @@ -607,6 +607,17 @@ > "refTable": "Datapath_Binding"}}}}, > "indexes": [["logical_port", "ip"]], > "isRoot": true}, > + "ECMP_Nexthop": { > + "columns": { > + "nexthop": {"type": "string"}, > + "external_ids": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}, > + "options": { > + "type": {"key": "string", "value": "string", > + "min": 0, "max": "unlimited"}}}, I think that the nexthop-id should not be in the "options' of ECMP_Nexthop. It's not an optional thing and so it should have its own column. > + "indexes": [["nexthop"]], > + "isRoot": true}, > "Chassis_Template_Var": { > "columns": { > "chassis": {"type": "string"}, > diff --git a/ovn-sb.xml b/ovn-sb.xml > index ac4e585f2..5f5e2401b 100644 > --- a/ovn-sb.xml > +++ b/ovn-sb.xml > @@ -5095,4 +5095,23 @@ tcp.flags = RST; > The set of variable values for a given chassis. > </column> > </table> > + > + <table name="ECMP_Nexthop"> > + <column name="nexthop"> > + <p> > + Nexthop IP address for this route. Nexthop IP address should be the IP > + address of a connected router port or the IP address of a logical port > + or can be set to <code>discard</code> for dropping packets which match > + the given route. > + </p> > + </column> > + > + <column name="options"> > + Reserved for future use. > + </column> "options:nexthop-id" is not documented here (or in the NB Logical_Router_Static_Route table, but as I mentioned earlier, I don't think it should be there anyway) > + > + <column name="external_ids"> > + See <em>External IDs</em> at the beginning of this document. > + </column> > + </table> > </database> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 89aed5adc..2160e8de7 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router > check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 > +check_row_count ECMP_Nexthop 1 > > ovn-sbctl dump-flows lr0 > lr0flows > > @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn > ]) > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 > +check_row_count ECMP_Nexthop 2 > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ > > # add ecmp route with wrong nexthop > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 > +check_row_count ECMP_Nexthop 3 > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. > > check ovn-nbctl lr-route-del lr0 > wait_row_count nb:Logical_Router_Static_Route 0 > +check_row_count ECMP_Nexthop 0 > > check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows
> Hi Lorenzo, Hi Mark, > > I have some comments below. thx for the review. > > On 3/7/24 08:19, Lorenzo Bianconi wrote: > > Introduce ECMP_Nexthop table in the SB db in order to track active > > ecmp-symmetric-reply connections and flush stale ones. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > --- > > northd/en-northd.c | 4 ++ > > northd/inc-proc-northd.c | 8 +++- > > northd/northd.c | 98 ++++++++++++++++++++++++++++++++++++++++ > > northd/northd.h | 3 ++ > > ovn-sb.ovsschema | 15 +++++- > > ovn-sb.xml | 19 ++++++++ > > tests/ovn-northd.at | 4 ++ > > 7 files changed, 147 insertions(+), 4 deletions(-) > > > > diff --git a/northd/en-northd.c b/northd/en-northd.c > > index 4479b4aff..8d2ab481f 100644 > > --- a/northd/en-northd.c > > +++ b/northd/en-northd.c > > @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node, > > EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node)); > > input_data->nbrec_mirror_table = > > EN_OVSDB_GET(engine_get_input("NB_mirror", node)); > > + input_data->nbrec_static_route_table = > > + EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", node)); > > input_data->sbrec_datapath_binding_table = > > EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node)); > > @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node, > > EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node)); > > input_data->sbrec_mirror_table = > > EN_OVSDB_GET(engine_get_input("SB_mirror", node)); > > + input_data->sbrec_ecmp_nexthop_table = > > + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); > > struct ed_type_lb_data *lb_data = > > engine_get_input_data("lb_data", node); > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > > index e1073812c..1c58da0bf 100644 > > --- a/northd/inc-proc-northd.c > > +++ b/northd/inc-proc-northd.c > > @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list; > > NB_NODE(meter, "meter") \ > > NB_NODE(bfd, "bfd") \ > > NB_NODE(static_mac_binding, "static_mac_binding") \ > > - NB_NODE(chassis_template_var, "chassis_template_var") > > + NB_NODE(chassis_template_var, "chassis_template_var") \ > > + NB_NODE(logical_router_static_route, "logical_router_static_route") > > enum nb_engine_node { > > #define NB_NODE(NAME, NAME_STR) NB_##NAME, > > @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list; > > SB_NODE(fdb, "fdb") \ > > SB_NODE(static_mac_binding, "static_mac_binding") \ > > SB_NODE(chassis_template_var, "chassis_template_var") \ > > - SB_NODE(logical_dp_group, "logical_dp_group") > > + SB_NODE(logical_dp_group, "logical_dp_group") \ > > + SB_NODE(ecmp_nexthop, "ecmp_nexthop") > > enum sb_engine_node { > > #define SB_NODE(NAME, NAME_STR) SB_##NAME, > > @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > engine_add_input(&en_northd, &en_nb_mirror, NULL); > > engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL); > > engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL); > > + engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL); > > engine_add_input(&en_northd, &en_sb_chassis, NULL); > > engine_add_input(&en_northd, &en_sb_mirror, NULL); > > @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > engine_add_input(&en_northd, &en_sb_fdb, NULL); > > engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL); > > engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL); > > + engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL); > > engine_add_input(&en_northd, &en_global_config, > > northd_global_config_handler); > > diff --git a/northd/northd.c b/northd/northd.c > > index 4b39137e7..3770f9f94 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -16654,6 +16654,101 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn, > > shash_destroy(&sb_mirrors); > > } > > +struct sb_ecmp_nexthop_entry { > > + struct hmap_node hmap_node; > > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > > +}; > > + > > +static struct sb_ecmp_nexthop_entry * > > +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop) > > +{ > > + uint32_t hash = hash_string(nexthop, 0); > > + struct sb_ecmp_nexthop_entry *enh_e; > > + > > + HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) { > > + if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) { > > + return enh_e; > > + } > > + } > > + return NULL; > > +} > > + > > +#define NEXTHOP_IDS_LEN 65535 > > +static void > > +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn, > > + const struct nbrec_logical_router_static_route_table *nbrec_sr_table, > > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nextop_table) > > +{ > > + unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN); > > + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); > > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; > > + struct sb_ecmp_nexthop_entry *enh_e; > > + int id; > > + > > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, > > + sbrec_ecmp_nextop_table) { > > + uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0); > > + enh_e = xmalloc(sizeof *enh_e); > > + enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop; > > + hmap_insert(&sb_only, &enh_e->hmap_node, hash); > > + } > > + > > + const struct nbrec_logical_router_static_route *r; > > + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { > > + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { > > + continue; > > + } > > + > > + id = smap_get_int(&r->options, "nexthop-id", -1); > > I think having options:nexthop-id in the NB DB can cause problems. Clearly > it's only intended for northd to set, but an admin also has access to it. > Let's say that a user has two static routes in the NB DB with the same > "nexthop" value. These should have the same nexthop-id associated with them. > But let's imagine an admin sets the two static routes to have two different > options:nexthop-id values. northd will end up creating one SB ECMP_Nexthop > entry, but it will be unpredictable which of the two nexthop-ids will be set > on this entry. Since patch 2 uses the NB nexthop-id in the ct_label, it > means that one of these two static routes will end up setting a nonsense ID > in the ct_label. > > I think that the nexthop-id should only exist on the SB ECMP_Nexthop, and > not in the NB Logical_Router_Static_Route table. In patch 2, OVN should use > the SB ECMP_Nexthop nexthop-id when setting the ct_label value. This will > ensure that all ECMP routes with the same nexthop will have the same > nexthop-id set in the ct_label. ack, fine. I will fix it in v2. > > > + if (id < 0) { > > + continue; > > + } > > + bitmap_set1(nexthop_ids, id); > > + } > > + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { > > + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { > > + continue; > > + } > > + > > + id = smap_get_int(&r->options, "nexthop-id", -1); > > + if (id < 0) { > > + id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN); > > + if (id == NEXTHOP_IDS_LEN) { > > + continue; > > + } > > + bitmap_set1(nexthop_ids, id); > > + > > + struct smap options = SMAP_INITIALIZER(&options); > > + smap_clone(&options, &r->options); > > + smap_add_format(&options, "nexthop-id", "%d", id); > > + nbrec_logical_router_static_route_set_options(r, &options); > > + smap_destroy(&options); > > + } > > + > > + enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop); > > + if (!enh_e) { > > + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); > > + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); > > + > > + struct smap options = SMAP_INITIALIZER(&options); > > + smap_add_format(&options, "nexthop-id", "%d", id); > > + sbrec_ecmp_nexthop_set_options(sb_ecmp_nexthop, &options); > > + smap_destroy(&options); > > + } else { > > + hmap_remove(&sb_only, &enh_e->hmap_node); > > + free(enh_e); > > + } > > + } > > + > > + HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) { > > + sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop); > > + free(enh_e); > > + } > > + hmap_destroy(&sb_only); > > + > > + bitmap_free(nexthop_ids); > > +} > > + > > /* > > * struct 'dns_info' is used to sync the DNS records between OVN Northbound db > > * and Southbound db. > > @@ -17334,6 +17429,9 @@ ovnnb_db_run(struct northd_input *input_data, > > &data->ls_datapaths.datapaths); > > sync_template_vars(ovnsb_txn, input_data->nbrec_chassis_template_var_table, > > input_data->sbrec_chassis_template_var_table); > > + sync_ecmp_symmetric_reply_nexthop(ovnsb_txn, > > + input_data->nbrec_static_route_table, > > + input_data->sbrec_ecmp_nexthop_table); > > cleanup_stale_fdb_entries(input_data->sbrec_fdb_table, > > &data->ls_datapaths.datapaths); > > diff --git a/northd/northd.h b/northd/northd.h > > index 3f1cd8341..48b38b542 100644 > > --- a/northd/northd.h > > +++ b/northd/northd.h > > @@ -34,6 +34,8 @@ struct northd_input { > > const struct nbrec_chassis_template_var_table > > *nbrec_chassis_template_var_table; > > const struct nbrec_mirror_table *nbrec_mirror_table; > > + const struct nbrec_logical_router_static_route_table > > + *nbrec_static_route_table; > > /* Southbound table references */ > > const struct sbrec_datapath_binding_table *sbrec_datapath_binding_table; > > @@ -50,6 +52,7 @@ struct northd_input { > > const struct sbrec_chassis_template_var_table > > *sbrec_chassis_template_var_table; > > const struct sbrec_mirror_table *sbrec_mirror_table; > > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table; > > /* Northd lb data node inputs*/ > > const struct hmap *lbs; > > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema > > index 84ae09515..fe32dd030 100644 > > --- a/ovn-sb.ovsschema > > +++ b/ovn-sb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Southbound", > > - "version": "20.33.0", > > - "cksum": "4076371179 31328", > > + "version": "20.34.0", > > + "cksum": "1685306144 31809", > > "tables": { > > "SB_Global": { > > "columns": { > > @@ -607,6 +607,17 @@ > > "refTable": "Datapath_Binding"}}}}, > > "indexes": [["logical_port", "ip"]], > > "isRoot": true}, > > + "ECMP_Nexthop": { > > + "columns": { > > + "nexthop": {"type": "string"}, > > + "external_ids": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}, > > + "options": { > > + "type": {"key": "string", "value": "string", > > + "min": 0, "max": "unlimited"}}}, > > I think that the nexthop-id should not be in the "options' of ECMP_Nexthop. > It's not an optional thing and so it should have its own column. ack, fine. I will fix it in v2. > > > + "indexes": [["nexthop"]], > > + "isRoot": true}, > > "Chassis_Template_Var": { > > "columns": { > > "chassis": {"type": "string"}, > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > index ac4e585f2..5f5e2401b 100644 > > --- a/ovn-sb.xml > > +++ b/ovn-sb.xml > > @@ -5095,4 +5095,23 @@ tcp.flags = RST; > > The set of variable values for a given chassis. > > </column> > > </table> > > + > > + <table name="ECMP_Nexthop"> > > + <column name="nexthop"> > > + <p> > > + Nexthop IP address for this route. Nexthop IP address should be the IP > > + address of a connected router port or the IP address of a logical port > > + or can be set to <code>discard</code> for dropping packets which match > > + the given route. > > + </p> > > + </column> > > + > > + <column name="options"> > > + Reserved for future use. > > + </column> > > "options:nexthop-id" is not documented here (or in the NB > Logical_Router_Static_Route table, but as I mentioned earlier, I don't think > it should be there anyway) ditto. Regards, Lorenzo > > > + > > + <column name="external_ids"> > > + See <em>External IDs</em> at the beginning of this document. > > + </column> > > + </table> > > </database> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index 89aed5adc..2160e8de7 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router > > check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 > > +check_row_count ECMP_Nexthop 1 > > ovn-sbctl dump-flows lr0 > lr0flows > > @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn > > ]) > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 > > +check_row_count ECMP_Nexthop 2 > > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl > > @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ > > # add ecmp route with wrong nexthop > > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 > > +check_row_count ECMP_Nexthop 3 > > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl > > @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. > > check ovn-nbctl lr-route-del lr0 > > wait_row_count nb:Logical_Router_Static_Route 0 > > +check_row_count ECMP_Nexthop 0 > > check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > > ovn-sbctl dump-flows lr0 > lr0flows >
diff --git a/northd/en-northd.c b/northd/en-northd.c index 4479b4aff..8d2ab481f 100644 --- a/northd/en-northd.c +++ b/northd/en-northd.c @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node, EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node)); input_data->nbrec_mirror_table = EN_OVSDB_GET(engine_get_input("NB_mirror", node)); + input_data->nbrec_static_route_table = + EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", node)); input_data->sbrec_datapath_binding_table = EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node)); @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node, EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node)); input_data->sbrec_mirror_table = EN_OVSDB_GET(engine_get_input("SB_mirror", node)); + input_data->sbrec_ecmp_nexthop_table = + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node)); struct ed_type_lb_data *lb_data = engine_get_input_data("lb_data", node); diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index e1073812c..1c58da0bf 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list; NB_NODE(meter, "meter") \ NB_NODE(bfd, "bfd") \ NB_NODE(static_mac_binding, "static_mac_binding") \ - NB_NODE(chassis_template_var, "chassis_template_var") + NB_NODE(chassis_template_var, "chassis_template_var") \ + NB_NODE(logical_router_static_route, "logical_router_static_route") enum nb_engine_node { #define NB_NODE(NAME, NAME_STR) NB_##NAME, @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list; SB_NODE(fdb, "fdb") \ SB_NODE(static_mac_binding, "static_mac_binding") \ SB_NODE(chassis_template_var, "chassis_template_var") \ - SB_NODE(logical_dp_group, "logical_dp_group") + SB_NODE(logical_dp_group, "logical_dp_group") \ + SB_NODE(ecmp_nexthop, "ecmp_nexthop") enum sb_engine_node { #define SB_NODE(NAME, NAME_STR) SB_##NAME, @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_northd, &en_nb_mirror, NULL); engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL); engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL); + engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL); engine_add_input(&en_northd, &en_sb_chassis, NULL); engine_add_input(&en_northd, &en_sb_mirror, NULL); @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_northd, &en_sb_fdb, NULL); engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL); engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL); + engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL); engine_add_input(&en_northd, &en_global_config, northd_global_config_handler); diff --git a/northd/northd.c b/northd/northd.c index 4b39137e7..3770f9f94 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -16654,6 +16654,101 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn, shash_destroy(&sb_mirrors); } +struct sb_ecmp_nexthop_entry { + struct hmap_node hmap_node; + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; +}; + +static struct sb_ecmp_nexthop_entry * +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop) +{ + uint32_t hash = hash_string(nexthop, 0); + struct sb_ecmp_nexthop_entry *enh_e; + + HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) { + if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) { + return enh_e; + } + } + return NULL; +} + +#define NEXTHOP_IDS_LEN 65535 +static void +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn, + const struct nbrec_logical_router_static_route_table *nbrec_sr_table, + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nextop_table) +{ + unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN); + struct hmap sb_only = HMAP_INITIALIZER(&sb_only); + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop; + struct sb_ecmp_nexthop_entry *enh_e; + int id; + + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, + sbrec_ecmp_nextop_table) { + uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0); + enh_e = xmalloc(sizeof *enh_e); + enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop; + hmap_insert(&sb_only, &enh_e->hmap_node, hash); + } + + const struct nbrec_logical_router_static_route *r; + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { + continue; + } + + id = smap_get_int(&r->options, "nexthop-id", -1); + if (id < 0) { + continue; + } + bitmap_set1(nexthop_ids, id); + } + NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) { + if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) { + continue; + } + + id = smap_get_int(&r->options, "nexthop-id", -1); + if (id < 0) { + id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN); + if (id == NEXTHOP_IDS_LEN) { + continue; + } + bitmap_set1(nexthop_ids, id); + + struct smap options = SMAP_INITIALIZER(&options); + smap_clone(&options, &r->options); + smap_add_format(&options, "nexthop-id", "%d", id); + nbrec_logical_router_static_route_set_options(r, &options); + smap_destroy(&options); + } + + enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop); + if (!enh_e) { + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn); + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop); + + struct smap options = SMAP_INITIALIZER(&options); + smap_add_format(&options, "nexthop-id", "%d", id); + sbrec_ecmp_nexthop_set_options(sb_ecmp_nexthop, &options); + smap_destroy(&options); + } else { + hmap_remove(&sb_only, &enh_e->hmap_node); + free(enh_e); + } + } + + HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) { + sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop); + free(enh_e); + } + hmap_destroy(&sb_only); + + bitmap_free(nexthop_ids); +} + /* * struct 'dns_info' is used to sync the DNS records between OVN Northbound db * and Southbound db. @@ -17334,6 +17429,9 @@ ovnnb_db_run(struct northd_input *input_data, &data->ls_datapaths.datapaths); sync_template_vars(ovnsb_txn, input_data->nbrec_chassis_template_var_table, input_data->sbrec_chassis_template_var_table); + sync_ecmp_symmetric_reply_nexthop(ovnsb_txn, + input_data->nbrec_static_route_table, + input_data->sbrec_ecmp_nexthop_table); cleanup_stale_fdb_entries(input_data->sbrec_fdb_table, &data->ls_datapaths.datapaths); diff --git a/northd/northd.h b/northd/northd.h index 3f1cd8341..48b38b542 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -34,6 +34,8 @@ struct northd_input { const struct nbrec_chassis_template_var_table *nbrec_chassis_template_var_table; const struct nbrec_mirror_table *nbrec_mirror_table; + const struct nbrec_logical_router_static_route_table + *nbrec_static_route_table; /* Southbound table references */ const struct sbrec_datapath_binding_table *sbrec_datapath_binding_table; @@ -50,6 +52,7 @@ struct northd_input { const struct sbrec_chassis_template_var_table *sbrec_chassis_template_var_table; const struct sbrec_mirror_table *sbrec_mirror_table; + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table; /* Northd lb data node inputs*/ const struct hmap *lbs; diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema index 84ae09515..fe32dd030 100644 --- a/ovn-sb.ovsschema +++ b/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "20.33.0", - "cksum": "4076371179 31328", + "version": "20.34.0", + "cksum": "1685306144 31809", "tables": { "SB_Global": { "columns": { @@ -607,6 +607,17 @@ "refTable": "Datapath_Binding"}}}}, "indexes": [["logical_port", "ip"]], "isRoot": true}, + "ECMP_Nexthop": { + "columns": { + "nexthop": {"type": "string"}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, + "options": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "indexes": [["nexthop"]], + "isRoot": true}, "Chassis_Template_Var": { "columns": { "chassis": {"type": "string"}, diff --git a/ovn-sb.xml b/ovn-sb.xml index ac4e585f2..5f5e2401b 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -5095,4 +5095,23 @@ tcp.flags = RST; The set of variable values for a given chassis. </column> </table> + + <table name="ECMP_Nexthop"> + <column name="nexthop"> + <p> + Nexthop IP address for this route. Nexthop IP address should be the IP + address of a connected router port or the IP address of a logical port + or can be set to <code>discard</code> for dropping packets which match + the given route. + </p> + </column> + + <column name="options"> + Reserved for future use. + </column> + + <column name="external_ids"> + See <em>External IDs</em> at the beginning of this document. + </column> + </table> </database> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 89aed5adc..2160e8de7 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10 +check_row_count ECMP_Nexthop 1 ovn-sbctl dump-flows lr0 > lr0flows @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn ]) check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20 +check_row_count ECMP_Nexthop 2 ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [ # add ecmp route with wrong nexthop check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20 +check_row_count ECMP_Nexthop 3 ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192. check ovn-nbctl lr-route-del lr0 wait_row_count nb:Logical_Router_Static_Route 0 +check_row_count ECMP_Nexthop 0 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 ovn-sbctl dump-flows lr0 > lr0flows
Introduce ECMP_Nexthop table in the SB db in order to track active ecmp-symmetric-reply connections and flush stale ones. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- northd/en-northd.c | 4 ++ northd/inc-proc-northd.c | 8 +++- northd/northd.c | 98 ++++++++++++++++++++++++++++++++++++++++ northd/northd.h | 3 ++ ovn-sb.ovsschema | 15 +++++- ovn-sb.xml | 19 ++++++++ tests/ovn-northd.at | 4 ++ 7 files changed, 147 insertions(+), 4 deletions(-)