Message ID | a242e61072e6587779a5bc5ca243dcc22808eb23.1731495611.git.lorenzo.bianconi@redhat.com |
---|---|
State | Changes Requested |
Delegated to: | Dumitru Ceara |
Headers | show |
Series | Introduce ECMP_nexthop monitor in ovn-controller | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
On 11/13/24 12:05 PM, Lorenzo Bianconi wrote: > Introduce ecmp_nexthop_monitor in ovn-controller in order to track and > flush ecmp-symmetric reply ct entires when requested by the CMS (e.g > removing the related static ecmp routes). CT entries are flushed using > the ethernet mac address stored in ct_label. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > --- Hi Lorenzo, Thanks for the patch! > NEWS | 2 + > controller/automake.mk | 4 +- > controller/ecmp-next-hop-monitor.c | 184 ++++++++++ > controller/ecmp-next-hop-monitor.h | 25 ++ > controller/ofctrl.c | 7 + > controller/ofctrl.h | 3 + > controller/ovn-controller.c | 3 + > include/ovn/logical-fields.h | 3 + > tests/system-ovn.at | 526 +++++++++++++++++++++++++++++ > 9 files changed, 756 insertions(+), 1 deletion(-) > create mode 100644 controller/ecmp-next-hop-monitor.c > create mode 100644 controller/ecmp-next-hop-monitor.h > > diff --git a/NEWS b/NEWS > index 1f8f54d5d..f46285d32 100644 > --- a/NEWS > +++ b/NEWS > @@ -9,6 +9,8 @@ Post v24.09.0 > ECMP-nexthop. > By default ovn-controller continuously sends ARP/ND packets for > ECMP-nexthop. > + - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct While we (OVN developers) know what "ct" stands for, the NEWS file targets users. I think we should rephrase this to: "Auto flush ECMP symmetric reply connection states when an ECMP route is removed by the CMS." > + entries when related ecmp routes are removed by the CMS. > > OVN v24.09.0 - 13 Sep 2024 > -------------------------- > diff --git a/controller/automake.mk b/controller/automake.mk > index bb0bf2d33..766e36382 100644 > --- a/controller/automake.mk > +++ b/controller/automake.mk > @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \ > controller/ct-zone.h \ > controller/ct-zone.c \ > controller/ovn-dns.c \ > - controller/ovn-dns.h > + controller/ovn-dns.h \ > + controller/ecmp-next-hop-monitor.h \ > + controller/ecmp-next-hop-monitor.c > > controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la > man_MANS += controller/ovn-controller.8 > diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c > new file mode 100644 > index 000000000..bafe9750f > --- /dev/null > +++ b/controller/ecmp-next-hop-monitor.c > @@ -0,0 +1,184 @@ > +/* Copyright (c) 2024, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include "ct-zone.h" > +#include "lib/ovn-util.h" > +#include "lib/simap.h" > +#include "openvswitch/hmap.h" > +#include "openvswitch/ofp-ct.h" > +#include "openvswitch/rconn.h" > +#include "openvswitch/vlog.h" > +#include "ovn/logical-fields.h" > +#include "ovn-sb-idl.h" > +#include "controller/ecmp-next-hop-monitor.h" > + > +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor); > + We don't log anything in this module. Should we? If not, we can remove the line above. > +static struct hmap ecmp_nexthop; > + > +struct ecmp_nexthop_data { > + struct hmap_node hmap_node; > + uint16_t zone_id; > + char *nexthop; > + char *mac; > +}; > + > +void ecmp_nexthop_init(void) > +{ > + hmap_init(&ecmp_nexthop); > +} > + > +static void > +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e) Nit: I see you used "erase" with other occasions in the past for other features but I think in most places in the code we call these kind of functions *_destroy(). We could call this ecmp_nexthop_destroy_entry(), wdyt? > +{ > + free(e->nexthop); > + free(e->mac); > + free(e); > +} > + > +static void > +ecmp_nexthop_destroy_map(struct hmap *map) > +{ > + struct ecmp_nexthop_data *e; > + HMAP_FOR_EACH_POP (e, hmap_node, map) { > + ecmp_nexthop_erase_entry(e); > + } > + hmap_destroy(map); > +} > + > +void ecmp_nexthop_destroy(void) > +{ > + ecmp_nexthop_destroy_map(&ecmp_nexthop); > +} > + > +static struct ecmp_nexthop_data * > +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac, > + const uint16_t zone_id, struct hmap *map) > +{ > + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); > + e->nexthop = xstrdup(nexthop); > + e->mac = xstrdup(mac); > + e->zone_id = zone_id; > + > + uint32_t hash = hash_string(nexthop, 0); > + hash = hash_add(hash, hash_string(mac, 0)); > + hash = hash_add(hash, zone_id); > + hmap_insert(map, &e->hmap_node, hash); > + > + return e; > +} > + > +static struct ecmp_nexthop_data * > +ecmp_nexthop_find_entry(const char *nexthop, const char *mac, > + const uint16_t zone_id, struct hmap *map) > +{ > + uint32_t hash = hash_string(nexthop, 0); > + hash = hash_add(hash, hash_string(mac, 0)); > + hash = hash_add(hash, zone_id); > + > + struct ecmp_nexthop_data *e; > + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { > + if (!strcmp(e->nexthop, nexthop) && > + !strcmp(e->mac, mac) && e->zone_id == zone_id) { > + return e; > + } > + } > + return NULL; > +} > + > +static void > +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn, > + const char *mac, uint16_t zone_id, > + struct ovs_list *msgs) > +{ > + struct eth_addr ea; > + if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { > + return; > + } > + > + ovs_u128 mask = { > + /* ct_label.ecmp_reply_eth BITS[32-79] */ > + .u64.hi = OVN_CT_ECMP_ETH_HIGH, > + .u64.lo = OVN_CT_ECMP_ETH_LOW, > + }; > + > + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]); > + ovs_u128 nexthop = { > + .u64.hi = ntohs(ea.be16[0]), > + .u64.lo = (uint64_t) ntohl(lo) << 32, > + }; > + > + struct ofp_ct_match match = { > + .labels = nexthop, > + .labels_mask = mask, > + }; > + struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id, > + rconn_get_version(swconn)); > + ovs_list_push_back(msgs, &msg->list_node); > +} > + > +void > +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, > + const struct shash *current_ct_zones, > + const struct rconn *swconn, struct ovs_list *msgs) > +{ > + struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop); > + > + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop; > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) { We should filter out non-local datapaths. > + struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port; > + if (!pb) { Can this ever happen? The port is a strong reference in the SB and it should be mandatory (see comment in patch 1/4). > + continue; > + } > + > + const char *dp_name = smap_get(&pb->datapath->external_ids, "name"); > + if (!dp_name) { > + continue; > + } > + > + char *name = xasprintf("%s_dnat", dp_name); This makes assumptions about how the zone name is created in ct-zone.c. We have the alloc_nat_zone_key(), please use that instead. > + struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name); > + free(name); > + > + if (!ct_zone) { > + continue; > + } > + > + if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop, > + sbrec_ecmp_nexthop->mac, ct_zone->zone, > + &ecmp_nexthop)) { > + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, > + sbrec_ecmp_nexthop->mac, > + ct_zone->zone, &ecmp_nexthop); > + } > + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, > + sbrec_ecmp_nexthop->mac, ct_zone->zone, > + &sb_ecmp_nexthop); > + } > + > + struct ecmp_nexthop_data *e; > + HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) { > + if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id, > + &sb_ecmp_nexthop)) { > + ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac, > + e->zone_id, msgs); > + hmap_remove(&ecmp_nexthop, &e->hmap_node); > + ecmp_nexthop_erase_entry(e); > + } > + } > + > + ecmp_nexthop_destroy_map(&sb_ecmp_nexthop); > +} > diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h > new file mode 100644 > index 000000000..ee8278e3b > --- /dev/null > +++ b/controller/ecmp-next-hop-monitor.h > @@ -0,0 +1,25 @@ > +/* Copyright (c) 2024, Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H > +#define OVN_CMP_NEXT_HOP_MONITOR_H OVN_CMP_NEXT_HOP_MONITOR_H? Did you mean OVN_ECMP_NEXT_HOP_MONITOR_H instead? Same thing in the comment at the end of the file. > + > +void ecmp_nexthop_init(void); > +void ecmp_nexthop_destroy(void); > +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,> + const struct shash *current_ct_zones, > + const struct rconn *swconn, > + struct ovs_list *msgs); > +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */ > diff --git a/controller/ofctrl.c b/controller/ofctrl.c > index f9387d375..e44da749d 100644 > --- a/controller/ofctrl.c > +++ b/controller/ofctrl.c > @@ -54,6 +54,7 @@ > #include "vswitch-idl.h" > #include "ovn-sb-idl.h" > #include "ct-zone.h" > +#include "ecmp-next-hop-monitor.h" > > VLOG_DEFINE_THIS_MODULE(ofctrl); > > @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table, > tx_counter = rconn_packet_counter_create(); > hmap_init(&installed_lflows); > hmap_init(&installed_pflows); > + ecmp_nexthop_init(); > ovs_list_init(&flow_updates); > ovn_init_symtab(&symtab); > groups = group_table; > @@ -877,6 +879,7 @@ ofctrl_destroy(void) > expr_symtab_destroy(&symtab); > shash_destroy(&symtab); > ofctrl_meter_bands_destroy(); > + ecmp_nexthop_destroy(); > } > > uint64_t > @@ -2662,8 +2665,10 @@ void > ofctrl_put(struct ovn_desired_flow_table *lflow_table, > struct ovn_desired_flow_table *pflow_table, > struct shash *pending_ct_zones, > + struct shash *current_ct_zones, > struct hmap *pending_lb_tuples, > struct ovsdb_idl_index *sbrec_meter_by_name, > + const struct sbrec_ecmp_nexthop_table *enh_table, > uint64_t req_cfg, > bool lflows_changed, > bool pflows_changed) > @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table, > /* OpenFlow messages to send to the switch to bring it up-to-date. */ > struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs); > > + ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs); > + > /* Iterate through ct zones that need to be flushed. */ > struct shash_node *iter; > SHASH_FOR_EACH(iter, pending_ct_zones) { > diff --git a/controller/ofctrl.h b/controller/ofctrl.h > index 129e3b6ad..5735cd553 100644 > --- a/controller/ofctrl.h > +++ b/controller/ofctrl.h > @@ -31,6 +31,7 @@ struct ofpbuf; > struct ovsrec_bridge; > struct ovsrec_open_vswitch_table; > struct sbrec_meter_table; > +struct sbrec_ecmp_nexthop_table; > struct shash; > > struct ovn_desired_flow_table { > @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void); > void ofctrl_put(struct ovn_desired_flow_table *lflow_table, > struct ovn_desired_flow_table *pflow_table, > struct shash *pending_ct_zones, > + struct shash *current_ct_zones, > struct hmap *pending_lb_tuples, > struct ovsdb_idl_index *sbrec_meter_by_name, > + const struct sbrec_ecmp_nexthop_table *enh_table, > uint64_t nb_cfg, > bool lflow_changed, > bool pflow_changed); > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 6cee6450d..4b05077d3 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -5821,8 +5821,11 @@ main(int argc, char *argv[]) > ofctrl_put(&lflow_output_data->flow_table, > &pflow_output_data->flow_table, > &ct_zones_data->ctx.pending, > + &ct_zones_data->ctx.current, > &lb_data->removed_tuples, > sbrec_meter_by_name, > + sbrec_ecmp_nexthop_table_get( > + ovnsb_idl_loop.idl), > ofctrl_seqno_get_req_cfg(), > engine_node_changed(&en_lflow_output), > engine_node_changed(&en_pflow_output)); > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h > index d563e044c..a024b0cd3 100644 > --- a/include/ovn/logical-fields.h > +++ b/include/ovn/logical-fields.h > @@ -212,6 +212,9 @@ const struct ovn_field *ovn_field_from_name(const char *name); > #define OVN_CT_ECMP_ETH_1ST_BIT 32 > #define OVN_CT_ECMP_ETH_END_BIT 79 > > +#define OVN_CT_ECMP_ETH_LOW (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32) > +#define OVN_CT_ECMP_ETH_HIGH ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1) > + These two defines are only relevant to ecmp_nexthop_monitor_flush_ct_entry() and we only need them because we pass the labels_mask to ovs as a set of two 64bit values. I'd move the defines just above the ecmp_nexthop_monitor_flush_ct_entry() function definition so it's clear what we're doing. > #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE) > #define OVN_CT_MASKED_STR(LABEL_VALUE) \ > OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE) > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 6dfc3055a..e9d15898f 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -14002,3 +14002,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > /.*terminating with signal 15.*/d"]) > AT_CLEANUP > ]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP Flush CT entries - IPv4]) > +AT_KEYWORDS([ecmp]) > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_BR([br-int]) > +ADD_BR([br-ext]) > +ADD_BR([br-ecmp]) > + > +ovs-ofctl add-flow br-ext action=normal > +ovs-ofctl add-flow br-ecmp action=normal Missing check? > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ Missing check. > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 > + 'arp-max-timeout-sec' doesn't exist as an actual option for ovn-controller. Also, missing check > +check ovn-nbctl lr-add R1 > +check ovn-nbctl set logical_router R1 options:chassis=hv1 > +check ovn-nbctl lr-add R2 > +check ovn-nbctl set logical_router R2 options:chassis=hv1 > + > +check ovn-nbctl ls-add sw0 > +check ovn-nbctl ls-add sw1 > +check ovn-nbctl ls-add public > + > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24 > + > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24 > + > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ > + type=router options:router-port=rp-sw0 \ > + -- lsp-set-addresses sw0-rp router > + > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ > + type=router options:router-port=rp-sw1 \ > + -- lsp-set-addresses sw1-rp router > + > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ > + type=router options:router-port=rp-public1 \ > + -- lsp-set-addresses public-rp1 router > + > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ > + type=router options:router-port=rp-public2 \ > + -- lsp-set-addresses public-rp2 router > + > +ADD_NAMESPACES(alice) > +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > + "192.168.1.1") > +check ovn-nbctl lsp-add sw0 alice \ > + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2" > + > +ADD_NAMESPACES(peter) > +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \ > + "192.168.2.1") > +check ovn-nbctl lsp-add sw1 peter \ > + -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2" > + > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > +check ovn-nbctl lsp-add public public1 \ > + -- lsp-set-addresses public1 unknown \ > + -- lsp-set-type public1 localnet \ > + -- lsp-set-options public1 network_name=phynet > + > +ADD_NAMESPACES(ecmp-path0) > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1") > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04") > + > +ADD_NAMESPACES(ecmp-path1) > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1") > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05") > + > +ADD_NAMESPACES(bob) > +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2") > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 > + > +wait_for_ports_up > +check ovn-nbctl --wait=hv sync > +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid]) > +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid]) > + > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > + > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > +]) > + > +# Change bob default IP address > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2]) > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3]) > + > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > + > +wait_row_count ECMP_Nexthop 2 > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > +# Remove first ECMP route > +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 1 > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > + > +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh > + Debugging leftover? > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > + > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2' > + > +# Change bob default IP address > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3]) > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2]) > + > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > +# Remove first ECMP route > +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 1 > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +]) > + > +# Remove second ECMP route > +check ovn-nbctl lr-route-del R1 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 0 > + > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3 > + > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 4 > + > +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5]) > +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5]) > + > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > + > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +]) > + > +check ovn-nbctl lr-route-del R1 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +]) > + > +check ovn-nbctl lr-route-del R2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 0 > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([ovn-northd]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > +/.*terminating with signal 15.*/d"]) > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP Flush CT entries - IPv6]) > +AT_KEYWORDS([ecmp]) > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_BR([br-int]) > +ADD_BR([br-ext]) > +ADD_BR([br-ecmp]) > + > +ovs-ofctl add-flow br-ext action=normal > +ovs-ofctl add-flow br-ecmp action=normal > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true Missing checks for ovs-ofctl and ovs-vsctl. > + > +# Start ovn-controller > +start_daemon ovn-controller > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 > + Wrong option name and missing check. > +check ovn-nbctl lr-add R1 > +check ovn-nbctl set logical_router R1 options:chassis=hv1 > +check ovn-nbctl lr-add R2 > +check ovn-nbctl set logical_router R2 options:chassis=hv1 > + > +check ovn-nbctl ls-add sw0 > +check ovn-nbctl ls-add sw1 > +check ovn-nbctl ls-add public > + > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64 > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64 > + > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64 > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64 > + > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ > + type=router options:router-port=rp-sw0 \ > + -- lsp-set-addresses sw0-rp router > + > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ > + type=router options:router-port=rp-sw1 \ > + -- lsp-set-addresses sw1-rp router > + > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ > + type=router options:router-port=rp-public1 \ > + -- lsp-set-addresses public-rp1 router > + > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ > + type=router options:router-port=rp-public2 \ > + -- lsp-set-addresses public-rp2 router > + > +ADD_NAMESPACES(alice) > +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad") > +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2" > + > +ADD_NAMESPACES(peter) > +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad") > +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2" > + > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > +check ovn-nbctl lsp-add public public1 \ > + -- lsp-set-addresses public1 unknown \ > + -- lsp-set-type public1 localnet \ > + -- lsp-set-options public1 network_name=phynet > + > +ADD_NAMESPACES(ecmp-path0) > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad") > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04") > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])]) > + > +ADD_NAMESPACES(ecmp-path1) > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad") > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05") > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])]) > + > +ADD_NAMESPACES(bob) > +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad") > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 > + > +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl > +net.ipv6.conf.all.forwarding = 1 > +]) > +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl > +net.ipv6.conf.all.forwarding = 1 > +]) > + > +ovn-nbctl --wait=hv sync > +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid]) > +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid]) > + > +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > + > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > +]) > + > +# Change bob default IP address > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2]) > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3]) > + > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > + > +wait_row_count ECMP_Nexthop 2 > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > +# Remove first ECMP route > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 1 > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > +# > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"' > + > +# Change bob default IP address > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3]) > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2]) > + > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > +]) > + > +# Remove first ECMP route > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 1 > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +]) > + > +# Remove second ECMP route > +check ovn-nbctl lr-route-del R1 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 0 > + > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 > + > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2 > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3 > + > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 4 > + > +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5]) > +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5]) > + > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > + > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +]) > + > +# Remove second ECMP route > +check ovn-nbctl lr-route-del R1 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 2 > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"' > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > +]) > + > +check ovn-nbctl lr-route-del R2 > +check ovn-nbctl --wait=hv sync > +wait_row_count ECMP_Nexthop 0 > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > +]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([ovn-northd]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > +/.*terminating with signal 15.*/d"]) > +AT_CLEANUP > +]) Regards, Dumitru
On Dec 18, Dumitru Ceara wrote: > On 11/13/24 12:05 PM, Lorenzo Bianconi wrote: > > Introduce ecmp_nexthop_monitor in ovn-controller in order to track and > > flush ecmp-symmetric reply ct entires when requested by the CMS (e.g > > removing the related static ecmp routes). CT entries are flushed using > > the ethernet mac address stored in ct_label. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > --- > > Hi Lorenzo, > > Thanks for the patch! Hi Dumitru, Thanks for the review. > > > NEWS | 2 + > > controller/automake.mk | 4 +- > > controller/ecmp-next-hop-monitor.c | 184 ++++++++++ > > controller/ecmp-next-hop-monitor.h | 25 ++ > > controller/ofctrl.c | 7 + > > controller/ofctrl.h | 3 + > > controller/ovn-controller.c | 3 + > > include/ovn/logical-fields.h | 3 + > > tests/system-ovn.at | 526 +++++++++++++++++++++++++++++ > > 9 files changed, 756 insertions(+), 1 deletion(-) > > create mode 100644 controller/ecmp-next-hop-monitor.c > > create mode 100644 controller/ecmp-next-hop-monitor.h > > > > diff --git a/NEWS b/NEWS > > index 1f8f54d5d..f46285d32 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -9,6 +9,8 @@ Post v24.09.0 > > ECMP-nexthop. > > By default ovn-controller continuously sends ARP/ND packets for > > ECMP-nexthop. > > + - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct > > While we (OVN developers) know what "ct" stands for, the NEWS file > targets users. I think we should rephrase this to: > > "Auto flush ECMP symmetric reply connection states when an ECMP route is > removed by the CMS." ack, I will fix it. > > > + entries when related ecmp routes are removed by the CMS. > > > > OVN v24.09.0 - 13 Sep 2024 > > -------------------------- > > diff --git a/controller/automake.mk b/controller/automake.mk > > index bb0bf2d33..766e36382 100644 > > --- a/controller/automake.mk > > +++ b/controller/automake.mk > > @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \ > > controller/ct-zone.h \ > > controller/ct-zone.c \ > > controller/ovn-dns.c \ > > - controller/ovn-dns.h > > + controller/ovn-dns.h \ > > + controller/ecmp-next-hop-monitor.h \ > > + controller/ecmp-next-hop-monitor.c > > > > controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la > > man_MANS += controller/ovn-controller.8 > > diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c > > new file mode 100644 > > index 000000000..bafe9750f > > --- /dev/null > > +++ b/controller/ecmp-next-hop-monitor.c > > @@ -0,0 +1,184 @@ > > +/* Copyright (c) 2024, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > +#include "ct-zone.h" > > +#include "lib/ovn-util.h" > > +#include "lib/simap.h" > > +#include "openvswitch/hmap.h" > > +#include "openvswitch/ofp-ct.h" > > +#include "openvswitch/rconn.h" > > +#include "openvswitch/vlog.h" > > +#include "ovn/logical-fields.h" > > +#include "ovn-sb-idl.h" > > +#include "controller/ecmp-next-hop-monitor.h" > > + > > +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor); > > + > > We don't log anything in this module. Should we? If not, we can remove > the line above. ack, I will remove it. > > > +static struct hmap ecmp_nexthop; > > + > > +struct ecmp_nexthop_data { > > + struct hmap_node hmap_node; > > + uint16_t zone_id; > > + char *nexthop; > > + char *mac; > > +}; > > + > > +void ecmp_nexthop_init(void) > > +{ > > + hmap_init(&ecmp_nexthop); > > +} > > + > > +static void > > +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e) > > Nit: I see you used "erase" with other occasions in the past for other > features but I think in most places in the code we call these kind of > functions *_destroy(). We could call this ecmp_nexthop_destroy_entry(), > wdyt? ack > > > +{ > > + free(e->nexthop); > > + free(e->mac); > > + free(e); > > +} > > + > > +static void > > +ecmp_nexthop_destroy_map(struct hmap *map) > > +{ > > + struct ecmp_nexthop_data *e; > > + HMAP_FOR_EACH_POP (e, hmap_node, map) { > > + ecmp_nexthop_erase_entry(e); > > + } > > + hmap_destroy(map); > > +} > > + > > +void ecmp_nexthop_destroy(void) > > +{ > > + ecmp_nexthop_destroy_map(&ecmp_nexthop); > > +} > > + > > +static struct ecmp_nexthop_data * > > +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac, > > + const uint16_t zone_id, struct hmap *map) > > +{ > > + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); > > + e->nexthop = xstrdup(nexthop); > > + e->mac = xstrdup(mac); > > + e->zone_id = zone_id; > > + > > + uint32_t hash = hash_string(nexthop, 0); > > + hash = hash_add(hash, hash_string(mac, 0)); > > + hash = hash_add(hash, zone_id); > > + hmap_insert(map, &e->hmap_node, hash); > > + > > + return e; > > +} > > + > > +static struct ecmp_nexthop_data * > > +ecmp_nexthop_find_entry(const char *nexthop, const char *mac, > > + const uint16_t zone_id, struct hmap *map) > > +{ > > + uint32_t hash = hash_string(nexthop, 0); > > + hash = hash_add(hash, hash_string(mac, 0)); > > + hash = hash_add(hash, zone_id); > > + > > + struct ecmp_nexthop_data *e; > > + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { > > + if (!strcmp(e->nexthop, nexthop) && > > + !strcmp(e->mac, mac) && e->zone_id == zone_id) { > > + return e; > > + } > > + } > > + return NULL; > > +} > > + > > +static void > > +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn, > > + const char *mac, uint16_t zone_id, > > + struct ovs_list *msgs) > > +{ > > + struct eth_addr ea; > > + if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { > > + return; > > + } > > + > > + ovs_u128 mask = { > > + /* ct_label.ecmp_reply_eth BITS[32-79] */ > > + .u64.hi = OVN_CT_ECMP_ETH_HIGH, > > + .u64.lo = OVN_CT_ECMP_ETH_LOW, > > + }; > > + > > + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]); > > + ovs_u128 nexthop = { > > + .u64.hi = ntohs(ea.be16[0]), > > + .u64.lo = (uint64_t) ntohl(lo) << 32, > > + }; > > + > > + struct ofp_ct_match match = { > > + .labels = nexthop, > > + .labels_mask = mask, > > + }; > > + struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id, > > + rconn_get_version(swconn)); > > + ovs_list_push_back(msgs, &msg->list_node); > > +} > > + > > +void > > +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, > > + const struct shash *current_ct_zones, > > + const struct rconn *swconn, struct ovs_list *msgs) > > +{ > > + struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop); > > + > > + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop; > > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) { > > We should filter out non-local datapaths. ack, I will fix it > > > + struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port; > > + if (!pb) { > > Can this ever happen? The port is a strong reference in the SB and it > should be mandatory (see comment in patch 1/4). ack, I will fix it > > > + continue; > > + } > > + > > + const char *dp_name = smap_get(&pb->datapath->external_ids, "name"); > > + if (!dp_name) { > > + continue; > > + } > > + > > + char *name = xasprintf("%s_dnat", dp_name); > > This makes assumptions about how the zone name is created in ct-zone.c. > > We have the alloc_nat_zone_key(), please use that instead. ack > > > + struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name); > > + free(name); > > + > > + if (!ct_zone) { > > + continue; > > + } > > + > > + if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop, > > + sbrec_ecmp_nexthop->mac, ct_zone->zone, > > + &ecmp_nexthop)) { > > + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, > > + sbrec_ecmp_nexthop->mac, > > + ct_zone->zone, &ecmp_nexthop); > > + } > > + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, > > + sbrec_ecmp_nexthop->mac, ct_zone->zone, > > + &sb_ecmp_nexthop); > > + } > > + > > + struct ecmp_nexthop_data *e; > > + HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) { > > + if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id, > > + &sb_ecmp_nexthop)) { > > + ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac, > > + e->zone_id, msgs); > > + hmap_remove(&ecmp_nexthop, &e->hmap_node); > > + ecmp_nexthop_erase_entry(e); > > + } > > + } > > + > > + ecmp_nexthop_destroy_map(&sb_ecmp_nexthop); > > +} > > diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h > > new file mode 100644 > > index 000000000..ee8278e3b > > --- /dev/null > > +++ b/controller/ecmp-next-hop-monitor.h > > @@ -0,0 +1,25 @@ > > +/* Copyright (c) 2024, Red Hat, Inc. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H > > +#define OVN_CMP_NEXT_HOP_MONITOR_H > > OVN_CMP_NEXT_HOP_MONITOR_H? > > Did you mean OVN_ECMP_NEXT_HOP_MONITOR_H instead? Same thing in the > comment at the end of the file. ack, I will fix it > > > + > > +void ecmp_nexthop_init(void); > > +void ecmp_nexthop_destroy(void); > > +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table,> + const struct shash *current_ct_zones, > > + const struct rconn *swconn, > > + struct ovs_list *msgs); > > +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */ > > diff --git a/controller/ofctrl.c b/controller/ofctrl.c > > index f9387d375..e44da749d 100644 > > --- a/controller/ofctrl.c > > +++ b/controller/ofctrl.c > > @@ -54,6 +54,7 @@ > > #include "vswitch-idl.h" > > #include "ovn-sb-idl.h" > > #include "ct-zone.h" > > +#include "ecmp-next-hop-monitor.h" > > > > VLOG_DEFINE_THIS_MODULE(ofctrl); > > > > @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table, > > tx_counter = rconn_packet_counter_create(); > > hmap_init(&installed_lflows); > > hmap_init(&installed_pflows); > > + ecmp_nexthop_init(); > > ovs_list_init(&flow_updates); > > ovn_init_symtab(&symtab); > > groups = group_table; > > @@ -877,6 +879,7 @@ ofctrl_destroy(void) > > expr_symtab_destroy(&symtab); > > shash_destroy(&symtab); > > ofctrl_meter_bands_destroy(); > > + ecmp_nexthop_destroy(); > > } > > > > uint64_t > > @@ -2662,8 +2665,10 @@ void > > ofctrl_put(struct ovn_desired_flow_table *lflow_table, > > struct ovn_desired_flow_table *pflow_table, > > struct shash *pending_ct_zones, > > + struct shash *current_ct_zones, > > struct hmap *pending_lb_tuples, > > struct ovsdb_idl_index *sbrec_meter_by_name, > > + const struct sbrec_ecmp_nexthop_table *enh_table, > > uint64_t req_cfg, > > bool lflows_changed, > > bool pflows_changed) > > @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table, > > /* OpenFlow messages to send to the switch to bring it up-to-date. */ > > struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs); > > > > + ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs); > > + > > /* Iterate through ct zones that need to be flushed. */ > > struct shash_node *iter; > > SHASH_FOR_EACH(iter, pending_ct_zones) { > > diff --git a/controller/ofctrl.h b/controller/ofctrl.h > > index 129e3b6ad..5735cd553 100644 > > --- a/controller/ofctrl.h > > +++ b/controller/ofctrl.h > > @@ -31,6 +31,7 @@ struct ofpbuf; > > struct ovsrec_bridge; > > struct ovsrec_open_vswitch_table; > > struct sbrec_meter_table; > > +struct sbrec_ecmp_nexthop_table; > > struct shash; > > > > struct ovn_desired_flow_table { > > @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void); > > void ofctrl_put(struct ovn_desired_flow_table *lflow_table, > > struct ovn_desired_flow_table *pflow_table, > > struct shash *pending_ct_zones, > > + struct shash *current_ct_zones, > > struct hmap *pending_lb_tuples, > > struct ovsdb_idl_index *sbrec_meter_by_name, > > + const struct sbrec_ecmp_nexthop_table *enh_table, > > uint64_t nb_cfg, > > bool lflow_changed, > > bool pflow_changed); > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index 6cee6450d..4b05077d3 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -5821,8 +5821,11 @@ main(int argc, char *argv[]) > > ofctrl_put(&lflow_output_data->flow_table, > > &pflow_output_data->flow_table, > > &ct_zones_data->ctx.pending, > > + &ct_zones_data->ctx.current, > > &lb_data->removed_tuples, > > sbrec_meter_by_name, > > + sbrec_ecmp_nexthop_table_get( > > + ovnsb_idl_loop.idl), > > ofctrl_seqno_get_req_cfg(), > > engine_node_changed(&en_lflow_output), > > engine_node_changed(&en_pflow_output)); > > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h > > index d563e044c..a024b0cd3 100644 > > --- a/include/ovn/logical-fields.h > > +++ b/include/ovn/logical-fields.h > > @@ -212,6 +212,9 @@ const struct ovn_field *ovn_field_from_name(const char *name); > > #define OVN_CT_ECMP_ETH_1ST_BIT 32 > > #define OVN_CT_ECMP_ETH_END_BIT 79 > > > > +#define OVN_CT_ECMP_ETH_LOW (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32) > > +#define OVN_CT_ECMP_ETH_HIGH ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1) > > + > > These two defines are only relevant to > ecmp_nexthop_monitor_flush_ct_entry() and we only need them because we > pass the labels_mask to ovs as a set of two 64bit values. I'd move the > defines just above the ecmp_nexthop_monitor_flush_ct_entry() function > definition so it's clear what we're doing. ack, I will move them above ecmp_nexthop_monitor_flush_ct_entry() > > > #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE) > > #define OVN_CT_MASKED_STR(LABEL_VALUE) \ > > OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE) > > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > > index 6dfc3055a..e9d15898f 100644 > > --- a/tests/system-ovn.at > > +++ b/tests/system-ovn.at > > @@ -14002,3 +14002,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > > /.*terminating with signal 15.*/d"]) > > AT_CLEANUP > > ]) > > + > > +OVN_FOR_EACH_NORTHD([ > > +AT_SETUP([ECMP Flush CT entries - IPv4]) > > +AT_KEYWORDS([ecmp]) > > +ovn_start > > +OVS_TRAFFIC_VSWITCHD_START() > > + > > +ADD_BR([br-int]) > > +ADD_BR([br-ext]) > > +ADD_BR([br-ecmp]) > > + > > +ovs-ofctl add-flow br-ext action=normal > > +ovs-ofctl add-flow br-ecmp action=normal > > Missing check? > > > +# Set external-ids in br-int needed for ovn-controller > > +ovs-vsctl \ > > Missing check. > > > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > > + > > +# Start ovn-controller > > +start_daemon ovn-controller > > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 > > + > > 'arp-max-timeout-sec' doesn't exist as an actual option for > ovn-controller. Also, missing check it is arp-nd-max-timeout-sec, I will fix it. > > > +check ovn-nbctl lr-add R1 > > +check ovn-nbctl set logical_router R1 options:chassis=hv1 > > +check ovn-nbctl lr-add R2 > > +check ovn-nbctl set logical_router R2 options:chassis=hv1 > > + > > +check ovn-nbctl ls-add sw0 > > +check ovn-nbctl ls-add sw1 > > +check ovn-nbctl ls-add public > > + > > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 > > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24 > > + > > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 > > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24 > > + > > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ > > + type=router options:router-port=rp-sw0 \ > > + -- lsp-set-addresses sw0-rp router > > + > > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ > > + type=router options:router-port=rp-sw1 \ > > + -- lsp-set-addresses sw1-rp router > > + > > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ > > + type=router options:router-port=rp-public1 \ > > + -- lsp-set-addresses public-rp1 router > > + > > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ > > + type=router options:router-port=rp-public2 \ > > + -- lsp-set-addresses public-rp2 router > > + > > +ADD_NAMESPACES(alice) > > +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > > + "192.168.1.1") > > +check ovn-nbctl lsp-add sw0 alice \ > > + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2" > > + > > +ADD_NAMESPACES(peter) > > +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \ > > + "192.168.2.1") > > +check ovn-nbctl lsp-add sw1 peter \ > > + -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2" > > + > > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > > +check ovn-nbctl lsp-add public public1 \ > > + -- lsp-set-addresses public1 unknown \ > > + -- lsp-set-type public1 localnet \ > > + -- lsp-set-options public1 network_name=phynet > > + > > +ADD_NAMESPACES(ecmp-path0) > > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1") > > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04") > > + > > +ADD_NAMESPACES(ecmp-path1) > > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1") > > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05") > > + > > +ADD_NAMESPACES(bob) > > +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2") > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 > > + > > +wait_for_ports_up > > +check ovn-nbctl --wait=hv sync > > +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid]) > > +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid]) > > + > > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > > + > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Change bob default IP address > > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2]) > > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3]) > > + > > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > > + > > +wait_row_count ECMP_Nexthop 2 > > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Remove first ECMP route > > +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 1 > > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > > + > > +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh > > + > > Debugging leftover? yep :) > > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > > + > > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2' > > + > > +# Change bob default IP address > > +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3]) > > +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2]) > > + > > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Remove first ECMP route > > +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 1 > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +]) > > + > > +# Remove second ECMP route > > +check ovn-nbctl lr-route-del R1 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 0 > > + > > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3 > > + > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 4 > > + > > +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5]) > > +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5]) > > + > > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) > > + > > +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +]) > > + > > +check ovn-nbctl lr-route-del R1 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +]) > > + > > +check ovn-nbctl lr-route-del R2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 0 > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +]) > > + > > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > + > > +as ovn-sb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as ovn-nb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as northd > > +OVS_APP_EXIT_AND_WAIT([ovn-northd]) > > + > > +as > > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > > +/.*terminating with signal 15.*/d"]) > > +AT_CLEANUP > > +]) > > + > > +OVN_FOR_EACH_NORTHD([ > > +AT_SETUP([ECMP Flush CT entries - IPv6]) > > +AT_KEYWORDS([ecmp]) > > +ovn_start > > +OVS_TRAFFIC_VSWITCHD_START() > > + > > +ADD_BR([br-int]) > > +ADD_BR([br-ext]) > > +ADD_BR([br-ecmp]) > > + > > +ovs-ofctl add-flow br-ext action=normal > > +ovs-ofctl add-flow br-ecmp action=normal > > +# Set external-ids in br-int needed for ovn-controller > > +ovs-vsctl \ > > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > > Missing checks for ovs-ofctl and ovs-vsctl. ack, I will fix it > > > + > > +# Start ovn-controller > > +start_daemon ovn-controller > > +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 > > + > > Wrong option name and missing check. ack, I will fix it Regards, Lorenzo > > > +check ovn-nbctl lr-add R1 > > +check ovn-nbctl set logical_router R1 options:chassis=hv1 > > +check ovn-nbctl lr-add R2 > > +check ovn-nbctl set logical_router R2 options:chassis=hv1 > > + > > +check ovn-nbctl ls-add sw0 > > +check ovn-nbctl ls-add sw1 > > +check ovn-nbctl ls-add public > > + > > +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64 > > +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64 > > + > > +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64 > > +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64 > > + > > +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ > > + type=router options:router-port=rp-sw0 \ > > + -- lsp-set-addresses sw0-rp router > > + > > +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ > > + type=router options:router-port=rp-sw1 \ > > + -- lsp-set-addresses sw1-rp router > > + > > +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ > > + type=router options:router-port=rp-public1 \ > > + -- lsp-set-addresses public-rp1 router > > + > > +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ > > + type=router options:router-port=rp-public2 \ > > + -- lsp-set-addresses public-rp2 router > > + > > +ADD_NAMESPACES(alice) > > +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad") > > +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2" > > + > > +ADD_NAMESPACES(peter) > > +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad") > > +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2" > > + > > +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > > +check ovn-nbctl lsp-add public public1 \ > > + -- lsp-set-addresses public1 unknown \ > > + -- lsp-set-type public1 localnet \ > > + -- lsp-set-options public1 network_name=phynet > > + > > +ADD_NAMESPACES(ecmp-path0) > > +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad") > > +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04") > > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])]) > > + > > +ADD_NAMESPACES(ecmp-path1) > > +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad") > > +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05") > > +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])]) > > + > > +ADD_NAMESPACES(bob) > > +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad") > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 > > + > > +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl > > +net.ipv6.conf.all.forwarding = 1 > > +]) > > +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl > > +net.ipv6.conf.all.forwarding = 1 > > +]) > > + > > +ovn-nbctl --wait=hv sync > > +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid]) > > +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid]) > > + > > +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > + > > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > > + > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Change bob default IP address > > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2]) > > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3]) > > + > > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > > + > > +wait_row_count ECMP_Nexthop 2 > > +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Remove first ECMP route > > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 1 > > +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > > +# > > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"' > > + > > +# Change bob default IP address > > +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3]) > > +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2]) > > + > > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Remove first ECMP route > > +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 1 > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +]) > > + > > +# Remove second ECMP route > > +check ovn-nbctl lr-route-del R1 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 0 > > + > > +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 > > + > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2 > > +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3 > > + > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 4 > > + > > +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5]) > > +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5]) > > + > > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) > > + > > +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \ > > +[0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +]) > > + > > +# Remove second ECMP route > > +check ovn-nbctl lr-route-del R1 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 2 > > +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"' > > +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 > > +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) > > +]) > > + > > +check ovn-nbctl lr-route-del R2 > > +check ovn-nbctl --wait=hv sync > > +wait_row_count ECMP_Nexthop 0 > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | > > +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl > > +]) > > + > > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > + > > +as ovn-sb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as ovn-nb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as northd > > +OVS_APP_EXIT_AND_WAIT([ovn-northd]) > > + > > +as > > +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > > +/.*terminating with signal 15.*/d"]) > > +AT_CLEANUP > > +]) > > Regards, > Dumitru >
diff --git a/NEWS b/NEWS index 1f8f54d5d..f46285d32 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,8 @@ Post v24.09.0 ECMP-nexthop. By default ovn-controller continuously sends ARP/ND packets for ECMP-nexthop. + - Introduce ovn-controller ECMP_nexthop monitor in order to flush stale ct + entries when related ecmp routes are removed by the CMS. OVN v24.09.0 - 13 Sep 2024 -------------------------- diff --git a/controller/automake.mk b/controller/automake.mk index bb0bf2d33..766e36382 100644 --- a/controller/automake.mk +++ b/controller/automake.mk @@ -51,7 +51,9 @@ controller_ovn_controller_SOURCES = \ controller/ct-zone.h \ controller/ct-zone.c \ controller/ovn-dns.c \ - controller/ovn-dns.h + controller/ovn-dns.h \ + controller/ecmp-next-hop-monitor.h \ + controller/ecmp-next-hop-monitor.c controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la man_MANS += controller/ovn-controller.8 diff --git a/controller/ecmp-next-hop-monitor.c b/controller/ecmp-next-hop-monitor.c new file mode 100644 index 000000000..bafe9750f --- /dev/null +++ b/controller/ecmp-next-hop-monitor.c @@ -0,0 +1,184 @@ +/* Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include "ct-zone.h" +#include "lib/ovn-util.h" +#include "lib/simap.h" +#include "openvswitch/hmap.h" +#include "openvswitch/ofp-ct.h" +#include "openvswitch/rconn.h" +#include "openvswitch/vlog.h" +#include "ovn/logical-fields.h" +#include "ovn-sb-idl.h" +#include "controller/ecmp-next-hop-monitor.h" + +VLOG_DEFINE_THIS_MODULE(ecmp_next_hop_monitor); + +static struct hmap ecmp_nexthop; + +struct ecmp_nexthop_data { + struct hmap_node hmap_node; + uint16_t zone_id; + char *nexthop; + char *mac; +}; + +void ecmp_nexthop_init(void) +{ + hmap_init(&ecmp_nexthop); +} + +static void +ecmp_nexthop_erase_entry(struct ecmp_nexthop_data *e) +{ + free(e->nexthop); + free(e->mac); + free(e); +} + +static void +ecmp_nexthop_destroy_map(struct hmap *map) +{ + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_POP (e, hmap_node, map) { + ecmp_nexthop_erase_entry(e); + } + hmap_destroy(map); +} + +void ecmp_nexthop_destroy(void) +{ + ecmp_nexthop_destroy_map(&ecmp_nexthop); +} + +static struct ecmp_nexthop_data * +ecmp_nexthop_alloc_entry(const char *nexthop, const char *mac, + const uint16_t zone_id, struct hmap *map) +{ + struct ecmp_nexthop_data *e = xmalloc(sizeof *e); + e->nexthop = xstrdup(nexthop); + e->mac = xstrdup(mac); + e->zone_id = zone_id; + + uint32_t hash = hash_string(nexthop, 0); + hash = hash_add(hash, hash_string(mac, 0)); + hash = hash_add(hash, zone_id); + hmap_insert(map, &e->hmap_node, hash); + + return e; +} + +static struct ecmp_nexthop_data * +ecmp_nexthop_find_entry(const char *nexthop, const char *mac, + const uint16_t zone_id, struct hmap *map) +{ + uint32_t hash = hash_string(nexthop, 0); + hash = hash_add(hash, hash_string(mac, 0)); + hash = hash_add(hash, zone_id); + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) { + if (!strcmp(e->nexthop, nexthop) && + !strcmp(e->mac, mac) && e->zone_id == zone_id) { + return e; + } + } + return NULL; +} + +static void +ecmp_nexthop_monitor_flush_ct_entry(const struct rconn *swconn, + const char *mac, uint16_t zone_id, + struct ovs_list *msgs) +{ + struct eth_addr ea; + if (!ovs_scan(mac, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { + return; + } + + ovs_u128 mask = { + /* ct_label.ecmp_reply_eth BITS[32-79] */ + .u64.hi = OVN_CT_ECMP_ETH_HIGH, + .u64.lo = OVN_CT_ECMP_ETH_LOW, + }; + + ovs_be32 lo = get_unaligned_be32((void *)&ea.be16[1]); + ovs_u128 nexthop = { + .u64.hi = ntohs(ea.be16[0]), + .u64.lo = (uint64_t) ntohl(lo) << 32, + }; + + struct ofp_ct_match match = { + .labels = nexthop, + .labels_mask = mask, + }; + struct ofpbuf *msg = ofp_ct_match_encode(&match, &zone_id, + rconn_get_version(swconn)); + ovs_list_push_back(msgs, &msg->list_node); +} + +void +ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, + const struct shash *current_ct_zones, + const struct rconn *swconn, struct ovs_list *msgs) +{ + struct hmap sb_ecmp_nexthop = HMAP_INITIALIZER(&sb_ecmp_nexthop); + + const struct sbrec_ecmp_nexthop *sbrec_ecmp_nexthop; + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sbrec_ecmp_nexthop, enh_table) { + struct sbrec_port_binding *pb = sbrec_ecmp_nexthop->port; + if (!pb) { + continue; + } + + const char *dp_name = smap_get(&pb->datapath->external_ids, "name"); + if (!dp_name) { + continue; + } + + char *name = xasprintf("%s_dnat", dp_name); + struct ct_zone *ct_zone = shash_find_data(current_ct_zones, name); + free(name); + + if (!ct_zone) { + continue; + } + + if (!ecmp_nexthop_find_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, ct_zone->zone, + &ecmp_nexthop)) { + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, + ct_zone->zone, &ecmp_nexthop); + } + ecmp_nexthop_alloc_entry(sbrec_ecmp_nexthop->nexthop, + sbrec_ecmp_nexthop->mac, ct_zone->zone, + &sb_ecmp_nexthop); + } + + struct ecmp_nexthop_data *e; + HMAP_FOR_EACH_SAFE (e, hmap_node, &ecmp_nexthop) { + if (!ecmp_nexthop_find_entry(e->nexthop, e->mac, e->zone_id, + &sb_ecmp_nexthop)) { + ecmp_nexthop_monitor_flush_ct_entry(swconn, e->mac, + e->zone_id, msgs); + hmap_remove(&ecmp_nexthop, &e->hmap_node); + ecmp_nexthop_erase_entry(e); + } + } + + ecmp_nexthop_destroy_map(&sb_ecmp_nexthop); +} diff --git a/controller/ecmp-next-hop-monitor.h b/controller/ecmp-next-hop-monitor.h new file mode 100644 index 000000000..ee8278e3b --- /dev/null +++ b/controller/ecmp-next-hop-monitor.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVN_CMP_NEXT_HOP_MONITOR_H +#define OVN_CMP_NEXT_HOP_MONITOR_H + +void ecmp_nexthop_init(void); +void ecmp_nexthop_destroy(void); +void ecmp_nexthop_monitor_run(const struct sbrec_ecmp_nexthop_table *enh_table, + const struct shash *current_ct_zones, + const struct rconn *swconn, + struct ovs_list *msgs); +#endif /* OVN_CMP_NEXT_HOP_MONITOR_H */ diff --git a/controller/ofctrl.c b/controller/ofctrl.c index f9387d375..e44da749d 100644 --- a/controller/ofctrl.c +++ b/controller/ofctrl.c @@ -54,6 +54,7 @@ #include "vswitch-idl.h" #include "ovn-sb-idl.h" #include "ct-zone.h" +#include "ecmp-next-hop-monitor.h" VLOG_DEFINE_THIS_MODULE(ofctrl); @@ -425,6 +426,7 @@ ofctrl_init(struct ovn_extend_table *group_table, tx_counter = rconn_packet_counter_create(); hmap_init(&installed_lflows); hmap_init(&installed_pflows); + ecmp_nexthop_init(); ovs_list_init(&flow_updates); ovn_init_symtab(&symtab); groups = group_table; @@ -877,6 +879,7 @@ ofctrl_destroy(void) expr_symtab_destroy(&symtab); shash_destroy(&symtab); ofctrl_meter_bands_destroy(); + ecmp_nexthop_destroy(); } uint64_t @@ -2662,8 +2665,10 @@ void ofctrl_put(struct ovn_desired_flow_table *lflow_table, struct ovn_desired_flow_table *pflow_table, struct shash *pending_ct_zones, + struct shash *current_ct_zones, struct hmap *pending_lb_tuples, struct ovsdb_idl_index *sbrec_meter_by_name, + const struct sbrec_ecmp_nexthop_table *enh_table, uint64_t req_cfg, bool lflows_changed, bool pflows_changed) @@ -2704,6 +2709,8 @@ ofctrl_put(struct ovn_desired_flow_table *lflow_table, /* OpenFlow messages to send to the switch to bring it up-to-date. */ struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs); + ecmp_nexthop_monitor_run(enh_table, current_ct_zones, swconn, &msgs); + /* Iterate through ct zones that need to be flushed. */ struct shash_node *iter; SHASH_FOR_EACH(iter, pending_ct_zones) { diff --git a/controller/ofctrl.h b/controller/ofctrl.h index 129e3b6ad..5735cd553 100644 --- a/controller/ofctrl.h +++ b/controller/ofctrl.h @@ -31,6 +31,7 @@ struct ofpbuf; struct ovsrec_bridge; struct ovsrec_open_vswitch_table; struct sbrec_meter_table; +struct sbrec_ecmp_nexthop_table; struct shash; struct ovn_desired_flow_table { @@ -57,8 +58,10 @@ enum mf_field_id ofctrl_get_mf_field_id(void); void ofctrl_put(struct ovn_desired_flow_table *lflow_table, struct ovn_desired_flow_table *pflow_table, struct shash *pending_ct_zones, + struct shash *current_ct_zones, struct hmap *pending_lb_tuples, struct ovsdb_idl_index *sbrec_meter_by_name, + const struct sbrec_ecmp_nexthop_table *enh_table, uint64_t nb_cfg, bool lflow_changed, bool pflow_changed); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 6cee6450d..4b05077d3 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -5821,8 +5821,11 @@ main(int argc, char *argv[]) ofctrl_put(&lflow_output_data->flow_table, &pflow_output_data->flow_table, &ct_zones_data->ctx.pending, + &ct_zones_data->ctx.current, &lb_data->removed_tuples, sbrec_meter_by_name, + sbrec_ecmp_nexthop_table_get( + ovnsb_idl_loop.idl), ofctrl_seqno_get_req_cfg(), engine_node_changed(&en_lflow_output), engine_node_changed(&en_pflow_output)); diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index d563e044c..a024b0cd3 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -212,6 +212,9 @@ const struct ovn_field *ovn_field_from_name(const char *name); #define OVN_CT_ECMP_ETH_1ST_BIT 32 #define OVN_CT_ECMP_ETH_END_BIT 79 +#define OVN_CT_ECMP_ETH_LOW (((1ULL << OVN_CT_ECMP_ETH_1ST_BIT) - 1) << 32) +#define OVN_CT_ECMP_ETH_HIGH ((1ULL << (OVN_CT_ECMP_ETH_END_BIT - 63)) - 1) + #define OVN_CT_STR(LABEL_VALUE) OVS_STRINGIZE(LABEL_VALUE) #define OVN_CT_MASKED_STR(LABEL_VALUE) \ OVS_STRINGIZE(LABEL_VALUE) "/" OVS_STRINGIZE(LABEL_VALUE) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 6dfc3055a..e9d15898f 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -14002,3 +14002,529 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d /.*terminating with signal 15.*/d"]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP Flush CT entries - IPv4]) +AT_KEYWORDS([ecmp]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) +ADD_BR([br-ext]) +ADD_BR([br-ecmp]) + +ovs-ofctl add-flow br-ext action=normal +ovs-ofctl add-flow br-ecmp action=normal +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:chassis=hv1 +check ovn-nbctl lr-add R2 +check ovn-nbctl set logical_router R2 options:chassis=hv1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl ls-add sw1 +check ovn-nbctl ls-add public + +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 172.16.1.1/24 + +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 172.16.1.5/24 + +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ + type=router options:router-port=rp-sw0 \ + -- lsp-set-addresses sw0-rp router + +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ + type=router options:router-port=rp-sw1 \ + -- lsp-set-addresses sw1-rp router + +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ + type=router options:router-port=rp-public1 \ + -- lsp-set-addresses public-rp1 router + +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ + type=router options:router-port=rp-public2 \ + -- lsp-set-addresses public-rp2 router + +ADD_NAMESPACES(alice) +ADD_VETH(alice, alice, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ + "192.168.1.1") +check ovn-nbctl lsp-add sw0 alice \ + -- lsp-set-addresses alice "f0:00:00:01:02:03 192.168.1.2" + +ADD_NAMESPACES(peter) +ADD_VETH(peter, peter, br-int, "192.168.2.2/24", "f0:00:02:01:02:03", \ + "192.168.2.1") +check ovn-nbctl lsp-add sw1 peter \ + -- lsp-set-addresses peter "f0:00:02:01:02:03 192.168.2.2" + +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext +check ovn-nbctl lsp-add public public1 \ + -- lsp-set-addresses public1 unknown \ + -- lsp-set-type public1 localnet \ + -- lsp-set-options public1 network_name=phynet + +ADD_NAMESPACES(ecmp-path0) +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "172.16.1.2/24", "f0:00:00:01:02:04", "172.16.1.1") +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "172.16.2.2/24", "f0:00:00:01:03:04") + +ADD_NAMESPACES(ecmp-path1) +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "172.16.1.3/24", "f0:00:00:01:02:05", "172.16.1.1") +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "172.16.2.3/24", "f0:00:00:01:03:05") + +ADD_NAMESPACES(bob) +ADD_VETH(bob, bob, br-ecmp, "172.16.2.10/24", "f0:00:00:01:02:06", "172.16.2.2") + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 + +wait_for_ports_up +check ovn-nbctl --wait=hv sync +NETNS_DAEMONIZE([alice], [nc -l -k 80], [alice.pid]) +NETNS_DAEMONIZE([peter], [nc -l -k 80], [peter.pid]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) +]) + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.2]) +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.3]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +ovn-sbctl list ECMP_Nexthop > /tmp/ecmp-nh + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + +# Add the route back and verify we do not flush if we have multiple next-hops with the same mac address +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.2' + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip route del 0.0.0.0/0 via 172.16.2.3]) +NS_CHECK_EXEC([bob], [ip route add 0.0.0.0/0 via 172.16.2.2]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 172.16.2.0/24 172.16.1.3 + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 172.16.2.0/24 172.16.1.3 + +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 4 + +NS_CHECK_EXEC([ecmp-path0], [ip route add 192.168.2.2/32 via 172.16.1.5]) +NS_CHECK_EXEC([ecmp-path1], [ip route add 192.168.2.2/32 via 172.16.1.5]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.1.2 80], [0]) + +NS_CHECK_EXEC([bob], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -z 192.168.2.2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +tcp,orig=(src=172.16.2.10,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +]) + +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='172.16.1.2' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='172.16.1.3' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmp,orig=(src=172.16.2.10,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.2.10,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +tcp,orig=(src=172.16.2.10,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.2.10,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +]) + +check ovn-nbctl lr-route-del R2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP Flush CT entries - IPv6]) +AT_KEYWORDS([ecmp]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) +ADD_BR([br-ext]) +ADD_BR([br-ecmp]) + +ovs-ofctl add-flow br-ext action=normal +ovs-ofctl add-flow br-ecmp action=normal +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller +ovs-vsctl set Open_vSwitch . external-ids:arp-max-timeout-sec=1 + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:chassis=hv1 +check ovn-nbctl lr-add R2 +check ovn-nbctl set logical_router R2 options:chassis=hv1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl ls-add sw1 +check ovn-nbctl ls-add public + +check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 fd11::1/64 +check ovn-nbctl lrp-add R1 rp-public1 00:00:02:01:02:03 fd12::1/64 + +check ovn-nbctl lrp-add R2 rp-sw1 00:00:03:01:02:03 fd14::1/64 +check ovn-nbctl lrp-add R2 rp-public2 00:00:04:01:02:03 fd12::5/64 + +check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ + type=router options:router-port=rp-sw0 \ + -- lsp-set-addresses sw0-rp router + +check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ + type=router options:router-port=rp-sw1 \ + -- lsp-set-addresses sw1-rp router + +check ovn-nbctl lsp-add public public-rp1 -- set Logical_Switch_Port public-rp1 \ + type=router options:router-port=rp-public1 \ + -- lsp-set-addresses public-rp1 router + +check ovn-nbctl lsp-add public public-rp2 -- set Logical_Switch_Port public-rp2 \ + type=router options:router-port=rp-public2 \ + -- lsp-set-addresses public-rp2 router + +ADD_NAMESPACES(alice) +ADD_VETH(alice, alice, br-int, "fd11::2/64", "f0:00:00:01:02:03", "fd11::1", "nodad") +check ovn-nbctl lsp-add sw0 alice -- lsp-set-addresses alice "f0:00:00:01:02:03 fd11::2" + +ADD_NAMESPACES(peter) +ADD_VETH(peter, peter, br-int, "fd14::2/64", "f0:00:02:01:02:03", "fd14::1", "nodad") +check ovn-nbctl lsp-add sw1 peter -- lsp-set-addresses peter "f0:00:02:01:02:03 fd14::2" + +check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext +check ovn-nbctl lsp-add public public1 \ + -- lsp-set-addresses public1 unknown \ + -- lsp-set-type public1 localnet \ + -- lsp-set-options public1 network_name=phynet + +ADD_NAMESPACES(ecmp-path0) +ADD_VETH(ecmp-p01, ecmp-path0, br-ext, "fd12::2/64", "f0:00:00:01:02:04", "fd12::1", "nodad") +ADD_VETH(ecmp-p02, ecmp-path0, br-ecmp, "fd13::2/64", "f0:00:00:01:03:04") +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path0], [ip a show dev ecmp-p02 | grep "fe80::" | grep -v tentative])]) + +ADD_NAMESPACES(ecmp-path1) +ADD_VETH(ecmp-p11, ecmp-path1, br-ext, "fd12::3/64", "f0:00:00:01:02:05", "fd12::1", "nodad") +ADD_VETH(ecmp-p12, ecmp-path1, br-ecmp, "fd13::3/64", "f0:00:00:01:03:05") +OVS_WAIT_UNTIL([NS_EXEC([ecmp-path1], [ip a show dev ecmp-p12 | grep "fe80::" | grep -v tentative])]) + +ADD_NAMESPACES(bob) +ADD_VETH(bob, bob, br-ecmp, "fd13::a/64", "f0:00:00:01:02:06", "fd13::2", "nodad") + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 + +NS_CHECK_EXEC([ecmp-path0], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl +net.ipv6.conf.all.forwarding = 1 +]) +NS_CHECK_EXEC([ecmp-path1], [sysctl -w net.ipv6.conf.all.forwarding=1],[0], [dnl +net.ipv6.conf.all.forwarding = 1 +]) + +ovn-nbctl --wait=hv sync +NETNS_DAEMONIZE([alice], [nc -6 -l -k 80], [alice.pid]) +NETNS_DAEMONIZE([peter], [nc -6 -l -k 80], [peter.pid]) + +NS_CHECK_EXEC([bob], [ping6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) +]) + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::2]) +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::3]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +wait_row_count ECMP_Nexthop 2 +check_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000 +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020400000000,protoinfo=(state=<cleared>) +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 +check_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + + Add the route back and verify we do not flush if we have multiple next-hops with the same mac address +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:04' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' +# +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:05]) +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::2"' + +# Change bob default IP address +NS_CHECK_EXEC([bob], [ip -6 route del ::/0 via fd13::3]) +NS_CHECK_EXEC([bob], [ip -6 route add ::/0 via fd13::2]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020500000000,protoinfo=(state=<cleared>) +]) + +# Remove first ECMP route +check ovn-nbctl lr-route-del R1 fd13::/64 fd12::2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 1 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 + +NS_CHECK_EXEC([ecmp-path0], [ip link set dev ecmp-p01 address f0:00:00:01:02:06]) + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R1 fd13::/64 fd12::3 + +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::2 +check ovn-nbctl --ecmp-symmetric-reply lr-route-add R2 fd13::/64 fd12::3 + +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 4 + +NS_CHECK_EXEC([ecmp-path0], [ip route add fd14::2/128 via fd12::5]) +NS_CHECK_EXEC([ecmp-path1], [ip route add fd14::2/128 via fd12::5]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd11::2 80], [0]) + +NS_CHECK_EXEC([bob], [ping -6 -q -c 3 -i 0.3 -w 2 fd14::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([bob], [nc -6 -z fd14::2 80], [0]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd11::2,id=<cleared>,type=128,code=0),reply=(src=fd11::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +tcp,orig=(src=fd13::a,dst=fd11::2,sport=<cleared>,dport=<cleared>),reply=(src=fd11::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +]) + +# Remove second ECMP route +check ovn-nbctl lr-route-del R1 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 2 +wait_column 'f0:00:00:01:02:06' ECMP_Nexthop mac nexthop='"fd12::2"' +wait_column 'f0:00:00:01:02:05' ECMP_Nexthop mac nexthop='"fd12::3"' + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd13::a) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +icmpv6,orig=(src=fd13::a,dst=fd14::2,id=<cleared>,type=128,code=0),reply=(src=fd14::2,dst=fd13::a,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000 +tcp,orig=(src=fd13::a,dst=fd14::2,sport=<cleared>,dport=<cleared>),reply=(src=fd14::2,dst=fd13::a,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0xf0000001020600000000,protoinfo=(state=<cleared>) +]) + +check ovn-nbctl lr-route-del R2 +check ovn-nbctl --wait=hv sync +wait_row_count ECMP_Nexthop 0 +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.2.10) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +])
Introduce ecmp_nexthop_monitor in ovn-controller in order to track and flush ecmp-symmetric reply ct entires when requested by the CMS (e.g removing the related static ecmp routes). CT entries are flushed using the ethernet mac address stored in ct_label. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- NEWS | 2 + controller/automake.mk | 4 +- controller/ecmp-next-hop-monitor.c | 184 ++++++++++ controller/ecmp-next-hop-monitor.h | 25 ++ controller/ofctrl.c | 7 + controller/ofctrl.h | 3 + controller/ovn-controller.c | 3 + include/ovn/logical-fields.h | 3 + tests/system-ovn.at | 526 +++++++++++++++++++++++++++++ 9 files changed, 756 insertions(+), 1 deletion(-) create mode 100644 controller/ecmp-next-hop-monitor.c create mode 100644 controller/ecmp-next-hop-monitor.h