Message ID | ae8615cb43df962b949990aee981817b1fd4faf0.1707831032.git.lorenzo.bianconi@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev] northd: Fix pmtud for non routed traffic. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
Hi Lorenzo, Thanks for the fix. Acked-by: Mark Michelson <mmichels@redhat.com> When this is merged, the following should also be folded in: --- diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 17b414144..0cf1c2bb5 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -338,7 +338,7 @@ </p> <p> - This table adds a priority-110 flow that matches 'recirculated' icmp{4,6} + This table adds a priority-105 flow that matches 'recirculated' icmp{4,6} error 'packet too big' to drop the packet. </p> --- This accounts for the change of priority introduced in this patch. I also noticed a small spelling mistake that should be corrected. I marked it below. On 2/13/24 08:32, Lorenzo Bianconi wrote: > Similar to what is already implemented for routed e/w traffic, > introduce pmtud support for e/w traffic between two logical switch ports > connected to the same logical switch, but running on two different > hypervisors. > > Reported-at: https://issues.redhat.com/browse/FDP-362 > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > --- > controller/lflow.h | 1 + > controller/physical.c | 31 ++++++++- > northd/northd.c | 35 +++++++--- > northd/ovn-northd.8.xml | 14 +++- > tests/multinode.at | 151 ++++++++++++++++++++++++++++++++++++++++ > tests/ovn-northd.at | 22 ++++-- > 6 files changed, 236 insertions(+), 18 deletions(-) > > diff --git a/controller/lflow.h b/controller/lflow.h > index 9b7ffa19c..906a26280 100644 > --- a/controller/lflow.h > +++ b/controller/lflow.h > @@ -94,6 +94,7 @@ struct uuid; > #define OFTABLE_ECMP_NH 77 > #define OFTABLE_CHK_LB_AFFINITY 78 > #define OFTABLE_MAC_CACHE_USE 79 > +#define OFTABLE_CT_ZONE_LOOKUP 80 > > struct lflow_ctx_in { > struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath; > diff --git a/controller/physical.c b/controller/physical.c > index c32642d2c..6a9327b8d 100644 > --- a/controller/physical.c > +++ b/controller/physical.c > @@ -2451,8 +2451,37 @@ physical_run(struct physical_ctx *p_ctx, > p_ctx->n_encap_ips, > p_ctx->encap_ips, > flow_table, &ofpacts); > + > + if (!local_binding_get_primary_pb(p_ctx->local_bindings, > + binding->logical_port)) { > + continue; > + } > + > + /* Table 80, priority 100. > + * ======================= > + * > + * Process ICMP{4,6} error packets too big locally generalted from the s/generalted/generated/ > + * kernel in order to lookup proper ct_zone. */ > + struct match match = MATCH_CATCHALL_INITIALIZER; > + match_set_metadata(&match, htonll(binding->datapath->tunnel_key)); > + match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, binding->tunnel_key); > + > + ofpbuf_clear(&ofpacts); > + struct zone_ids zone_ids = get_zone_ids(binding, p_ctx->ct_zones); > + put_zones_ofpacts(&zone_ids, &ofpacts); > + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); > + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100, 0, > + &match, &ofpacts, hc_uuid); > } > > + /* Default flow for CT_ZONE_LOOKUP Table. */ > + struct match ct_look_def_match; > + match_init_catchall(&ct_look_def_match); > + ofpbuf_clear(&ofpacts); > + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); > + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0, > + &ct_look_def_match, &ofpacts, hc_uuid); > + > /* Handle output to multicast groups, in tables 40 and 41. */ > const struct sbrec_multicast_group *mc; > SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) { > @@ -2511,7 +2540,7 @@ physical_run(struct physical_ctx *p_ctx, > /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets > * do not fit path MTU. > */ > - put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); > + put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts); > > /* IPv4 */ > match_init_catchall(&match); > diff --git a/northd/northd.c b/northd/northd.c > index a174a4dcd..34c56f95e 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -8634,7 +8634,7 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od, > ovs_assert(od->nbs); > > /* Default action for recirculated ICMP error 'packet too big'. */ > - ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110, > + ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105, > "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" > " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" > " flags.tunnel_rx == 1", debug_drop_action(), lflow_ref); > @@ -11822,7 +11822,24 @@ build_lswitch_icmp_packet_toobig_admin_flows( > { > ovs_assert(op->nbsp); > > + ds_clear(match); > if (!lsp_is_router(op->nbsp)) { > + struct eth_addr mac; > + if (!op->nbsp->n_addresses || > + !ovs_scan(op->nbsp->addresses[0], ETH_ADDR_SCAN_FMT, > + ETH_ADDR_SCAN_ARGS(mac))) { > + return; > + } > + > + ds_put_format(match, > + "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" > + " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" > + " eth.src == "ETH_ADDR_FMT" && outport == %s &&" > + " !is_chassis_resident(%s) && flags.tunnel_rx == 1", > + ETH_ADDR_ARGS(mac), op->json_key, op->json_key); > + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110, > + ds_cstr(match), "outport <-> inport; next;", > + op->lflow_ref); > return; > } > > @@ -11831,26 +11848,28 @@ build_lswitch_icmp_packet_toobig_admin_flows( > return; > } > > - ds_clear(match); > if (peer->od->is_gw_router) { > ds_put_format(match, > "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" > " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " > "eth.src == %s && outport == %s && flags.tunnel_rx == 1", > peer->nbrp->mac, op->json_key); > + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, > + ds_cstr(match), "outport <-> inport; next;", > + op->lflow_ref); > } else { > ds_put_format(match, > "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" > " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " > "eth.dst == %s && flags.tunnel_rx == 1", > peer->nbrp->mac); > + ds_clear(actions); > + ds_put_format(actions, > + "outport <-> inport; next(pipeline=ingress,table=%d);", > + ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); > + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, > + ds_cstr(match), ds_cstr(actions), op->lflow_ref); > } > - ds_clear(actions); > - ds_put_format(actions, > - "outport <-> inport; next(pipeline=ingress,table=%d);", > - ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); > - ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, > - ds_cstr(match), ds_cstr(actions), op->lflow_ref); > } > > static void > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index 9583abeff..840a4f6a3 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -324,8 +324,7 @@ > 'packet too big' and <code>eth.src == <var>D</var> && > outport == <var>P</var> && flags.tunnel_rx == 1</code> where > <var>D</var> is the peer logical router port <var>RP</var> mac address, > - swaps inport and outport and applies the action <code> > - next(pipeline=S_SWITCH_IN_L2_LKUP)</code>. > + swaps inport and outport and applies the action <code>next</code>. > </p> > > <p> > @@ -338,7 +337,16 @@ > </p> > > <p> > - This table adds a priority-110 flow that matches 'recirculated' icmp{4,6} > + For each logical switch port <var>P</var> a priority-110 flow that > + matches 'recirculated' icmp{4,6} error 'packet too big' and <code> > + eth.src == <var>D</var> && outport == <var>P</var> && > + !is_chassis_resident("<var>P</var>") && flags.tunnel_rx == 1 > + </code> where <var>D</var> is the logical switch port mac address, > + swaps inport and outport and applies the action <code>next</code>. > + </p> > + > + <p> > + This table adds a priority-105 flow that matches 'recirculated' icmp{4,6} > error 'packet too big' to drop the packet. > </p> > > diff --git a/tests/multinode.at b/tests/multinode.at > index 0187382be..ef40db9b7 100644 > --- a/tests/multinode.at > +++ b/tests/multinode.at > @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 > check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related > check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related > > +# create LB > +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp > +check multinode_nbctl ls-lb-add sw0 lb0 > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1 &]) > + > m_as ovn-gw-1 ip netns add ovn-ext0 > m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal > m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 > @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | > 3 packets transmitted, 3 received, 0% packet loss, time 0ms > ]) > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 > +for i in $(seq 30); do > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) > +done > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) > + > +killall nc > + > AT_CLEANUP > > AT_SETUP([ovn multinode pmtu - distributed router - vxlan]) > @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 > check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related > check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related > > +# create LB > +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp > +check multinode_nbctl lr-lb-add lr0 lb0 > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1 &]) > + > m_as ovn-gw-1 ip netns add ovn-ext0 > m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal > m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 > @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | > M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100]) > M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"]) > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) > + > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 > +for i in $(seq 30); do > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) > +done > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) > + > +killall nc > + > AT_CLEANUP > > AT_SETUP([ovn multinode pmtu - gw router - vxlan]) > @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 > check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related > check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related > > +# create LB > +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp > +check multinode_nbctl lr-lb-add lr0 lb0 > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1 &]) > + > m_as ovn-gw-1 ip netns add ovn-ext0 > m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal > m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 > @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | > > M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"]) > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) > + > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 > +for i in $(seq 30); do > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) > +done > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950']) > + > +killall nc > + > +AT_CLEANUP > + > +AT_SETUP([ovn multinode pmtu - logical switch - geneve]) > + > +# Check that ovn-fake-multinode setup is up and running > +check_fake_multinode_setup > + > +# Delete the multinode NB and OVS resources before starting the test. > +cleanup_multinode_resources > + > +m_as ovn-chassis-1 ip link del sw0p1-p > +m_as ovn-chassis-2 ip link del sw0p2-p > + > +# Reset geneve tunnels > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1 > +do > + m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve > +done > + > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys]) > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys]) > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys]) > + > +# Test East-West switching > +check multinode_nbctl ls-add sw0 > +check multinode_nbctl lsp-add sw0 sw0-port1 > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3" > +check multinode_nbctl lsp-add sw0 sw0-port2 > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4" > + > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a > + > +# Create the second logical switch with one port > +check multinode_nbctl ls-add sw1 > +check multinode_nbctl lsp-add sw1 sw1-port1 > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3" > + > +# Create a logical router and attach both logical switches > +check multinode_nbctl lr-add lr0 > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64 > +check multinode_nbctl lsp-add sw0 sw0-lr0 > +check multinode_nbctl lsp-set-type sw0-lr0 router > +check multinode_nbctl lsp-set-addresses sw0-lr0 router > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 > + > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64 > +check multinode_nbctl lsp-add sw1 sw1-lr0 > +check multinode_nbctl lsp-set-type sw1-lr0 router > +check multinode_nbctl lsp-set-addresses sw1-lr0 router > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 > + > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a > + > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24 > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 > + > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10 > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10 > + > +# create some ACLs > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related > + > +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp > +check multinode_nbctl ls-lb-add sw0 lb0 > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1 &]) > + > +m_wait_for_ports_up > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +# Change ptmu for the geneve tunnel > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1 > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"]) > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \ > +[0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +# Change ptmu for the geneve tunnel > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1 > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"]) > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) > + > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 > +for i in $(seq 30); do > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) > +done > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) > + > +killall nc > + > AT_CLEANUP > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 591ad5aad..b04cc4893 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -8507,7 +8507,7 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) > table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) > @@ -8533,7 +8533,9 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) > table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) > @@ -8560,7 +8562,9 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) > table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) > @@ -8588,7 +8592,9 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) > table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) > @@ -8615,7 +8621,9 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_check_port_sec), priority=70 , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) > @@ -8645,7 +8653,9 @@ ovn_strip_lflows ], [0], [dnl > table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) > table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) > - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) > table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_check_port_sec), priority=70 , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;) > table=??(ls_in_check_port_sec), priority=70 , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
On 3/15/24 20:34, Mark Michelson wrote: > Hi Lorenzo, > > Thanks for the fix. > > Acked-by: Mark Michelson <mmichels@redhat.com> > Hi Lorenzo, Mark, I'm afraid there's a bug in this patch, please see below. > When this is merged, the following should also be folded in: > > --- > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index 17b414144..0cf1c2bb5 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -338,7 +338,7 @@ > </p> > > <p> > - This table adds a priority-110 flow that matches 'recirculated' > icmp{4,6} > + This table adds a priority-105 flow that matches 'recirculated' > icmp{4,6} > error 'packet too big' to drop the packet. > </p> > > --- > > This accounts for the change of priority introduced in this patch. > > I also noticed a small spelling mistake that should be corrected. I > marked it below. > > On 2/13/24 08:32, Lorenzo Bianconi wrote: >> Similar to what is already implemented for routed e/w traffic, >> introduce pmtud support for e/w traffic between two logical switch ports >> connected to the same logical switch, but running on two different >> hypervisors. >> >> Reported-at: https://issues.redhat.com/browse/FDP-362 >> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> >> --- >> controller/lflow.h | 1 + >> controller/physical.c | 31 ++++++++- >> northd/northd.c | 35 +++++++--- >> northd/ovn-northd.8.xml | 14 +++- >> tests/multinode.at | 151 ++++++++++++++++++++++++++++++++++++++++ >> tests/ovn-northd.at | 22 ++++-- >> 6 files changed, 236 insertions(+), 18 deletions(-) >> >> diff --git a/controller/lflow.h b/controller/lflow.h >> index 9b7ffa19c..906a26280 100644 >> --- a/controller/lflow.h >> +++ b/controller/lflow.h >> @@ -94,6 +94,7 @@ struct uuid; >> #define OFTABLE_ECMP_NH 77 >> #define OFTABLE_CHK_LB_AFFINITY 78 >> #define OFTABLE_MAC_CACHE_USE 79 >> +#define OFTABLE_CT_ZONE_LOOKUP 80 >> struct lflow_ctx_in { >> struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath; >> diff --git a/controller/physical.c b/controller/physical.c >> index c32642d2c..6a9327b8d 100644 >> --- a/controller/physical.c >> +++ b/controller/physical.c >> @@ -2451,8 +2451,37 @@ physical_run(struct physical_ctx *p_ctx, >> p_ctx->n_encap_ips, >> p_ctx->encap_ips, >> flow_table, &ofpacts); This whole chunk below should be part of consider_port_binding(). Otherwise, we fail to incrementally add flows for newly bound interfaces. physical_run() is only called on recompute while consider_port_binding() is called in both paths (recompute and incremetally process updates). We should add a test case for this. One way to hit the bug is to start an OVN sandbox. Then: $ ./ovn-setup.sh $ ovn-sbctl show Chassis chassis-1 hostname: sandbox Encap geneve ip: "127.0.0.1" options: {csum="true"} Port_Binding sw1-port1 Port_Binding sw0-port1 # Two ports are bound locally, we expect 2 non-default flows in table 80: $ ovs-ofctl dump-flows br-int table=80 cookie=0x0, duration=51.758s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x3->NXM_NX_REG13[0..15],load:0x2->NXM_NX_REG11[],load:0x7->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=1.116s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x2 actions=load:0x8->NXM_NX_REG13[0..15],load:0x6->NXM_NX_REG11[],load:0x5->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=53.640s, table=80, n_packets=0, n_bytes=0, priority=0 actions=resubmit(,8) # Add a new locally bound port: $ ovn-nbctl lsp-add sw0 sw0-bar $ ovs-vsctl add-port br-int sw0-bar -- set interface sw0-bar external_ids:iface-id=sw0-bar $ ovn-sbctl show Chassis chassis-1 hostname: sandbox Encap geneve ip: "127.0.0.1" options: {csum="true"} Port_Binding sw1-port1 Port_Binding sw0-port1 Port_Binding sw0-bar # Three ports are bound locally, we expect 3 non-default flows in table 80: $ ovs-ofctl dump-flows br-int table=80 cookie=0x0, duration=123.946s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x3->NXM_NX_REG13[0..15],load:0x2->NXM_NX_REG11[],load:0x7->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=73.304s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x2 actions=load:0x8->NXM_NX_REG13[0..15],load:0x6->NXM_NX_REG11[],load:0x5->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=125.828s, table=80, n_packets=0, n_bytes=0, priority=0 actions=resubmit(,8) # Only 2 are there.. trigger a recompute: $ ovn-appctl recompute $ ovs-ofctl dump-flows br-int table=80 cookie=0x0, duration=167.205s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x3->NXM_NX_REG13[0..15],load:0x2->NXM_NX_REG11[],load:0x7->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=116.563s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x1,metadata=0x2 actions=load:0x8->NXM_NX_REG13[0..15],load:0x6->NXM_NX_REG11[],load:0x5->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=2.673s, table=80, n_packets=0, n_bytes=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x9->NXM_NX_REG13[0..15],load:0x2->NXM_NX_REG11[],load:0x7->NXM_NX_REG12[],resubmit(,8) cookie=0x0, duration=169.087s, table=80, n_packets=0, n_bytes=0, priority=0 actions=resubmit(,8) Regards, Dumitru >> + >> + if (!local_binding_get_primary_pb(p_ctx->local_bindings, >> + binding->logical_port)) { >> + continue; >> + } >> + >> + /* Table 80, priority 100. >> + * ======================= >> + * >> + * Process ICMP{4,6} error packets too big locally generalted >> from the > > s/generalted/generated/ > >> + * kernel in order to lookup proper ct_zone. */ >> + struct match match = MATCH_CATCHALL_INITIALIZER; >> + match_set_metadata(&match, >> htonll(binding->datapath->tunnel_key)); >> + match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, >> binding->tunnel_key); >> + >> + ofpbuf_clear(&ofpacts); >> + struct zone_ids zone_ids = get_zone_ids(binding, >> p_ctx->ct_zones); >> + put_zones_ofpacts(&zone_ids, &ofpacts); >> + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); >> + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100, 0, >> + &match, &ofpacts, hc_uuid); >> } >> + /* Default flow for CT_ZONE_LOOKUP Table. */ >> + struct match ct_look_def_match; >> + match_init_catchall(&ct_look_def_match); >> + ofpbuf_clear(&ofpacts); >> + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); >> + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0, >> + &ct_look_def_match, &ofpacts, hc_uuid); >> + >> /* Handle output to multicast groups, in tables 40 and 41. */ >> const struct sbrec_multicast_group *mc; >> SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) { >> @@ -2511,7 +2540,7 @@ physical_run(struct physical_ctx *p_ctx, >> /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled >> packets >> * do not fit path MTU. >> */ >> - put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); >> + put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts); >> /* IPv4 */ >> match_init_catchall(&match); >> diff --git a/northd/northd.c b/northd/northd.c >> index a174a4dcd..34c56f95e 100644 >> --- a/northd/northd.c >> +++ b/northd/northd.c >> @@ -8634,7 +8634,7 @@ build_lswitch_lflows_admission_control(struct >> ovn_datapath *od, >> ovs_assert(od->nbs); >> /* Default action for recirculated ICMP error 'packet too >> big'. */ >> - ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110, >> + ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105, >> "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" >> " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" >> " flags.tunnel_rx == 1", debug_drop_action(), >> lflow_ref); >> @@ -11822,7 +11822,24 @@ build_lswitch_icmp_packet_toobig_admin_flows( >> { >> ovs_assert(op->nbsp); >> + ds_clear(match); >> if (!lsp_is_router(op->nbsp)) { >> + struct eth_addr mac; >> + if (!op->nbsp->n_addresses || >> + !ovs_scan(op->nbsp->addresses[0], ETH_ADDR_SCAN_FMT, >> + ETH_ADDR_SCAN_ARGS(mac))) { >> + return; >> + } >> + >> + ds_put_format(match, >> + "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" >> + " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" >> + " eth.src == "ETH_ADDR_FMT" && outport == %s &&" >> + " !is_chassis_resident(%s) && flags.tunnel_rx >> == 1", >> + ETH_ADDR_ARGS(mac), op->json_key, op->json_key); >> + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110, >> + ds_cstr(match), "outport <-> inport; next;", >> + op->lflow_ref); >> return; >> } >> @@ -11831,26 +11848,28 @@ build_lswitch_icmp_packet_toobig_admin_flows( >> return; >> } >> - ds_clear(match); >> if (peer->od->is_gw_router) { >> ds_put_format(match, >> "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" >> " (ip6 && icmp6.type == 2 && icmp6.code == 0)) >> && " >> "eth.src == %s && outport == %s && >> flags.tunnel_rx == 1", >> peer->nbrp->mac, op->json_key); >> + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, >> + ds_cstr(match), "outport <-> inport; next;", >> + op->lflow_ref); >> } else { >> ds_put_format(match, >> "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" >> " (ip6 && icmp6.type == 2 && icmp6.code == 0)) >> && " >> "eth.dst == %s && flags.tunnel_rx == 1", >> peer->nbrp->mac); >> + ds_clear(actions); >> + ds_put_format(actions, >> + "outport <-> inport; >> next(pipeline=ingress,table=%d);", >> + ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); >> + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, >> + ds_cstr(match), ds_cstr(actions), op->lflow_ref); >> } >> - ds_clear(actions); >> - ds_put_format(actions, >> - "outport <-> inport; >> next(pipeline=ingress,table=%d);", >> - ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); >> - ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, >> - ds_cstr(match), ds_cstr(actions), op->lflow_ref); >> } >> static void >> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml >> index 9583abeff..840a4f6a3 100644 >> --- a/northd/ovn-northd.8.xml >> +++ b/northd/ovn-northd.8.xml >> @@ -324,8 +324,7 @@ >> 'packet too big' and <code>eth.src == <var>D</var> && >> outport == <var>P</var> && flags.tunnel_rx == 1</code> >> where >> <var>D</var> is the peer logical router port <var>RP</var> mac >> address, >> - swaps inport and outport and applies the action <code> >> - next(pipeline=S_SWITCH_IN_L2_LKUP)</code>. >> + swaps inport and outport and applies the action <code>next</code>. >> </p> >> <p> >> @@ -338,7 +337,16 @@ >> </p> >> <p> >> - This table adds a priority-110 flow that matches 'recirculated' >> icmp{4,6} >> + For each logical switch port <var>P</var> a priority-110 flow that >> + matches 'recirculated' icmp{4,6} error 'packet too big' and <code> >> + eth.src == <var>D</var> && outport == <var>P</var> >> && >> + !is_chassis_resident("<var>P</var>") && flags.tunnel_rx >> == 1 >> + </code> where <var>D</var> is the logical switch port mac address, >> + swaps inport and outport and applies the action <code>next</code>. >> + </p> >> + >> + <p> >> + This table adds a priority-105 flow that matches 'recirculated' >> icmp{4,6} >> error 'packet too big' to drop the packet. >> </p> >> diff --git a/tests/multinode.at b/tests/multinode.at >> index 0187382be..ef40db9b7 100644 >> --- a/tests/multinode.at >> +++ b/tests/multinode.at >> @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat >> 172.20.0.100 20.0.0.0/24 >> check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' >> allow-related >> check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' >> allow-related >> +# create LB >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp >> +check multinode_nbctl ls-lb-add sw0 lb0 >> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null >> 2>&1 &]) >> + >> m_as ovn-gw-1 ip netns add ovn-ext0 >> m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 >> type=internal >> m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 >> @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping >> -q -c 3 -i 0.3 -w 2 172.20.1.2 | >> 3 packets transmitted, 3 received, 0% packet loss, time 0ms >> ]) >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 >> +for i in $(seq 30); do >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 >> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) >> +done >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev >> sw0p1 | grep -q 'mtu 942']) >> + >> +killall nc >> + >> AT_CLEANUP >> AT_SETUP([ovn multinode pmtu - distributed router - vxlan]) >> @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat >> 172.20.0.100 20.0.0.0/24 >> check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' >> allow-related >> check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' >> allow-related >> +# create LB >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp >> +check multinode_nbctl lr-lb-add lr0 lb0 >> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null >> 2>&1 &]) >> + >> m_as ovn-gw-1 ip netns add ovn-ext0 >> m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 >> type=internal >> m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 >> @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping >> -q -c 3 -i 0.3 -w 2 172.20.1.2 | >> M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu >> 1100]) >> M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 >> -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 >> dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via >> 10.0.0.1 dev sw0p1]) >> + >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 >> +for i in $(seq 30); do >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 >> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) >> +done >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev >> sw0p1 | grep -q 'mtu 942']) >> + >> +killall nc >> + >> AT_CLEANUP >> AT_SETUP([ovn multinode pmtu - gw router - vxlan]) >> @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat >> 172.20.0.100 20.0.0.0/24 >> check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' >> allow-related >> check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' >> allow-related >> +# create LB >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp >> +check multinode_nbctl lr-lb-add lr0 lb0 >> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null >> 2>&1 &]) >> + >> m_as ovn-gw-1 ip netns add ovn-ext0 >> m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 >> type=internal >> m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 >> @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping >> -q -c 3 -i 0.3 -w 2 172.20.1.2 | >> M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s >> 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 >> dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via >> 10.0.0.1 dev sw0p1]) >> + >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 >> +for i in $(seq 30); do >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 >> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) >> +done >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev >> sw0p1 | grep -q 'mtu 950']) >> + >> +killall nc >> + >> +AT_CLEANUP >> + >> +AT_SETUP([ovn multinode pmtu - logical switch - geneve]) >> + >> +# Check that ovn-fake-multinode setup is up and running >> +check_fake_multinode_setup >> + >> +# Delete the multinode NB and OVS resources before starting the test. >> +cleanup_multinode_resources >> + >> +m_as ovn-chassis-1 ip link del sw0p1-p >> +m_as ovn-chassis-2 ip link del sw0p2-p >> + >> +# Reset geneve tunnels >> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1 >> +do >> + m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve >> +done >> + >> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys]) >> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys]) >> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys]) >> + >> +# Test East-West switching >> +check multinode_nbctl ls-add sw0 >> +check multinode_nbctl lsp-add sw0 sw0-port1 >> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 >> 10.0.0.3 1000::3" >> +check multinode_nbctl lsp-add sw0 sw0-port2 >> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 >> 10.0.0.4 1000::4" >> + >> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 >> 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a >> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 >> 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a >> + >> +# Create the second logical switch with one port >> +check multinode_nbctl ls-add sw1 >> +check multinode_nbctl lsp-add sw1 sw1-port1 >> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 >> 20.0.0.3 2000::3" >> + >> +# Create a logical router and attach both logical switches >> +check multinode_nbctl lr-add lr0 >> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 >> 10.0.0.1/24 1000::a/64 >> +check multinode_nbctl lsp-add sw0 sw0-lr0 >> +check multinode_nbctl lsp-set-type sw0-lr0 router >> +check multinode_nbctl lsp-set-addresses sw0-lr0 router >> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 >> + >> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 >> 20.0.0.1/24 2000::a/64 >> +check multinode_nbctl lsp-add sw1 sw1-lr0 >> +check multinode_nbctl lsp-set-type sw1-lr0 router >> +check multinode_nbctl lsp-set-addresses sw1-lr0 router >> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 >> + >> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 >> 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a >> + >> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24 >> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 >> + >> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10 >> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10 >> + >> +# create some ACLs >> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' >> allow-related >> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' >> allow-related >> + >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp >> +check multinode_nbctl ls-lb-add sw0 lb0 >> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null >> 2>&1 &]) >> + >> +m_wait_for_ports_up >> + >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 >> 10.0.0.4 | FORMAT_PING], \ >> +[0], [dnl >> +3 packets transmitted, 3 received, 0% packet loss, time 0ms >> +]) >> + >> +# Change ptmu for the geneve tunnel >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1 >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do >> 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"]) >> + >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 >> dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via >> 10.0.0.1 dev sw0p1]) >> + >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 >> 20.0.0.3 | FORMAT_PING], \ >> +[0], [dnl >> +3 packets transmitted, 3 received, 0% packet loss, time 0ms >> +]) >> + >> +# Change ptmu for the geneve tunnel >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1 >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do >> 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"]) >> + >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 >> dev sw0p1]) >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via >> 10.0.0.1 dev sw0p1]) >> + >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 >> +for i in $(seq 30); do >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 >> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) >> +done >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev >> sw0p1 | grep -q 'mtu 942']) >> + >> +killall nc >> + >> AT_CLEANUP >> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at >> index 591ad5aad..b04cc4893 100644 >> --- a/tests/ovn-northd.at >> +++ b/tests/ovn-northd.at >> @@ -8507,7 +8507,7 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] >> == 1), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_l2_lkup ), priority=0 , match=(1), >> action=(outport = get_fdb(eth.dst); next;) >> table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == >> $svc_monitor_mac && (tcp || icmp || icmp6)), >> action=(handle_svc_check(inport);) >> @@ -8533,7 +8533,9 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] >> == 1), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == >> "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == >> "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_l2_lkup ), priority=0 , match=(1), >> action=(outport = get_fdb(eth.dst); next;) >> table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == >> $svc_monitor_mac && (tcp || icmp || icmp6)), >> action=(handle_svc_check(inport);) >> @@ -8560,7 +8562,9 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] >> == 1), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == >> "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == >> "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_l2_lkup ), priority=0 , match=(1), >> action=(outport = get_fdb(eth.dst); next;) >> table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == >> $svc_monitor_mac && (tcp || icmp || icmp6)), >> action=(handle_svc_check(inport);) >> @@ -8588,7 +8592,9 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , match=(inport == >> "sw0p1"), action=(reg0[[15]] = 1; next;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == >> "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == >> "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_l2_lkup ), priority=0 , match=(1), >> action=(outport = get_fdb(eth.dst); next;) >> table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == >> $svc_monitor_mac && (tcp || icmp || icmp6)), >> action=(handle_svc_check(inport);) >> @@ -8615,7 +8621,9 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , match=(inport == >> "sw0p1"), action=(reg0[[15]] = 1; next;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == >> "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == >> "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_check_port_sec), priority=70 , match=(inport == >> "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_l2_lkup ), priority=0 , match=(1), >> action=(outport = get_fdb(eth.dst); next;) >> @@ -8645,7 +8653,9 @@ ovn_strip_lflows ], [0], [dnl >> table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] >> == 1), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(eth.src[[40]]), action=(drop;) >> table=??(ls_in_check_port_sec), priority=100 , >> match=(vlan.present), action=(drop;) >> - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == >> "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && >> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && >> icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == >> "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), >> action=(outport <-> inport; next;) >> table=??(ls_in_check_port_sec), priority=50 , match=(1), >> action=(reg0[[15]] = check_in_port_sec(); next;) >> table=??(ls_in_check_port_sec), priority=70 , match=(inport == >> "localnetport"), action=(set_queue(10); reg0[[15]] = >> check_in_port_sec(); next;) >> table=??(ls_in_check_port_sec), priority=70 , match=(inport == >> "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);) >
diff --git a/controller/lflow.h b/controller/lflow.h index 9b7ffa19c..906a26280 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -94,6 +94,7 @@ struct uuid; #define OFTABLE_ECMP_NH 77 #define OFTABLE_CHK_LB_AFFINITY 78 #define OFTABLE_MAC_CACHE_USE 79 +#define OFTABLE_CT_ZONE_LOOKUP 80 struct lflow_ctx_in { struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath; diff --git a/controller/physical.c b/controller/physical.c index c32642d2c..6a9327b8d 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -2451,8 +2451,37 @@ physical_run(struct physical_ctx *p_ctx, p_ctx->n_encap_ips, p_ctx->encap_ips, flow_table, &ofpacts); + + if (!local_binding_get_primary_pb(p_ctx->local_bindings, + binding->logical_port)) { + continue; + } + + /* Table 80, priority 100. + * ======================= + * + * Process ICMP{4,6} error packets too big locally generalted from the + * kernel in order to lookup proper ct_zone. */ + struct match match = MATCH_CATCHALL_INITIALIZER; + match_set_metadata(&match, htonll(binding->datapath->tunnel_key)); + match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, binding->tunnel_key); + + ofpbuf_clear(&ofpacts); + struct zone_ids zone_ids = get_zone_ids(binding, p_ctx->ct_zones); + put_zones_ofpacts(&zone_ids, &ofpacts); + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100, 0, + &match, &ofpacts, hc_uuid); } + /* Default flow for CT_ZONE_LOOKUP Table. */ + struct match ct_look_def_match; + match_init_catchall(&ct_look_def_match); + ofpbuf_clear(&ofpacts); + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); + ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0, + &ct_look_def_match, &ofpacts, hc_uuid); + /* Handle output to multicast groups, in tables 40 and 41. */ const struct sbrec_multicast_group *mc; SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) { @@ -2511,7 +2540,7 @@ physical_run(struct physical_ctx *p_ctx, /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets * do not fit path MTU. */ - put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts); + put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts); /* IPv4 */ match_init_catchall(&match); diff --git a/northd/northd.c b/northd/northd.c index a174a4dcd..34c56f95e 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -8634,7 +8634,7 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od, ovs_assert(od->nbs); /* Default action for recirculated ICMP error 'packet too big'. */ - ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110, + ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105, "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" " flags.tunnel_rx == 1", debug_drop_action(), lflow_ref); @@ -11822,7 +11822,24 @@ build_lswitch_icmp_packet_toobig_admin_flows( { ovs_assert(op->nbsp); + ds_clear(match); if (!lsp_is_router(op->nbsp)) { + struct eth_addr mac; + if (!op->nbsp->n_addresses || + !ovs_scan(op->nbsp->addresses[0], ETH_ADDR_SCAN_FMT, + ETH_ADDR_SCAN_ARGS(mac))) { + return; + } + + ds_put_format(match, + "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" + " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&" + " eth.src == "ETH_ADDR_FMT" && outport == %s &&" + " !is_chassis_resident(%s) && flags.tunnel_rx == 1", + ETH_ADDR_ARGS(mac), op->json_key, op->json_key); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110, + ds_cstr(match), "outport <-> inport; next;", + op->lflow_ref); return; } @@ -11831,26 +11848,28 @@ build_lswitch_icmp_packet_toobig_admin_flows( return; } - ds_clear(match); if (peer->od->is_gw_router) { ds_put_format(match, "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " "eth.src == %s && outport == %s && flags.tunnel_rx == 1", peer->nbrp->mac, op->json_key); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, + ds_cstr(match), "outport <-> inport; next;", + op->lflow_ref); } else { ds_put_format(match, "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||" " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && " "eth.dst == %s && flags.tunnel_rx == 1", peer->nbrp->mac); + ds_clear(actions); + ds_put_format(actions, + "outport <-> inport; next(pipeline=ingress,table=%d);", + ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, + ds_cstr(match), ds_cstr(actions), op->lflow_ref); } - ds_clear(actions); - ds_put_format(actions, - "outport <-> inport; next(pipeline=ingress,table=%d);", - ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); - ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120, - ds_cstr(match), ds_cstr(actions), op->lflow_ref); } static void diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 9583abeff..840a4f6a3 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -324,8 +324,7 @@ 'packet too big' and <code>eth.src == <var>D</var> && outport == <var>P</var> && flags.tunnel_rx == 1</code> where <var>D</var> is the peer logical router port <var>RP</var> mac address, - swaps inport and outport and applies the action <code> - next(pipeline=S_SWITCH_IN_L2_LKUP)</code>. + swaps inport and outport and applies the action <code>next</code>. </p> <p> @@ -338,7 +337,16 @@ </p> <p> - This table adds a priority-110 flow that matches 'recirculated' icmp{4,6} + For each logical switch port <var>P</var> a priority-110 flow that + matches 'recirculated' icmp{4,6} error 'packet too big' and <code> + eth.src == <var>D</var> && outport == <var>P</var> && + !is_chassis_resident("<var>P</var>") && flags.tunnel_rx == 1 + </code> where <var>D</var> is the logical switch port mac address, + swaps inport and outport and applies the action <code>next</code>. + </p> + + <p> + This table adds a priority-105 flow that matches 'recirculated' icmp{4,6} error 'packet too big' to drop the packet. </p> diff --git a/tests/multinode.at b/tests/multinode.at index 0187382be..ef40db9b7 100644 --- a/tests/multinode.at +++ b/tests/multinode.at @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related +# create LB +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp +check multinode_nbctl ls-lb-add sw0 lb0 +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1 &]) + m_as ovn-gw-1 ip netns add ovn-ext0 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 +for i in $(seq 30); do +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) +done +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) + +killall nc + AT_CLEANUP AT_SETUP([ovn multinode pmtu - distributed router - vxlan]) @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related +# create LB +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp +check multinode_nbctl lr-lb-add lr0 lb0 +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1 &]) + m_as ovn-gw-1 ip netns add ovn-ext0 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100]) M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) + +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 +for i in $(seq 30); do +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) +done +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) + +killall nc + AT_CLEANUP AT_SETUP([ovn multinode pmtu - gw router - vxlan]) @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related +# create LB +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp +check multinode_nbctl lr-lb-add lr0 lb0 +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1 &]) + m_as ovn-gw-1 ip netns add ovn-ext0 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal m_as ovn-gw-1 ip link set ext0 netns ovn-ext0 @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) + +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 +for i in $(seq 30); do +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) +done +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950']) + +killall nc + +AT_CLEANUP + +AT_SETUP([ovn multinode pmtu - logical switch - geneve]) + +# Check that ovn-fake-multinode setup is up and running +check_fake_multinode_setup + +# Delete the multinode NB and OVS resources before starting the test. +cleanup_multinode_resources + +m_as ovn-chassis-1 ip link del sw0p1-p +m_as ovn-chassis-2 ip link del sw0p2-p + +# Reset geneve tunnels +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1 +do + m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve +done + +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys]) +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys]) +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys]) + +# Test East-West switching +check multinode_nbctl ls-add sw0 +check multinode_nbctl lsp-add sw0 sw0-port1 +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3" +check multinode_nbctl lsp-add sw0 sw0-port2 +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4" + +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a + +# Create the second logical switch with one port +check multinode_nbctl ls-add sw1 +check multinode_nbctl lsp-add sw1 sw1-port1 +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3" + +# Create a logical router and attach both logical switches +check multinode_nbctl lr-add lr0 +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64 +check multinode_nbctl lsp-add sw0 sw0-lr0 +check multinode_nbctl lsp-set-type sw0-lr0 router +check multinode_nbctl lsp-set-addresses sw0-lr0 router +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 + +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64 +check multinode_nbctl lsp-add sw1 sw1-lr0 +check multinode_nbctl lsp-set-type sw1-lr0 router +check multinode_nbctl lsp-set-addresses sw1-lr0 router +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 + +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a + +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24 +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24 + +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10 +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10 + +# create some ACLs +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6' allow-related +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6' allow-related + +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp +check multinode_nbctl ls-lb-add sw0 lb0 +M_NS_CHECK_EXEC([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1 &]) + +m_wait_for_ports_up + +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Change ptmu for the geneve tunnel +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1 +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"]) + +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) + +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Change ptmu for the geneve tunnel +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1 +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"]) + +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1]) +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1]) + +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1 +for i in $(seq 30); do +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore]) +done +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942']) + +killall nc + AT_CLEANUP diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 591ad5aad..b04cc4893 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -8507,7 +8507,7 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) @@ -8533,7 +8533,9 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) @@ -8560,7 +8562,9 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) @@ -8588,7 +8592,9 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) table=??(ls_in_l2_lkup ), priority=110 , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);) @@ -8615,7 +8621,9 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_check_port_sec), priority=70 , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_l2_lkup ), priority=0 , match=(1), action=(outport = get_fdb(eth.dst); next;) @@ -8645,7 +8653,9 @@ ovn_strip_lflows ], [0], [dnl table=??(ls_in_apply_port_sec), priority=50 , match=(reg0[[15]] == 1), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(eth.src[[40]]), action=(drop;) table=??(ls_in_check_port_sec), priority=100 , match=(vlan.present), action=(drop;) - table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=105 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) + table=??(ls_in_check_port_sec), priority=110 , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;) table=??(ls_in_check_port_sec), priority=50 , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_check_port_sec), priority=70 , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;) table=??(ls_in_check_port_sec), priority=70 , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
Similar to what is already implemented for routed e/w traffic, introduce pmtud support for e/w traffic between two logical switch ports connected to the same logical switch, but running on two different hypervisors. Reported-at: https://issues.redhat.com/browse/FDP-362 Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- controller/lflow.h | 1 + controller/physical.c | 31 ++++++++- northd/northd.c | 35 +++++++--- northd/ovn-northd.8.xml | 14 +++- tests/multinode.at | 151 ++++++++++++++++++++++++++++++++++++++++ tests/ovn-northd.at | 22 ++++-- 6 files changed, 236 insertions(+), 18 deletions(-)