diff mbox series

[ovs-dev,RFC,v3] ovn: add geneve PMTUD support

Message ID bcceb9a3c0ea9f43ddb16ce53c318afe303d60dd.1703262253.git.lorenzo.bianconi@redhat.com
State RFC
Headers show
Series [ovs-dev,RFC,v3] ovn: add geneve PMTUD support | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi Dec. 22, 2023, 4:27 p.m. UTC
Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
Changes since v2:
- the icmp error forwarding for n/s traffic
- add vxlan tests
- merge IPv6 test cases
Changes since v1:
- add fix for vxlan and stt tunnels
---
 NEWS                    |   1 +
 controller/physical.c   |  31 +++-
 northd/northd.c         |  72 +++++++++
 northd/ovn-northd.8.xml |  29 ++++
 tests/multinode.at      | 348 +++++++++++++++++++++++++++++++++++++++-
 tests/ovn-northd.at     |  21 +++
 6 files changed, 499 insertions(+), 3 deletions(-)

Comments

Numan Siddique Jan. 6, 2024, 12:10 a.m. UTC | #1
On Fri, Dec 22, 2023 at 11:27 AM Lorenzo Bianconi
<lorenzo.bianconi@redhat.com> wrote:
>
> Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
> do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>

Hi Lorenzo,

Thanks for the patch.  Please see below for a few comments.


> ---
> Changes since v2:
> - the icmp error forwarding for n/s traffic
> - add vxlan tests
> - merge IPv6 test cases
> Changes since v1:
> - add fix for vxlan and stt tunnels
> ---
>  NEWS                    |   1 +
>  controller/physical.c   |  31 +++-
>  northd/northd.c         |  72 +++++++++
>  northd/ovn-northd.8.xml |  29 ++++
>  tests/multinode.at      | 348 +++++++++++++++++++++++++++++++++++++++-
>  tests/ovn-northd.at     |  21 +++
>  6 files changed, 499 insertions(+), 3 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index e10fb79dd..acb3b854f 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,7 @@ Post v23.09.0
>      connection method and doesn't require additional probing.
>      external_ids:ovn-openflow-probe-interval configuration option for
>      ovn-controller no longer matters and is ignored.
> +  - Enable PMTU discovery on geneve tunnels for E/W traffic.
>
>  OVN v23.09.0 - 15 Sep 2023
>  --------------------------
> diff --git a/controller/physical.c b/controller/physical.c
> index ba88e1d8b..78cde3e2a 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -2440,9 +2440,36 @@ physical_run(struct physical_ctx *p_ctx,
>              OVS_NOT_REACHED();
>          }
>
> -        put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
> -
> +        struct ofpbuf *tunnel_ofpacts = ofpbuf_clone(&ofpacts);
> +        put_resubmit(OFTABLE_LOCAL_OUTPUT, tunnel_ofpacts);
>          ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
> +                        tunnel_ofpacts, hc_uuid);
> +        ofpbuf_delete(tunnel_ofpacts);
> +
> +        /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets do not
> +         * fit path MTU.
> +         */
> +        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
> +
> +        /* IPv4 */
> +        match_init_catchall(&match);
> +        match_set_in_port(&match, tun->ofport);
> +        match_set_dl_type(&match, htons(ETH_TYPE_IP));
> +        match_set_nw_proto(&match, IPPROTO_ICMP);
> +        match_set_icmp_type(&match, 3);
> +        match_set_icmp_code(&match, 4);
> +
> +        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
> +                        &ofpacts, hc_uuid);
> +        /* IPv6 */
> +        match_init_catchall(&match);
> +        match_set_in_port(&match, tun->ofport);
> +        match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
> +        match_set_nw_proto(&match, IPPROTO_ICMPV6);
> +        match_set_icmp_type(&match, 2);
> +        match_set_icmp_code(&match, 0);
> +
> +        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
>                          &ofpacts, hc_uuid);
>      }
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 617f292fe..a020f2097 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -12794,6 +12794,75 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
>      ds_destroy(&actions);
>  }
>
> +/* Following flows are used to manage traffic redirected by the kernel
> + * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
> + */
> +static void
> +build_lrouter_icmp_packet_toobig_admin_flows(
> +        struct ovn_port *op, struct hmap *lflows,
> +        struct ds *match, struct ds *actions)
> +{
> +    ovs_assert(op->nbrp);
> +
> +    if (is_l3dgw_port(op)) {
> +        ds_clear(match);
> +        ds_put_format(match,
> +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> +                      "eth.dst == %s && !is_chassis_resident(%s)",
> +                      op->nbrp->mac, op->cr_port->json_key);
> +        ds_clear(actions);
> +        ds_put_format(actions, "outport = inport; inport = %s; next;",
> +                      op->json_key);
> +        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
> +                      ds_cstr(match), ds_cstr(actions));
> +    }
> +
> +    /* default flow */
> +    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
> +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> +}
> +

I don't think there is a need for default flow.  If I understand
correctly,  we are trying to handle
the scenario when the kernel generates the icmp needs frag error
packet.  For the normal case i.e  icmp
needs a frag error packet not generated by the kernel,  it should
continue the normal flow.


> +static void
> +build_lswitch_icmp_packet_toobig_admin_flows(
> +        struct ovn_port *op, struct hmap *lflows,
> +        struct ds *match, struct ds *actions)
> +{
> +    ovs_assert(op->nbsp);
> +
> +    if (lsp_is_router(op->nbsp)) {
> +        return;
> +    }
> +
> +    struct ovn_datapath *od = op->od;
> +    for (int i = 0; i < od->n_router_ports; i++) {
> +        struct ovn_port *peer = od->router_ports[i]->peer;
> +        if (!peer) {
> +            continue;
> +        }
> +
> +        ds_clear(match);
> +        char *rp_port =
> +            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
> +        ds_put_format(match,
> +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> +                      "eth.dst == %s && !is_chassis_resident(%s)",
> +                      peer->nbrp->mac, rp_port);
> +        ds_clear(actions);
> +        ds_put_format(actions, "outport = %s; inport = %s; output;",
> +                      od->router_ports[i]->json_key, op->json_key);
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> +                      ds_cstr(match), ds_cstr(actions));
> +    }

I think with this we will end up adding two logical flows for every
logical port in the logical switch.
I don't think that's necessary.

I think we should add the logical flow only for logical switch ports
of type router.
The function should return immediately if !lsp_is_router(op->nbsp).

I think you can also match on the "inport == <lrp" in the first
logical flow of this function.

Also I don't think there is a need for the default flow below.
The below logical flow by-passes the port security check which could
be exploited by a rogue pod/VM.

Let me know if my suggestions don't work.

Thanks
Numan


> +
> +    /* default flow */
> +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> +}
> +
>  static void
>  build_lrouter_force_snat_flows_op(struct ovn_port *op,
>                                    struct hmap *lflows,
> @@ -16161,6 +16230,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
>      build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
>      build_lswitch_external_port(op, lflows);
>      build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
> +    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
>
>      /* Build Logical Router Flows. */
>      build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
> @@ -16197,6 +16267,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
>                                  &lsi->match, &lsi->actions, lsi->meter_groups);
>      build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
>                                        &lsi->actions);
> +    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
> +                                                 &lsi->actions);
>  }
>
>  static void *
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 97718821f..85576a845 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -372,6 +372,20 @@
>
>      <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
>
> +    <p>
> +      For each logical switch port <var>P</var> a priority-120 flow that
> +      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
> +      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
> +      <var>D</var> is the peer logical router port <var>RP</var> mac address,
> +      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
> +      and forward the packet to the egress pipeline.
> +    </p>
> +
> +    <p>
> +      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> +      too big' to forward the packet to the next stage in the pipeline.
> +    </p>
> +
>      <p>
>        This table drops the packets if the port security check failed
>        in the previous stage i.e the register bit
> @@ -2463,6 +2477,21 @@ output;
>            (LBs, NAT).
>          </p>
>
> +        <p>
> +          For each gateway port <var>GW</var> on a distributed logical router
> +          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
> +          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
> +          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
> +          address and <var>cr-GW</var> is the chassis resident port of
> +          <var>GW</var>, swap inport and outport and stores <var>GW</var>
> +          as inport.
> +        </p>
> +
> +        <p>
> +          This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> +          too big' to forward the packet to the next stage in the pipeline.
> +        </p>
> +
>          <p>
>            For a distributed logical router or for gateway router where
>            the port is configured with <code>options:gateway_mtu</code>
> diff --git a/tests/multinode.at b/tests/multinode.at
> index 2b199b4bc..772134b7d 100644
> --- a/tests/multinode.at
> +++ b/tests/multinode.at
> @@ -42,7 +42,6 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | F
>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
>  ])
>
> -
>  # Create the second logical switch with one port
>  check multinode_nbctl ls-add sw1
>  check multinode_nbctl lsp-add sw1 sw1-port1
> @@ -72,3 +71,350 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
>  ])
>
>  AT_CLEANUP
> +
> +AT_SETUP([ovn multinode pmtu - distributed router])
> +
> +# Check that ovn-fake-multinode setup is up and running
> +check_fake_multinode_setup
> +
> +# Delete the multinode NB and OVS resources before starting the test.
> +cleanup_multinode_resources
> +
> +m_as ovn-chassis-1 ip link del sw0p1-p
> +m_as ovn-chassis-2 ip link del sw0p2-p
> +m_as ovn-chassis-2 ip link del sw1p1-p
> +
> +# Reset geneve tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> +
> +# Test East-West switching
> +check multinode_nbctl ls-add sw0
> +check multinode_nbctl lsp-add sw0 sw0-port1
> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> +check multinode_nbctl lsp-add sw0 sw0-port2
> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> +
> +m_wait_for_ports_up
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Create the second logical switch with one port
> +check multinode_nbctl ls-add sw1
> +check multinode_nbctl lsp-add sw1 sw1-port1
> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +# Create a logical router and attach both logical switches
> +check multinode_nbctl lr-add lr0
> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +check multinode_nbctl lsp-add sw0 sw0-lr0
> +check multinode_nbctl lsp-set-type sw0-lr0 router
> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +check multinode_nbctl lsp-add sw1 sw1-lr0
> +check multinode_nbctl lsp-set-type sw1-lr0 router
> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> +
> +# create exteranl connection for N/S traffic
> +check multinode_nbctl ls-add public
> +check multinode_nbctl lsp-add public ln-lublic
> +check multinode_nbctl lsp-set-type ln-lublic localnet
> +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> +
> +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> +check multinode_nbctl lsp-add public public-lr0
> +check multinode_nbctl lsp-set-type public-lr0 router
> +check multinode_nbctl lsp-set-addresses public-lr0 router
> +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> +
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> +
> +# create some ACLs
> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> +
> +m_as ovn-gw-1 ip netns add ovn-ext0
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> +
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> +
> +m_as ovn-gw-1 ip netns add ovn-ext2
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> +
> +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +
> +m_wait_for_ports_up sw1-port1
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Change ptmu for the geneve tunnel
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1000"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Create vxlan tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> +
> +AT_CLEANUP
> +
> +AT_SETUP([ovn multinode pmtu - gw_router_port])
> +
> +# Check that ovn-fake-multinode setup is up and running
> +check_fake_multinode_setup
> +
> +# Delete the multinode NB and OVS resources before starting the test.
> +cleanup_multinode_resources
> +
> +m_as ovn-chassis-1 ip link del sw0p1-p
> +m_as ovn-chassis-2 ip link del sw0p2-p
> +m_as ovn-chassis-2 ip link del sw1p1-p
> +
> +# Reset geneve tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> +
> +# Test East-West switching
> +check multinode_nbctl ls-add sw0
> +check multinode_nbctl lsp-add sw0 sw0-port1
> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> +check multinode_nbctl lsp-add sw0 sw0-port2
> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> +
> +m_wait_for_ports_up
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Create the second logical switch with one port
> +check multinode_nbctl ls-add sw1
> +check multinode_nbctl lsp-add sw1 sw1-port1
> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +# Create a logical router and attach both logical switches
> +check multinode_nbctl lr-add lr0
> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +check multinode_nbctl lsp-add sw0 sw0-lr0
> +check multinode_nbctl lsp-set-type sw0-lr0 router
> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +check multinode_nbctl lsp-add sw1 sw1-lr0
> +check multinode_nbctl lsp-set-type sw1-lr0 router
> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> +
> +# create exteranl connection for N/S traffic
> +check multinode_nbctl ls-add public
> +check multinode_nbctl lsp-add public ln-lublic
> +check multinode_nbctl lsp-set-type ln-lublic localnet
> +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> +
> +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> +check multinode_nbctl lsp-add public public-lr0
> +check multinode_nbctl lsp-set-type public-lr0 router
> +check multinode_nbctl lsp-set-addresses public-lr0 router
> +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> +
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> +
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> +
> +# create some ACLs
> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> +
> +m_as ovn-gw-1 ip netns add ovn-ext0
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> +
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> +
> +m_as ovn-gw-1 ip netns add ovn-ext2
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> +
> +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> +
> +m_wait_for_ports_up sw1-port1
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
> +
> +# Create vxlan tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> +
> +AT_CLEANUP
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index a267daca2..223e53991 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -6492,6 +6492,9 @@ AT_CAPTURE_FILE([lrflows])
>
>  # Check the flows in lr_in_admission stage
>  AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
> +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), action=(outport = inport; inport = "DR-S1"; next;)
> +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), action=(outport = inport; inport = "DR-S2"; next;)
> +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), action=(outport = inport; inport = "DR-S3"; next;)
>    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
>    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
>    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
> @@ -6551,6 +6554,7 @@ AT_CAPTURE_FILE([lrflows])
>
>  # Check the flows in lr_in_admission stage
>  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>  ])
> @@ -6572,6 +6576,7 @@ AT_CAPTURE_FILE([lrflows])
>
>  # Check the flows in lr_in_admission stage
>  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>  ])
> @@ -6590,6 +6595,7 @@ AT_CAPTURE_FILE([lrflows])
>
>  # Check the flows in lr_in_admission stage
>  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
>  ])
> @@ -8343,6 +8349,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
>  sort | sed 's/table=../table=??/' ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> @@ -8369,6 +8378,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
>  sort | sed 's/table=../table=??/' ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> @@ -8396,6 +8408,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> @@ -8422,6 +8437,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> @@ -8451,6 +8469,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
>  sort | sed 's/table=../table=??/' ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)
> --
> 2.43.0
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Lorenzo Bianconi Jan. 9, 2024, 1:59 p.m. UTC | #2
> On Fri, Dec 22, 2023 at 11:27 AM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
> >
> > Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
> > do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.
> >
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> 
> Hi Lorenzo,
> 
> Thanks for the patch.  Please see below for a few comments.

Hi Numan,

thx for the review. Few comments inline.

Regards,
Lorenzo

> 
> 
[...]
> > +/* Following flows are used to manage traffic redirected by the kernel
> > + * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
> > + */
> > +static void
> > +build_lrouter_icmp_packet_toobig_admin_flows(
> > +        struct ovn_port *op, struct hmap *lflows,
> > +        struct ds *match, struct ds *actions)
> > +{
> > +    ovs_assert(op->nbrp);
> > +
> > +    if (is_l3dgw_port(op)) {
> > +        ds_clear(match);
> > +        ds_put_format(match,
> > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > +                      op->nbrp->mac, op->cr_port->json_key);
> > +        ds_clear(actions);
> > +        ds_put_format(actions, "outport = inport; inport = %s; next;",
> > +                      op->json_key);
> > +        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
> > +                      ds_cstr(match), ds_cstr(actions));
> > +    }
> > +
> > +    /* default flow */
> > +    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
> > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > +}
> > +
> 
> I don't think there is a need for default flow.  If I understand
> correctly,  we are trying to handle
> the scenario when the kernel generates the icmp needs frag error
> packet.  For the normal case i.e  icmp
> needs a frag error packet not generated by the kernel,  it should
> continue the normal flow.

Reviewing the code I think it is wrong, but for icmp error "packet too big"
traffic hitting a gw router port I think we need a 'default' flow since if the
port is "local" to the hv we need to set the inport from the l3dgw_port port to
the regular router one. Do you agree? (We need this flow just if
is_l3dgw_port() is true).

> 
> 
> > +static void
> > +build_lswitch_icmp_packet_toobig_admin_flows(
> > +        struct ovn_port *op, struct hmap *lflows,
> > +        struct ds *match, struct ds *actions)
> > +{
> > +    ovs_assert(op->nbsp);
> > +
> > +    if (lsp_is_router(op->nbsp)) {
> > +        return;
> > +    }
> > +
> > +    struct ovn_datapath *od = op->od;
> > +    for (int i = 0; i < od->n_router_ports; i++) {
> > +        struct ovn_port *peer = od->router_ports[i]->peer;
> > +        if (!peer) {
> > +            continue;
> > +        }
> > +
> > +        ds_clear(match);
> > +        char *rp_port =
> > +            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
> > +        ds_put_format(match,
> > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > +                      peer->nbrp->mac, rp_port);
> > +        ds_clear(actions);
> > +        ds_put_format(actions, "outport = %s; inport = %s; output;",
> > +                      od->router_ports[i]->json_key, op->json_key);
> > +        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> > +                      ds_cstr(match), ds_cstr(actions));
> > +    }
> 
> I think with this we will end up adding two logical flows for every
> logical port in the logical switch.
> I don't think that's necessary.
> 
> I think we should add the logical flow only for logical switch ports
> of type router.
> The function should return immediately if !lsp_is_router(op->nbsp).

ack, agree. I will fix it.

> 
> I think you can also match on the "inport == <lrp" in the first
> logical flow of this function.

If we use the inport as match I think it is hard to distinguish between the
locally generated ICMP 'packet too big' traffic (generated by the kernel) and
ICMP 'packet too big' sent by a remote node. Am I wrong or am I missing
something?

> 
> Also I don't think there is a need for the default flow below.
> The below logical flow by-passes the port security check which could
> be exploited by a rogue pod/VM.

ack, I will fix it.

> 
> Let me know if my suggestions don't work.
> 
> Thanks
> Numan
> 
> 
> > +
> > +    /* default flow */
> > +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > +}
> > +
> >  static void
> >  build_lrouter_force_snat_flows_op(struct ovn_port *op,
> >                                    struct hmap *lflows,
> > @@ -16161,6 +16230,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
> >      build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
> >      build_lswitch_external_port(op, lflows);
> >      build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
> > +    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
> >
> >      /* Build Logical Router Flows. */
> >      build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
> > @@ -16197,6 +16267,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
> >                                  &lsi->match, &lsi->actions, lsi->meter_groups);
> >      build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
> >                                        &lsi->actions);
> > +    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
> > +                                                 &lsi->actions);
> >  }
> >
> >  static void *
> > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> > index 97718821f..85576a845 100644
> > --- a/northd/ovn-northd.8.xml
> > +++ b/northd/ovn-northd.8.xml
> > @@ -372,6 +372,20 @@
> >
> >      <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
> >
> > +    <p>
> > +      For each logical switch port <var>P</var> a priority-120 flow that
> > +      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
> > +      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
> > +      <var>D</var> is the peer logical router port <var>RP</var> mac address,
> > +      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
> > +      and forward the packet to the egress pipeline.
> > +    </p>
> > +
> > +    <p>
> > +      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > +      too big' to forward the packet to the next stage in the pipeline.
> > +    </p>
> > +
> >      <p>
> >        This table drops the packets if the port security check failed
> >        in the previous stage i.e the register bit
> > @@ -2463,6 +2477,21 @@ output;
> >            (LBs, NAT).
> >          </p>
> >
> > +        <p>
> > +          For each gateway port <var>GW</var> on a distributed logical router
> > +          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
> > +          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
> > +          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
> > +          address and <var>cr-GW</var> is the chassis resident port of
> > +          <var>GW</var>, swap inport and outport and stores <var>GW</var>
> > +          as inport.
> > +        </p>
> > +
> > +        <p>
> > +          This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > +          too big' to forward the packet to the next stage in the pipeline.
> > +        </p>
> > +
> >          <p>
> >            For a distributed logical router or for gateway router where
> >            the port is configured with <code>options:gateway_mtu</code>
> > diff --git a/tests/multinode.at b/tests/multinode.at
> > index 2b199b4bc..772134b7d 100644
> > --- a/tests/multinode.at
> > +++ b/tests/multinode.at
> > @@ -42,7 +42,6 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | F
> >  3 packets transmitted, 3 received, 0% packet loss, time 0ms
> >  ])
> >
> > -
> >  # Create the second logical switch with one port
> >  check multinode_nbctl ls-add sw1
> >  check multinode_nbctl lsp-add sw1 sw1-port1
> > @@ -72,3 +71,350 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
> >  ])
> >
> >  AT_CLEANUP
> > +
> > +AT_SETUP([ovn multinode pmtu - distributed router])
> > +
> > +# Check that ovn-fake-multinode setup is up and running
> > +check_fake_multinode_setup
> > +
> > +# Delete the multinode NB and OVS resources before starting the test.
> > +cleanup_multinode_resources
> > +
> > +m_as ovn-chassis-1 ip link del sw0p1-p
> > +m_as ovn-chassis-2 ip link del sw0p2-p
> > +m_as ovn-chassis-2 ip link del sw1p1-p
> > +
> > +# Reset geneve tunnels
> > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > +do
> > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > +done
> > +
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > +
> > +# Test East-West switching
> > +check multinode_nbctl ls-add sw0
> > +check multinode_nbctl lsp-add sw0 sw0-port1
> > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > +check multinode_nbctl lsp-add sw0 sw0-port2
> > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > +
> > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > +
> > +m_wait_for_ports_up
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +# Create the second logical switch with one port
> > +check multinode_nbctl ls-add sw1
> > +check multinode_nbctl lsp-add sw1 sw1-port1
> > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > +
> > +# Create a logical router and attach both logical switches
> > +check multinode_nbctl lr-add lr0
> > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > +
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > +
> > +# create exteranl connection for N/S traffic
> > +check multinode_nbctl ls-add public
> > +check multinode_nbctl lsp-add public ln-lublic
> > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > +check multinode_nbctl lsp-add public public-lr0
> > +check multinode_nbctl lsp-set-type public-lr0 router
> > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > +
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > +
> > +# create some ACLs
> > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > +
> > +m_as ovn-gw-1 ip netns add ovn-ext0
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > +
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > +
> > +m_as ovn-gw-1 ip netns add ovn-ext2
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > +
> > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +
> > +m_wait_for_ports_up sw1-port1
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +# Change ptmu for the geneve tunnel
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1000"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +# Create vxlan tunnels
> > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > +do
> > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > +done
> > +
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > +
> > +AT_CLEANUP
> > +
> > +AT_SETUP([ovn multinode pmtu - gw_router_port])
> > +
> > +# Check that ovn-fake-multinode setup is up and running
> > +check_fake_multinode_setup
> > +
> > +# Delete the multinode NB and OVS resources before starting the test.
> > +cleanup_multinode_resources
> > +
> > +m_as ovn-chassis-1 ip link del sw0p1-p
> > +m_as ovn-chassis-2 ip link del sw0p2-p
> > +m_as ovn-chassis-2 ip link del sw1p1-p
> > +
> > +# Reset geneve tunnels
> > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > +do
> > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > +done
> > +
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > +
> > +# Test East-West switching
> > +check multinode_nbctl ls-add sw0
> > +check multinode_nbctl lsp-add sw0 sw0-port1
> > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > +check multinode_nbctl lsp-add sw0 sw0-port2
> > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > +
> > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > +
> > +m_wait_for_ports_up
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +# Create the second logical switch with one port
> > +check multinode_nbctl ls-add sw1
> > +check multinode_nbctl lsp-add sw1 sw1-port1
> > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > +
> > +# Create a logical router and attach both logical switches
> > +check multinode_nbctl lr-add lr0
> > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > +
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > +
> > +# create exteranl connection for N/S traffic
> > +check multinode_nbctl ls-add public
> > +check multinode_nbctl lsp-add public ln-lublic
> > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > +check multinode_nbctl lsp-add public public-lr0
> > +check multinode_nbctl lsp-set-type public-lr0 router
> > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > +
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > +
> > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> > +
> > +# create some ACLs
> > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > +
> > +m_as ovn-gw-1 ip netns add ovn-ext0
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > +
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > +
> > +m_as ovn-gw-1 ip netns add ovn-ext2
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > +
> > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > +
> > +m_wait_for_ports_up sw1-port1
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
> > +
> > +# Create vxlan tunnels
> > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > +do
> > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > +done
> > +
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > +
> > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > +
> > +AT_CLEANUP
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index a267daca2..223e53991 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -6492,6 +6492,9 @@ AT_CAPTURE_FILE([lrflows])
> >
> >  # Check the flows in lr_in_admission stage
> >  AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
> > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), action=(outport = inport; inport = "DR-S1"; next;)
> > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), action=(outport = inport; inport = "DR-S2"; next;)
> > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), action=(outport = inport; inport = "DR-S3"; next;)
> >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
> >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
> >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
> > @@ -6551,6 +6554,7 @@ AT_CAPTURE_FILE([lrflows])
> >
> >  # Check the flows in lr_in_admission stage
> >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >  ])
> > @@ -6572,6 +6576,7 @@ AT_CAPTURE_FILE([lrflows])
> >
> >  # Check the flows in lr_in_admission stage
> >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >  ])
> > @@ -6590,6 +6595,7 @@ AT_CAPTURE_FILE([lrflows])
> >
> >  # Check the flows in lr_in_admission stage
> >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> >  ])
> > @@ -8343,6 +8349,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> >  sort | sed 's/table=../table=??/' ], [0], [dnl
> >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > @@ -8369,6 +8378,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> >  sort | sed 's/table=../table=??/' ], [0], [dnl
> >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > @@ -8396,6 +8408,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > @@ -8422,6 +8437,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > @@ -8451,6 +8469,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> >  sort | sed 's/table=../table=??/' ], [0], [dnl
> >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)
> > --
> > 2.43.0
> >
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
Numan Siddique Jan. 9, 2024, 9:54 p.m. UTC | #3
On Tue, Jan 9, 2024 at 8:59 AM Lorenzo Bianconi
<lorenzo.bianconi@redhat.com> wrote:
>
> > On Fri, Dec 22, 2023 at 11:27 AM Lorenzo Bianconi
> > <lorenzo.bianconi@redhat.com> wrote:
> > >
> > > Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
> > > do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.
> > >
> > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
> > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> >
> > Hi Lorenzo,
> >
> > Thanks for the patch.  Please see below for a few comments.
>
> Hi Numan,
>
> thx for the review. Few comments inline.
>
> Regards,
> Lorenzo
>
> >
> >
> [...]
> > > +/* Following flows are used to manage traffic redirected by the kernel
> > > + * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
> > > + */
> > > +static void
> > > +build_lrouter_icmp_packet_toobig_admin_flows(
> > > +        struct ovn_port *op, struct hmap *lflows,
> > > +        struct ds *match, struct ds *actions)
> > > +{
> > > +    ovs_assert(op->nbrp);
> > > +
> > > +    if (is_l3dgw_port(op)) {
> > > +        ds_clear(match);
> > > +        ds_put_format(match,
> > > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > > +                      op->nbrp->mac, op->cr_port->json_key);
> > > +        ds_clear(actions);
> > > +        ds_put_format(actions, "outport = inport; inport = %s; next;",
> > > +                      op->json_key);
> > > +        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
> > > +                      ds_cstr(match), ds_cstr(actions));
> > > +    }
> > > +
> > > +    /* default flow */
> > > +    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
> > > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > > +}
> > > +
> >
> > I don't think there is a need for default flow.  If I understand
> > correctly,  we are trying to handle
> > the scenario when the kernel generates the icmp needs frag error
> > packet.  For the normal case i.e  icmp
> > needs a frag error packet not generated by the kernel,  it should
> > continue the normal flow.
>
> Reviewing the code I think it is wrong, but for icmp error "packet too big"
> traffic hitting a gw router port I think we need a 'default' flow since if the
> port is "local" to the hv we need to set the inport from the l3dgw_port port to
> the regular router one. Do you agree? (We need this flow just if
> is_l3dgw_port() is true).

I don't understand your point.  For the scenario you mentioned about
icmp error "packet too big" packet,  who generates it ?
Is it generated by the local kernel due to route mtu exception ?

If the port is local to the hypervisor, then the original packet will
never go out of the tunnel.

In my testing,  I've one router port which has gateway chassis set and
I see the below logical flows added by this patch

---
table=0 (lr_in_admission    ), priority=120  , match=(((ip4 &&
icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 &&
icmp6.code == 0)) && eth.dst == 00:11:22:00:ff:01 &&
!is_chassis_resident("cr-lr0-public")), action=(outport = inport;
inport = "lr0-public"; next;)
table=0 (lr_in_admission    ), priority=110  , match=((ip4 &&
icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 &&
icmp6.code == 0)), action=(next; )
---

If the packet matches the first flow with priority 120,  then the
outport/inport is set properly by the actions and it goes to the next
table.  But if the packet doesn't hit the first flow or if that flow
is not installed
because is_chassis_resident("cr-lr0-public") is true,  then the packet
will continue with the remaining matches in the "lr_in_admission"
table and will advance to the next stage.
So we don't need that flow.


>
> >
> >
> > > +static void
> > > +build_lswitch_icmp_packet_toobig_admin_flows(
> > > +        struct ovn_port *op, struct hmap *lflows,
> > > +        struct ds *match, struct ds *actions)
> > > +{
> > > +    ovs_assert(op->nbsp);
> > > +
> > > +    if (lsp_is_router(op->nbsp)) {
> > > +        return;
> > > +    }
> > > +
> > > +    struct ovn_datapath *od = op->od;
> > > +    for (int i = 0; i < od->n_router_ports; i++) {
> > > +        struct ovn_port *peer = od->router_ports[i]->peer;
> > > +        if (!peer) {
> > > +            continue;
> > > +        }
> > > +
> > > +        ds_clear(match);
> > > +        char *rp_port =
> > > +            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
> > > +        ds_put_format(match,
> > > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > > +                      peer->nbrp->mac, rp_port);
> > > +        ds_clear(actions);
> > > +        ds_put_format(actions, "outport = %s; inport = %s; output;",
> > > +                      od->router_ports[i]->json_key, op->json_key);
> > > +        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> > > +                      ds_cstr(match), ds_cstr(actions));
> > > +    }
> >
> > I think with this we will end up adding two logical flows for every
> > logical port in the logical switch.
> > I don't think that's necessary.
> >
> > I think we should add the logical flow only for logical switch ports
> > of type router.
> > The function should return immediately if !lsp_is_router(op->nbsp).
>
> ack, agree. I will fix it.
>
> >
> > I think you can also match on the "inport == <lrp" in the first
> > logical flow of this function.
>
> If we use the inport as match I think it is hard to distinguish between the
> locally generated ICMP 'packet too big' traffic (generated by the kernel) and
> ICMP 'packet too big' sent by a remote node. Am I wrong or am I missing
> something?

You don't need to distinguish between the two.  You just need to figure out
if the icmp error 'packet too big' is generated locally by geneve or NOT.

If the icmp error packet was  actually received from the tunnel,
then the packet will continue with the pipeline.  Only in the case where
kernel generates the icmp error packet due to route mtu exception,  the inport
will be "lrp".


>
> >
> > Also I don't think there is a need for the default flow below.
> > The below logical flow by-passes the port security check which could
> > be exploited by a rogue pod/VM.
>
> ack, I will fix it.
>

 >
> > Let me know if my suggestions don't work.
> >
> > Thanks
> > Numan
> >
> >
> > > +
> > > +    /* default flow */
> > > +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> > > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > > +}
> > > +
> > >  static void
> > >  build_lrouter_force_snat_flows_op(struct ovn_port *op,
> > >                                    struct hmap *lflows,
> > > @@ -16161,6 +16230,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
> > >      build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
> > >      build_lswitch_external_port(op, lflows);
> > >      build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
> > > +    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
> > >
> > >      /* Build Logical Router Flows. */
> > >      build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
> > > @@ -16197,6 +16267,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
> > >                                  &lsi->match, &lsi->actions, lsi->meter_groups);
> > >      build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
> > >                                        &lsi->actions);
> > > +    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
> > > +                                                 &lsi->actions);
> > >  }
> > >
> > >  static void *
> > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> > > index 97718821f..85576a845 100644
> > > --- a/northd/ovn-northd.8.xml
> > > +++ b/northd/ovn-northd.8.xml
> > > @@ -372,6 +372,20 @@
> > >
> > >      <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
> > >
> > > +    <p>
> > > +      For each logical switch port <var>P</var> a priority-120 flow that
> > > +      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
> > > +      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
> > > +      <var>D</var> is the peer logical router port <var>RP</var> mac address,
> > > +      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
> > > +      and forward the packet to the egress pipeline.
> > > +    </p>
> > > +
> > > +    <p>
> > > +      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > > +      too big' to forward the packet to the next stage in the pipeline.
> > > +    </p>
> > > +
> > >      <p>
> > >        This table drops the packets if the port security check failed
> > >        in the previous stage i.e the register bit
> > > @@ -2463,6 +2477,21 @@ output;
> > >            (LBs, NAT).
> > >          </p>
> > >
> > > +        <p>
> > > +          For each gateway port <var>GW</var> on a distributed logical router
> > > +          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
> > > +          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
> > > +          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
> > > +          address and <var>cr-GW</var> is the chassis resident port of
> > > +          <var>GW</var>, swap inport and outport and stores <var>GW</var>
> > > +          as inport.
> > > +        </p>
> > > +
> > > +        <p>
> > > +          This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > > +          too big' to forward the packet to the next stage in the pipeline.
> > > +        </p>
> > > +
> > >          <p>
> > >            For a distributed logical router or for gateway router where
> > >            the port is configured with <code>options:gateway_mtu</code>
> > > diff --git a/tests/multinode.at b/tests/multinode.at
> > > index 2b199b4bc..772134b7d 100644
> > > --- a/tests/multinode.at
> > > +++ b/tests/multinode.at
> > > @@ -42,7 +42,6 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | F
> > >  3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > >  ])
> > >
> > > -
> > >  # Create the second logical switch with one port
> > >  check multinode_nbctl ls-add sw1
> > >  check multinode_nbctl lsp-add sw1 sw1-port1
> > > @@ -72,3 +71,350 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
> > >  ])
> > >
> > >  AT_CLEANUP
> > > +
> > > +AT_SETUP([ovn multinode pmtu - distributed router])
> > > +

The test case added here will also run for the branch-23.03 and it will fail.

You need to skip this test for "multinode tests branch-22.03".

Check this commit (which was reverted) to skip this test for
branch-22.03 - https://github.com/ovn-org/ovn/commit/450e41e783bfa69e4f9d6c80f6bcb01147d5cfe1

Please add the changes in "ovn-fake-multinode-tests.yml" of the above
commit to this patch.


Thanks
Numan


> > > +# Check that ovn-fake-multinode setup is up and running
> > > +check_fake_multinode_setup
> > > +
> > > +# Delete the multinode NB and OVS resources before starting the test.
> > > +cleanup_multinode_resources
> > > +
> > > +m_as ovn-chassis-1 ip link del sw0p1-p
> > > +m_as ovn-chassis-2 ip link del sw0p2-p
> > > +m_as ovn-chassis-2 ip link del sw1p1-p
> > > +
> > > +# Reset geneve tunnels
> > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > +do
> > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > > +done
> > > +
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > > +
> > > +# Test East-West switching
> > > +check multinode_nbctl ls-add sw0
> > > +check multinode_nbctl lsp-add sw0 sw0-port1
> > > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > > +check multinode_nbctl lsp-add sw0 sw0-port2
> > > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > > +
> > > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > > +
> > > +m_wait_for_ports_up
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +# Create the second logical switch with one port
> > > +check multinode_nbctl ls-add sw1
> > > +check multinode_nbctl lsp-add sw1 sw1-port1
> > > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > > +
> > > +# Create a logical router and attach both logical switches
> > > +check multinode_nbctl lr-add lr0
> > > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > > +
> > > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > > +
> > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > > +
> > > +# create exteranl connection for N/S traffic
> > > +check multinode_nbctl ls-add public
> > > +check multinode_nbctl lsp-add public ln-lublic
> > > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > > +
> > > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > > +check multinode_nbctl lsp-add public public-lr0
> > > +check multinode_nbctl lsp-set-type public-lr0 router
> > > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > > +
> > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > > +
> > > +# create some ACLs
> > > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > > +
> > > +m_as ovn-gw-1 ip netns add ovn-ext0
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > > +
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > > +
> > > +m_as ovn-gw-1 ip netns add ovn-ext2
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > > +
> > > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +
> > > +m_wait_for_ports_up sw1-port1
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +# Change ptmu for the geneve tunnel
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1000"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +# Create vxlan tunnels
> > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > +do
> > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > > +done
> > > +
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > > +
> > > +AT_CLEANUP
> > > +
> > > +AT_SETUP([ovn multinode pmtu - gw_router_port])
> > > +
> > > +# Check that ovn-fake-multinode setup is up and running
> > > +check_fake_multinode_setup
> > > +
> > > +# Delete the multinode NB and OVS resources before starting the test.
> > > +cleanup_multinode_resources
> > > +
> > > +m_as ovn-chassis-1 ip link del sw0p1-p
> > > +m_as ovn-chassis-2 ip link del sw0p2-p
> > > +m_as ovn-chassis-2 ip link del sw1p1-p
> > > +
> > > +# Reset geneve tunnels
> > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > +do
> > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > > +done
> > > +
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > > +
> > > +# Test East-West switching
> > > +check multinode_nbctl ls-add sw0
> > > +check multinode_nbctl lsp-add sw0 sw0-port1
> > > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > > +check multinode_nbctl lsp-add sw0 sw0-port2
> > > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > > +
> > > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > > +
> > > +m_wait_for_ports_up
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +# Create the second logical switch with one port
> > > +check multinode_nbctl ls-add sw1
> > > +check multinode_nbctl lsp-add sw1 sw1-port1
> > > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > > +
> > > +# Create a logical router and attach both logical switches
> > > +check multinode_nbctl lr-add lr0
> > > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > > +
> > > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > > +
> > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > > +
> > > +# create exteranl connection for N/S traffic
> > > +check multinode_nbctl ls-add public
> > > +check multinode_nbctl lsp-add public ln-lublic
> > > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > > +
> > > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > > +check multinode_nbctl lsp-add public public-lr0
> > > +check multinode_nbctl lsp-set-type public-lr0 router
> > > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > > +
> > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > > +
> > > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> > > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> > > +
> > > +# create some ACLs
> > > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > > +
> > > +m_as ovn-gw-1 ip netns add ovn-ext0
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > > +
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > > +
> > > +m_as ovn-gw-1 ip netns add ovn-ext2
> > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > > +
> > > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > +
> > > +m_wait_for_ports_up sw1-port1
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
> > > +
> > > +# Create vxlan tunnels
> > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > +do
> > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > > +done
> > > +
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > +[0], [dnl
> > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > +])
> > > +
> > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > > +
> > > +AT_CLEANUP
> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > > index a267daca2..223e53991 100644
> > > --- a/tests/ovn-northd.at
> > > +++ b/tests/ovn-northd.at
> > > @@ -6492,6 +6492,9 @@ AT_CAPTURE_FILE([lrflows])
> > >
> > >  # Check the flows in lr_in_admission stage
> > >  AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
> > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), action=(outport = inport; inport = "DR-S1"; next;)
> > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), action=(outport = inport; inport = "DR-S2"; next;)
> > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), action=(outport = inport; inport = "DR-S3"; next;)
> > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
> > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
> > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
> > > @@ -6551,6 +6554,7 @@ AT_CAPTURE_FILE([lrflows])
> > >
> > >  # Check the flows in lr_in_admission stage
> > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >  ])
> > > @@ -6572,6 +6576,7 @@ AT_CAPTURE_FILE([lrflows])
> > >
> > >  # Check the flows in lr_in_admission stage
> > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >  ])
> > > @@ -6590,6 +6595,7 @@ AT_CAPTURE_FILE([lrflows])
> > >
> > >  # Check the flows in lr_in_admission stage
> > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > >  ])
> > > @@ -8343,6 +8349,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > @@ -8369,6 +8378,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > @@ -8396,6 +8408,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > @@ -8422,6 +8437,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > > @@ -8451,6 +8469,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)
> > > --
> > > 2.43.0
> > >
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Lorenzo Bianconi Jan. 9, 2024, 11:37 p.m. UTC | #4
> On Tue, Jan 9, 2024 at 8:59 AM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
> >
> > > On Fri, Dec 22, 2023 at 11:27 AM Lorenzo Bianconi
> > > <lorenzo.bianconi@redhat.com> wrote:
> > > >
> > > > Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
> > > > do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.
> > > >
> > > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > >
> > > Hi Lorenzo,
> > >
> > > Thanks for the patch.  Please see below for a few comments.
> >
> > Hi Numan,
> >
> > thx for the review. Few comments inline.
> >
> > Regards,
> > Lorenzo
> >
> > >
> > >
> > [...]
> > > > +/* Following flows are used to manage traffic redirected by the kernel
> > > > + * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
> > > > + */
> > > > +static void
> > > > +build_lrouter_icmp_packet_toobig_admin_flows(
> > > > +        struct ovn_port *op, struct hmap *lflows,
> > > > +        struct ds *match, struct ds *actions)
> > > > +{
> > > > +    ovs_assert(op->nbrp);
> > > > +
> > > > +    if (is_l3dgw_port(op)) {
> > > > +        ds_clear(match);
> > > > +        ds_put_format(match,
> > > > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > > > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > > > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > > > +                      op->nbrp->mac, op->cr_port->json_key);
> > > > +        ds_clear(actions);
> > > > +        ds_put_format(actions, "outport = inport; inport = %s; next;",
> > > > +                      op->json_key);
> > > > +        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
> > > > +                      ds_cstr(match), ds_cstr(actions));
> > > > +    }
> > > > +
> > > > +    /* default flow */
> > > > +    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
> > > > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > > > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > > > +}
> > > > +
> > >
> > > I don't think there is a need for default flow.  If I understand
> > > correctly,  we are trying to handle
> > > the scenario when the kernel generates the icmp needs frag error
> > > packet.  For the normal case i.e  icmp
> > > needs a frag error packet not generated by the kernel,  it should
> > > continue the normal flow.
> >
> > Reviewing the code I think it is wrong, but for icmp error "packet too big"
> > traffic hitting a gw router port I think we need a 'default' flow since if the
> > port is "local" to the hv we need to set the inport from the l3dgw_port port to
> > the regular router one. Do you agree? (We need this flow just if
> > is_l3dgw_port() is true).
> 
> I don't understand your point.  For the scenario you mentioned about
> icmp error "packet too big" packet,  who generates it ?
> Is it generated by the local kernel due to route mtu exception ?
> 
> If the port is local to the hypervisor, then the original packet will
> never go out of the tunnel.
> 
> In my testing,  I've one router port which has gateway chassis set and
> I see the below logical flows added by this patch
> 
> ---
> table=0 (lr_in_admission    ), priority=120  , match=(((ip4 &&
> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 &&
> icmp6.code == 0)) && eth.dst == 00:11:22:00:ff:01 &&
> !is_chassis_resident("cr-lr0-public")), action=(outport = inport;
> inport = "lr0-public"; next;)
> table=0 (lr_in_admission    ), priority=110  , match=((ip4 &&
> icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 &&
> icmp6.code == 0)), action=(next; )
> ---
> 
> If the packet matches the first flow with priority 120,  then the
> outport/inport is set properly by the actions and it goes to the next
> table.  But if the packet doesn't hit the first flow or if that flow
> is not installed
> because is_chassis_resident("cr-lr0-public") is true,  then the packet
> will continue with the remaining matches in the "lr_in_admission"
> table and will advance to the next stage.
> So we don't need that flow.

Hi Numan,

reading my previous email I think I have not been very clear, sorry for that :(
Let's assume N/S traffic and an ICMP error message 'packet too big'
not generated locally (let's say from a device outside the cluster).
In table OFTABLE_PHY_TO_LOG we resumbit this packet to table OFTABLE_LOG_INGRESS_PIPELINE.
What I mean is for this packet we need a flow to set inport from 'cr-lr0-public' to
'lr0-public' since in this case since we are not hitting the flows you reported above, right?
Am I missing something?

> 
> 
> >
> > >
> > >
> > > > +static void
> > > > +build_lswitch_icmp_packet_toobig_admin_flows(
> > > > +        struct ovn_port *op, struct hmap *lflows,
> > > > +        struct ds *match, struct ds *actions)
> > > > +{
> > > > +    ovs_assert(op->nbsp);
> > > > +
> > > > +    if (lsp_is_router(op->nbsp)) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    struct ovn_datapath *od = op->od;
> > > > +    for (int i = 0; i < od->n_router_ports; i++) {
> > > > +        struct ovn_port *peer = od->router_ports[i]->peer;
> > > > +        if (!peer) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        ds_clear(match);
> > > > +        char *rp_port =
> > > > +            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
> > > > +        ds_put_format(match,
> > > > +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> > > > +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
> > > > +                      "eth.dst == %s && !is_chassis_resident(%s)",
> > > > +                      peer->nbrp->mac, rp_port);
> > > > +        ds_clear(actions);
> > > > +        ds_put_format(actions, "outport = %s; inport = %s; output;",
> > > > +                      od->router_ports[i]->json_key, op->json_key);
> > > > +        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> > > > +                      ds_cstr(match), ds_cstr(actions));
> > > > +    }
> > >
> > > I think with this we will end up adding two logical flows for every
> > > logical port in the logical switch.
> > > I don't think that's necessary.
> > >
> > > I think we should add the logical flow only for logical switch ports
> > > of type router.
> > > The function should return immediately if !lsp_is_router(op->nbsp).
> >
> > ack, agree. I will fix it.
> >
> > >
> > > I think you can also match on the "inport == <lrp" in the first
> > > logical flow of this function.
> >
> > If we use the inport as match I think it is hard to distinguish between the
> > locally generated ICMP 'packet too big' traffic (generated by the kernel) and
> > ICMP 'packet too big' sent by a remote node. Am I wrong or am I missing
> > something?
> 
> You don't need to distinguish between the two.  You just need to figure out
> if the icmp error 'packet too big' is generated locally by geneve or NOT.

yes

> 
> If the icmp error packet was  actually received from the tunnel,
> then the packet will continue with the pipeline.  Only in the case where
> kernel generates the icmp error packet due to route mtu exception,  the inport
> will be "lrp".

I do not think we can use inport for vxlan tunnels, right? In physical_run() we
just set the outport from tunnel_id for vxlan. Can we just use eth.dst in this
case too?

> 
> 
> >
> > >
> > > Also I don't think there is a need for the default flow below.
> > > The below logical flow by-passes the port security check which could
> > > be exploited by a rogue pod/VM.
> >
> > ack, I will fix it.
> >
> 
>  >
> > > Let me know if my suggestions don't work.
> > >
> > > Thanks
> > > Numan
> > >
> > >
> > > > +
> > > > +    /* default flow */
> > > > +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> > > > +                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
> > > > +                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
> > > > +}
> > > > +
> > > >  static void
> > > >  build_lrouter_force_snat_flows_op(struct ovn_port *op,
> > > >                                    struct hmap *lflows,
> > > > @@ -16161,6 +16230,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
> > > >      build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
> > > >      build_lswitch_external_port(op, lflows);
> > > >      build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
> > > > +    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
> > > >
> > > >      /* Build Logical Router Flows. */
> > > >      build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
> > > > @@ -16197,6 +16267,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
> > > >                                  &lsi->match, &lsi->actions, lsi->meter_groups);
> > > >      build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
> > > >                                        &lsi->actions);
> > > > +    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
> > > > +                                                 &lsi->actions);
> > > >  }
> > > >
> > > >  static void *
> > > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> > > > index 97718821f..85576a845 100644
> > > > --- a/northd/ovn-northd.8.xml
> > > > +++ b/northd/ovn-northd.8.xml
> > > > @@ -372,6 +372,20 @@
> > > >
> > > >      <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
> > > >
> > > > +    <p>
> > > > +      For each logical switch port <var>P</var> a priority-120 flow that
> > > > +      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
> > > > +      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
> > > > +      <var>D</var> is the peer logical router port <var>RP</var> mac address,
> > > > +      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
> > > > +      and forward the packet to the egress pipeline.
> > > > +    </p>
> > > > +
> > > > +    <p>
> > > > +      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > > > +      too big' to forward the packet to the next stage in the pipeline.
> > > > +    </p>
> > > > +
> > > >      <p>
> > > >        This table drops the packets if the port security check failed
> > > >        in the previous stage i.e the register bit
> > > > @@ -2463,6 +2477,21 @@ output;
> > > >            (LBs, NAT).
> > > >          </p>
> > > >
> > > > +        <p>
> > > > +          For each gateway port <var>GW</var> on a distributed logical router
> > > > +          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
> > > > +          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
> > > > +          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
> > > > +          address and <var>cr-GW</var> is the chassis resident port of
> > > > +          <var>GW</var>, swap inport and outport and stores <var>GW</var>
> > > > +          as inport.
> > > > +        </p>
> > > > +
> > > > +        <p>
> > > > +          This table adds a priority-110 flow that matches icmp{4,6} error 'packet
> > > > +          too big' to forward the packet to the next stage in the pipeline.
> > > > +        </p>
> > > > +
> > > >          <p>
> > > >            For a distributed logical router or for gateway router where
> > > >            the port is configured with <code>options:gateway_mtu</code>
> > > > diff --git a/tests/multinode.at b/tests/multinode.at
> > > > index 2b199b4bc..772134b7d 100644
> > > > --- a/tests/multinode.at
> > > > +++ b/tests/multinode.at
> > > > @@ -42,7 +42,6 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | F
> > > >  3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > >  ])
> > > >
> > > > -
> > > >  # Create the second logical switch with one port
> > > >  check multinode_nbctl ls-add sw1
> > > >  check multinode_nbctl lsp-add sw1 sw1-port1
> > > > @@ -72,3 +71,350 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
> > > >  ])
> > > >
> > > >  AT_CLEANUP
> > > > +
> > > > +AT_SETUP([ovn multinode pmtu - distributed router])
> > > > +
> 
> The test case added here will also run for the branch-23.03 and it will fail.
> 
> You need to skip this test for "multinode tests branch-22.03".
> 
> Check this commit (which was reverted) to skip this test for
> branch-22.03 - https://github.com/ovn-org/ovn/commit/450e41e783bfa69e4f9d6c80f6bcb01147d5cfe1

acj, I will fix it.

Regards,
Lorenzo

> 
> Please add the changes in "ovn-fake-multinode-tests.yml" of the above
> commit to this patch.
> 
> 
> Thanks
> Numan
> 
> 
> > > > +# Check that ovn-fake-multinode setup is up and running
> > > > +check_fake_multinode_setup
> > > > +
> > > > +# Delete the multinode NB and OVS resources before starting the test.
> > > > +cleanup_multinode_resources
> > > > +
> > > > +m_as ovn-chassis-1 ip link del sw0p1-p
> > > > +m_as ovn-chassis-2 ip link del sw0p2-p
> > > > +m_as ovn-chassis-2 ip link del sw1p1-p
> > > > +
> > > > +# Reset geneve tunnels
> > > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > > +do
> > > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > > > +done
> > > > +
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > > > +
> > > > +# Test East-West switching
> > > > +check multinode_nbctl ls-add sw0
> > > > +check multinode_nbctl lsp-add sw0 sw0-port1
> > > > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > > > +check multinode_nbctl lsp-add sw0 sw0-port2
> > > > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > > > +
> > > > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > > > +
> > > > +m_wait_for_ports_up
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +# Create the second logical switch with one port
> > > > +check multinode_nbctl ls-add sw1
> > > > +check multinode_nbctl lsp-add sw1 sw1-port1
> > > > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > > > +
> > > > +# Create a logical router and attach both logical switches
> > > > +check multinode_nbctl lr-add lr0
> > > > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > > > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > > > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > > > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > > > +
> > > > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > > > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > > > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > > > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > > > +
> > > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > > > +
> > > > +# create exteranl connection for N/S traffic
> > > > +check multinode_nbctl ls-add public
> > > > +check multinode_nbctl lsp-add public ln-lublic
> > > > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > > > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > > > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > > > +
> > > > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > > > +check multinode_nbctl lsp-add public public-lr0
> > > > +check multinode_nbctl lsp-set-type public-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > > > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > > > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > > > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > > > +
> > > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > > > +
> > > > +# create some ACLs
> > > > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > > > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > > > +
> > > > +m_as ovn-gw-1 ip netns add ovn-ext0
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > > > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > > > +
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > > > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > > > +
> > > > +m_as ovn-gw-1 ip netns add ovn-ext2
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > > > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > > > +
> > > > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +
> > > > +m_wait_for_ports_up sw1-port1
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +# Change ptmu for the geneve tunnel
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1000"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +# Create vxlan tunnels
> > > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > > +do
> > > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > > > +done
> > > > +
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > > > +
> > > > +AT_CLEANUP
> > > > +
> > > > +AT_SETUP([ovn multinode pmtu - gw_router_port])
> > > > +
> > > > +# Check that ovn-fake-multinode setup is up and running
> > > > +check_fake_multinode_setup
> > > > +
> > > > +# Delete the multinode NB and OVS resources before starting the test.
> > > > +cleanup_multinode_resources
> > > > +
> > > > +m_as ovn-chassis-1 ip link del sw0p1-p
> > > > +m_as ovn-chassis-2 ip link del sw0p2-p
> > > > +m_as ovn-chassis-2 ip link del sw1p1-p
> > > > +
> > > > +# Reset geneve tunnels
> > > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > > +do
> > > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > > > +done
> > > > +
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > > > +
> > > > +# Test East-West switching
> > > > +check multinode_nbctl ls-add sw0
> > > > +check multinode_nbctl lsp-add sw0 sw0-port1
> > > > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > > > +check multinode_nbctl lsp-add sw0 sw0-port2
> > > > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > > > +
> > > > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > > > +
> > > > +m_wait_for_ports_up
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +# Create the second logical switch with one port
> > > > +check multinode_nbctl ls-add sw1
> > > > +check multinode_nbctl lsp-add sw1 sw1-port1
> > > > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > > > +
> > > > +# Create a logical router and attach both logical switches
> > > > +check multinode_nbctl lr-add lr0
> > > > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > > > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > > > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > > > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > > > +
> > > > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > > > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > > > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > > > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > > > +
> > > > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > > > +
> > > > +# create exteranl connection for N/S traffic
> > > > +check multinode_nbctl ls-add public
> > > > +check multinode_nbctl lsp-add public ln-lublic
> > > > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > > > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > > > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > > > +
> > > > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > > > +check multinode_nbctl lsp-add public public-lr0
> > > > +check multinode_nbctl lsp-set-type public-lr0 router
> > > > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > > > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > > > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > > > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > > > +
> > > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > > > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > > > +
> > > > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> > > > +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> > > > +
> > > > +# create some ACLs
> > > > +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> > > > +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> > > > +
> > > > +m_as ovn-gw-1 ip netns add ovn-ext0
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
> > > > +m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
> > > > +
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
> > > > +m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
> > > > +
> > > > +m_as ovn-gw-1 ip netns add ovn-ext2
> > > > +m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
> > > > +m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
> > > > +m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
> > > > +
> > > > +m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
> > > > +
> > > > +m_wait_for_ports_up sw1-port1
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
> > > > +
> > > > +# Create vxlan tunnels
> > > > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > > > +do
> > > > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
> > > > +done
> > > > +
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
> > > > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
> > > > +[0], [dnl
> > > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> > > > +
> > > > +AT_CLEANUP
> > > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > > > index a267daca2..223e53991 100644
> > > > --- a/tests/ovn-northd.at
> > > > +++ b/tests/ovn-northd.at
> > > > @@ -6492,6 +6492,9 @@ AT_CAPTURE_FILE([lrflows])
> > > >
> > > >  # Check the flows in lr_in_admission stage
> > > >  AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
> > > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), action=(outport = inport; inport = "DR-S1"; next;)
> > > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), action=(outport = inport; inport = "DR-S2"; next;)
> > > > +  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), action=(outport = inport; inport = "DR-S3"; next;)
> > > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
> > > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
> > > >    table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
> > > > @@ -6551,6 +6554,7 @@ AT_CAPTURE_FILE([lrflows])
> > > >
> > > >  # Check the flows in lr_in_admission stage
> > > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >  ])
> > > > @@ -6572,6 +6576,7 @@ AT_CAPTURE_FILE([lrflows])
> > > >
> > > >  # Check the flows in lr_in_admission stage
> > > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >  ])
> > > > @@ -6590,6 +6595,7 @@ AT_CAPTURE_FILE([lrflows])
> > > >
> > > >  # Check the flows in lr_in_admission stage
> > > >  AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
> > > > +  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >    table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
> > > >  ])
> > > > @@ -8343,6 +8349,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > > @@ -8369,6 +8378,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > > @@ -8396,6 +8408,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > > >    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
> > > > @@ -8422,6 +8437,9 @@ sort | sed 's/table=../table=??/' ], [0], [dnl
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
> > > > @@ -8451,6 +8469,9 @@ AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
> > > >  sort | sed 's/table=../table=??/' ], [0], [dnl
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
> > > >    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > > +  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
> > > >    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
> > > >    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)
> > > > --
> > > > 2.43.0
> > > >
> > > >
> > > > _______________________________________________
> > > > dev mailing list
> > > > dev@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > >
> > >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index e10fb79dd..acb3b854f 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,7 @@  Post v23.09.0
     connection method and doesn't require additional probing.
     external_ids:ovn-openflow-probe-interval configuration option for
     ovn-controller no longer matters and is ignored.
+  - Enable PMTU discovery on geneve tunnels for E/W traffic.
 
 OVN v23.09.0 - 15 Sep 2023
 --------------------------
diff --git a/controller/physical.c b/controller/physical.c
index ba88e1d8b..78cde3e2a 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -2440,9 +2440,36 @@  physical_run(struct physical_ctx *p_ctx,
             OVS_NOT_REACHED();
         }
 
-        put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
-
+        struct ofpbuf *tunnel_ofpacts = ofpbuf_clone(&ofpacts);
+        put_resubmit(OFTABLE_LOCAL_OUTPUT, tunnel_ofpacts);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
+                        tunnel_ofpacts, hc_uuid);
+        ofpbuf_delete(tunnel_ofpacts);
+
+        /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets do not
+         * fit path MTU.
+         */
+        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+
+        /* IPv4 */
+        match_init_catchall(&match);
+        match_set_in_port(&match, tun->ofport);
+        match_set_dl_type(&match, htons(ETH_TYPE_IP));
+        match_set_nw_proto(&match, IPPROTO_ICMP);
+        match_set_icmp_type(&match, 3);
+        match_set_icmp_code(&match, 4);
+
+        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
+                        &ofpacts, hc_uuid);
+        /* IPv6 */
+        match_init_catchall(&match);
+        match_set_in_port(&match, tun->ofport);
+        match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
+        match_set_nw_proto(&match, IPPROTO_ICMPV6);
+        match_set_icmp_type(&match, 2);
+        match_set_icmp_code(&match, 0);
+
+        ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
                         &ofpacts, hc_uuid);
     }
 
diff --git a/northd/northd.c b/northd/northd.c
index 617f292fe..a020f2097 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -12794,6 +12794,75 @@  build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
     ds_destroy(&actions);
 }
 
+/* Following flows are used to manage traffic redirected by the kernel
+ * (e.g. ICMP errors packets) that enter the cluster from the geneve ports
+ */
+static void
+build_lrouter_icmp_packet_toobig_admin_flows(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    ovs_assert(op->nbrp);
+
+    if (is_l3dgw_port(op)) {
+        ds_clear(match);
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
+                      "eth.dst == %s && !is_chassis_resident(%s)",
+                      op->nbrp->mac, op->cr_port->json_key);
+        ds_clear(actions);
+        ds_put_format(actions, "outport = inport; inport = %s; next;",
+                      op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
+                      ds_cstr(match), ds_cstr(actions));
+    }
+
+    /* default flow */
+    ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 110,
+                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
+                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
+}
+
+static void
+build_lswitch_icmp_packet_toobig_admin_flows(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    ovs_assert(op->nbsp);
+
+    if (lsp_is_router(op->nbsp)) {
+        return;
+    }
+
+    struct ovn_datapath *od = op->od;
+    for (int i = 0; i < od->n_router_ports; i++) {
+        struct ovn_port *peer = od->router_ports[i]->peer;
+        if (!peer) {
+            continue;
+        }
+
+        ds_clear(match);
+        char *rp_port =
+            is_l3dgw_port(peer) ? peer->cr_port->json_key : peer->json_key;
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
+                      "eth.dst == %s && !is_chassis_resident(%s)",
+                      peer->nbrp->mac, rp_port);
+        ds_clear(actions);
+        ds_put_format(actions, "outport = %s; inport = %s; output;",
+                      od->router_ports[i]->json_key, op->json_key);
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), ds_cstr(actions));
+    }
+
+    /* default flow */
+    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+                  "(ip4 && icmp4.type == 3 && icmp4.code == 4) || "
+                  "(ip6 && icmp6.type == 2 && icmp6.code == 0)", "next; ");
+}
+
 static void
 build_lrouter_force_snat_flows_op(struct ovn_port *op,
                                   struct hmap *lflows,
@@ -16161,6 +16230,7 @@  build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
     build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
     build_lswitch_external_port(op, lflows);
     build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
+    build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);
 
     /* Build Logical Router Flows. */
     build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
@@ -16197,6 +16267,8 @@  build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
                                 &lsi->match, &lsi->actions, lsi->meter_groups);
     build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
                                       &lsi->actions);
+    build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
+                                                 &lsi->actions);
 }
 
 static void *
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 97718821f..85576a845 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -372,6 +372,20 @@ 
 
     <h3>Ingress Table 1: Ingress Port Security - Apply</h3>
 
+    <p>
+      For each logical switch port <var>P</var> a priority-120 flow that
+      matches icmp{4,6} error 'packet too big' and <code>eth.dst ==
+      <var>D</var> &amp;&amp; !is_chassis_resident(<var>RP</var>)</code> where
+      <var>D</var> is the peer logical router port <var>RP</var> mac address,
+      stores <var>RP</var> peer port as outport, stores <var>P</var> as inport
+      and forward the packet to the egress pipeline.
+    </p>
+
+    <p>
+      This table adds a priority-110 flow that matches icmp{4,6} error 'packet
+      too big' to forward the packet to the next stage in the pipeline.
+    </p>
+
     <p>
       This table drops the packets if the port security check failed
       in the previous stage i.e the register bit
@@ -2463,6 +2477,21 @@  output;
           (LBs, NAT).
         </p>
 
+        <p>
+          For each gateway port <var>GW</var> on a distributed logical router
+          a priority-120 flow that matches icmp{4,6} error 'packet too big' and
+          <code>eth.dst == <var>D</var> &amp;&amp; !is_chassis_resident(<var>
+          cr-GW</var>)</code> where <var>D</var> is the gateway port mac
+          address and <var>cr-GW</var> is the chassis resident port of
+          <var>GW</var>, swap inport and outport and stores <var>GW</var>
+          as inport.
+        </p>
+
+        <p>
+          This table adds a priority-110 flow that matches icmp{4,6} error 'packet
+          too big' to forward the packet to the next stage in the pipeline.
+        </p>
+
         <p>
           For a distributed logical router or for gateway router where
           the port is configured with <code>options:gateway_mtu</code>
diff --git a/tests/multinode.at b/tests/multinode.at
index 2b199b4bc..772134b7d 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -42,7 +42,6 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | F
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
-
 # Create the second logical switch with one port
 check multinode_nbctl ls-add sw1
 check multinode_nbctl lsp-add sw1 sw1-port1
@@ -72,3 +71,350 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
 ])
 
 AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - distributed router])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+m_as ovn-chassis-2 ip link del sw1p1-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+# create exteranl connection for N/S traffic
+check multinode_nbctl ls-add public
+check multinode_nbctl lsp-add public ln-lublic
+check multinode_nbctl lsp-set-type ln-lublic localnet
+check multinode_nbctl lsp-set-addresses ln-lublic unknown
+check multinode_nbctl lsp-set-options ln-lublic network_name=public
+
+check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
+check multinode_nbctl lsp-add public public-lr0
+check multinode_nbctl lsp-set-type public-lr0 router
+check multinode_nbctl lsp-set-addresses public-lr0 router
+check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
+check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
+check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+m_as ovn-gw-1 ip netns add ovn-ext0
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
+m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
+
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
+m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
+
+m_as ovn-gw-1 ip netns add ovn-ext2
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
+m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
+
+m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+
+m_wait_for_ports_up sw1-port1
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1000])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 10 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1000"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create vxlan tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
+
+AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - gw_router_port])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+m_as ovn-chassis-2 ip link del sw1p1-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+# create exteranl connection for N/S traffic
+check multinode_nbctl ls-add public
+check multinode_nbctl lsp-add public ln-lublic
+check multinode_nbctl lsp-set-type ln-lublic localnet
+check multinode_nbctl lsp-set-addresses ln-lublic unknown
+check multinode_nbctl lsp-set-options ln-lublic network_name=public
+
+check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
+check multinode_nbctl lsp-add public public-lr0
+check multinode_nbctl lsp-set-type public-lr0 router
+check multinode_nbctl lsp-set-addresses public-lr0 router
+check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
+check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
+check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+m_as ovn-gw-1 ip netns add ovn-ext0
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
+m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext0 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.0.1/24 dev ext0
+
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext1 -- set interface ext1 type=internal
+m_as ovn-gw-1 ip link set ext1 netns ovn-ext0
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip link set ext1 up
+m_as ovn-gw-1 ip netns exec ovn-ext0 ip addr add 172.20.1.1/24 dev ext1
+
+m_as ovn-gw-1 ip netns add ovn-ext2
+m_as ovn-gw-1 ovs-vsctl add-port br-ex ext2 -- set interface ext2 type=internal
+m_as ovn-gw-1 ip link set ext2 netns ovn-ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip link set ext2 up
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip addr add 172.20.1.2/24 dev ext2
+m_as ovn-gw-1 ip netns exec ovn-ext2 ip route add default via 172.20.1.1 dev ext2
+
+m_as ovn-gw-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+m_as ovn-chassis-2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=public:br-ex
+
+m_wait_for_ports_up sw1-port1
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1400 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping6 -c 5 -s 1450 -M do 2000::3 2>&1 |grep -q "message too long, mtu: 1342"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
+
+# Create vxlan tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=vxlan
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q vxlan_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q vxlan_sys])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1150"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
+
+AT_CLEANUP
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a267daca2..223e53991 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6492,6 +6492,9 @@  AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 02:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S1")), action=(outport = inport; inport = "DR-S1"; next;)
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 03:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S2")), action=(outport = inport; inport = "DR-S2"; next;)
+  table=0 (lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 04:ac:10:01:00:01 && !is_chassis_resident("cr-DR-S3")), action=(outport = inport; inport = "DR-S3"; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
   table=0 (lr_in_admission    ), priority=50   , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
@@ -6551,6 +6554,7 @@  AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -6572,6 +6576,7 @@  AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -6590,6 +6595,7 @@  AT_CAPTURE_FILE([lrflows])
 
 # Check the flows in lr_in_admission stage
 AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
+  table=??(lr_in_admission    ), priority=120  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.dst == 00:00:00:00:00:01 && !is_chassis_resident("cr-lrp1")), action=(outport = inport; inport = "lrp1"; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
   table=??(lr_in_admission    ), priority=50   , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
 ])
@@ -8343,6 +8349,9 @@  AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
@@ -8369,6 +8378,9 @@  AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
@@ -8396,6 +8408,9 @@  sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
@@ -8422,6 +8437,9 @@  sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_apply_port_sec), priority=0    , match=(1), action=(next;)
@@ -8451,6 +8469,9 @@  AT_CHECK([cat sw0flows | grep -e port_sec -e ls_in_l2_lkup -e ls_in_l2_unknown |
 sort | sed 's/table=../table=??/' ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
+  table=??(ls_in_check_port_sec), priority=110  , match=((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)), action=(next; )
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=17);)