diff mbox series

[ovs-dev,v2] provider networks: Provide the option to tunnel traffic.

Message ID 20240724160130.551695-1-numans@ovn.org
State Accepted
Headers show
Series [ovs-dev,v2] provider networks: Provide the option to tunnel traffic. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Numan Siddique July 24, 2024, 4:01 p.m. UTC
From: Numan Siddique <numans@ovn.org>

This patch adds a global config option - 'always_tunnel' and
when set to true, any traffic destined to a VIF logical port of a
provider logical switch (having localnet port(s)), is tunnelled to
the destination chassis, instead of sending it out via the localnet
port.  This feature is useful for the following reasons:

1.  CMS can add both provider logical switches and overlay logical
    swithes to a logical router.  With this option set, E-W routing between
    these logical switches will be tunnelled all the time.  The router port
    mac addresses are not leaked from multiple chassis to the upstream
    switches anymore.

2.  NATting will work as expected either in the gateway chassis or on
    the source VIF chassis (if external_mac and logical_port set).

3.  With this option set, there is no need to centralize routing
    for provider logical switches ('reside-on-redirect-chassis').

4.  With the commits [1] now merged, MTU issues arising due to tunnel
    overhead will be handled gracefully.

[1] - 3faadc76ad71 ("northd: Fix pmtud for non routed traffic.")
      221476a01f26 ("ovn: Add tunnel PMTUD support.")

Reported-at: https://issues.redhat.com/browse/FDP-209
Signed-off-by: Numan Siddique <numans@ovn.org>
---

v1 -> v2
-------
   * Changed the config option from 'provider_network_overlay' to
     'always_tunnel' as suggested by Mark.
   * Rebased.


 controller/ovn-controller.c |  27 +++
 controller/physical.c       |  10 +-
 controller/physical.h       |   1 +
 northd/en-global-config.c   |   5 +
 ovn-nb.xml                  |  16 ++
 tests/multinode-macros.at   |  19 ++
 tests/multinode.at          | 358 ++++++++++++++++++++++++++++++++++++
 tests/ovn.at                | 157 ++++++++++++++++
 8 files changed, 592 insertions(+), 1 deletion(-)

Comments

Mark Michelson July 24, 2024, 4:34 p.m. UTC | #1
Thanks for the update Numan.

Acked-by: Mark Michelson <mmichels@redhat.com>

On 7/24/24 12:01, numans@ovn.org wrote:
> From: Numan Siddique <numans@ovn.org>
> 
> This patch adds a global config option - 'always_tunnel' and
> when set to true, any traffic destined to a VIF logical port of a
> provider logical switch (having localnet port(s)), is tunnelled to
> the destination chassis, instead of sending it out via the localnet
> port.  This feature is useful for the following reasons:
> 
> 1.  CMS can add both provider logical switches and overlay logical
>      swithes to a logical router.  With this option set, E-W routing between
>      these logical switches will be tunnelled all the time.  The router port
>      mac addresses are not leaked from multiple chassis to the upstream
>      switches anymore.
> 
> 2.  NATting will work as expected either in the gateway chassis or on
>      the source VIF chassis (if external_mac and logical_port set).
> 
> 3.  With this option set, there is no need to centralize routing
>      for provider logical switches ('reside-on-redirect-chassis').
> 
> 4.  With the commits [1] now merged, MTU issues arising due to tunnel
>      overhead will be handled gracefully.
> 
> [1] - 3faadc76ad71 ("northd: Fix pmtud for non routed traffic.")
>        221476a01f26 ("ovn: Add tunnel PMTUD support.")
> 
> Reported-at: https://issues.redhat.com/browse/FDP-209
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
> 
> v1 -> v2
> -------
>     * Changed the config option from 'provider_network_overlay' to
>       'always_tunnel' as suggested by Mark.
>     * Rebased.
> 
> 
>   controller/ovn-controller.c |  27 +++
>   controller/physical.c       |  10 +-
>   controller/physical.h       |   1 +
>   northd/en-global-config.c   |   5 +
>   ovn-nb.xml                  |  16 ++
>   tests/multinode-macros.at   |  19 ++
>   tests/multinode.at          | 358 ++++++++++++++++++++++++++++++++++++
>   tests/ovn.at                | 157 ++++++++++++++++
>   8 files changed, 592 insertions(+), 1 deletion(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index b3a265230c..4e30302ea6 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -3349,6 +3349,11 @@ non_vif_data_ovs_iface_handler(struct engine_node *node, void *data OVS_UNUSED)
>   
>   struct ed_type_northd_options {
>       bool explicit_arp_ns_output;
> +    bool always_tunnel; /* Indicates if the traffic to the
> +                         * logical port of a bridged logical
> +                         * switch (i.e with localnet port) should
> +                         * be tunnelled or sent via the localnet
> +                         * port.  Default value is 'false'. */
>   };
>   
>   
> @@ -3380,6 +3385,12 @@ en_northd_options_run(struct engine_node *node, void *data)
>                               false)
>               : false;
>   
> +    n_opts->always_tunnel =
> +            sb_global
> +            ? smap_get_bool(&sb_global->options, "always_tunnel",
> +                            false)
> +            : false;
> +
>       engine_set_node_state(node, EN_UPDATED);
>   }
>   
> @@ -3403,6 +3414,17 @@ en_northd_options_sb_sb_global_handler(struct engine_node *node, void *data)
>           engine_set_node_state(node, EN_UPDATED);
>       }
>   
> +    bool always_tunnel =
> +            sb_global
> +            ? smap_get_bool(&sb_global->options, "always_tunnel",
> +                            false)
> +            : false;
> +
> +    if (always_tunnel != n_opts->always_tunnel) {
> +        n_opts->always_tunnel = always_tunnel;
> +        engine_set_node_state(node, EN_UPDATED);
> +    }
> +
>       return true;
>   }
>   
> @@ -4315,6 +4337,9 @@ static void init_physical_ctx(struct engine_node *node,
>           engine_get_input_data("ct_zones", node);
>       struct simap *ct_zones = &ct_zones_data->ctx.current;
>   
> +    struct ed_type_northd_options *n_opts =
> +        engine_get_input_data("northd_options", node);
> +
>       parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, &p_ctx->encap_ips);
>       p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>       p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
> @@ -4332,6 +4357,7 @@ static void init_physical_ctx(struct engine_node *node,
>       p_ctx->local_bindings = &rt_data->lbinding_data.bindings;
>       p_ctx->patch_ofports = &non_vif_data->patch_ofports;
>       p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels;
> +    p_ctx->always_tunnel = n_opts->always_tunnel;
>   
>       struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx;
>       p_ctx->if_mgr = ctrl_ctx->if_mgr;
> @@ -5032,6 +5058,7 @@ main(int argc, char *argv[])
>        */
>       engine_add_input(&en_pflow_output, &en_non_vif_data,
>                        NULL);
> +    engine_add_input(&en_pflow_output, &en_northd_options, NULL);
>       engine_add_input(&en_pflow_output, &en_ct_zones,
>                        pflow_output_ct_zones_handler);
>       engine_add_input(&en_pflow_output, &en_sb_chassis,
> diff --git a/controller/physical.c b/controller/physical.c
> index 22756810fd..876ceccf17 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1489,6 +1489,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                         const struct if_status_mgr *if_mgr,
>                         size_t n_encap_ips,
>                         const char **encap_ips,
> +                      bool always_tunnel,
>                         struct ovn_desired_flow_table *flow_table,
>                         struct ofpbuf *ofpacts_p)
>   {
> @@ -1922,7 +1923,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                               binding->header_.uuid.parts[0], &match,
>                               ofpacts_p, &binding->header_.uuid);
>           }
> -    } else if (access_type == PORT_LOCALNET) {
> +    } else if (access_type == PORT_LOCALNET && !always_tunnel) {
>           /* Remote port connected by localnet port */
>           /* Table 40, priority 100.
>            * =======================
> @@ -1930,6 +1931,11 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>            * Implements switching to localnet port. Each flow matches a
>            * logical output port on remote hypervisor, switch the output port
>            * to connected localnet port and resubmits to same table.
> +         *
> +         * Note: If 'always_tunnel' is true, then
> +         * put_remote_port_redirect_overlay() called from below takes care
> +         * of adding the flow in OFTABLE_REMOTE_OUTPUT table to tunnel to
> +         * the destination chassis.
>            */
>   
>           ofpbuf_clear(ofpacts_p);
> @@ -2355,6 +2361,7 @@ physical_eval_port_binding(struct physical_ctx *p_ctx,
>                             p_ctx->if_mgr,
>                             p_ctx->n_encap_ips,
>                             p_ctx->encap_ips,
> +                          p_ctx->always_tunnel,
>                             flow_table, &ofpacts);
>       ofpbuf_uninit(&ofpacts);
>   }
> @@ -2482,6 +2489,7 @@ physical_run(struct physical_ctx *p_ctx,
>                                 p_ctx->if_mgr,
>                                 p_ctx->n_encap_ips,
>                                 p_ctx->encap_ips,
> +                              p_ctx->always_tunnel,
>                                 flow_table, &ofpacts);
>       }
>   
> diff --git a/controller/physical.h b/controller/physical.h
> index 7fe8ee3c18..4dd228cf8f 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -69,6 +69,7 @@ struct physical_ctx {
>       size_t n_encap_ips;
>       const char **encap_ips;
>       struct physical_debug debug;
> +    bool always_tunnel;
>   };
>   
>   void physical_register_ovs_idl(struct ovsdb_idl *);
> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> index 5b71ede1f2..c5e65966b8 100644
> --- a/northd/en-global-config.c
> +++ b/northd/en-global-config.c
> @@ -521,6 +521,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
>           return true;
>       }
>   
> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> +                           "always_tunnel", false)) {
> +        return true;
> +    }
> +
>       return false;
>   }
>   
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 9552534f6d..0f9a1005a8 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -391,6 +391,22 @@
>           non-<code>VXLAN mode</code> tunnel IDs allocation logic.
>         </column>
>   
> +      <column name="options" key="always_tunnel"
> +           type='{"type": "boolean"}'>
> +        <p>
> +          If set to true, then the traffic destined to a VIF of a provider
> +          logical switch (having a localnet port) will be tunnelled instead
> +          of sending it via the localnet port.  This option will be useful
> +          if CMS wants to connect overlay logical switches (without
> +          localnet port) and provider logical switches to a router.  Without
> +          this option set, the traffic path will be a mix of tunnelling and
> +          localnet ports (since routing is distributed) resulting in the
> +          leakage of the router port mac address to the upstream switches
> +          and undefined behavior if NATting is involed.  This option is
> +          disabled by default.
> +        </p>
> +      </column>
> +
>         <group title="Options for configuring interconnection route advertisement">
>           <p>
>             These options control how routes are advertised between OVN
> diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
> index ef41087ae3..786e564860 100644
> --- a/tests/multinode-macros.at
> +++ b/tests/multinode-macros.at
> @@ -22,6 +22,25 @@ m4_define([M_NS_CHECK_EXEC],
>       [ AT_CHECK([M_NS_EXEC([$1], [$2], [$3])], m4_shift(m4_shift(m4_shift($@)))) ]
>   )
>   
> +# M_DAEMONIZE([fake_node],[command],[pidfile])
> +m4_define([M_DAEMONIZE],
> +    [podman exec $1 $2 & echo $! > $3
> +     echo "kill \`cat $3\`" >> cleanup
> +    ]
> +)
> +
> +# M_START_TCPDUMP([fake_node], [params], [name])
> +#
> +# Helper to properly start tcpdump and wait for the startup.
> +# The tcpdump output is available in <name>.tcpdump file.
> +m4_define([M_START_TCPDUMP],
> +    [
> +     podman exec $1 tcpdump -l $2 >$3.tcpdump 2>$3.stderr &
> +     OVS_WAIT_UNTIL([grep -q "listening" $3.stderr])
> +    ]
> +)
> +
> +
>   OVS_START_SHELL_HELPERS
>   
>   m_as() {
> diff --git a/tests/multinode.at b/tests/multinode.at
> index 1e6eeb6610..a7231130ac 100644
> --- a/tests/multinode.at
> +++ b/tests/multinode.at
> @@ -1034,3 +1034,361 @@ done
>   M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>   
>   AT_CLEANUP
> +
> +AT_SETUP([ovn provider network - always_tunnel])
> +
> +# Check that ovn-fake-multinode setup is up and running
> +check_fake_multinode_setup
> +
> +# Delete the multinode NB and OVS resources before starting the test.
> +cleanup_multinode_resources
> +
> +m_as ovn-chassis-1 ip link del sw0p1-p
> +m_as ovn-chassis-2 ip link del sw0p2-p
> +
> +# Reset geneve tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> +
> +# The goal of this test case is to see the traffic works for
> +# E-W switching and routing when the logical switches has localnet ports
> +# and the option - always_tunnel=true is set.  When this option
> +# is set, traffic is tunneled to the destination chassis instead of using
> +# localnet ports.
> +
> +check multinode_nbctl ls-add sw0
> +check multinode_nbctl lsp-add sw0 sw0-port1
> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> +check multinode_nbctl lsp-add sw0 sw0-port2
> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> +
> +m_wait_for_ports_up
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Create the second logical switch with one port
> +check multinode_nbctl ls-add sw1
> +check multinode_nbctl lsp-add sw1 sw1-port1
> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +# Create a logical router and attach both logical switches
> +check multinode_nbctl lr-add lr0
> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +check multinode_nbctl lsp-add sw0 sw0-lr0
> +check multinode_nbctl lsp-set-type sw0-lr0 router
> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +check multinode_nbctl lsp-add sw1 sw1-lr0
> +check multinode_nbctl lsp-set-type sw1-lr0 router
> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> +
> +# create exteranl connection for N/S traffic
> +check multinode_nbctl ls-add public
> +check multinode_nbctl lsp-add public ln-lublic
> +check multinode_nbctl lsp-set-type ln-lublic localnet
> +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> +
> +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> +check multinode_nbctl lsp-add public public-lr0
> +check multinode_nbctl lsp-set-type public-lr0 router
> +check multinode_nbctl lsp-set-addresses public-lr0 router
> +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> +
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> +
> +# create localnet ports for sw0 and sw1
> +check multinode_nbctl lsp-add sw0 ln-sw0
> +check multinode_nbctl lsp-set-type ln-sw0 localnet
> +check multinode_nbctl lsp-set-addresses ln-sw0 unknown
> +check multinode_nbctl lsp-set-options ln-sw0 network_name=public
> +check multinode_nbctl set logical_switch_port ln-sw0 tag_request=100
> +
> +check multinode_nbctl lsp-add sw1 ln-sw1
> +check multinode_nbctl lsp-set-type ln-sw1 localnet
> +check multinode_nbctl lsp-set-addresses ln-sw1 unknown
> +check multinode_nbctl lsp-set-options ln-sw1 network_name=public
> +check multinode_nbctl set logical_switch_port ln-sw1 tag_request=101
> +
> +check multinode_nbctl --wait=hv sync
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> +50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.4: ICMP echo request,
> +50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.4 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 101, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 20.0.0.3: ICMP echo request,
> +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> +])
> +
> +# Set the option always_tunnel=true.
> +# Traffic from sw0p1 to sw0p2 should be tunneled.
> +check multinode_nbctl set NB_Global . options:always_tunnel=true
> +check multinode_nbctl --wait=hv sync
> +
> +m_as ovn-chassis-1 killall tcpdump
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 10.0.0.4: ICMP echo request,
> +50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.4 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
> +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +# Delete ln-sw1.
> +check multinode_nbctl --wait=hv lsp-del ln-sw1
> +# Traffic from sw0p1 to sw1p1 should be tunneled.
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
> +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +# Make sure that traffic from sw0 still goes out of localnet port
> +# for IPs not managed by OVN.
> +# Create a fake vm in br-ex on ovn-gw-1 with IP - 10.0.0.10
> +m_as ovn-gw-1 ip netns add sw0-p10
> +m_as ovn-gw-1 ovs-vsctl add-port br-ex sw0-p10 -- set interface sw0-p10 type=internal
> +m_as ovn-gw-1 ovs-vsctl set port sw0-p10 tag=100
> +m_as ovn-gw-1 ip link set sw0-p10 netns sw0-p10
> +m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 up
> +m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 address 32:31:8c:da:64:4f
> +m_as ovn-gw-1 ip netns exec sw0-p10 ip addr add 10.0.0.10/24 dev sw0-p10
> +
> +# Ping from sw0p1 (on ovn-chassis-1) tp sw0-p10 which is in ovn-gw-1 on
> +# external bridge.  The traffic path is
> +# sw0p1 -> br-int -> localnet port (vlan tagged 100) -> br-ex -> eth2 of ovn-chassis-1 to
> +# eth2 of ovn-gw-1  -> br-ex -> sw0-p10
> +
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.10 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +m_as ovn-gw-1 killall tcpdump
> +
> +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> +50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
> +32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> +
> +])
> +
> +AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> +50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
> +32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
> +])
> +
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +# Add dnat_and_snat entry for 10.0.0.3 <-> 172.20.0.110
> +check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03
> +
> +# Ping from sw1-p1 to 172.20.0.110
> +# Traffic path is
> +# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
> +#  -> ln-public -> br-ex -> eth2 -> ovn-chassis-1 -> br-ex -> ln-public -> br-int ->
> +#  -> DNAT 172.20.0.110 to 10.0.0.3 -> sw0-p1 with src ip 172.20.0.100 and dst ip 10.0.0.3.
> +
> +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
> +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
> +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei genev_sys_6081 icmp], [gw1_geneve])
> +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +m_as ovn-chassis-2 killall tcpdump
> +m_as ovn-gw-1 killall tcpdump
> +
> +AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
> +30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
> +30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> +
> +])
> +
> +rm -f *.tcpdump
> +rm -f *.stderr
> +
> +# Now clear the logical_port of dnat_and_snat entry.  ovn-gw-1 should handle the DNAT.
> +check multinode_nbctl lr-nat-del lr0 dnat_and_snat 172.20.0.110
> +check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3
> +# Ping from sw1-p1 to 172.20.0.110
> +# Traffic path is
> +# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
> +#  DNAT 172.20.0.110 -> 10.0.0.3 -> tunnel -> ovn-chassis-1 -> br-int -> sw0p1
> +
> +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
> +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
> +M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei genev_sys_6081 icmp], [gw1_geneve])
> +M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei eth2 icmp], [gw1_eth2])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +m_as ovn-chassis-1 killall tcpdump
> +m_as ovn-chassis-2 killall tcpdump
> +m_as ovn-gw-1 killall tcpdump
> +
> +AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> +
> +])
> +
> +AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
> +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
> +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat gw1_eth2.tcpdump], [0], [dnl
> +
> +])
> +
> +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
> +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
> +])
> +
> +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> +
> +])
> +
> +AT_CLEANUP
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 13b393932e..2ced7c0b2f 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -38339,6 +38339,163 @@ OVN_CLEANUP([hv1
>   AT_CLEANUP
>   ])
>   
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Provider network - always tunnel])
> +ovn_start
> +net_add n1
> +
> +for hv in 1 2; do
> +    sim_add hv${hv}
> +    as hv${hv}
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.${hv}
> +    ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
> +done
> +
> +check ovn-nbctl ls-add sw0
> +check ovn-nbctl lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 "00:00:10:01:02:03 10.0.0.3"
> +check ovn-nbctl lsp-add sw0 sw0-p2 -- lsp-set-addresses sw0-p2 "00:00:04:01:02:04 10.0.0.4"
> +
> +check ovn-nbctl ls-add sw1
> +check ovn-nbctl lsp-add sw1 sw1-p1 -- lsp-set-addresses sw1-p1 "00:00:20:01:02:03 20.0.0.3"
> +check ovn-nbctl lsp-add sw1 sw1-p2 -- lsp-set-addresses sw1-p2 "00:00:20:01:02:04 20.0.0.4"
> +
> +check ovn-nbctl lr-add lr0
> +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24
> +check ovn-nbctl lsp-add sw0 sw0-lr0
> +check ovn-nbctl lsp-set-type sw0-lr0 router
> +check ovn-nbctl lsp-set-addresses sw0-lr0 router
> +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24
> +check ovn-nbctl lsp-add sw1 sw1-lr0
> +check ovn-nbctl lsp-set-type sw1-lr0 router
> +check ovn-nbctl lsp-set-addresses sw1-lr0 router
> +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +as hv1
> +ovs-vsctl add-port br-int vif11 -- \
> +    set Interface vif11 external-ids:iface-id=sw0-p1 \
> +                              options:tx_pcap=hv1/vif11-tx.pcap \
> +                              options:rxq_pcap=hv1/vif11-rx.pcap \
> +                              ofport-request=11
> +ovs-vsctl add-port br-int vif12 -- \
> +    set Interface vif12 external-ids:iface-id=sw1-p1 \
> +                              options:tx_pcap=hv1/vif12-tx.pcap \
> +                              options:rxq_pcap=hv1/vif12-rx.pcap \
> +                              ofport-request=12
> +
> +as hv2
> +ovs-vsctl add-port br-int vif21 -- \
> +    set Interface vif21 external-ids:iface-id=sw0-p2 \
> +                              options:tx_pcap=hv1/vif21-tx.pcap \
> +                              options:rxq_pcap=hv1/vif21-rx.pcap \
> +                              ofport-request=21
> +ovs-vsctl add-port br-int vif22 -- \
> +    set Interface vif22 external-ids:iface-id=sw1-p2 \
> +                              options:tx_pcap=hv1/vif22-tx.pcap \
> +                              options:rxq_pcap=hv1/vif22-rx.pcap \
> +                              ofport-request=22
> +
> +check ovn-nbctl --wait=hv sync
> +wait_for_ports_up
> +
> +sw0_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw0))
> +sw0p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p1))
> +sw0p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p2))
> +
> +sw1_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw1))
> +sw1p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p1))
> +sw1p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p2))
> +
> +check_output_flows_tunnelled() {
> +  hv=$1
> +  dp_key=$2
> +  dp_rport=$3
> +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
> + table=OFTABLE_REMOTE_OUTPUT, priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_LOCAL_OUTPUT)
> +])
> +}
> +
> +check_output_flows_via_localnet() {
> +  hv=$1
> +  dp_key=$2
> +  dp_rport=$3
> +  lnport_key=$4
> +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [1], [dnl
> +])
> +
> +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
> + table=OFTABLE_LOCAL_OUTPUT, priority=100,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${lnport_key}->NXM_NX_REG15[[]],resubmit(,OFTABLE_LOCAL_OUTPUT)
> +])
> +}
> +
> +# There are no localnet ports in sw0 and sw1.  So the pkts are tunnelled.
> +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> +
> +# Add localnet port to sw0
> +check ovn-nbctl lsp-add sw0 ln-sw0 -- lsp-set-addresses ln-sw0 unknown -- lsp-set-type ln-sw0 localnet
> +check ovn-nbctl --wait=hv lsp-set-options ln-sw0 network_name=physnet1 -- set logical_switch_port ln-sw0 tag_request=100
> +lnsw0_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw0))
> +
> +# Flows should be installed to use localnet port for sw0.
> +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> +
> +# Add localnet port to sw1
> +check ovn-nbctl lsp-add sw1 ln-sw1 -- lsp-set-addresses ln-sw1 unknown -- lsp-set-type ln-sw1 localnet
> +check ovn-nbctl --wait=hv lsp-set-options ln-sw1 network_name=physnet1 -- set logical_switch_port ln-sw1 tag_request=101
> +lnsw1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw1))
> +
> +# Flows should be installed to use localnet port.
> +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> +check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
> +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> +check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
> +
> +# Set the always_tunnel option to true.
> +check ovn-nbctl set NB_Global . options:always_tunnel=true
> +check ovn-nbctl --wait=hv sync
> +
> +# Flows should be installed to tunnel.
> +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> +
> +# Set the always_tunnel option to false.
> +check ovn-nbctl set NB_Global . options:always_tunnel=false
> +check ovn-nbctl --wait=hv sync
> +
> +# Flows should be installed to use localnet port.
> +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> +check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
> +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> +check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
> +
> +check ovn-nbctl --wait=hv lsp-del ln-sw0
> +
> +# Flows should be installed to tunnel for sw0
> +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> +
> +check ovn-nbctl --wait=hv lsp-del ln-sw1
> +# Flows should be installed to tunnel.
> +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
>   OVN_FOR_EACH_NORTHD([
>   AT_SETUP([Delete parent of container port])
>   ovn_start
Numan Siddique July 24, 2024, 7:13 p.m. UTC | #2
On Wed, Jul 24, 2024 at 12:35 PM Mark Michelson <mmichels@redhat.com> wrote:
>
> Thanks for the update Numan.
>
> Acked-by: Mark Michelson <mmichels@redhat.com>

Thank you.  I had missed adding a NEWS entry.  I added that and
applied this patch to
the main branch.

Numan

>
> On 7/24/24 12:01, numans@ovn.org wrote:
> > From: Numan Siddique <numans@ovn.org>
> >
> > This patch adds a global config option - 'always_tunnel' and
> > when set to true, any traffic destined to a VIF logical port of a
> > provider logical switch (having localnet port(s)), is tunnelled to
> > the destination chassis, instead of sending it out via the localnet
> > port.  This feature is useful for the following reasons:
> >
> > 1.  CMS can add both provider logical switches and overlay logical
> >      swithes to a logical router.  With this option set, E-W routing between
> >      these logical switches will be tunnelled all the time.  The router port
> >      mac addresses are not leaked from multiple chassis to the upstream
> >      switches anymore.
> >
> > 2.  NATting will work as expected either in the gateway chassis or on
> >      the source VIF chassis (if external_mac and logical_port set).
> >
> > 3.  With this option set, there is no need to centralize routing
> >      for provider logical switches ('reside-on-redirect-chassis').
> >
> > 4.  With the commits [1] now merged, MTU issues arising due to tunnel
> >      overhead will be handled gracefully.
> >
> > [1] - 3faadc76ad71 ("northd: Fix pmtud for non routed traffic.")
> >        221476a01f26 ("ovn: Add tunnel PMTUD support.")
> >
> > Reported-at: https://issues.redhat.com/browse/FDP-209
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >
> > v1 -> v2
> > -------
> >     * Changed the config option from 'provider_network_overlay' to
> >       'always_tunnel' as suggested by Mark.
> >     * Rebased.
> >
> >
> >   controller/ovn-controller.c |  27 +++
> >   controller/physical.c       |  10 +-
> >   controller/physical.h       |   1 +
> >   northd/en-global-config.c   |   5 +
> >   ovn-nb.xml                  |  16 ++
> >   tests/multinode-macros.at   |  19 ++
> >   tests/multinode.at          | 358 ++++++++++++++++++++++++++++++++++++
> >   tests/ovn.at                | 157 ++++++++++++++++
> >   8 files changed, 592 insertions(+), 1 deletion(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index b3a265230c..4e30302ea6 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -3349,6 +3349,11 @@ non_vif_data_ovs_iface_handler(struct engine_node *node, void *data OVS_UNUSED)
> >
> >   struct ed_type_northd_options {
> >       bool explicit_arp_ns_output;
> > +    bool always_tunnel; /* Indicates if the traffic to the
> > +                         * logical port of a bridged logical
> > +                         * switch (i.e with localnet port) should
> > +                         * be tunnelled or sent via the localnet
> > +                         * port.  Default value is 'false'. */
> >   };
> >
> >
> > @@ -3380,6 +3385,12 @@ en_northd_options_run(struct engine_node *node, void *data)
> >                               false)
> >               : false;
> >
> > +    n_opts->always_tunnel =
> > +            sb_global
> > +            ? smap_get_bool(&sb_global->options, "always_tunnel",
> > +                            false)
> > +            : false;
> > +
> >       engine_set_node_state(node, EN_UPDATED);
> >   }
> >
> > @@ -3403,6 +3414,17 @@ en_northd_options_sb_sb_global_handler(struct engine_node *node, void *data)
> >           engine_set_node_state(node, EN_UPDATED);
> >       }
> >
> > +    bool always_tunnel =
> > +            sb_global
> > +            ? smap_get_bool(&sb_global->options, "always_tunnel",
> > +                            false)
> > +            : false;
> > +
> > +    if (always_tunnel != n_opts->always_tunnel) {
> > +        n_opts->always_tunnel = always_tunnel;
> > +        engine_set_node_state(node, EN_UPDATED);
> > +    }
> > +
> >       return true;
> >   }
> >
> > @@ -4315,6 +4337,9 @@ static void init_physical_ctx(struct engine_node *node,
> >           engine_get_input_data("ct_zones", node);
> >       struct simap *ct_zones = &ct_zones_data->ctx.current;
> >
> > +    struct ed_type_northd_options *n_opts =
> > +        engine_get_input_data("northd_options", node);
> > +
> >       parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, &p_ctx->encap_ips);
> >       p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> >       p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
> > @@ -4332,6 +4357,7 @@ static void init_physical_ctx(struct engine_node *node,
> >       p_ctx->local_bindings = &rt_data->lbinding_data.bindings;
> >       p_ctx->patch_ofports = &non_vif_data->patch_ofports;
> >       p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels;
> > +    p_ctx->always_tunnel = n_opts->always_tunnel;
> >
> >       struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx;
> >       p_ctx->if_mgr = ctrl_ctx->if_mgr;
> > @@ -5032,6 +5058,7 @@ main(int argc, char *argv[])
> >        */
> >       engine_add_input(&en_pflow_output, &en_non_vif_data,
> >                        NULL);
> > +    engine_add_input(&en_pflow_output, &en_northd_options, NULL);
> >       engine_add_input(&en_pflow_output, &en_ct_zones,
> >                        pflow_output_ct_zones_handler);
> >       engine_add_input(&en_pflow_output, &en_sb_chassis,
> > diff --git a/controller/physical.c b/controller/physical.c
> > index 22756810fd..876ceccf17 100644
> > --- a/controller/physical.c
> > +++ b/controller/physical.c
> > @@ -1489,6 +1489,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >                         const struct if_status_mgr *if_mgr,
> >                         size_t n_encap_ips,
> >                         const char **encap_ips,
> > +                      bool always_tunnel,
> >                         struct ovn_desired_flow_table *flow_table,
> >                         struct ofpbuf *ofpacts_p)
> >   {
> > @@ -1922,7 +1923,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >                               binding->header_.uuid.parts[0], &match,
> >                               ofpacts_p, &binding->header_.uuid);
> >           }
> > -    } else if (access_type == PORT_LOCALNET) {
> > +    } else if (access_type == PORT_LOCALNET && !always_tunnel) {
> >           /* Remote port connected by localnet port */
> >           /* Table 40, priority 100.
> >            * =======================
> > @@ -1930,6 +1931,11 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >            * Implements switching to localnet port. Each flow matches a
> >            * logical output port on remote hypervisor, switch the output port
> >            * to connected localnet port and resubmits to same table.
> > +         *
> > +         * Note: If 'always_tunnel' is true, then
> > +         * put_remote_port_redirect_overlay() called from below takes care
> > +         * of adding the flow in OFTABLE_REMOTE_OUTPUT table to tunnel to
> > +         * the destination chassis.
> >            */
> >
> >           ofpbuf_clear(ofpacts_p);
> > @@ -2355,6 +2361,7 @@ physical_eval_port_binding(struct physical_ctx *p_ctx,
> >                             p_ctx->if_mgr,
> >                             p_ctx->n_encap_ips,
> >                             p_ctx->encap_ips,
> > +                          p_ctx->always_tunnel,
> >                             flow_table, &ofpacts);
> >       ofpbuf_uninit(&ofpacts);
> >   }
> > @@ -2482,6 +2489,7 @@ physical_run(struct physical_ctx *p_ctx,
> >                                 p_ctx->if_mgr,
> >                                 p_ctx->n_encap_ips,
> >                                 p_ctx->encap_ips,
> > +                              p_ctx->always_tunnel,
> >                                 flow_table, &ofpacts);
> >       }
> >
> > diff --git a/controller/physical.h b/controller/physical.h
> > index 7fe8ee3c18..4dd228cf8f 100644
> > --- a/controller/physical.h
> > +++ b/controller/physical.h
> > @@ -69,6 +69,7 @@ struct physical_ctx {
> >       size_t n_encap_ips;
> >       const char **encap_ips;
> >       struct physical_debug debug;
> > +    bool always_tunnel;
> >   };
> >
> >   void physical_register_ovs_idl(struct ovsdb_idl *);
> > diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> > index 5b71ede1f2..c5e65966b8 100644
> > --- a/northd/en-global-config.c
> > +++ b/northd/en-global-config.c
> > @@ -521,6 +521,11 @@ check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
> >           return true;
> >       }
> >
> > +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> > +                           "always_tunnel", false)) {
> > +        return true;
> > +    }
> > +
> >       return false;
> >   }
> >
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index 9552534f6d..0f9a1005a8 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -391,6 +391,22 @@
> >           non-<code>VXLAN mode</code> tunnel IDs allocation logic.
> >         </column>
> >
> > +      <column name="options" key="always_tunnel"
> > +           type='{"type": "boolean"}'>
> > +        <p>
> > +          If set to true, then the traffic destined to a VIF of a provider
> > +          logical switch (having a localnet port) will be tunnelled instead
> > +          of sending it via the localnet port.  This option will be useful
> > +          if CMS wants to connect overlay logical switches (without
> > +          localnet port) and provider logical switches to a router.  Without
> > +          this option set, the traffic path will be a mix of tunnelling and
> > +          localnet ports (since routing is distributed) resulting in the
> > +          leakage of the router port mac address to the upstream switches
> > +          and undefined behavior if NATting is involed.  This option is
> > +          disabled by default.
> > +        </p>
> > +      </column>
> > +
> >         <group title="Options for configuring interconnection route advertisement">
> >           <p>
> >             These options control how routes are advertised between OVN
> > diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
> > index ef41087ae3..786e564860 100644
> > --- a/tests/multinode-macros.at
> > +++ b/tests/multinode-macros.at
> > @@ -22,6 +22,25 @@ m4_define([M_NS_CHECK_EXEC],
> >       [ AT_CHECK([M_NS_EXEC([$1], [$2], [$3])], m4_shift(m4_shift(m4_shift($@)))) ]
> >   )
> >
> > +# M_DAEMONIZE([fake_node],[command],[pidfile])
> > +m4_define([M_DAEMONIZE],
> > +    [podman exec $1 $2 & echo $! > $3
> > +     echo "kill \`cat $3\`" >> cleanup
> > +    ]
> > +)
> > +
> > +# M_START_TCPDUMP([fake_node], [params], [name])
> > +#
> > +# Helper to properly start tcpdump and wait for the startup.
> > +# The tcpdump output is available in <name>.tcpdump file.
> > +m4_define([M_START_TCPDUMP],
> > +    [
> > +     podman exec $1 tcpdump -l $2 >$3.tcpdump 2>$3.stderr &
> > +     OVS_WAIT_UNTIL([grep -q "listening" $3.stderr])
> > +    ]
> > +)
> > +
> > +
> >   OVS_START_SHELL_HELPERS
> >
> >   m_as() {
> > diff --git a/tests/multinode.at b/tests/multinode.at
> > index 1e6eeb6610..a7231130ac 100644
> > --- a/tests/multinode.at
> > +++ b/tests/multinode.at
> > @@ -1034,3 +1034,361 @@ done
> >   M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
> >
> >   AT_CLEANUP
> > +
> > +AT_SETUP([ovn provider network - always_tunnel])
> > +
> > +# Check that ovn-fake-multinode setup is up and running
> > +check_fake_multinode_setup
> > +
> > +# Delete the multinode NB and OVS resources before starting the test.
> > +cleanup_multinode_resources
> > +
> > +m_as ovn-chassis-1 ip link del sw0p1-p
> > +m_as ovn-chassis-2 ip link del sw0p2-p
> > +
> > +# Reset geneve tunnels
> > +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> > +do
> > +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> > +done
> > +
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> > +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> > +
> > +# The goal of this test case is to see the traffic works for
> > +# E-W switching and routing when the logical switches has localnet ports
> > +# and the option - always_tunnel=true is set.  When this option
> > +# is set, traffic is tunneled to the destination chassis instead of using
> > +# localnet ports.
> > +
> > +check multinode_nbctl ls-add sw0
> > +check multinode_nbctl lsp-add sw0 sw0-port1
> > +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> > +check multinode_nbctl lsp-add sw0 sw0-port2
> > +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> > +
> > +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> > +
> > +m_wait_for_ports_up
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +# Create the second logical switch with one port
> > +check multinode_nbctl ls-add sw1
> > +check multinode_nbctl lsp-add sw1 sw1-port1
> > +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> > +
> > +# Create a logical router and attach both logical switches
> > +check multinode_nbctl lr-add lr0
> > +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> > +check multinode_nbctl lsp-add sw0 sw0-lr0
> > +check multinode_nbctl lsp-set-type sw0-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> > +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> > +check multinode_nbctl lsp-add sw1 sw1-lr0
> > +check multinode_nbctl lsp-set-type sw1-lr0 router
> > +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> > +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > +
> > +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> > +
> > +# create exteranl connection for N/S traffic
> > +check multinode_nbctl ls-add public
> > +check multinode_nbctl lsp-add public ln-lublic
> > +check multinode_nbctl lsp-set-type ln-lublic localnet
> > +check multinode_nbctl lsp-set-addresses ln-lublic unknown
> > +check multinode_nbctl lsp-set-options ln-lublic network_name=public
> > +
> > +check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
> > +check multinode_nbctl lsp-add public public-lr0
> > +check multinode_nbctl lsp-set-type public-lr0 router
> > +check multinode_nbctl lsp-set-addresses public-lr0 router
> > +check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
> > +check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
> > +check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
> > +
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> > +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> > +
> > +# create localnet ports for sw0 and sw1
> > +check multinode_nbctl lsp-add sw0 ln-sw0
> > +check multinode_nbctl lsp-set-type ln-sw0 localnet
> > +check multinode_nbctl lsp-set-addresses ln-sw0 unknown
> > +check multinode_nbctl lsp-set-options ln-sw0 network_name=public
> > +check multinode_nbctl set logical_switch_port ln-sw0 tag_request=100
> > +
> > +check multinode_nbctl lsp-add sw1 ln-sw1
> > +check multinode_nbctl lsp-set-type ln-sw1 localnet
> > +check multinode_nbctl lsp-set-addresses ln-sw1 unknown
> > +check multinode_nbctl lsp-set-options ln-sw1 network_name=public
> > +check multinode_nbctl set logical_switch_port ln-sw1 tag_request=101
> > +
> > +check multinode_nbctl --wait=hv sync
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> > +50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.4: ICMP echo request,
> > +50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.4 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 101, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 20.0.0.3: ICMP echo request,
> > +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> > +])
> > +
> > +# Set the option always_tunnel=true.
> > +# Traffic from sw0p1 to sw0p2 should be tunneled.
> > +check multinode_nbctl set NB_Global . options:always_tunnel=true
> > +check multinode_nbctl --wait=hv sync
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 10.0.0.4: ICMP echo request,
> > +50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.4 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
> > +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +# Delete ln-sw1.
> > +check multinode_nbctl --wait=hv lsp-del ln-sw1
> > +# Traffic from sw0p1 to sw1p1 should be tunneled.
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
> > +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +# Make sure that traffic from sw0 still goes out of localnet port
> > +# for IPs not managed by OVN.
> > +# Create a fake vm in br-ex on ovn-gw-1 with IP - 10.0.0.10
> > +m_as ovn-gw-1 ip netns add sw0-p10
> > +m_as ovn-gw-1 ovs-vsctl add-port br-ex sw0-p10 -- set interface sw0-p10 type=internal
> > +m_as ovn-gw-1 ovs-vsctl set port sw0-p10 tag=100
> > +m_as ovn-gw-1 ip link set sw0-p10 netns sw0-p10
> > +m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 up
> > +m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 address 32:31:8c:da:64:4f
> > +m_as ovn-gw-1 ip netns exec sw0-p10 ip addr add 10.0.0.10/24 dev sw0-p10
> > +
> > +# Ping from sw0p1 (on ovn-chassis-1) tp sw0-p10 which is in ovn-gw-1 on
> > +# external bridge.  The traffic path is
> > +# sw0p1 -> br-int -> localnet port (vlan tagged 100) -> br-ex -> eth2 of ovn-chassis-1 to
> > +# eth2 of ovn-gw-1  -> br-ex -> sw0-p10
> > +
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.10 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +m_as ovn-gw-1 killall tcpdump
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> > +50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
> > +32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> > +
> > +])
> > +
> > +AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
> > +50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
> > +32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
> > +])
> > +
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +# Add dnat_and_snat entry for 10.0.0.3 <-> 172.20.0.110
> > +check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03
> > +
> > +# Ping from sw1-p1 to 172.20.0.110
> > +# Traffic path is
> > +# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
> > +#  -> ln-public -> br-ex -> eth2 -> ovn-chassis-1 -> br-ex -> ln-public -> br-int ->
> > +#  -> DNAT 172.20.0.110 to 10.0.0.3 -> sw0-p1 with src ip 172.20.0.100 and dst ip 10.0.0.3.
> > +
> > +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
> > +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
> > +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei genev_sys_6081 icmp], [gw1_geneve])
> > +M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +m_as ovn-chassis-2 killall tcpdump
> > +m_as ovn-gw-1 killall tcpdump
> > +
> > +AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
> > +30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
> > +30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
> > +
> > +])
> > +
> > +rm -f *.tcpdump
> > +rm -f *.stderr
> > +
> > +# Now clear the logical_port of dnat_and_snat entry.  ovn-gw-1 should handle the DNAT.
> > +check multinode_nbctl lr-nat-del lr0 dnat_and_snat 172.20.0.110
> > +check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3
> > +# Ping from sw1-p1 to 172.20.0.110
> > +# Traffic path is
> > +# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
> > +#  DNAT 172.20.0.110 -> 10.0.0.3 -> tunnel -> ovn-chassis-1 -> br-int -> sw0p1
> > +
> > +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
> > +M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
> > +M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei genev_sys_6081 icmp], [gw1_geneve])
> > +M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei eth2 icmp], [gw1_eth2])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
> > +M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
> > +
> > +M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +m_as ovn-chassis-1 killall tcpdump
> > +m_as ovn-chassis-2 killall tcpdump
> > +m_as ovn-gw-1 killall tcpdump
> > +
> > +AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> > +
> > +])
> > +
> > +AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
> > +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
> > +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
> > +00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat gw1_eth2.tcpdump], [0], [dnl
> > +
> > +])
> > +
> > +AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
> > +00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
> > +00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
> > +])
> > +
> > +AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
> > +
> > +])
> > +
> > +AT_CLEANUP
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 13b393932e..2ced7c0b2f 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -38339,6 +38339,163 @@ OVN_CLEANUP([hv1
> >   AT_CLEANUP
> >   ])
> >
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([Provider network - always tunnel])
> > +ovn_start
> > +net_add n1
> > +
> > +for hv in 1 2; do
> > +    sim_add hv${hv}
> > +    as hv${hv}
> > +    ovs-vsctl add-br br-phys
> > +    ovn_attach n1 br-phys 192.168.0.${hv}
> > +    ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
> > +done
> > +
> > +check ovn-nbctl ls-add sw0
> > +check ovn-nbctl lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 "00:00:10:01:02:03 10.0.0.3"
> > +check ovn-nbctl lsp-add sw0 sw0-p2 -- lsp-set-addresses sw0-p2 "00:00:04:01:02:04 10.0.0.4"
> > +
> > +check ovn-nbctl ls-add sw1
> > +check ovn-nbctl lsp-add sw1 sw1-p1 -- lsp-set-addresses sw1-p1 "00:00:20:01:02:03 20.0.0.3"
> > +check ovn-nbctl lsp-add sw1 sw1-p2 -- lsp-set-addresses sw1-p2 "00:00:20:01:02:04 20.0.0.4"
> > +
> > +check ovn-nbctl lr-add lr0
> > +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24
> > +check ovn-nbctl lsp-add sw0 sw0-lr0
> > +check ovn-nbctl lsp-set-type sw0-lr0 router
> > +check ovn-nbctl lsp-set-addresses sw0-lr0 router
> > +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> > +
> > +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24
> > +check ovn-nbctl lsp-add sw1 sw1-lr0
> > +check ovn-nbctl lsp-set-type sw1-lr0 router
> > +check ovn-nbctl lsp-set-addresses sw1-lr0 router
> > +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> > +
> > +as hv1
> > +ovs-vsctl add-port br-int vif11 -- \
> > +    set Interface vif11 external-ids:iface-id=sw0-p1 \
> > +                              options:tx_pcap=hv1/vif11-tx.pcap \
> > +                              options:rxq_pcap=hv1/vif11-rx.pcap \
> > +                              ofport-request=11
> > +ovs-vsctl add-port br-int vif12 -- \
> > +    set Interface vif12 external-ids:iface-id=sw1-p1 \
> > +                              options:tx_pcap=hv1/vif12-tx.pcap \
> > +                              options:rxq_pcap=hv1/vif12-rx.pcap \
> > +                              ofport-request=12
> > +
> > +as hv2
> > +ovs-vsctl add-port br-int vif21 -- \
> > +    set Interface vif21 external-ids:iface-id=sw0-p2 \
> > +                              options:tx_pcap=hv1/vif21-tx.pcap \
> > +                              options:rxq_pcap=hv1/vif21-rx.pcap \
> > +                              ofport-request=21
> > +ovs-vsctl add-port br-int vif22 -- \
> > +    set Interface vif22 external-ids:iface-id=sw1-p2 \
> > +                              options:tx_pcap=hv1/vif22-tx.pcap \
> > +                              options:rxq_pcap=hv1/vif22-rx.pcap \
> > +                              ofport-request=22
> > +
> > +check ovn-nbctl --wait=hv sync
> > +wait_for_ports_up
> > +
> > +sw0_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw0))
> > +sw0p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p1))
> > +sw0p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p2))
> > +
> > +sw1_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw1))
> > +sw1p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p1))
> > +sw1p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p2))
> > +
> > +check_output_flows_tunnelled() {
> > +  hv=$1
> > +  dp_key=$2
> > +  dp_rport=$3
> > +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
> > + table=OFTABLE_REMOTE_OUTPUT, priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_LOCAL_OUTPUT)
> > +])
> > +}
> > +
> > +check_output_flows_via_localnet() {
> > +  hv=$1
> > +  dp_key=$2
> > +  dp_rport=$3
> > +  lnport_key=$4
> > +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [1], [dnl
> > +])
> > +
> > +  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
> > + table=OFTABLE_LOCAL_OUTPUT, priority=100,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${lnport_key}->NXM_NX_REG15[[]],resubmit(,OFTABLE_LOCAL_OUTPUT)
> > +])
> > +}
> > +
> > +# There are no localnet ports in sw0 and sw1.  So the pkts are tunnelled.
> > +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> > +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> > +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> > +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> > +
> > +# Add localnet port to sw0
> > +check ovn-nbctl lsp-add sw0 ln-sw0 -- lsp-set-addresses ln-sw0 unknown -- lsp-set-type ln-sw0 localnet
> > +check ovn-nbctl --wait=hv lsp-set-options ln-sw0 network_name=physnet1 -- set logical_switch_port ln-sw0 tag_request=100
> > +lnsw0_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw0))
> > +
> > +# Flows should be installed to use localnet port for sw0.
> > +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> > +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> > +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> > +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> > +
> > +# Add localnet port to sw1
> > +check ovn-nbctl lsp-add sw1 ln-sw1 -- lsp-set-addresses ln-sw1 unknown -- lsp-set-type ln-sw1 localnet
> > +check ovn-nbctl --wait=hv lsp-set-options ln-sw1 network_name=physnet1 -- set logical_switch_port ln-sw1 tag_request=101
> > +lnsw1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw1))
> > +
> > +# Flows should be installed to use localnet port.
> > +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> > +check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
> > +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> > +check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
> > +
> > +# Set the always_tunnel option to true.
> > +check ovn-nbctl set NB_Global . options:always_tunnel=true
> > +check ovn-nbctl --wait=hv sync
> > +
> > +# Flows should be installed to tunnel.
> > +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> > +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> > +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> > +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> > +
> > +# Set the always_tunnel option to false.
> > +check ovn-nbctl set NB_Global . options:always_tunnel=false
> > +check ovn-nbctl --wait=hv sync
> > +
> > +# Flows should be installed to use localnet port.
> > +check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
> > +check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
> > +check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
> > +check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
> > +
> > +check ovn-nbctl --wait=hv lsp-del ln-sw0
> > +
> > +# Flows should be installed to tunnel for sw0
> > +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> > +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> > +
> > +check ovn-nbctl --wait=hv lsp-del ln-sw1
> > +# Flows should be installed to tunnel.
> > +check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
> > +check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
> > +check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
> > +check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
> > +
> > +OVN_CLEANUP([hv1],[hv2])
> > +
> > +AT_CLEANUP
> > +])
> > +
> >   OVN_FOR_EACH_NORTHD([
> >   AT_SETUP([Delete parent of container port])
> >   ovn_start
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index b3a265230c..4e30302ea6 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3349,6 +3349,11 @@  non_vif_data_ovs_iface_handler(struct engine_node *node, void *data OVS_UNUSED)
 
 struct ed_type_northd_options {
     bool explicit_arp_ns_output;
+    bool always_tunnel; /* Indicates if the traffic to the
+                         * logical port of a bridged logical
+                         * switch (i.e with localnet port) should
+                         * be tunnelled or sent via the localnet
+                         * port.  Default value is 'false'. */
 };
 
 
@@ -3380,6 +3385,12 @@  en_northd_options_run(struct engine_node *node, void *data)
                             false)
             : false;
 
+    n_opts->always_tunnel =
+            sb_global
+            ? smap_get_bool(&sb_global->options, "always_tunnel",
+                            false)
+            : false;
+
     engine_set_node_state(node, EN_UPDATED);
 }
 
@@ -3403,6 +3414,17 @@  en_northd_options_sb_sb_global_handler(struct engine_node *node, void *data)
         engine_set_node_state(node, EN_UPDATED);
     }
 
+    bool always_tunnel =
+            sb_global
+            ? smap_get_bool(&sb_global->options, "always_tunnel",
+                            false)
+            : false;
+
+    if (always_tunnel != n_opts->always_tunnel) {
+        n_opts->always_tunnel = always_tunnel;
+        engine_set_node_state(node, EN_UPDATED);
+    }
+
     return true;
 }
 
@@ -4315,6 +4337,9 @@  static void init_physical_ctx(struct engine_node *node,
         engine_get_input_data("ct_zones", node);
     struct simap *ct_zones = &ct_zones_data->ctx.current;
 
+    struct ed_type_northd_options *n_opts =
+        engine_get_input_data("northd_options", node);
+
     parse_encap_ips(ovs_table, &p_ctx->n_encap_ips, &p_ctx->encap_ips);
     p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
     p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
@@ -4332,6 +4357,7 @@  static void init_physical_ctx(struct engine_node *node,
     p_ctx->local_bindings = &rt_data->lbinding_data.bindings;
     p_ctx->patch_ofports = &non_vif_data->patch_ofports;
     p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels;
+    p_ctx->always_tunnel = n_opts->always_tunnel;
 
     struct controller_engine_ctx *ctrl_ctx = engine_get_context()->client_ctx;
     p_ctx->if_mgr = ctrl_ctx->if_mgr;
@@ -5032,6 +5058,7 @@  main(int argc, char *argv[])
      */
     engine_add_input(&en_pflow_output, &en_non_vif_data,
                      NULL);
+    engine_add_input(&en_pflow_output, &en_northd_options, NULL);
     engine_add_input(&en_pflow_output, &en_ct_zones,
                      pflow_output_ct_zones_handler);
     engine_add_input(&en_pflow_output, &en_sb_chassis,
diff --git a/controller/physical.c b/controller/physical.c
index 22756810fd..876ceccf17 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1489,6 +1489,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
                       const struct if_status_mgr *if_mgr,
                       size_t n_encap_ips,
                       const char **encap_ips,
+                      bool always_tunnel,
                       struct ovn_desired_flow_table *flow_table,
                       struct ofpbuf *ofpacts_p)
 {
@@ -1922,7 +1923,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
                             binding->header_.uuid.parts[0], &match,
                             ofpacts_p, &binding->header_.uuid);
         }
-    } else if (access_type == PORT_LOCALNET) {
+    } else if (access_type == PORT_LOCALNET && !always_tunnel) {
         /* Remote port connected by localnet port */
         /* Table 40, priority 100.
          * =======================
@@ -1930,6 +1931,11 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
          * Implements switching to localnet port. Each flow matches a
          * logical output port on remote hypervisor, switch the output port
          * to connected localnet port and resubmits to same table.
+         *
+         * Note: If 'always_tunnel' is true, then
+         * put_remote_port_redirect_overlay() called from below takes care
+         * of adding the flow in OFTABLE_REMOTE_OUTPUT table to tunnel to
+         * the destination chassis.
          */
 
         ofpbuf_clear(ofpacts_p);
@@ -2355,6 +2361,7 @@  physical_eval_port_binding(struct physical_ctx *p_ctx,
                           p_ctx->if_mgr,
                           p_ctx->n_encap_ips,
                           p_ctx->encap_ips,
+                          p_ctx->always_tunnel,
                           flow_table, &ofpacts);
     ofpbuf_uninit(&ofpacts);
 }
@@ -2482,6 +2489,7 @@  physical_run(struct physical_ctx *p_ctx,
                               p_ctx->if_mgr,
                               p_ctx->n_encap_ips,
                               p_ctx->encap_ips,
+                              p_ctx->always_tunnel,
                               flow_table, &ofpacts);
     }
 
diff --git a/controller/physical.h b/controller/physical.h
index 7fe8ee3c18..4dd228cf8f 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -69,6 +69,7 @@  struct physical_ctx {
     size_t n_encap_ips;
     const char **encap_ips;
     struct physical_debug debug;
+    bool always_tunnel;
 };
 
 void physical_register_ovs_idl(struct ovsdb_idl *);
diff --git a/northd/en-global-config.c b/northd/en-global-config.c
index 5b71ede1f2..c5e65966b8 100644
--- a/northd/en-global-config.c
+++ b/northd/en-global-config.c
@@ -521,6 +521,11 @@  check_nb_options_out_of_sync(const struct nbrec_nb_global *nb,
         return true;
     }
 
+    if (config_out_of_sync(&nb->options, &config_data->nb_options,
+                           "always_tunnel", false)) {
+        return true;
+    }
+
     return false;
 }
 
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 9552534f6d..0f9a1005a8 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -391,6 +391,22 @@ 
         non-<code>VXLAN mode</code> tunnel IDs allocation logic.
       </column>
 
+      <column name="options" key="always_tunnel"
+           type='{"type": "boolean"}'>
+        <p>
+          If set to true, then the traffic destined to a VIF of a provider
+          logical switch (having a localnet port) will be tunnelled instead
+          of sending it via the localnet port.  This option will be useful
+          if CMS wants to connect overlay logical switches (without
+          localnet port) and provider logical switches to a router.  Without
+          this option set, the traffic path will be a mix of tunnelling and
+          localnet ports (since routing is distributed) resulting in the
+          leakage of the router port mac address to the upstream switches
+          and undefined behavior if NATting is involed.  This option is
+          disabled by default.
+        </p>
+      </column>
+
       <group title="Options for configuring interconnection route advertisement">
         <p>
           These options control how routes are advertised between OVN
diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
index ef41087ae3..786e564860 100644
--- a/tests/multinode-macros.at
+++ b/tests/multinode-macros.at
@@ -22,6 +22,25 @@  m4_define([M_NS_CHECK_EXEC],
     [ AT_CHECK([M_NS_EXEC([$1], [$2], [$3])], m4_shift(m4_shift(m4_shift($@)))) ]
 )
 
+# M_DAEMONIZE([fake_node],[command],[pidfile])
+m4_define([M_DAEMONIZE],
+    [podman exec $1 $2 & echo $! > $3
+     echo "kill \`cat $3\`" >> cleanup
+    ]
+)
+
+# M_START_TCPDUMP([fake_node], [params], [name])
+#
+# Helper to properly start tcpdump and wait for the startup.
+# The tcpdump output is available in <name>.tcpdump file.
+m4_define([M_START_TCPDUMP],
+    [
+     podman exec $1 tcpdump -l $2 >$3.tcpdump 2>$3.stderr &
+     OVS_WAIT_UNTIL([grep -q "listening" $3.stderr])
+    ]
+)
+
+
 OVS_START_SHELL_HELPERS
 
 m_as() {
diff --git a/tests/multinode.at b/tests/multinode.at
index 1e6eeb6610..a7231130ac 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -1034,3 +1034,361 @@  done
 M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
 
 AT_CLEANUP
+
+AT_SETUP([ovn provider network - always_tunnel])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# The goal of this test case is to see the traffic works for
+# E-W switching and routing when the logical switches has localnet ports
+# and the option - always_tunnel=true is set.  When this option
+# is set, traffic is tunneled to the destination chassis instead of using
+# localnet ports.
+
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+# create exteranl connection for N/S traffic
+check multinode_nbctl ls-add public
+check multinode_nbctl lsp-add public ln-lublic
+check multinode_nbctl lsp-set-type ln-lublic localnet
+check multinode_nbctl lsp-set-addresses ln-lublic unknown
+check multinode_nbctl lsp-set-options ln-lublic network_name=public
+
+check multinode_nbctl lrp-add lr0 lr0-public 00:11:22:00:ff:01 172.20.0.100/24
+check multinode_nbctl lsp-add public public-lr0
+check multinode_nbctl lsp-set-type public-lr0 router
+check multinode_nbctl lsp-set-addresses public-lr0 router
+check multinode_nbctl lsp-set-options public-lr0 router-port=lr0-public
+check multinode_nbctl lrp-set-gateway-chassis lr0-public ovn-gw-1 10
+check multinode_nbctl lr-route-add lr0 0.0.0.0/0 172.20.0.1
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+# create localnet ports for sw0 and sw1
+check multinode_nbctl lsp-add sw0 ln-sw0
+check multinode_nbctl lsp-set-type ln-sw0 localnet
+check multinode_nbctl lsp-set-addresses ln-sw0 unknown
+check multinode_nbctl lsp-set-options ln-sw0 network_name=public
+check multinode_nbctl set logical_switch_port ln-sw0 tag_request=100
+
+check multinode_nbctl lsp-add sw1 ln-sw1
+check multinode_nbctl lsp-set-type ln-sw1 localnet
+check multinode_nbctl lsp-set-addresses ln-sw1 unknown
+check multinode_nbctl lsp-set-options ln-sw1 network_name=public
+check multinode_nbctl set logical_switch_port ln-sw1 tag_request=101
+
+check multinode_nbctl --wait=hv sync
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
+50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.4: ICMP echo request,
+50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.4 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
+])
+
+m_as ovn-chassis-1 killall tcpdump
+rm -f *.tcpdump
+rm -f *.stderr
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 101, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 20.0.0.3: ICMP echo request,
+00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
+])
+
+# Set the option always_tunnel=true.
+# Traffic from sw0p1 to sw0p2 should be tunneled.
+check multinode_nbctl set NB_Global . options:always_tunnel=true
+check multinode_nbctl --wait=hv sync
+
+m_as ovn-chassis-1 killall tcpdump
+rm -f *.tcpdump
+rm -f *.stderr
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+50:54:00:00:00:03 > 50:54:00:00:00:04, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 10.0.0.4: ICMP echo request,
+50:54:00:00:00:04 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.4 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
+])
+
+m_as ovn-chassis-1 killall tcpdump
+rm -f *.tcpdump
+rm -f *.stderr
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
+00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
+])
+
+m_as ovn-chassis-1 killall tcpdump
+rm -f *.tcpdump
+rm -f *.stderr
+
+# Delete ln-sw1.
+check multinode_nbctl --wait=hv lsp-del ln-sw1
+# Traffic from sw0p1 to sw1p1 should be tunneled.
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 20.0.0.3: ICMP echo request,
+00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
+])
+
+m_as ovn-chassis-1 killall tcpdump
+rm -f *.tcpdump
+rm -f *.stderr
+
+# Make sure that traffic from sw0 still goes out of localnet port
+# for IPs not managed by OVN.
+# Create a fake vm in br-ex on ovn-gw-1 with IP - 10.0.0.10
+m_as ovn-gw-1 ip netns add sw0-p10
+m_as ovn-gw-1 ovs-vsctl add-port br-ex sw0-p10 -- set interface sw0-p10 type=internal
+m_as ovn-gw-1 ovs-vsctl set port sw0-p10 tag=100
+m_as ovn-gw-1 ip link set sw0-p10 netns sw0-p10
+m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 up
+m_as ovn-gw-1 ip netns exec sw0-p10 ip link set sw0-p10 address 32:31:8c:da:64:4f
+m_as ovn-gw-1 ip netns exec sw0-p10 ip addr add 10.0.0.10/24 dev sw0-p10
+
+# Ping from sw0p1 (on ovn-chassis-1) tp sw0-p10 which is in ovn-gw-1 on
+# external bridge.  The traffic path is
+# sw0p1 -> br-int -> localnet port (vlan tagged 100) -> br-ex -> eth2 of ovn-chassis-1 to
+# eth2 of ovn-gw-1  -> br-ex -> sw0-p10
+
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.10 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+m_as ovn-chassis-1 killall tcpdump
+m_as ovn-gw-1 killall tcpdump
+
+AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
+50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
+32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
+
+])
+
+AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-22], [0], [dnl
+50:54:00:00:00:03 > 32:31:8c:da:64:4f, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 10.0.0.10: ICMP echo request,
+32:31:8c:da:64:4f > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 100, p 0, ethertype IPv4 (0x0800), 10.0.0.10 > 10.0.0.3: ICMP echo reply,
+])
+
+rm -f *.tcpdump
+rm -f *.stderr
+
+# Add dnat_and_snat entry for 10.0.0.3 <-> 172.20.0.110
+check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03
+
+# Ping from sw1-p1 to 172.20.0.110
+# Traffic path is
+# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
+#  -> ln-public -> br-ex -> eth2 -> ovn-chassis-1 -> br-ex -> ln-public -> br-int ->
+#  -> DNAT 172.20.0.110 to 10.0.0.3 -> sw0-p1 with src ip 172.20.0.100 and dst ip 10.0.0.3.
+
+M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
+M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
+M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei genev_sys_6081 icmp], [gw1_geneve])
+M_START_TCPDUMP([ovn-gw-1], [-c 2 -neei eth2 icmp], [gw1_eth2])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+m_as ovn-chassis-1 killall tcpdump
+m_as ovn-chassis-2 killall tcpdump
+m_as ovn-gw-1 killall tcpdump
+
+AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat gw1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
+30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 30:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 172.20.0.110: ICMP echo request,
+30:54:00:00:00:03 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 172.20.0.100: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_genev.tcpdump], [0], [dnl
+
+])
+
+rm -f *.tcpdump
+rm -f *.stderr
+
+# Now clear the logical_port of dnat_and_snat entry.  ovn-gw-1 should handle the DNAT.
+check multinode_nbctl lr-nat-del lr0 dnat_and_snat 172.20.0.110
+check multinode_nbctl --wait=hv lr-nat-add lr0 dnat_and_snat 172.20.0.110 10.0.0.3
+# Ping from sw1-p1 to 172.20.0.110
+# Traffic path is
+# sw1-p1 in ovn-chassis-2 -> tunnel -> ovn-gw-1 -> In ovn-gw-1 SNAT 20.0.0.3 to 172.20.0.100 ->
+#  DNAT 172.20.0.110 -> 10.0.0.3 -> tunnel -> ovn-chassis-1 -> br-int -> sw0p1
+
+M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei genev_sys_6081 icmp], [ch2_genev])
+M_START_TCPDUMP([ovn-chassis-2], [-c 2 -neei eth2 icmp], [ch2_eth2])
+M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei genev_sys_6081 icmp], [gw1_geneve])
+M_START_TCPDUMP([ovn-gw-1], [-c 4 -neei eth2 icmp], [gw1_eth2])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei genev_sys_6081 icmp], [ch1_genev])
+M_START_TCPDUMP([ovn-chassis-1], [-c 2 -neei eth2 icmp], [ch1_eth2])
+
+M_NS_CHECK_EXEC([ovn-chassis-2], [sw1p1], [ping -q -c 3 -i 0.3 -w 2 172.20.0.110 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+m_as ovn-chassis-1 killall tcpdump
+m_as ovn-chassis-2 killall tcpdump
+m_as ovn-gw-1 killall tcpdump
+
+AT_CHECK([cat ch2_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
+
+])
+
+AT_CHECK([cat gw1_geneve.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 20.0.0.3 > 172.20.0.110: ICMP echo request,
+00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
+00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
+00:00:00:00:ff:02 > 40:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.110 > 20.0.0.3: ICMP echo reply,
+])
+
+AT_CHECK([cat gw1_eth2.tcpdump], [0], [dnl
+
+])
+
+AT_CHECK([cat ch1_genev.tcpdump | cut -d  ' ' -f2-15], [0], [dnl
+00:00:00:00:ff:01 > 50:54:00:00:00:03, ethertype IPv4 (0x0800), length 98: 172.20.0.100 > 10.0.0.3: ICMP echo request,
+00:11:22:00:ff:01 > 00:11:22:00:ff:01, ethertype IPv4 (0x0800), length 98: 10.0.0.3 > 172.20.0.100: ICMP echo reply,
+])
+
+AT_CHECK([cat ch1_eth2.tcpdump], [0], [dnl
+
+])
+
+AT_CLEANUP
diff --git a/tests/ovn.at b/tests/ovn.at
index 13b393932e..2ced7c0b2f 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -38339,6 +38339,163 @@  OVN_CLEANUP([hv1
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Provider network - always tunnel])
+ovn_start
+net_add n1
+
+for hv in 1 2; do
+    sim_add hv${hv}
+    as hv${hv}
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.${hv}
+    ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys
+done
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 "00:00:10:01:02:03 10.0.0.3"
+check ovn-nbctl lsp-add sw0 sw0-p2 -- lsp-set-addresses sw0-p2 "00:00:04:01:02:04 10.0.0.4"
+
+check ovn-nbctl ls-add sw1
+check ovn-nbctl lsp-add sw1 sw1-p1 -- lsp-set-addresses sw1-p1 "00:00:20:01:02:03 20.0.0.3"
+check ovn-nbctl lsp-add sw1 sw1-p2 -- lsp-set-addresses sw1-p2 "00:00:20:01:02:04 20.0.0.4"
+
+check ovn-nbctl lr-add lr0
+check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24
+check ovn-nbctl lsp-add sw0 sw0-lr0
+check ovn-nbctl lsp-set-type sw0-lr0 router
+check ovn-nbctl lsp-set-addresses sw0-lr0 router
+check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24
+check ovn-nbctl lsp-add sw1 sw1-lr0
+check ovn-nbctl lsp-set-type sw1-lr0 router
+check ovn-nbctl lsp-set-addresses sw1-lr0 router
+check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+as hv1
+ovs-vsctl add-port br-int vif11 -- \
+    set Interface vif11 external-ids:iface-id=sw0-p1 \
+                              options:tx_pcap=hv1/vif11-tx.pcap \
+                              options:rxq_pcap=hv1/vif11-rx.pcap \
+                              ofport-request=11
+ovs-vsctl add-port br-int vif12 -- \
+    set Interface vif12 external-ids:iface-id=sw1-p1 \
+                              options:tx_pcap=hv1/vif12-tx.pcap \
+                              options:rxq_pcap=hv1/vif12-rx.pcap \
+                              ofport-request=12
+
+as hv2
+ovs-vsctl add-port br-int vif21 -- \
+    set Interface vif21 external-ids:iface-id=sw0-p2 \
+                              options:tx_pcap=hv1/vif21-tx.pcap \
+                              options:rxq_pcap=hv1/vif21-rx.pcap \
+                              ofport-request=21
+ovs-vsctl add-port br-int vif22 -- \
+    set Interface vif22 external-ids:iface-id=sw1-p2 \
+                              options:tx_pcap=hv1/vif22-tx.pcap \
+                              options:rxq_pcap=hv1/vif22-rx.pcap \
+                              ofport-request=22
+
+check ovn-nbctl --wait=hv sync
+wait_for_ports_up
+
+sw0_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw0))
+sw0p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p1))
+sw0p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw0-p2))
+
+sw1_dp_key=$(printf "%x" $(fetch_column Datapath_Binding tunnel_key external_ids:name=sw1))
+sw1p1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p1))
+sw1p2_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=sw1-p2))
+
+check_output_flows_tunnelled() {
+  hv=$1
+  dp_key=$2
+  dp_rport=$3
+  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
+ table=OFTABLE_REMOTE_OUTPUT, priority=100,reg13=0/0xffff0000,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${dp_key}->NXM_NX_TUN_ID[[0..23]],set_field:0x${dp_rport}->tun_metadata0,move:NXM_NX_REG14[[0..14]]->NXM_NX_TUN_METADATA0[[16..30]],output:1,resubmit(,OFTABLE_LOCAL_OUTPUT)
+])
+}
+
+check_output_flows_via_localnet() {
+  hv=$1
+  dp_key=$2
+  dp_rport=$3
+  lnport_key=$4
+  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_REMOTE_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [1], [dnl
+])
+
+  AT_CHECK_UNQUOTED([as $hv ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT,metadata=0x${dp_key},reg15=0x${dp_rport} | ofctl_strip_all | grep -v NXST_FLOW], [0], [dnl
+ table=OFTABLE_LOCAL_OUTPUT, priority=100,reg15=0x${dp_rport},metadata=0x${dp_key} actions=load:0x${lnport_key}->NXM_NX_REG15[[]],resubmit(,OFTABLE_LOCAL_OUTPUT)
+])
+}
+
+# There are no localnet ports in sw0 and sw1.  So the pkts are tunnelled.
+check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
+check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
+check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
+check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
+
+# Add localnet port to sw0
+check ovn-nbctl lsp-add sw0 ln-sw0 -- lsp-set-addresses ln-sw0 unknown -- lsp-set-type ln-sw0 localnet
+check ovn-nbctl --wait=hv lsp-set-options ln-sw0 network_name=physnet1 -- set logical_switch_port ln-sw0 tag_request=100
+lnsw0_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw0))
+
+# Flows should be installed to use localnet port for sw0.
+check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
+check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
+check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
+check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
+
+# Add localnet port to sw1
+check ovn-nbctl lsp-add sw1 ln-sw1 -- lsp-set-addresses ln-sw1 unknown -- lsp-set-type ln-sw1 localnet
+check ovn-nbctl --wait=hv lsp-set-options ln-sw1 network_name=physnet1 -- set logical_switch_port ln-sw1 tag_request=101
+lnsw1_key=$(printf "%x" $(fetch_column Port_Binding tunnel_key logical_port=ln-sw1))
+
+# Flows should be installed to use localnet port.
+check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
+check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
+check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
+check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
+
+# Set the always_tunnel option to true.
+check ovn-nbctl set NB_Global . options:always_tunnel=true
+check ovn-nbctl --wait=hv sync
+
+# Flows should be installed to tunnel.
+check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
+check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
+check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
+check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
+
+# Set the always_tunnel option to false.
+check ovn-nbctl set NB_Global . options:always_tunnel=false
+check ovn-nbctl --wait=hv sync
+
+# Flows should be installed to use localnet port.
+check_output_flows_via_localnet hv1 ${sw0_dp_key} ${sw0p2_key} ${lnsw0_key}
+check_output_flows_via_localnet hv1 ${sw1_dp_key} ${sw1p2_key} ${lnsw1_key}
+check_output_flows_via_localnet hv2 ${sw0_dp_key} ${sw0p1_key} ${lnsw0_key}
+check_output_flows_via_localnet hv2 ${sw1_dp_key} ${sw1p1_key} ${lnsw1_key}
+
+check ovn-nbctl --wait=hv lsp-del ln-sw0
+
+# Flows should be installed to tunnel for sw0
+check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
+check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
+
+check ovn-nbctl --wait=hv lsp-del ln-sw1
+# Flows should be installed to tunnel.
+check_output_flows_tunnelled hv1 ${sw0_dp_key} ${sw0p2_key}
+check_output_flows_tunnelled hv1 ${sw1_dp_key} ${sw1p2_key}
+check_output_flows_tunnelled hv2 ${sw0_dp_key} ${sw0p1_key}
+check_output_flows_tunnelled hv2 ${sw1_dp_key} ${sw1p1_key}
+
+OVN_CLEANUP([hv1],[hv2])
+
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([Delete parent of container port])
 ovn_start