@@ -323,10 +323,8 @@ update_ct_zones(struct sset *lports, const struct hmap *local_datapaths,
/* Local patched datapath (gateway routers) need zones assigned. */
const struct local_datapath *ld;
HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
- if (!ld->has_local_l3gateway) {
- continue;
- }
-
+ /* XXX Add method to limit zone assignment to logical router
+ * datapaths with NAT */
char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "dnat");
char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat");
sset_add(&all_users, dnat);
@@ -752,9 +752,25 @@ output;
<ref column="addresses" table="Logical_Switch_Port"
db="OVN_Northbound"/> column is set to <code>router</code> and
the connected logical router port specifies a
- <code>redirect-chassis</code>, the flow is only programmed on the
- <code>redirect-chassis</code>.
+ <code>redirect-chassis</code>:
</p>
+
+ <ul>
+ <li>
+ The flow for the connected logical router port's Ethernet
+ address is only programmed on the <code>redirect-chassis</code>.
+ </li>
+
+ <li>
+ If the logical router has rules specified in
+ <ref column="nat" table="Logical_Router" db="OVN_Northbound"/> with
+ <ref column="external_mac" table="NAT" db="OVN_Northbound"/>, then
+ those addresses are also used to populate the switch's destination
+ lookup on the chassis where
+ <ref column="logical_port" table="NAT" db="OVN_Northbound"/> is
+ resident.
+ </li>
+ </ul>
</li>
<li>
@@ -890,6 +906,23 @@ output;
<code>redirect-chassis</code>.
</p>
</li>
+
+ <li>
+ <p>
+ For each <code>dnat_and_snat</code> NAT rule on a distributed
+ router that specifies an external Ethernet address <var>E</var>,
+ a priority-50 flow that matches <code>inport == <var>GW</var>
+ && eth.dst == <var>E</var></code>, where <var>GW</var>
+ is the logical router gateway port, with action
+ <code>next;</code>.
+ </p>
+
+ <p>
+ This flow is only programmed on the gateway port instance on
+ the chassis where the <code>logical_port</code> specified in
+ the NAT rule resides.
+ </p>
+ </li>
</ul>
<p>
@@ -928,7 +961,9 @@ output;
</li>
<li>
<code>ip4.src</code> or <code>ip6.src</code> is any IP
- address owned by the router.
+ address owned by the router, unless the packet was recirculated
+ due to egress loopback as indicated by
+ <code>REGBIT_EGRESS_LOOPBACK</code>.
</li>
<li>
<code>ip4.src</code> is the broadcast address of any IP network
@@ -1040,6 +1075,50 @@ outport = <var>P</var>;
flags.loopback = 1;
output;
</pre>
+
+ <p>
+ For the gateway port on a distributed logical router with NAT
+ (where one of the logical router ports specifies a
+ <code>redirect-chassis</code>):
+ </p>
+
+ <ul>
+ <li>
+ If the corresponding NAT rule cannot be handled in a
+ distributed manner, then this flow is only programmed on
+ the gateway port instance on the
+ <code>redirect-chassis</code>. This behavior avoids
+ generation of multiple ARP responses from different chassis,
+ and allows upstream MAC learning to point to the
+ <code>redirect-chassis</code>.
+ </li>
+
+ <li>
+ <p>
+ If the corresponding NAT rule can be handled in a distributed
+ manner, then this flow is only programmed on the gateway port
+ instance where the <code>logical_port</code> specified in the
+ NAT rule resides.
+ </p>
+
+ <p>
+ Some of the actions are different for this case, using the
+ <code>external_mac</code> specified in the NAT rule rather
+ than the gateway port's Ethernet address <var>E</var>:
+ </p>
+
+ <pre>
+eth.src = <var>external_mac</var>;
+arp.sha = <var>external_mac</var>;
+ </pre>
+
+ <p>
+ This behavior avoids generation of multiple ARP responses
+ from different chassis, and allows upstream MAC learning to
+ point to the correct chassis.
+ </p>
+ </li>
+ </ul>
</li>
<li>
@@ -1242,6 +1321,8 @@ icmp4 {
unSNATted here.
</p>
+ <p>Ingress Table 3: UNSNAT on Gateway Routers</p>
+
<ul>
<li>
<p>
@@ -1273,6 +1354,45 @@ icmp4 {
</li>
</ul>
+ <p>Ingress Table 3: UNSNAT on Distributed Routers</p>
+
+ <ul>
+ <li>
+ <p>
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from <var>A</var> to
+ <var>B</var>, a priority-100 flow matches <code>ip &&
+ ip4.dst == <var>B</var> && inport == <var>GW</var></code>,
+ where <var>GW</var> is the logical router gateway port, with an
+ action <code>ct_snat; next;</code>.
+ </p>
+
+ <p>
+ If the NAT rule cannot be handled in a distributed manner, then
+ the priority-100 flow above is only programmed on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from <var>A</var> to
+ <var>B</var>, a priority-50 flow matches <code>ip &&
+ ip4.dst == <var>B</var></code> with an action
+ <code>REGBIT_NAT_REDIRECT = 1; next;</code>. This flow is for
+ east/west traffic to a NAT destination IPv4 address. By
+ setting the <code>REGBIT_NAT_REDIRECT</code> flag, in the
+ ingress table <code>Gateway Redirect</code> this will trigger a
+ redirect to the instance of the gateway port on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ A priority-0 logical flow with match <code>1</code> has actions
+ <code>next;</code>.
+ </p>
+ </li>
+ </ul>
+
<h3>Ingress Table 4: DNAT</h3>
<p>
@@ -1280,6 +1400,9 @@ icmp4 {
be DNATted from a virtual IP address to a real IP address. Packets
in the reverse direction needs to be unDNATed.
</p>
+
+ <p>Ingress Table 4: DNAT on Gateway Routers</p>
+
<ul>
<li>
For all the configured load balancing rules for Gateway router in
@@ -1356,6 +1479,53 @@ icmp4 {
</li>
</ul>
+ <p>Ingress Table 4: DNAT on Distributed Routers</p>
+
+ <p>
+ On distributed routers, the DNAT table only handles packets
+ with destination IP address that needs to be DNATted from a
+ virtual IP address to a real IP address. The unDNAT processing
+ in the reverse direction is handled in a separate table in the
+ egress pipeline.
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ For each configuration in the OVN Northbound database, that asks
+ to change the destination IP address of a packet from <var>A</var> to
+ <var>B</var>, a priority-100 flow matches <code>ip &&
+ ip4.dst == <var>B</var> && inport == <var>GW</var></code>,
+ where <var>GW</var> is the logical router gateway port, with an
+ action <code>ct_dnat(<var>B</var>);</code>.
+ </p>
+
+ <p>
+ If the NAT rule cannot be handled in a distributed manner, then
+ the priority-100 flow above is only programmed on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ For each configuration in the OVN Northbound database, that asks
+ to change the destination IP address of a packet from <var>A</var> to
+ <var>B</var>, a priority-50 flow matches <code>ip &&
+ ip4.dst == <var>B</var></code> with an action
+ <code>REGBIT_NAT_REDIRECT = 1; next;</code>. This flow is for
+ east/west traffic to a NAT destination IPv4 address. By
+ setting the <code>REGBIT_NAT_REDIRECT</code> flag, in the
+ ingress table <code>Gateway Redirect</code> this will trigger a
+ redirect to the instance of the gateway port on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ A priority-0 logical flow with match <code>1</code> has actions
+ <code>next;</code>.
+ </p>
+ </li>
+ </ul>
+
<h3>Ingress Table 5: IP Routing</h3>
<p>
@@ -1367,9 +1537,9 @@ icmp4 {
packet's final destination, unchanged) and advances to the next
table for ARP resolution. It also sets <code>reg1</code> (or
<code>xxreg1</code>) to the IP address owned by the selected router
- port (Table 7 will generate ARP request, if needed, with
- <code>reg0</code> as the target protocol address and <code>reg1</code>
- as the source protocol address).
+ port (ingress table <code>ARP Request</code> will generate an ARP
+ request, if needed, with <code>reg0</code> as the target protocol
+ address and <code>reg1</code> as the source protocol address).
</p>
<p>
@@ -1379,6 +1549,16 @@ icmp4 {
<ul>
<li>
<p>
+ For distributed logical routers where one of the logical router
+ ports specifies a <code>redirect-chassis</code>, a priority-300
+ logical flow with match <code>REGBIT_NAT_REDIRECT == 1</code> has
+ actions <code>ip.ttl--; next;</code>. The <code>outport</code>
+ will be set later in the Gateway Redirect table.
+ </p>
+ </li>
+
+ <li>
+ <p>
IPv4 routing table. For each route to IPv4 network <var>N</var> with
netmask <var>M</var>, on router port <var>P</var> with IP address
<var>A</var> and Ethernet
@@ -1464,6 +1644,17 @@ next;
<ul>
<li>
<p>
+ For distributed logical routers where one of the logical router
+ ports specifies a <code>redirect-chassis</code>, a priority-200
+ logical flow with match <code>REGBIT_NAT_REDIRECT == 1</code> has
+ actions <code>eth.dst = <var>E</var>; next;</code>, where
+ <var>E</var> is the ethernet address of the router's distributed
+ gateway port.
+ </p>
+ </li>
+
+ <li>
+ <p>
Static MAC bindings. MAC bindings can be known statically based on
data in the <code>OVN_Northbound</code> database. For router ports
connected to logical switches, MAC bindings can be known statically
@@ -1513,9 +1704,9 @@ next;
<p>
Dynamic MAC bindings. These flows resolve MAC-to-IP bindings
that have become known dynamically through ARP or neighbor
- discovery. (The next table will issue an ARP or neighbor
- solicitation request for cases where the binding is not yet
- known.)
+ discovery. (The ingress table <code>ARP Request</code> will
+ issue an ARP or neighbor solicitation request for cases where
+ the binding is not yet known.)
</p>
<p>
@@ -1541,6 +1732,15 @@ next;
<ul>
<li>
+ A priority-200 logical flow with match
+ <code>REGBIT_NAT_REDIRECT == 1</code> has actions
+ <code>outport = <var>CR</var>; next;</code>, where <var>CR</var>
+ is the <code>chassisredirect</code> port representing the instance
+ of the logical router distributed gateway port on the
+ <code>redirect-chassis</code>.
+ </li>
+
+ <li>
A priority-150 logical flow with match
<code>outport == <var>GW</var> &&
eth.dst == 00:00:00:00:00:00</code> has actions
@@ -1553,6 +1753,15 @@ next;
</li>
<li>
+ For each NAT rule in the OVN Northbound database that can
+ be handled in a distributed manner, a priority-100 logical
+ flow with match <code>ip4.src == <var>B</var> &&
+ outport == <var>GW</var></code>, where <var>GW</var> is
+ the logical router distributed gateway port, with actions
+ <code>next;</code>.
+ </li>
+
+ <li>
A priority-50 logical flow with match
<code>outport == <var>GW</var></code> has actions
<code>outport = <var>CR</var>; next;</code>, where
@@ -1595,9 +1804,9 @@ arp {
</pre>
<p>
- (Ingress table 4 initialized <code>reg1</code> with the IP address
- owned by <code>outport</code> and <code>reg0</code> with the next-hop
- IP address)
+ (Ingress table <code>IP Routing</code> initialized <code>reg1</code>
+ with the IP address owned by <code>outport</code> and
+ <code>reg0</code> with the next-hop IP address)
</p>
<p>
@@ -1611,12 +1820,60 @@ arp {
</li>
</ul>
- <h3>Egress Table 0: SNAT</h3>
+ <h3>Egress Table 0: UNDNAT</h3>
+
+ <p>
+ This is for already established connections' reverse traffic.
+ i.e., DNAT has already been done in ingress pipeline and now the
+ packet has entered the egress pipeline as part of a reply. For
+ NAT on a distributed router, it is unDNATted here. For Gateway
+ routers, the unDNAT processing is carried out in the ingress DNAT
+ table.
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ For each configuration in the OVN Northbound database that asks
+ to change the destination IP address of a packet from an IP
+ address of <var>A</var> to <var>B</var>, a priority-100 flow
+ matches <code>ip && ip4.src == <var>B</var>
+ && outport == <var>GW</var></code>, where <var>GW</var>
+ is the logical router gateway port, with an action
+ <code>ct_dnat;</code>.
+ </p>
+
+ <p>
+ If the NAT rule cannot be handled in a distributed manner, then
+ the priority-100 flow above is only programmed on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ If the NAT rule can be handled in a distributed manner, then
+ there is an additional action
+ <code>eth.src = <var>EA</var>;</code>, where <var>EA</var>
+ is the ethernet address associated with the IP address
+ <var>A</var> in the NAT rule. This allows upstream MAC
+ learning to point to the correct chassis.
+ </p>
+ </li>
+
+ <li>
+ A priority-0 logical flow with match <code>1</code> has actions
+ <code>next;</code>.
+ </li>
+ </ul>
+
+ <h3>Egress Table 1: SNAT</h3>
<p>
Packets that are configured to be SNATed get their source IP address
changed based on the configuration in the OVN Northbound database.
</p>
+
+ <p>Egress Table 1: SNAT on Gateway Routers</p>
+
<ul>
<li>
<p>
@@ -1650,7 +1907,122 @@ arp {
</li>
</ul>
- <h3>Egress Table 1: Delivery</h3>
+ <p>Egress Table 1: SNAT on Distributed Routers</p>
+
+ <ul>
+ <li>
+ <p>
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from an IP address of
+ <var>A</var> or to change the source IP address of a packet that
+ belongs to network <var>A</var> to <var>B</var>, a flow matches
+ <code>ip && ip4.src == <var>A</var> &&
+ outport == <var>GW</var></code>, where <var>GW</var> is the
+ logical router gateway port, with an action
+ <code>ct_snat(<var>B</var>);</code>. The priority of the flow
+ is calculated based on the mask of <var>A</var>, with matches
+ having larger masks getting higher priorities.
+ </p>
+
+ <p>
+ If the NAT rule cannot be handled in a distributed manner, then
+ the flow above is only programmed on the
+ <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ If the NAT rule can be handled in a distributed manner, then
+ there is an additional action
+ <code>eth.src = <var>EA</var>;</code>, where <var>EA</var>
+ is the ethernet address associated with the IP address
+ <var>A</var> in the NAT rule. This allows upstream MAC
+ learning to point to the correct chassis.
+ </p>
+ </li>
+
+ <li>
+ A priority-0 logical flow with match <code>1</code> has actions
+ <code>next;</code>.
+ </li>
+ </ul>
+
+ <h3>Egress Table 2: Egress Loopback</h3>
+
+ <p>
+ For distributed logical routers where one of the logical router
+ ports specifies a <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ Earlier in the ingress pipeline, some east-west traffic was
+ redirected to the <code>chassisredirect</code> port, based on
+ flows in the <code>UNSNAT</code> and <code>DNAT</code> ingress
+ tables setting the <code>REGBIT_NAT_REDIRECT</code> flag, which
+ then triggered a match to a flow in the
+ <code>Gateway Redirect</code> ingress table. The intention was
+ not to actually send traffic out the distributed gateway port
+ instance on the <code>redirect-chassis</code>. This traffic was
+ sent to the distributed gateway port instance in order for DNAT
+ and/or SNAT processing to be applied.
+ </p>
+
+ <p>
+ While UNDNAT and SNAT processing have already occurred by this
+ point, this traffic needs to be forced through egress loopback on
+ this distributed gateway port instance, in order for UNSNAT and
+ DNAT processing to be applied, and also for IP routing and ARP
+ resolution after all of the NAT processing, so that the packet can
+ be forwarded to the destination.
+ </p>
+
+ <p>
+ This table has the following flows:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ For each NAT rule in the OVN Northbound database on a
+ distributed router, a priority-100 logical flow with match
+ <code>ip4.dst == <var>E</var> &&
+ outport == <var>GW</var></code>, where <var>E</var> is the
+ external IP address specified in the NAT rule, and <var>GW</var>
+ is the logical router distributed gateway port, with the
+ following actions:
+ </p>
+
+ <pre>
+clone {
+ ct_clear;
+ inport = outport;
+ outport = "";
+ flags = 0;
+ flags.loopback = 1;
+ reg0 = 0;
+ reg1 = 0;
+ ...
+ reg9 = 0;
+ REGBIT_EGRESS_LOOPBACK = 1;
+ next(pipeline=ingress, table=0);
+};
+ </pre>
+
+ <p>
+ <code>flags.loopback</code> is set since in_port is unchanged
+ and the packet may return back to that port after NAT processing.
+ <code>REGBIT_EGRESS_LOOPBACK</code> is set to indicate that
+ egress loopback has occurred, in order to skip the source IP
+ address check against the router address.
+ </p>
+ </li>
+
+ <li>
+ A priority-0 logical flow with match <code>1</code> has actions
+ <code>next;</code>.
+ </li>
+ </ul>
+
+ <h3>Egress Table 3: Delivery</h3>
<p>
Packets that reach this table are ready for delivery. It contains
@@ -28,6 +28,7 @@
#include "openvswitch/hmap.h"
#include "openvswitch/json.h"
#include "ovn/lex.h"
+#include "ovn/lib/logical-fields.h"
#include "ovn/lib/ovn-dhcp.h"
#include "ovn/lib/ovn-nb-idl.h"
#include "ovn/lib/ovn-sb-idl.h"
@@ -136,8 +137,10 @@ enum ovn_stage {
PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 8, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
- PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
- PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
+ PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
+ PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
+ PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
+ PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
S_##DP_TYPE##_##PIPELINE##_##STAGE \
@@ -152,11 +155,20 @@ enum ovn_stage {
* priority to determine the ACL's logical flow priority. */
#define OVN_ACL_PRI_OFFSET 1000
+/* Register definitions specific to switches. */
#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
#define REGBIT_CONNTRACK_NAT "reg0[2]"
#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
+/* Register definitions for switches and routers. */
+#define REGBIT_NAT_REDIRECT "reg9[0]"
+/* Indicate that this packet has been recirculated using egress
+ * loopback. This allows certain checks to be bypassed, such as a
+ * logical router dropping packets with source IP address equals
+ * one of the logical router's own IP addresses. */
+#define REGBIT_EGRESS_LOOPBACK "reg9[1]"
+
/* Returns an "enum ovn_stage" built from the arguments. */
static enum ovn_stage
ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
@@ -3236,6 +3248,33 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
ds_put_format(&actions, "outport = %s; output;", op->json_key);
ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
ds_cstr(&match), ds_cstr(&actions));
+
+ /* Add ethernet addresses specified in NAT rules on
+ * distributed logical routers. */
+ if (op->peer->od->l3dgw_port
+ && op->peer == op->peer->od->l3dgw_port) {
+ for (int i = 0; i < op->peer->od->nbr->n_nat; i++) {
+ const struct nbrec_nat *nat
+ = op->peer->od->nbr->nat[i];
+ if (!strcmp(nat->type, "dnat_and_snat")
+ && nat->logical_port && nat->external_mac
+ && eth_addr_from_string(nat->external_mac, &mac)) {
+
+ ds_clear(&match);
+ ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
+ " && is_chassis_resident(\"%s\")",
+ ETH_ADDR_ARGS(mac),
+ nat->logical_port);
+
+ ds_clear(&actions);
+ ds_put_format(&actions, "outport = %s; output;",
+ op->json_key);
+ ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
+ 50, ds_cstr(&match),
+ ds_cstr(&actions));
+ }
+ }
+ }
} else {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
@@ -3765,6 +3804,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_cstr(&match, "ip4.src == ");
op_put_v4_networks(&match, op, true);
+ ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
ds_cstr(&match), "drop;");
@@ -3937,17 +3977,56 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&actions);
ds_put_format(&actions,
"eth.dst = eth.src; "
- "eth.src = %s; "
"arp.op = 2; /* ARP reply */ "
- "arp.tha = arp.sha; "
- "arp.sha = %s; "
+ "arp.tha = arp.sha; ");
+
+ if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
+ struct eth_addr mac;
+ if (nat->external_mac &&
+ eth_addr_from_string(nat->external_mac, &mac)
+ && nat->logical_port) {
+ /* distributed NAT case, use nat->external_mac */
+ ds_put_format(&actions,
+ "eth.src = "ETH_ADDR_FMT"; "
+ "arp.sha = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac),
+ ETH_ADDR_ARGS(mac));
+ /* Traffic with eth.src = nat->external_mac should only be
+ * sent from the chassis where nat->logical_port is
+ * resident, so that upstream MAC learning points to the
+ * correct chassis. Also need to avoid generation of
+ * multiple ARP responses from different chassis. */
+ ds_put_format(&match, " && is_chassis_resident(\"%s\")",
+ nat->logical_port);
+ } else {
+ ds_put_format(&actions,
+ "eth.src = %s; "
+ "arp.sha = %s; ",
+ op->lrp_networks.ea_s,
+ op->lrp_networks.ea_s);
+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+ * should only be sent from the "redirect-chassis", so that
+ * upstream MAC learning points to the "redirect-chassis".
+ * Also need to avoid generation of multiple ARP responses
+ * from different chassis. */
+ if (op->od->l3redirect_port) {
+ ds_put_format(&match, " && is_chassis_resident(%s)",
+ op->od->l3redirect_port->json_key);
+ }
+ }
+ } else {
+ ds_put_format(&actions,
+ "eth.src = %s; "
+ "arp.sha = %s; ",
+ op->lrp_networks.ea_s,
+ op->lrp_networks.ea_s);
+ }
+ ds_put_format(&actions,
"arp.tpa = arp.spa; "
"arp.spa = "IP_FMT"; "
"outport = %s; "
"flags.loopback = 1; "
"output;",
- op->lrp_networks.ea_s,
- op->lrp_networks.ea_s,
IP_ARGS(ip),
op->json_key);
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
@@ -4075,7 +4154,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
}
}
- /* NAT, Defrag and load balancing in Gateway routers. */
+ /* NAT, Defrag and load balancing. */
HMAP_FOR_EACH (od, key_node, datapaths) {
if (!od->nbr) {
continue;
@@ -4086,10 +4165,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
- /* NAT rules, packet defrag and load balancing are only valid on
- * Gateway routers. */
- if (!smap_get(&od->nbr->options, "chassis")) {
+ /* NAT rules are only valid on Gateway routers and routers with
+ * l3dgw_port (router has a port with "redirect-chassis"
+ * specified). */
+ if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
continue;
}
@@ -4139,6 +4221,23 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
}
}
+ /* For distributed router NAT, determine whether this NAT rule
+ * satisfies the conditions for distributed NAT processing. */
+ bool distributed = false;
+ struct eth_addr mac;
+ if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
+ nat->logical_port && nat->external_mac) {
+ if (eth_addr_from_string(nat->external_mac, &mac)) {
+ distributed = true;
+ } else {
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
+ ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
+ continue;
+ }
+ }
+
/* Ingress UNSNAT table: It is for already established connections'
* reverse traffic. i.e., SNAT has already been done in egress
* pipeline and now the packet has entered the ingress pipeline as
@@ -4150,10 +4249,41 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
* egress pipeline. */
if (!strcmp(nat->type, "snat")
|| !strcmp(nat->type, "dnat_and_snat")) {
- ds_clear(&match);
- ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
- ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
- ds_cstr(&match), "ct_snat; next;");
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s",
+ nat->external_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
+ ds_cstr(&match), "ct_snat; next;");
+ } else {
+ /* Distributed router. */
+
+ /* Traffic received on l3dgw_port is subject to NAT. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s"
+ " && inport == %s",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the "redirect-chassis". */
+ ds_put_format(&match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
+ ds_cstr(&match), "ct_snat;");
+
+ /* Traffic received on other router ports must be
+ * redirected to the central instance of the l3dgw_port
+ * for NAT processing. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s",
+ nat->external_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
+ ds_cstr(&match),
+ REGBIT_NAT_REDIRECT" = 1; next;");
+ }
}
/* Ingress DNAT table: Packets enter the pipeline with destination
@@ -4161,21 +4291,87 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
* to a logical IP address. */
if (!strcmp(nat->type, "dnat")
|| !strcmp(nat->type, "dnat_and_snat")) {
- /* Packet when it goes from the initiator to destination.
- * We need to zero the inport because the router can
- * send the packet back through the same interface. */
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ /* Packet when it goes from the initiator to destination.
+ * We need to set flags.loopback because the router can
+ * send the packet back through the same interface. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s",
+ nat->external_ip);
+ ds_clear(&actions);
+ if (dnat_force_snat_ip) {
+ /* Indicate to the future tables that a DNAT has taken
+ * place and a force SNAT needs to be done in the
+ * Egress SNAT table. */
+ ds_put_format(&actions,
+ "flags.force_snat_for_dnat = 1; ");
+ }
+ ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
+ nat->logical_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_cstr(&match), ds_cstr(&actions));
+ } else {
+ /* Distributed router. */
+
+ /* Traffic received on l3dgw_port is subject to NAT. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s"
+ " && inport == %s",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the "redirect-chassis". */
+ ds_put_format(&match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ds_clear(&actions);
+ ds_put_format(&actions, "ct_dnat(%s);",
+ nat->logical_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_cstr(&match), ds_cstr(&actions));
+
+ /* Traffic received on other router ports must be
+ * redirected to the central instance of the l3dgw_port
+ * for NAT processing. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s",
+ nat->external_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+ ds_cstr(&match),
+ REGBIT_NAT_REDIRECT" = 1; next;");
+ }
+ }
+
+ /* Egress UNDNAT table: It is for already established connections'
+ * reverse traffic. i.e., DNAT has already been done in ingress
+ * pipeline and now the packet has entered the egress pipeline as
+ * part of a reply. We undo the DNAT here.
+ *
+ * Note that this only applies for NAT on a distributed router.
+ * Undo DNAT on a gateway router is done in the ingress DNAT
+ * pipeline stage. */
+ if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
+ || !strcmp(nat->type, "dnat_and_snat"))) {
ds_clear(&match);
- ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
+ ds_put_format(&match, "ip && ip4.src == %s"
+ " && outport == %s",
+ nat->logical_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the "redirect-chassis". */
+ ds_put_format(&match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
ds_clear(&actions);
- if (dnat_force_snat_ip) {
- /* Indicate to the future tables that a DNAT has taken
- * place and a force SNAT needs to be done in the Egress
- * SNAT table. */
- ds_put_format(&actions, "flags.force_snat_for_dnat = 1; ");
+ if (distributed) {
+ ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac));
}
- ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
- nat->logical_ip);
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+ ds_put_format(&actions, "ct_dnat;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
ds_cstr(&match), ds_cstr(&actions));
}
@@ -4184,22 +4380,107 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
* address. */
if (!strcmp(nat->type, "snat")
|| !strcmp(nat->type, "dnat_and_snat")) {
+ if (!od->l3dgw_port) {
+ /* Gateway router. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.src == %s",
+ nat->logical_ip);
+ ds_clear(&actions);
+ ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
+
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
+ count_1bits(ntohl(mask)) + 1,
+ ds_cstr(&match), ds_cstr(&actions));
+ } else {
+ /* Distributed router. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.src == %s"
+ " && outport == %s",
+ nat->logical_ip,
+ od->l3dgw_port->json_key);
+ if (!distributed && od->l3redirect_port) {
+ /* Flows for NAT rules that are centralized are only
+ * programmed on the "redirect-chassis". */
+ ds_put_format(&match, " && is_chassis_resident(%s)",
+ od->l3redirect_port->json_key);
+ }
+ ds_clear(&actions);
+ if (distributed) {
+ ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
+ ETH_ADDR_ARGS(mac));
+ }
+ ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
+
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
+ count_1bits(ntohl(mask)) + 1,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+ }
+
+ /* Logical router ingress table 0:
+ * For NAT on a distributed router, add rules allowing
+ * ingress traffic with eth.dst matching nat->external_mac
+ * on the l3dgw_port instance where nat->logical_port is
+ * resident. */
+ if (distributed) {
ds_clear(&match);
- ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
- ds_clear(&actions);
- ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
+ ds_put_format(&match,
+ "eth.dst == "ETH_ADDR_FMT" && inport == %s"
+ " && is_chassis_resident(\"%s\")",
+ ETH_ADDR_ARGS(mac),
+ od->l3dgw_port->json_key,
+ nat->logical_port);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
+ ds_cstr(&match), "next;");
+ }
+
+ /* Ingress Gateway Redirect Table: For NAT on a distributed
+ * router, add flows that are specific to a NAT rule. These
+ * flows indicate the presence of an applicable NAT rule that
+ * can be applied in a distributed manner. */
+ if (distributed) {
+ ds_clear(&match);
+ ds_put_format(&match, "ip4.src == %s && outport == %s",
+ nat->logical_ip,
+ od->l3dgw_port->json_key);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
+ ds_cstr(&match), "next;");
+ }
- /* The priority here is calculated such that the
- * nat->logical_ip with the longest mask gets a higher
- * priority. */
- ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
- count_1bits(ntohl(mask)) + 1,
+ /* Egress Loopback table: For NAT on a distributed router.
+ * If packets in the egress pipeline on the distributed
+ * gateway port have ip.dst matching a NAT external IP, then
+ * loop a clone of the packet back to the beginning of the
+ * ingress pipeline with inport = outport. */
+ if (od->l3dgw_port) {
+ /* Distributed router. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip4.dst == %s && outport == %s",
+ nat->external_ip,
+ od->l3dgw_port->json_key);
+ ds_clear(&actions);
+ ds_put_format(&actions,
+ "clone { ct_clear; "
+ "inport = outport; outport = \"\"; "
+ "flags = 0; flags.loopback = 1; ");
+ for (int i = 0; i < MFF_N_LOG_REGS; i++) {
+ ds_put_format(&actions, "reg%d = 0; ", i);
+ }
+ ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
+ "next(pipeline=ingress, table=0); };");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
ds_cstr(&match), ds_cstr(&actions));
}
}
/* Handle force SNAT options set in the gateway router. */
- if (dnat_force_snat_ip) {
+ if (dnat_force_snat_ip && !od->l3dgw_port) {
/* If a packet with destination IP address as that of the
* gateway router (as set in options:dnat_force_snat_ip) is seen,
* UNSNAT it. */
@@ -4218,7 +4499,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
ds_cstr(&match), ds_cstr(&actions));
}
- if (lb_force_snat_ip) {
+ if (lb_force_snat_ip && !od->l3dgw_port) {
/* If a packet with destination IP address as that of the
* gateway router (as set in options:lb_force_snat_ip) is seen,
* UNSNAT it. */
@@ -4237,22 +4518,61 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_cstr(&match), ds_cstr(&actions));
}
- /* Re-circulate every packet through the DNAT zone.
- * This helps with two things.
- *
- * 1. Any packet that needs to be unDNATed in the reverse
- * direction gets unDNATed. Ideally this could be done in
- * the egress pipeline. But since the gateway router
- * does not have any feature that depends on the source
- * ip address being external IP address for IP routing,
- * we can do it here, saving a future re-circulation.
- *
- * 2. Any packet that was sent through SNAT zone in the
- * previous table automatically gets re-circulated to get
- * back the new destination IP address that is needed for
- * routing in the openflow pipeline. */
- ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
- "ip", "flags.loopback = 1; ct_dnat;");
+ if (!od->l3dgw_port) {
+ /* For gateway router, re-circulate every packet through
+ * the DNAT zone. This helps with two things.
+ *
+ * 1. Any packet that needs to be unDNATed in the reverse
+ * direction gets unDNATed. Ideally this could be done in
+ * the egress pipeline. But since the gateway router
+ * does not have any feature that depends on the source
+ * ip address being external IP address for IP routing,
+ * we can do it here, saving a future re-circulation.
+ *
+ * 2. Any packet that was sent through SNAT zone in the
+ * previous table automatically gets re-circulated to get
+ * back the new destination IP address that is needed for
+ * routing in the openflow pipeline. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+ "ip", "flags.loopback = 1; ct_dnat;");
+ } else {
+ /* For NAT on a distributed router, add flows to Ingress
+ * IP Routing table, Ingress ARP Resolution table, and
+ * Ingress Gateway Redirect Table that are not specific to a
+ * NAT rule. */
+
+ /* The highest priority IN_IP_ROUTING rule matches packets
+ * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
+ * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
+ * will take care of setting the outport. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
+ REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
+
+ /* The highest priority IN_ARP_RESOLVE rule matches packets
+ * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
+ * then sets eth.dst to the distributed gateway port's
+ * ethernet address. */
+ ds_clear(&actions);
+ ds_put_format(&actions, "eth.dst = %s; next;",
+ od->l3dgw_port->lrp_networks.ea_s);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
+ REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
+
+ /* The highest priority IN_GW_REDIRECT rule redirects packets
+ * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
+ * the central instance of the l3dgw_port for NAT processing. */
+ ds_clear(&actions);
+ ds_put_format(&actions, "outport = %s; next;",
+ od->l3redirect_port->json_key);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
+ REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
+ }
+
+ /* Load balancing and packet defrag are only valid on
+ * Gateway routers. */
+ if (!smap_get(&od->nbr->options, "chassis")) {
+ continue;
+ }
/* A set to hold all ips that need defragmentation and tracking. */
struct sset all_ips = SSET_INITIALIZER(&all_ips);
@@ -793,11 +793,10 @@
number 13.
</dd>
- <dt>conntrack zone fields for Gateway router</dt>
+ <dt>conntrack zone fields for routers</dt>
<dd>
- Fields that denote the connection tracking zones for Gateway routers.
- These values only have local significance (only on chassis that have
- Gateway routers instantiated) and is not meaningful between
+ Fields that denote the connection tracking zones for routers. These
+ values only have local significance and are not meaningful between
chassis. OVN stores the zone information for DNATting in Open vSwitch
<!-- Keep the following in sync with MFF_LOG_DNAT_ZONE and
MFF_LOG_SNAT_ZONE in ovn/lib/logical-fields.h. -->
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
"version": "5.5.0",
- "cksum": "379266191 13990",
+ "cksum": "2099428463 14236",
"tables": {
"NB_Global": {
"columns": {
@@ -220,7 +220,11 @@
"NAT": {
"columns": {
"external_ip": {"type": "string"},
+ "external_mac": {"type": {"key": "string",
+ "min": 0, "max": 1}},
"logical_ip": {"type": "string"},
+ "logical_port": {"type": {"key": "string",
+ "min": 0, "max": 1}},
"type": {"type": {"key": {"type": "string",
"enum": ["set", ["dnat",
"snat",
@@ -536,6 +536,15 @@
</p>
<p>
+ If the connected logical router port has a
+ <code>redirect-chassis</code> specified and the logical router
+ has rules specified in <ref column="nat" table="Logical_Router"/>
+ with <ref column="external_mac" table="NAT"/>, then those
+ addresses are also used to populate the switch's destination
+ lookup.
+ </p>
+
+ <p>
Supported only in OVN 2.7 and later. Earlier versions required
router addresses to be manually synchronized.
</p>
@@ -927,8 +936,9 @@
</column>
<column name="nat">
- One or more NAT rules for the router. NAT rules only work on the
- Gateway routers.
+ One or more NAT rules for the router. NAT rules only work on
+ Gateway routers, and on distributed routers with one logical router
+ port with a <code>redirect-chassis</code> specified.
</column>
<column name="load_balancer">
@@ -1231,9 +1241,9 @@
</column>
</table>
- <table name="NAT" title="NAT rules for a Gateway router.">
+ <table name="NAT" title="NAT rules">
<p>
- Each record represents a NAT rule in a Gateway router.
+ Each record represents a NAT rule.
</p>
<column name="type">
@@ -1266,9 +1276,47 @@
An IPv4 address.
</column>
+ <column name="external_mac">
+ <p>
+ A MAC address.
+ </p>
+
+ <p>
+ This is only used on the gateway port on distributed routers.
+ This must be specified in order for the NAT rule to be
+ processed in a distributed manner on all chassis. If this is
+ not specified for a NAT rule on a distributed router, then
+ this NAT rule will be processed in a centralized manner on
+ the gateway port instance on the <code>redirect-chassis</code>.
+ </p>
+
+ <p>
+ This MAC address must be unique on the logical switch that the
+ gateway port is attached to. If the MAC address used on the
+ <ref column="logical_port"/> is globally unique, then that MAC
+ address can be specified as this <ref column="external_mac"/>.
+ </p>
+ </column>
+
<column name="logical_ip">
An IPv4 network (e.g 192.168.1.0/24) or an IPv4 address.
</column>
+
+ <column name="logical_port">
+ <p>
+ The name of the logical port where the <ref column="logical_ip"/>
+ resides.
+ </p>
+
+ <p>
+ This is only used on distributed routers. This must be
+ specified in order for the NAT rule to be processed in a
+ distributed manner on all chassis. If this is not specified
+ for a NAT rule on a distributed router, then this NAT rule
+ will be processed in a centralized manner on the gateway
+ port instance on the <code>redirect-chassis</code>.
+ </p>
+ </column>
</table>
<table name="DHCP_Options" title="DHCP options">
@@ -1060,3 +1060,341 @@ as
OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
/connection dropped.*/d"])
AT_CLEANUP
+
+AT_SETUP([ovn -- DNAT and SNAT on distributed router - N/S])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24),
+# and alice (172.16.1.0/24) connected to it. The port between R1 and
+# alice is the router gateway port where the R1 NAT rules are applied.
+#
+# foo -- R1 -- alice
+# |
+# bar ----
+
+ovn-nbctl lr-add R1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
+ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24 \
+ -- set Logical_Router_Port alice options:redirect-chassis=hv1
+
+# Connect foo to R1
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+ type=router options:router-port=foo \
+ -- lsp-set-addresses rp-foo router
+
+# Connect bar to R1
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+ type=router options:router-port=bar \
+ -- lsp-set-addresses rp-bar router
+
+# Connect alice to R1
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+ type=router options:router-port=alice \
+ -- lsp-set-addresses rp-alice router
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+ "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'foo2' in switch 'foo'.
+ADD_NAMESPACES(foo2)
+ADD_VETH(foo2, foo2, br-int, "192.168.1.3/24", "f0:00:00:01:02:06", \
+ "192.168.1.1")
+ovn-nbctl lsp-add foo foo2 \
+-- lsp-set-addresses foo2 "f0:00:00:01:02:06 192.168.1.3"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \
+ "192.168.2.1")
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.2/24", "f0:00:00:01:02:05", \
+ "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2"
+
+# Add DNAT rules
+ovn-nbctl -- --id=@nat create nat type="dnat_and_snat" \
+ logical_ip=192.168.1.2 external_ip=172.16.1.3 \
+ external_mac=\"00:00:02:02:03:04\" logical_port=foo1 \
+ -- add logical_router R1 nat @nat
+
+ovn-nbctl -- --id=@nat create nat type="dnat_and_snat" \
+ logical_ip=192.168.1.3 external_ip=172.16.1.4 \
+ external_mac=\"00:00:02:02:03:05\" logical_port=foo2 \
+ -- add logical_router R1 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" \
+ logical_ip=192.168.0.0/16 external_ip=172.16.1.1 \
+ -- add logical_router R1 nat @nat
+
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep ct\( | grep nat])
+
+# North-South DNAT: 'alice1' pings 'foo1' using 172.16.1.3.
+NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that DNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.2,dst=172.16.1.3,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.2,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'foo2' pings 'alice1'. But 'alice1' receives traffic
+# from 172.16.1.4
+NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.4) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.1.3,dst=172.16.1.2,id=<cleared>,type=8,code=0),reply=(src=172.16.1.2,dst=172.16.1.4,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'bar1' pings 'alice1'. But 'alice1' receives traffic
+# from 172.16.1.1
+NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.2.2,dst=172.16.1.2,id=<cleared>,type=8,code=0),reply=(src=172.16.1.2,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+
+AT_SETUP([ovn -- DNAT and SNAT on distributed router - E/W])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24),
+# and alice (172.16.1.0/24) connected to it. The port between R1 and
+# alice is the router gateway port where the R1 NAT rules are applied.
+#
+# foo -- R1 -- alice
+# |
+# bar ----
+
+ovn-nbctl lr-add R1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
+ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24 \
+ -- set Logical_Router_Port alice options:redirect-chassis=hv1
+
+# Connect foo to R1
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+ type=router options:router-port=foo \
+ -- lsp-set-addresses rp-foo router
+
+# Connect bar to R1
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+ type=router options:router-port=bar \
+ -- lsp-set-addresses rp-bar router
+
+# Connect alice to R1
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+ type=router options:router-port=alice \
+ -- lsp-set-addresses rp-alice router
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+ "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'foo2' in switch 'foo'.
+ADD_NAMESPACES(foo2)
+ADD_VETH(foo2, foo2, br-int, "192.168.1.3/24", "f0:00:00:01:02:06", \
+ "192.168.1.1")
+ovn-nbctl lsp-add foo foo2 \
+-- lsp-set-addresses foo2 "f0:00:00:01:02:06 192.168.1.3"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \
+ "192.168.2.1")
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.2/24", "f0:00:00:01:02:05", \
+ "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2"
+
+# Add DNAT rules
+ovn-nbctl -- --id=@nat create nat type="dnat_and_snat" \
+ logical_ip=192.168.1.2 external_ip=172.16.1.3 \
+ external_mac=\"00:00:02:02:03:04\" logical_port=foo1 \
+ -- add logical_router R1 nat @nat
+
+ovn-nbctl -- --id=@nat create nat type="dnat_and_snat" \
+ logical_ip=192.168.2.2 external_ip=172.16.1.4 \
+ external_mac=\"00:00:02:02:03:05\" logical_port=bar1 \
+ -- add logical_router R1 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" \
+ logical_ip=192.168.0.0/16 external_ip=172.16.1.1 \
+ -- add logical_router R1 nat @nat
+
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep ct\( | grep nat])
+
+echo "------ hv dump ------"
+ovs-ofctl show br-int
+ovs-ofctl dump-flows br-int
+echo "---------------------"
+
+# East-West No NAT: 'foo1' pings 'bar1' using 192.168.2.2.
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that no NAT happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(192.168.2.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | wc -l], [0], [0
+])
+
+# East-West No NAT: 'foo2' pings 'bar1' using 192.168.2.2.
+NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that no NAT happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(192.168.2.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | wc -l], [0], [0
+])
+
+# East-West No NAT: 'bar1' pings 'foo2' using 192.168.1.3.
+NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 192.168.1.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that no NAT happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(192.168.2.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | wc -l], [0], [0
+])
+
+# East-West NAT: 'foo1' pings 'bar1' using 172.16.1.4.
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries. First SNAT of 'foo1' address happens.
+# Then DNAT of 'bar1' address happens (listed first below).
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.3,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=192.168.1.2,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# East-West NAT: 'foo2' pings 'bar1' using 172.16.1.4.
+NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries. First SNAT of 'foo2' address happens.
+# Then DNAT of 'bar1' address happens (listed first below).
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.1,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=192.168.1.3,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP