@@ -88,6 +88,10 @@ ovn_init_symtab(struct shash *symtab)
char flags_str[16];
snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_ALLOW_LOOPBACK_BIT);
expr_symtab_add_subfield(symtab, "flags.loopback", NULL, flags_str);
+ snprintf(flags_str, sizeof flags_str, "flags[%d]",
+ MLF_FORCE_SNAT_FOR_DNAT_BIT);
+ expr_symtab_add_subfield(symtab, "flags.force_snat_for_dnat", NULL,
+ flags_str);
/* Connection tracking state. */
expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
@@ -47,6 +47,7 @@ void ovn_init_symtab(struct shash *symtab);
enum mff_log_flags_bits {
MLF_ALLOW_LOOPBACK_BIT = 0,
MLF_RCV_FROM_VXLAN_BIT = 1,
+ MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -59,6 +60,10 @@ enum mff_log_flags {
* VXLAN encapsulation. Egress port information is available for
* Geneve and STT tunnel types. */
MLF_RCV_FROM_VXLAN = (1 << MLF_RCV_FROM_VXLAN_BIT),
+
+ /* Indicate that a packet needs a force SNAT in the gateway router when
+ * DNAT has taken place. */
+ MLF_FORCE_SNAT_FOR_DNAT = (1 << MLF_FORCE_SNAT_FOR_DNAT_BIT),
};
#endif /* ovn/lib/logical-fields.h */
@@ -1153,6 +1153,14 @@ icmp4 {
</p>
<p>
+ If the Gateway router has been configured to force SNAT (any
+ previously DNATted or Load-balanced packets) to <var>B</var>,
+ a priority-100 flow matches <code>ip &&
+ ip4.dst == <var>B</var></code> with an action <code>ct_snat;
+ next;</code>.
+ </p>
+
+ <p>
A priority-0 logical flow with match <code>1</code> has actions
<code>next;</code>.
</p>
@@ -1194,7 +1202,11 @@ icmp4 {
to change the destination IP address of a packet from <var>A</var> to
<var>B</var>, a priority-100 flow matches <code>ip &&
ip4.dst == <var>A</var></code> with an action
- <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>.
+ <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>. If the
+ Gateway router is configured to force SNAT any DNATed packet,
+ the above action will be replaced by
+ <code>flags.force_snat_for_dnat = 1; flags.loopback = 1;
+ ct_dnat(<var>B</var>);</code>.
</li>
<li>
@@ -1433,6 +1445,20 @@ arp {
<ul>
<li>
<p>
+ If the Gateway router in the OVN Northbound database has been
+ configured to force SNAT a packet (that has been previously DNATted)
+ to <var>B</var>, a priority-110 flow matches
+ <code>flags.force_snat_for_dnat == 1 && ip</code> with an
+ action <code>ct_snat(<var>B</var>);</code>.
+ </p>
+ <p>
+ If the Gateway router in the OVN Northbound database has been
+ configured to force SNAT a packet (that has been previously
+ load-balanced) to <var>B</var>, a priority-100 flow matches
+ <code>ct.dnat && ip</code> with an action
+ <code>ct_snat(<var>B</var>);</code>.
+ </p>
+ <p>
For each configuration in the OVN Northbound database, that asks
to change the source IP address of a packet from an IP address of
<var>A</var> or to change the source IP address of a packet that
@@ -3452,6 +3452,31 @@ op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
ds_put_cstr(ds, "}");
}
+static const char *
+get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
+{
+ char *key = xasprintf("%s_force_snat_ip", key_type);
+ const char *ip_address = smap_get(&od->nbr->options, key);
+ free(key);
+
+ if (ip_address) {
+ ovs_be32 mask;
+ char *error = ip_parse_masked(ip_address, ip, &mask);
+ if (error || mask != OVS_BE32_MAX) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
+ ip_address, UUID_ARGS(&od->key));
+ free(error);
+ *ip = 0;
+ return NULL;
+ }
+ return ip_address;
+ }
+
+ *ip = 0;
+ return NULL;
+}
+
static void
build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
struct hmap *lflows)
@@ -3673,8 +3698,26 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
sset_destroy(&all_ips);
- ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
+ /* A gateway router can have 2 SNAT IP addresses to force DNATed and
+ * LBed traffic respectively to be SNATed. In addition, there can be
+ * a number of SNAT rules in the NAT table. */
+ ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
+ (op->od->nbr->n_nat + 2));
size_t n_snat_ips = 0;
+
+ ovs_be32 snat_ip;
+ const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
+ &snat_ip);
+ if (dnat_force_snat_ip) {
+ snat_ips[n_snat_ips++] = snat_ip;
+ }
+
+ const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
+ &snat_ip);
+ if (lb_force_snat_ip) {
+ snat_ips[n_snat_ips++] = snat_ip;
+ }
+
for (int i = 0; i < op->od->nbr->n_nat; i++) {
const struct nbrec_nat *nat;
@@ -3845,6 +3888,12 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
continue;
}
+ ovs_be32 snat_ip;
+ const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
+ &snat_ip);
+ const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
+ &snat_ip);
+
/* A set to hold all ips that need defragmentation and tracking. */
struct sset all_ips = SSET_INITIALIZER(&all_ips);
@@ -3981,7 +4030,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
ds_clear(&actions);
- ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);",
+ if (dnat_force_snat_ip) {
+ /* Indicate to the future tables that a DNAT has taken
+ * place and a force SNAT needs to be done in the Egress
+ * SNAT table. */
+ ds_put_format(&actions, "flags.force_snat_for_dnat = 1; ");
+ }
+ ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
nat->logical_ip);
ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
ds_cstr(&match), ds_cstr(&actions));
@@ -4006,6 +4061,45 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
}
}
+ /* Handle force SNAT options set in the gateway router. */
+ if (dnat_force_snat_ip) {
+ /* If a packet with destination IP address as that of the
+ * gateway router (as set in options:dnat_force_snat_ip) is seen,
+ * UNSNAT it. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
+ ds_cstr(&match), "ct_snat; next;");
+
+ /* Higher priority rules to force SNAT with the IP addresses
+ * configured in the Gateway router. This only takes effect
+ * when the packet has already been DNATed once. */
+ ds_clear(&match);
+ ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
+ ds_clear(&actions);
+ ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 110,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+ if (lb_force_snat_ip) {
+ /* If a packet with destination IP address as that of the
+ * gateway router (as set in options:lb_force_snat_ip) is seen,
+ * UNSNAT it. */
+ ds_clear(&match);
+ ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
+ ds_cstr(&match), "ct_snat; next;");
+
+ /* Load balanced traffic (a subset of DNATed traffic) will have
+ * ct.dnat set. Force SNAT it. */
+ ds_clear(&match);
+ ds_put_format(&match, "ct.dnat && ip");
+ ds_clear(&actions);
+ ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+
/* Re-circulate every packet through the DNAT zone.
* This helps with three things.
*
@@ -923,6 +923,31 @@
router.
</p>
</column>
+ <column name="options" key="dnat_force_snat_ip">
+ <p>
+ If set, indicates the IP address to use to force SNAT a packet
+ that has already been DNATed in the gateway router. When multiple
+ gateway routers are configured, a packet can potentially enter any
+ of the gateway router, get DNATted and eventually reach the logical
+ switch port. For the return traffic to go back to the same gateway
+ router (for unDNATing), the packet needs a SNAT in the first place.
+ This can be achieved by setting the above option with a gateway
+ specific IP address.
+ </p>
+ </column>
+ <column name="options" key="lb_force_snat_ip">
+ <p>
+ If set, indicates the IP address to use to force SNAT a packet
+ that has already been load-balanced in the gateway router. When
+ multiple gateway routers are configured, a packet can potentially
+ enter any of the gateway router, get DNATted as part of the load-
+ balancing and eventually reach the logical switch port.
+ For the return traffic to go back to the same gateway router (for
+ unDNATing), the packet needs a SNAT in the first place. This can be
+ achieved by setting the above option with a gateway specific IP
+ address.
+ </p>
+ </column>
</group>
<group title="Common Columns">
@@ -279,6 +279,225 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
/connection dropped.*/d"])
AT_CLEANUP
+AT_SETUP([ovn -- multiple gateway routers, SNAT and DNAT])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# Three LRs - R1, R2 and R3 that are connected to each other via LS "join"
+# in 20.0.0.0/24 network. R1 has switchess foo (192.168.1.0/24) and
+# bar (192.168.2.0/24) connected to it. R2 has alice (172.16.1.0/24) connected
+# to it. R3 has bob (172.16.1.0/24) connected to it. Note how both alice and
+# bob have the same subnet behind it. We are trying to simulate external
+# network via those 2 switches. In real world the switch ports of these
+# switches will have addresses set as "unknown" to make them learning switches.
+# Or those switches will be "localnet" ones.
+#
+# foo -- R1 -- join - R2 -- alice
+# | |
+# bar ---- - R3 --- bob
+
+ovn-nbctl create Logical_Router name=R1
+ovn-nbctl create Logical_Router name=R2 options:chassis=hv1
+ovn-nbctl create Logical_Router name=R3 options:chassis=hv1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+ovn-nbctl ls-add bob
+ovn-nbctl ls-add join
+
+# Connect foo to R1
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+ type=router options:router-port=foo addresses=\"00:00:01:01:02:03\"
+
+# Connect bar to R1
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+ type=router options:router-port=bar addresses=\"00:00:01:01:02:04\"
+
+# Connect alice to R2
+ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 172.16.1.1/24
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+ type=router options:router-port=alice addresses=\"00:00:02:01:02:03\"
+
+# Connect bob to R3
+ovn-nbctl lrp-add R3 bob 00:00:03:01:02:03 172.16.1.2/24
+ovn-nbctl lsp-add bob rp-bob -- set Logical_Switch_Port rp-bob \
+ type=router options:router-port=bob addresses=\"00:00:03:01:02:03\"
+
+# Connect R1 to join
+ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 20.0.0.1/24
+ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \
+ type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"'
+
+# Connect R2 to join
+ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 20.0.0.2/24
+ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \
+ type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"'
+
+# Connect R3 to join
+ovn-nbctl lrp-add R3 R3_join 00:00:04:01:02:05 20.0.0.3/24
+ovn-nbctl lsp-add join r3-join -- set Logical_Switch_Port r3-join \
+ type=router options:router-port=R3_join addresses='"00:00:04:01:02:05"'
+
+# Install static routes with source ip address as the policy for routing.
+# We want traffic from 'foo' to go via R2 and traffic of 'bar' to go via R3.
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.1.0/24 20.0.0.2
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.2.0/24 20.0.0.3
+
+# Static routes.
+ovn-nbctl lr-route-add R2 192.168.0.0/16 20.0.0.1
+ovn-nbctl lr-route-add R3 192.168.0.0/16 20.0.0.1
+
+# For gateway routers R2 and R3, set a force SNAT rule.
+ovn-nbctl set logical_router R2 options:dnat_force_snat_ip=20.0.0.2
+ovn-nbctl set logical_router R3 options:dnat_force_snat_ip=20.0.0.3
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+ "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.3/24", "f0:00:00:01:02:04", \
+ "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.3"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:05", \
+"192.168.2.1")
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:05 192.168.2.2"
+
+# Logical port 'bob1' in switch 'bob'.
+ADD_NAMESPACES(bob1)
+ADD_VETH(bob1, bob1, br-int, "172.16.1.4/24", "f0:00:00:01:02:06", \
+ "172.16.1.2")
+ovn-nbctl lsp-add bob bob1 \
+-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.1.4"
+
+# Router R2
+# Add a DNAT rule.
+ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \
+ external_ip=30.0.0.2 -- add logical_router R2 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.1.2 \
+ external_ip=30.0.0.1 -- add logical_router R2 nat @nat
+
+# Router R3
+# Add a DNAT rule.
+ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \
+ external_ip=30.0.0.3 -- add logical_router R3 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.2.2 \
+ external_ip=30.0.0.4 -- add logical_router R3 nat @nat
+
+# wait for ovn-controller to catch up.
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep ct\( | grep nat])
+
+# North-South DNAT: 'alice1' should be able to ping 'foo1' via 30.0.0.2
+NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.3,dst=30.0.0.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# But foo1 should receive traffic from 20.0.0.2
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.3,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.2,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# North-South DNAT: 'bob1' should be able to ping 'foo1' via 30.0.0.3
+NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.4) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.4,dst=30.0.0.3,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.4,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# But foo1 should receive traffic from 20.0.0.3
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.4,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.3,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'bar1' pings 'bob1'. But 'bob1' receives traffic
+# from 30.0.0.4
+NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.4) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.2.2,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=30.0.0.4,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'foo1' pings 'alice1'. But 'alice1' receives traffic
+# from 30.0.0.1
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.1.2,dst=172.16.1.3,id=<cleared>,type=8,code=0),reply=(src=172.16.1.3,dst=30.0.0.1,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+
AT_SETUP([ovn -- load-balancing])
AT_KEYWORDS([ovnlb])
When multiple gateway routers exist, a packet can enter any gateway router. Once the packet reaches its destination, its reverse direction should be via the same gateway router. This is achieved by doing a SNAT of the packet in the inward direction (towards logical space) with a IP address of the gateway router such that packet travels back to the same gateway router. To do the above, we introduce two new options in the logical router. options:dnat_force_snat_ip=$IP will force SNAT any packet to $IP if it has been previously DNATted. options:lb_force_snat_ip=$IP will force SNAT any packet to $IP if it has been previously load-balanced. Signed-off-by: Gurucharan Shetty <guru@ovn.org> --- ovn/lib/logical-fields.c | 4 + ovn/lib/logical-fields.h | 5 + ovn/northd/ovn-northd.8.xml | 28 +++++- ovn/northd/ovn-northd.c | 98 +++++++++++++++++++- ovn/ovn-nb.xml | 25 +++++ tests/system-ovn.at | 219 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 376 insertions(+), 3 deletions(-)