Message ID | 1490899342-29582-2-git-send-email-mickeys.dev@gmail.com |
---|---|
State | Accepted |
Headers | show |
On 30 March 2017 at 11:42, Mickey Spiegel <mickeys.dev@gmail.com> wrote: > This patch extends gratuitous ARP support for NAT addresses so that it > applies to distributed NAT rules on a distributed logical router. > Distributed NAT rules have type "dnat_and_snat" and specify > 'external_mac' and 'logical_port'. > > Gratuitous ARP packets for distributed NAT rules are only generated on > the chassis where the 'logical_port' specified in the NAT rule resides. > Gratuitous ARPs are issued for the 'external_ip' address, resolving to > the 'external_mac'. > > Since the MAC address varies for each distributed NAT rule, a separate > 'nat_addresses' string must be generated for each distributed NAT rule. > For this reason, in the southbound 'Port_Binding', > 'options:nat-addresses' is replaced by a 'nat_addresses' column that > can have an unlimited number of instances. In order to allow for > upgrades, pinctrl in the ovn-controller can work off either the > 'nat_addresses' column (if present), or 'options:nat-addresses' > otherwise. > > Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com> > Thanks. Applied both to master. > --- > NEWS | 1 + > ovn/controller/pinctrl.c | 108 +++++++++++++++++++++++++++++- > ----------------- > ovn/northd/ovn-northd.c | 92 ++++++++++++++++++++++++++++------------ > ovn/ovn-sb.ovsschema | 9 ++-- > ovn/ovn-sb.xml | 17 ++++++-- > tests/ovn.at | 64 +++++++++++++++++++++++++--- > 6 files changed, 211 insertions(+), 80 deletions(-) > > diff --git a/NEWS b/NEWS > index 00c9106..ec8572a 100644 > --- a/NEWS > +++ b/NEWS > @@ -15,6 +15,7 @@ Post-v2.7.0 > "dot1q-tunnel" port VLAN mode. > - OVN: > * Make the DHCPv4 router setting optional. > + * Gratuitous ARP for NAT addresses on a distributed logical router. > - Add the command 'ovs-appctl stp/show' (see ovs-vswitchd(8)). > > v2.7.0 - 21 Feb 2017 > diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c > index e564a30..01259e2 100644 > --- a/ovn/controller/pinctrl.c > +++ b/ovn/controller/pinctrl.c > @@ -1056,21 +1056,23 @@ send_garp_update(const struct sbrec_port_binding > *binding_rec, > if (!strcmp(binding_rec->type, "l3gateway") > || !strcmp(binding_rec->type, "patch")) { > struct lport_addresses *laddrs = NULL; > - laddrs = shash_find_data(nat_addresses, > binding_rec->logical_port); > - if (!laddrs) { > - return; > - } > - int i; > - for (i = 0; i < laddrs->n_ipv4_addrs; i++) { > - char *name = xasprintf("%s-%s", binding_rec->logical_port, > - laddrs->ipv4_addrs[i].addr_s); > - garp = shash_find_data(&send_garp_data, name); > - if (garp) { > - garp->ofport = ofport; > - } else { > - add_garp(name, ofport, laddrs->ea, > laddrs->ipv4_addrs[i].addr); > + while ((laddrs = shash_find_and_delete(nat_addresses, > + > binding_rec->logical_port))) { > + int i; > + for (i = 0; i < laddrs->n_ipv4_addrs; i++) { > + char *name = xasprintf("%s-%s", binding_rec->logical_port, > + > laddrs->ipv4_addrs[i].addr_s); > + garp = shash_find_data(&send_garp_data, name); > + if (garp) { > + garp->ofport = ofport; > + } else { > + add_garp(name, ofport, laddrs->ea, > + laddrs->ipv4_addrs[i].addr); > + } > + free(name); > } > - free(name); > + destroy_lport_addresses(laddrs); > + free(laddrs); > } > return; > } > @@ -1304,6 +1306,42 @@ extract_addresses_with_port(const char *addresses, > } > > static void > +consider_nat_address(const char *nat_address, > + const struct sbrec_port_binding *pb, > + struct sset *nat_address_keys, > + const struct lport_index *lports, > + const struct sbrec_chassis *chassis, > + struct shash *nat_addresses) > +{ > + struct lport_addresses *laddrs = xmalloc(sizeof *laddrs); > + char *lport = NULL; > + if (!extract_addresses_with_port(nat_address, laddrs, &lport) > + || (!lport && !strcmp(pb->type, "patch"))) { > + free(laddrs); > + if (lport) { > + free(lport); > + } > + return; > + } else if (lport) { > + if (!pinctrl_is_chassis_resident(lports, chassis, lport)) { > + free(laddrs); > + free(lport); > + return; > + } > + free(lport); > + } > + > + int i; > + for (i = 0; i < laddrs->n_ipv4_addrs; i++) { > + char *name = xasprintf("%s-%s", pb->logical_port, > + laddrs->ipv4_addrs[i].addr_s); > + sset_add(nat_address_keys, name); > + free(name); > + } > + shash_add(nat_addresses, pb->logical_port, laddrs); > +} > + > +static void > get_nat_addresses_and_keys(struct sset *nat_address_keys, > struct sset *local_l3gw_ports, > const struct lport_index *lports, > @@ -1317,38 +1355,24 @@ get_nat_addresses_and_keys(struct sset > *nat_address_keys, > if (!pb) { > continue; > } > - const char *nat_addresses_options = smap_get(&pb->options, > - "nat-addresses"); > - if (!nat_addresses_options) { > - continue; > - } > > - struct lport_addresses *laddrs = xmalloc(sizeof *laddrs); > - char *lport = NULL; > - if (!extract_addresses_with_port(nat_addresses_options, laddrs, > &lport) > - || (!lport && !strcmp(pb->type, "patch"))) { > - free(laddrs); > - if (lport) { > - free(lport); > + if (pb->n_nat_addresses) { > + for (int i = 0; i < pb->n_nat_addresses; i++) { > + consider_nat_address(pb->nat_addresses[i], pb, > + nat_address_keys, lports, chassis, > + nat_addresses); > } > - continue; > - } else if (lport) { > - if (!pinctrl_is_chassis_resident(lports, chassis, lport)) { > - free(laddrs); > - free(lport); > - continue; > + } else { > + /* Continue to support options:nat-addresses for version > + * upgrade. */ > + const char *nat_addresses_options = smap_get(&pb->options, > + "nat-addresses"); > + if (nat_addresses_options) { > + consider_nat_address(nat_addresses_options, pb, > + nat_address_keys, lports, chassis, > + nat_addresses); > } > - free(lport); > - } > - > - int i; > - for (i = 0; i < laddrs->n_ipv4_addrs; i++) { > - char *name = xasprintf("%s-%s", pb->logical_port, > - laddrs->ipv4_addrs[i].addr_s); > - sset_add(nat_address_keys, name); > - free(name); > } > - shash_add(nat_addresses, pb->logical_port, laddrs); > } > } > > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c > index 6090e24..5a2e5ab 100644 > --- a/ovn/northd/ovn-northd.c > +++ b/ovn/northd/ovn-northd.c > @@ -1508,26 +1508,37 @@ get_router_load_balancer_ips(const struct > ovn_datapath *od, > } > } > > -/* Returns a string consisting of the port's MAC address followed by the > - * external IP addresses of all NAT rules defined on that router and the > - * VIPs of all load balancers defined on that router. > +/* Returns an array of strings, each consisting of a MAC address followed > + * by one or more IP addresses, and if the port is a distributed gateway > + * port, followed by 'is_chassis_resident("LPORT_NAME")', where the > + * LPORT_NAME is the name of the L3 redirect port or the name of the > + * logical_port specified in a NAT rule. These strings include the > + * external IP addresses of all NAT rules defined on that router, and all > + * of the IP addresses used in load balancer VIPs defined on that router. > * > - * The caller must free the returned string with free(). */ > -static char * > -get_nat_addresses(const struct ovn_port *op) > + * The caller must free each of the n returned strings with free(), > + * and must free the returned array when it is no longer needed. */ > +static char ** > +get_nat_addresses(const struct ovn_port *op, size_t *n) > { > + size_t n_nats = 0; > struct eth_addr mac; > if (!op->nbrp || !op->od || !op->od->nbr > || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer) > || !eth_addr_from_string(op->nbrp->mac, &mac)) { > + *n = n_nats; > return NULL; > } > > - struct ds addresses = DS_EMPTY_INITIALIZER; > - ds_put_format(&addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); > + struct ds c_addresses = DS_EMPTY_INITIALIZER; > + ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); > + bool central_ip_address = false; > + > + char **addresses; > + addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1)); > > /* Get NAT IP addresses. */ > - for (int i = 0; i < op->od->nbr->n_nat; i++) { > + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > const struct nbrec_nat *nat = op->od->nbr->nat[i]; > ovs_be32 ip, mask; > > @@ -1542,14 +1553,19 @@ get_nat_addresses(const struct ovn_port *op) > if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat") > && nat->logical_port && nat->external_mac) { > /* Distributed NAT rule. */ > - /* XXX This uses a different MAC address, so it cannot go > - * into the same string as centralized NAT external IP > - * addresses. Need to change this function to return an > - * array of strings. */ > + if (eth_addr_from_string(nat->external_mac, &mac)) { > + struct ds address = DS_EMPTY_INITIALIZER; > + ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); > + ds_put_format(&address, " %s", nat->external_ip); > + ds_put_format(&address, " is_chassis_resident(\"%s\")", > + nat->logical_port); > + addresses[n_nats++] = ds_steal_cstr(&address); > + } > } else { > /* Centralized NAT rule, either on gateway router or > distributed > * router. */ > - ds_put_format(&addresses, " %s", nat->external_ip); > + ds_put_format(&c_addresses, " %s", nat->external_ip); > + central_ip_address = true; > } > } > > @@ -1559,18 +1575,25 @@ get_nat_addresses(const struct ovn_port *op) > > const char *ip_address; > SSET_FOR_EACH (ip_address, &all_ips) { > - ds_put_format(&addresses, " %s", ip_address); > + ds_put_format(&c_addresses, " %s", ip_address); > + central_ip_address = true; > } > sset_destroy(&all_ips); > > - /* Gratuitous ARP for centralized NAT rules on distributed gateway > - * ports should be restricted to the "redirect-chassis". */ > - if (op->od->l3redirect_port) { > - ds_put_format(&addresses, " is_chassis_resident(%s)", > - op->od->l3redirect_port->json_key); > + if (central_ip_address) { > + /* Gratuitous ARP for centralized NAT rules on distributed gateway > + * ports should be restricted to the "redirect-chassis". */ > + if (op->od->l3redirect_port) { > + ds_put_format(&c_addresses, " is_chassis_resident(%s)", > + op->od->l3redirect_port->json_key); > + } > + > + addresses[n_nats++] = ds_steal_cstr(&c_addresses); > } > > - return ds_steal_cstr(&addresses); > + *n = n_nats; > + > + return addresses; > } > > static void > @@ -1659,17 +1682,28 @@ ovn_port_update_sbrec(const struct ovn_port *op, > if (chassis) { > smap_add(&new, "l3gateway-chassis", chassis); > } > + sbrec_port_binding_set_options(op->sb, &new); > + smap_destroy(&new); > > const char *nat_addresses = smap_get(&op->nbsp->options, > "nat-addresses"); > if (nat_addresses && !strcmp(nat_addresses, "router")) { > if (op->peer && op->peer->od > && (chassis || op->peer->od->l3redirect_port)) { > - char *nats = get_nat_addresses(op->peer); > - if (nats) { > - smap_add(&new, "nat-addresses", nats); > + size_t n_nats; > + char **nats = get_nat_addresses(op->peer, &n_nats); > + if (n_nats) { > + sbrec_port_binding_set_nat_addresses(op->sb, > + (const char **) nats, n_nats); > + for (size_t i = 0; i < n_nats; i++) { > + free(nats[i]); > + } > free(nats); > + } else { > + sbrec_port_binding_set_nat_addresses(op->sb, > NULL, 0); > } > + } else { > + sbrec_port_binding_set_nat_addresses(op->sb, NULL, > 0); > } > /* Only accept manual specification of ethernet address > * followed by IPv4 addresses on type "l3gateway" ports. */ > @@ -1679,13 +1713,15 @@ ovn_port_update_sbrec(const struct ovn_port *op, > static struct vlog_rate_limit rl = > VLOG_RATE_LIMIT_INIT(1, 1); > VLOG_WARN_RL(&rl, "Error extracting nat-addresses."); > + sbrec_port_binding_set_nat_addresses(op->sb, NULL, > 0); > } else { > - smap_add(&new, "nat-addresses", nat_addresses); > + sbrec_port_binding_set_nat_addresses(op->sb, > + &nat_addresses, > 1); > destroy_lport_addresses(&laddrs); > } > + } else { > + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); > } > - sbrec_port_binding_set_options(op->sb, &new); > - smap_destroy(&new); > } > sbrec_port_binding_set_parent_port(op->sb, > op->nbsp->parent_name); > sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, > op->nbsp->n_tag); > @@ -5658,6 +5694,8 @@ main(int argc, char *argv[]) > add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); > add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_ > options); > add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); > + add_column_noalert(ovnsb_idl_loop.idl, > + &sbrec_port_binding_col_nat_addresses); > ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_ > chassis); > ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding); > add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ > datapath); > diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema > index 0212a5e..a576dc4 100644 > --- a/ovn/ovn-sb.ovsschema > +++ b/ovn/ovn-sb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Southbound", > - "version": "1.9.0", > - "cksum": "2240045372 9719", > + "version": "1.10.0", > + "cksum": "860871483 9898", > "tables": { > "SB_Global": { > "columns": { > @@ -127,7 +127,10 @@ > "min": 0, "max": 1}}, > "mac": {"type": {"key": "string", > "min": 0, > - "max": "unlimited"}}}, > + "max": "unlimited"}}, > + "nat_addresses": {"type": {"key": "string", > + "min": 0, > + "max": "unlimited"}}}, > "indexes": [["datapath", "tunnel_key"], ["logical_port"]], > "isRoot": true}, > "MAC_Binding": { > diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml > index a82d705..8605c98 100644 > --- a/ovn/ovn-sb.xml > +++ b/ovn/ovn-sb.xml > @@ -1863,9 +1863,9 @@ tcp.flags = RST; > <code>peer</code> values. > </column> > > - <column name="options" key="nat-addresses"> > - MAC address of the <code>patch</code> port followed by a list of > - SNAT and DNAT external IP addresses, followed by > + <column name="nat_addresses"> > + MAC address followed by a list of SNAT and DNAT external IP > + addresses, followed by > <code>is_chassis_resident("<var>lport</var>")</code>, where > <var>lport</var> is the name of a logical port on the same chassis > where the corresponding NAT rules are applied. This is used to > @@ -1905,6 +1905,17 @@ tcp.flags = RST; > Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>. > This would result in generation of gratuitous ARPs for IP > addresses > 158.36.44.22 and 158.36.44.24 with a MAC address of > 80:fa:5b:06:72:b7. > + This is used in OVS versions prior to 2.8. > + </column> > + > + <column name="nat_addresses"> > + MAC address of the <code>l3gateway</code> port followed by a list > of > + SNAT and DNAT external IP addresses. This is used to send > gratuitous > + ARPs for SNAT and DNAT external IP addresses via > <code>localnet</code>. > + Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>. > + This would result in generation of gratuitous ARPs for IP > addresses > + 158.36.44.22 and 158.36.44.24 with a MAC address of > 80:fa:5b:06:72:b7. > + This is used in OVS version 2.8 and later versions. > </column> > </group> > > diff --git a/tests/ovn.at b/tests/ovn.at > index 6b1a728..97e8f70 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -6678,14 +6678,18 @@ ovn-nbctl lsp-add ls0 lrp0-rp -- set > Logical_Switch_Port lrp0-rp \ > ovn-nbctl lrp-add lr0 lrp1 f0:00:00:00:00:02 10.0.0.1/24 > ovn-nbctl lsp-add ls1 lrp1-rp -- set Logical_Switch_Port lrp1-rp \ > type=router options:router-port=lrp1 addresses="router" > -# Add logical port for NAT rule > -ovn-nbctl lsp-add ls1 foo1 > +# Add logical ports for NAT rules > +ovn-nbctl lsp-add ls1 foo1 \ > +-- lsp-set-addresses foo1 "00:00:00:00:00:03 10.0.0.3" > +ovn-nbctl lsp-add ls1 foo2 \ > +-- lsp-set-addresses foo2 "00:00:00:00:00:04 10.0.0.4" > # Add nat-addresses option > ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" > # Add NAT rules > AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.0.1 10.0.0.0/24]) > AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 192.168.0.2 10.0.0.2]) > AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.3 10.0.0.3 > foo1 f0:00:00:00:00:03]) > +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.4 10.0.0.4 > foo2 f0:00:00:00:00:04]) > > net_add n1 > sim_add hv1 > @@ -6702,6 +6706,12 @@ ovs-vsctl add-br br-phys > ovn_attach n1 br-phys 192.168.0.2 > # Initially test with no bridge-mapping on hv2, expect to receive no > packets > > +sim_add hv3 > +as hv3 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.3 > +# Initially test with no bridge-mapping on hv3 > + > # Create a localnet port. > AT_CHECK([ovn-nbctl lsp-add ls0 ln_port]) > AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) > @@ -6717,7 +6727,7 @@ sleep 2 > OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets]) > > # Add bridge-mapping on hv2 > -AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge- > mappings=physnet1:br-phys]) > +AT_CHECK([as hv2 ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge- > mappings=physnet1:br-phys]) > > # Wait for packets to be received. > OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 100]) > @@ -6730,8 +6740,52 @@ echo $expected > expout > expected="fffffffffffff0000000000108060001080006040001f00000000001c0a8 > 0002000000000000c0a80002" > echo $expected >> expout > AT_CHECK([sort packets], [0], [expout]) > -cat packets > +sort packets | cat > > -OVN_CLEANUP([hv1],[hv2]) > +# Temporarily remove nat-addresses option to avoid race conditions > +# due to GARP backoff > +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="" > + > +reset_pcap_file() { > + local iface=$1 > + local pcap_file=$2 > + ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ > +options:rxq_pcap=dummy-rx.pcap > + rm -f ${pcap_file}*.pcap > + ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap > \ > +options:rxq_pcap=${pcap_file}-rx.pcap > +} > + > +as hv1 reset_pcap_file snoopvif hv1/snoopvif > + > +# Add OVS ports for foo1 and foo2 on hv3 > +ovs-vsctl -- add-port br-int hv3-vif1 -- \ > + set interface hv3-vif1 external-ids:iface-id=foo1 \ > + ofport-request=1 > +ovs-vsctl -- add-port br-int hv3-vif2 -- \ > + set interface hv3-vif2 external-ids:iface-id=foo2 \ > + ofport-request=2 > + > +# Add bridge-mapping on hv3 > +AT_CHECK([as hv3 ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge- > mappings=physnet1:br-phys]) > + > +# Re-add nat-addresses option > +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" > + > +# Wait for packets to be received. > +OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 250]) > +trim_zeros() { > + sed 's/\(00\)\{1,\}$//' > +} > + > +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | > trim_zeros > packets > +expected="fffffffffffff0000000000308060001080006040001f00000000003c0a8 > 0003000000000000c0a80003" > +echo $expected >> expout > +expected="fffffffffffff0000000000408060001080006040001f00000000004c0a8 > 0004000000000000c0a80004" > +echo $expected >> expout > +AT_CHECK([sort packets], [0], [expout]) > +sort packets | cat > + > +OVN_CLEANUP([hv1],[hv2],[hv3]) > > AT_CLEANUP > -- > 1.9.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/NEWS b/NEWS index 00c9106..ec8572a 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,7 @@ Post-v2.7.0 "dot1q-tunnel" port VLAN mode. - OVN: * Make the DHCPv4 router setting optional. + * Gratuitous ARP for NAT addresses on a distributed logical router. - Add the command 'ovs-appctl stp/show' (see ovs-vswitchd(8)). v2.7.0 - 21 Feb 2017 diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index e564a30..01259e2 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -1056,21 +1056,23 @@ send_garp_update(const struct sbrec_port_binding *binding_rec, if (!strcmp(binding_rec->type, "l3gateway") || !strcmp(binding_rec->type, "patch")) { struct lport_addresses *laddrs = NULL; - laddrs = shash_find_data(nat_addresses, binding_rec->logical_port); - if (!laddrs) { - return; - } - int i; - for (i = 0; i < laddrs->n_ipv4_addrs; i++) { - char *name = xasprintf("%s-%s", binding_rec->logical_port, - laddrs->ipv4_addrs[i].addr_s); - garp = shash_find_data(&send_garp_data, name); - if (garp) { - garp->ofport = ofport; - } else { - add_garp(name, ofport, laddrs->ea, laddrs->ipv4_addrs[i].addr); + while ((laddrs = shash_find_and_delete(nat_addresses, + binding_rec->logical_port))) { + int i; + for (i = 0; i < laddrs->n_ipv4_addrs; i++) { + char *name = xasprintf("%s-%s", binding_rec->logical_port, + laddrs->ipv4_addrs[i].addr_s); + garp = shash_find_data(&send_garp_data, name); + if (garp) { + garp->ofport = ofport; + } else { + add_garp(name, ofport, laddrs->ea, + laddrs->ipv4_addrs[i].addr); + } + free(name); } - free(name); + destroy_lport_addresses(laddrs); + free(laddrs); } return; } @@ -1304,6 +1306,42 @@ extract_addresses_with_port(const char *addresses, } static void +consider_nat_address(const char *nat_address, + const struct sbrec_port_binding *pb, + struct sset *nat_address_keys, + const struct lport_index *lports, + const struct sbrec_chassis *chassis, + struct shash *nat_addresses) +{ + struct lport_addresses *laddrs = xmalloc(sizeof *laddrs); + char *lport = NULL; + if (!extract_addresses_with_port(nat_address, laddrs, &lport) + || (!lport && !strcmp(pb->type, "patch"))) { + free(laddrs); + if (lport) { + free(lport); + } + return; + } else if (lport) { + if (!pinctrl_is_chassis_resident(lports, chassis, lport)) { + free(laddrs); + free(lport); + return; + } + free(lport); + } + + int i; + for (i = 0; i < laddrs->n_ipv4_addrs; i++) { + char *name = xasprintf("%s-%s", pb->logical_port, + laddrs->ipv4_addrs[i].addr_s); + sset_add(nat_address_keys, name); + free(name); + } + shash_add(nat_addresses, pb->logical_port, laddrs); +} + +static void get_nat_addresses_and_keys(struct sset *nat_address_keys, struct sset *local_l3gw_ports, const struct lport_index *lports, @@ -1317,38 +1355,24 @@ get_nat_addresses_and_keys(struct sset *nat_address_keys, if (!pb) { continue; } - const char *nat_addresses_options = smap_get(&pb->options, - "nat-addresses"); - if (!nat_addresses_options) { - continue; - } - struct lport_addresses *laddrs = xmalloc(sizeof *laddrs); - char *lport = NULL; - if (!extract_addresses_with_port(nat_addresses_options, laddrs, &lport) - || (!lport && !strcmp(pb->type, "patch"))) { - free(laddrs); - if (lport) { - free(lport); + if (pb->n_nat_addresses) { + for (int i = 0; i < pb->n_nat_addresses; i++) { + consider_nat_address(pb->nat_addresses[i], pb, + nat_address_keys, lports, chassis, + nat_addresses); } - continue; - } else if (lport) { - if (!pinctrl_is_chassis_resident(lports, chassis, lport)) { - free(laddrs); - free(lport); - continue; + } else { + /* Continue to support options:nat-addresses for version + * upgrade. */ + const char *nat_addresses_options = smap_get(&pb->options, + "nat-addresses"); + if (nat_addresses_options) { + consider_nat_address(nat_addresses_options, pb, + nat_address_keys, lports, chassis, + nat_addresses); } - free(lport); - } - - int i; - for (i = 0; i < laddrs->n_ipv4_addrs; i++) { - char *name = xasprintf("%s-%s", pb->logical_port, - laddrs->ipv4_addrs[i].addr_s); - sset_add(nat_address_keys, name); - free(name); } - shash_add(nat_addresses, pb->logical_port, laddrs); } } diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index 6090e24..5a2e5ab 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -1508,26 +1508,37 @@ get_router_load_balancer_ips(const struct ovn_datapath *od, } } -/* Returns a string consisting of the port's MAC address followed by the - * external IP addresses of all NAT rules defined on that router and the - * VIPs of all load balancers defined on that router. +/* Returns an array of strings, each consisting of a MAC address followed + * by one or more IP addresses, and if the port is a distributed gateway + * port, followed by 'is_chassis_resident("LPORT_NAME")', where the + * LPORT_NAME is the name of the L3 redirect port or the name of the + * logical_port specified in a NAT rule. These strings include the + * external IP addresses of all NAT rules defined on that router, and all + * of the IP addresses used in load balancer VIPs defined on that router. * - * The caller must free the returned string with free(). */ -static char * -get_nat_addresses(const struct ovn_port *op) + * The caller must free each of the n returned strings with free(), + * and must free the returned array when it is no longer needed. */ +static char ** +get_nat_addresses(const struct ovn_port *op, size_t *n) { + size_t n_nats = 0; struct eth_addr mac; if (!op->nbrp || !op->od || !op->od->nbr || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer) || !eth_addr_from_string(op->nbrp->mac, &mac)) { + *n = n_nats; return NULL; } - struct ds addresses = DS_EMPTY_INITIALIZER; - ds_put_format(&addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); + struct ds c_addresses = DS_EMPTY_INITIALIZER; + ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); + bool central_ip_address = false; + + char **addresses; + addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1)); /* Get NAT IP addresses. */ - for (int i = 0; i < op->od->nbr->n_nat; i++) { + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { const struct nbrec_nat *nat = op->od->nbr->nat[i]; ovs_be32 ip, mask; @@ -1542,14 +1553,19 @@ get_nat_addresses(const struct ovn_port *op) if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat") && nat->logical_port && nat->external_mac) { /* Distributed NAT rule. */ - /* XXX This uses a different MAC address, so it cannot go - * into the same string as centralized NAT external IP - * addresses. Need to change this function to return an - * array of strings. */ + if (eth_addr_from_string(nat->external_mac, &mac)) { + struct ds address = DS_EMPTY_INITIALIZER; + ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); + ds_put_format(&address, " %s", nat->external_ip); + ds_put_format(&address, " is_chassis_resident(\"%s\")", + nat->logical_port); + addresses[n_nats++] = ds_steal_cstr(&address); + } } else { /* Centralized NAT rule, either on gateway router or distributed * router. */ - ds_put_format(&addresses, " %s", nat->external_ip); + ds_put_format(&c_addresses, " %s", nat->external_ip); + central_ip_address = true; } } @@ -1559,18 +1575,25 @@ get_nat_addresses(const struct ovn_port *op) const char *ip_address; SSET_FOR_EACH (ip_address, &all_ips) { - ds_put_format(&addresses, " %s", ip_address); + ds_put_format(&c_addresses, " %s", ip_address); + central_ip_address = true; } sset_destroy(&all_ips); - /* Gratuitous ARP for centralized NAT rules on distributed gateway - * ports should be restricted to the "redirect-chassis". */ - if (op->od->l3redirect_port) { - ds_put_format(&addresses, " is_chassis_resident(%s)", - op->od->l3redirect_port->json_key); + if (central_ip_address) { + /* Gratuitous ARP for centralized NAT rules on distributed gateway + * ports should be restricted to the "redirect-chassis". */ + if (op->od->l3redirect_port) { + ds_put_format(&c_addresses, " is_chassis_resident(%s)", + op->od->l3redirect_port->json_key); + } + + addresses[n_nats++] = ds_steal_cstr(&c_addresses); } - return ds_steal_cstr(&addresses); + *n = n_nats; + + return addresses; } static void @@ -1659,17 +1682,28 @@ ovn_port_update_sbrec(const struct ovn_port *op, if (chassis) { smap_add(&new, "l3gateway-chassis", chassis); } + sbrec_port_binding_set_options(op->sb, &new); + smap_destroy(&new); const char *nat_addresses = smap_get(&op->nbsp->options, "nat-addresses"); if (nat_addresses && !strcmp(nat_addresses, "router")) { if (op->peer && op->peer->od && (chassis || op->peer->od->l3redirect_port)) { - char *nats = get_nat_addresses(op->peer); - if (nats) { - smap_add(&new, "nat-addresses", nats); + size_t n_nats; + char **nats = get_nat_addresses(op->peer, &n_nats); + if (n_nats) { + sbrec_port_binding_set_nat_addresses(op->sb, + (const char **) nats, n_nats); + for (size_t i = 0; i < n_nats; i++) { + free(nats[i]); + } free(nats); + } else { + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); } + } else { + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); } /* Only accept manual specification of ethernet address * followed by IPv4 addresses on type "l3gateway" ports. */ @@ -1679,13 +1713,15 @@ ovn_port_update_sbrec(const struct ovn_port *op, static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "Error extracting nat-addresses."); + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); } else { - smap_add(&new, "nat-addresses", nat_addresses); + sbrec_port_binding_set_nat_addresses(op->sb, + &nat_addresses, 1); destroy_lport_addresses(&laddrs); } + } else { + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); } - sbrec_port_binding_set_options(op->sb, &new); - smap_destroy(&new); } sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name); sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag); @@ -5658,6 +5694,8 @@ main(int argc, char *argv[]) add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); + add_column_noalert(ovnsb_idl_loop.idl, + &sbrec_port_binding_col_nat_addresses); ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis); ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding); add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath); diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema index 0212a5e..a576dc4 100644 --- a/ovn/ovn-sb.ovsschema +++ b/ovn/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "1.9.0", - "cksum": "2240045372 9719", + "version": "1.10.0", + "cksum": "860871483 9898", "tables": { "SB_Global": { "columns": { @@ -127,7 +127,10 @@ "min": 0, "max": 1}}, "mac": {"type": {"key": "string", "min": 0, - "max": "unlimited"}}}, + "max": "unlimited"}}, + "nat_addresses": {"type": {"key": "string", + "min": 0, + "max": "unlimited"}}}, "indexes": [["datapath", "tunnel_key"], ["logical_port"]], "isRoot": true}, "MAC_Binding": { diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index a82d705..8605c98 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -1863,9 +1863,9 @@ tcp.flags = RST; <code>peer</code> values. </column> - <column name="options" key="nat-addresses"> - MAC address of the <code>patch</code> port followed by a list of - SNAT and DNAT external IP addresses, followed by + <column name="nat_addresses"> + MAC address followed by a list of SNAT and DNAT external IP + addresses, followed by <code>is_chassis_resident("<var>lport</var>")</code>, where <var>lport</var> is the name of a logical port on the same chassis where the corresponding NAT rules are applied. This is used to @@ -1905,6 +1905,17 @@ tcp.flags = RST; Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>. This would result in generation of gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24 with a MAC address of 80:fa:5b:06:72:b7. + This is used in OVS versions prior to 2.8. + </column> + + <column name="nat_addresses"> + MAC address of the <code>l3gateway</code> port followed by a list of + SNAT and DNAT external IP addresses. This is used to send gratuitous + ARPs for SNAT and DNAT external IP addresses via <code>localnet</code>. + Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>. + This would result in generation of gratuitous ARPs for IP addresses + 158.36.44.22 and 158.36.44.24 with a MAC address of 80:fa:5b:06:72:b7. + This is used in OVS version 2.8 and later versions. </column> </group> diff --git a/tests/ovn.at b/tests/ovn.at index 6b1a728..97e8f70 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -6678,14 +6678,18 @@ ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \ ovn-nbctl lrp-add lr0 lrp1 f0:00:00:00:00:02 10.0.0.1/24 ovn-nbctl lsp-add ls1 lrp1-rp -- set Logical_Switch_Port lrp1-rp \ type=router options:router-port=lrp1 addresses="router" -# Add logical port for NAT rule -ovn-nbctl lsp-add ls1 foo1 +# Add logical ports for NAT rules +ovn-nbctl lsp-add ls1 foo1 \ +-- lsp-set-addresses foo1 "00:00:00:00:00:03 10.0.0.3" +ovn-nbctl lsp-add ls1 foo2 \ +-- lsp-set-addresses foo2 "00:00:00:00:00:04 10.0.0.4" # Add nat-addresses option ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" # Add NAT rules AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.0.1 10.0.0.0/24]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 192.168.0.2 10.0.0.2]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.3 10.0.0.3 foo1 f0:00:00:00:00:03]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.4 10.0.0.4 foo2 f0:00:00:00:00:04]) net_add n1 sim_add hv1 @@ -6702,6 +6706,12 @@ ovs-vsctl add-br br-phys ovn_attach n1 br-phys 192.168.0.2 # Initially test with no bridge-mapping on hv2, expect to receive no packets +sim_add hv3 +as hv3 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.3 +# Initially test with no bridge-mapping on hv3 + # Create a localnet port. AT_CHECK([ovn-nbctl lsp-add ls0 ln_port]) AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown]) @@ -6717,7 +6727,7 @@ sleep 2 OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets]) # Add bridge-mapping on hv2 -AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) +AT_CHECK([as hv2 ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) # Wait for packets to be received. OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 100]) @@ -6730,8 +6740,52 @@ echo $expected > expout expected="fffffffffffff0000000000108060001080006040001f00000000001c0a80002000000000000c0a80002" echo $expected >> expout AT_CHECK([sort packets], [0], [expout]) -cat packets +sort packets | cat -OVN_CLEANUP([hv1],[hv2]) +# Temporarily remove nat-addresses option to avoid race conditions +# due to GARP backoff +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="" + +reset_pcap_file() { + local iface=$1 + local pcap_file=$2 + ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ +options:rxq_pcap=dummy-rx.pcap + rm -f ${pcap_file}*.pcap + ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \ +options:rxq_pcap=${pcap_file}-rx.pcap +} + +as hv1 reset_pcap_file snoopvif hv1/snoopvif + +# Add OVS ports for foo1 and foo2 on hv3 +ovs-vsctl -- add-port br-int hv3-vif1 -- \ + set interface hv3-vif1 external-ids:iface-id=foo1 \ + ofport-request=1 +ovs-vsctl -- add-port br-int hv3-vif2 -- \ + set interface hv3-vif2 external-ids:iface-id=foo2 \ + ofport-request=2 + +# Add bridge-mapping on hv3 +AT_CHECK([as hv3 ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys]) + +# Re-add nat-addresses option +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router" + +# Wait for packets to be received. +OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 250]) +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} + +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | trim_zeros > packets +expected="fffffffffffff0000000000308060001080006040001f00000000003c0a80003000000000000c0a80003" +echo $expected >> expout +expected="fffffffffffff0000000000408060001080006040001f00000000004c0a80004000000000000c0a80004" +echo $expected >> expout +AT_CHECK([sort packets], [0], [expout]) +sort packets | cat + +OVN_CLEANUP([hv1],[hv2],[hv3]) AT_CLEANUP
This patch extends gratuitous ARP support for NAT addresses so that it applies to distributed NAT rules on a distributed logical router. Distributed NAT rules have type "dnat_and_snat" and specify 'external_mac' and 'logical_port'. Gratuitous ARP packets for distributed NAT rules are only generated on the chassis where the 'logical_port' specified in the NAT rule resides. Gratuitous ARPs are issued for the 'external_ip' address, resolving to the 'external_mac'. Since the MAC address varies for each distributed NAT rule, a separate 'nat_addresses' string must be generated for each distributed NAT rule. For this reason, in the southbound 'Port_Binding', 'options:nat-addresses' is replaced by a 'nat_addresses' column that can have an unlimited number of instances. In order to allow for upgrades, pinctrl in the ovn-controller can work off either the 'nat_addresses' column (if present), or 'options:nat-addresses' otherwise. Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com> --- NEWS | 1 + ovn/controller/pinctrl.c | 108 +++++++++++++++++++++++++++++------------------ ovn/northd/ovn-northd.c | 92 ++++++++++++++++++++++++++++------------ ovn/ovn-sb.ovsschema | 9 ++-- ovn/ovn-sb.xml | 17 ++++++-- tests/ovn.at | 64 +++++++++++++++++++++++++--- 6 files changed, 211 insertions(+), 80 deletions(-)