@@ -37,6 +37,8 @@ OVN v23.06.0 - xx xxx xxxx
- Support for tiered ACLs has been added. This allows for ACLs to be layered
into separate tiers of priority. For more information, please see the
ovn-nb and ovn-northd manpages.
+ - Support to create/update MAC_Binding when GARP received from VTEP (RAMP)
+ switch on l3dgw port.
OVN v23.03.0 - 03 Mar 2023
--------------------------
@@ -509,6 +509,30 @@ update_ld_multichassis_ports(const struct sbrec_port_binding *binding_rec,
}
}
+static void
+update_ld_vtep_port(const struct sbrec_port_binding *binding_rec,
+ struct hmap *local_datapaths)
+{
+ struct local_datapath *ld
+ = get_local_datapath(local_datapaths,
+ binding_rec->datapath->tunnel_key);
+ if (!ld) {
+ return;
+ }
+
+ if (ld->vtep_port && strcmp(ld->vtep_port->logical_port,
+ binding_rec->logical_port)) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "vtep port '%s' already set for datapath "
+ "'%"PRId64"', skipping the new port '%s'.",
+ ld->vtep_port->logical_port,
+ binding_rec->datapath->tunnel_key,
+ binding_rec->logical_port);
+ return;
+ }
+ ld->vtep_port = binding_rec;
+}
+
static void
update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
struct shash *bridge_mappings,
@@ -1987,6 +2011,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports);
struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER(
&multichassis_ports);
+ struct ovs_list vtep_lports = OVS_LIST_INITIALIZER(&vtep_lports);
struct lport {
struct ovs_list list_node;
@@ -2010,8 +2035,13 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
switch (lport_type) {
case LP_PATCH:
+ update_related_lport(pb, b_ctx_out);
+ break;
case LP_VTEP:
update_related_lport(pb, b_ctx_out);
+ struct lport *vtep_lport = xmalloc(sizeof *vtep_lport);
+ vtep_lport->pb = pb;
+ ovs_list_push_back(&vtep_lports, &vtep_lport->list_node);
break;
case LP_LOCALPORT:
@@ -2111,6 +2141,15 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
free(multichassis_lport);
}
+ /* Run through vtep lport list to see if there are vtep ports
+ * on local datapaths discovered from above loop, and update the
+ * corresponding local datapath accordingly. */
+ struct lport *vtep_lport;
+ LIST_FOR_EACH_POP (vtep_lport, list_node, &vtep_lports) {
+ update_ld_vtep_port(vtep_lport->pb, b_ctx_out->local_datapaths);
+ free(vtep_lport);
+ }
+
shash_destroy(&bridge_mappings);
/* Remove stale QoS entries. */
ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, b_ctx_in->ovsrec_port_by_qos,
@@ -2175,6 +2214,11 @@ remove_pb_from_local_datapath(const struct sbrec_port_binding *pb,
}
} else if (!strcmp(pb->type, "external")) {
remove_local_datapath_external_port(ld, pb->logical_port);
+ } else if (!strcmp(pb->type, "vtep")) {
+ if (ld->vtep_port && !strcmp(ld->vtep_port->logical_port,
+ pb->logical_port)) {
+ ld->vtep_port = NULL;
+ }
}
remove_local_datapath_multichassis_port(ld, pb->logical_port);
}
@@ -2836,6 +2880,7 @@ handle_updated_port(struct binding_ctx_in *b_ctx_in,
case LP_VTEP:
update_related_lport(pb, b_ctx_out);
+ update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
/* VTEP lports are claimed/released by ovn-controller-vteps.
* We are not sure what changed. */
b_ctx_out->non_vif_ports_changed = true;
@@ -3088,6 +3133,8 @@ delete_done:
b_ctx_out->local_datapaths);
} else if (lport_type == LP_EXTERNAL) {
update_ld_external_ports(pb, b_ctx_out->local_datapaths);
+ } else if (lport_type == LP_VTEP) {
+ update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
} else if (pb->n_additional_chassis) {
update_ld_multichassis_ports(pb,
b_ctx_out->local_datapaths);
@@ -50,6 +50,9 @@ struct local_datapath {
/* The localnet port in this datapath, if any (at most one is allowed). */
const struct sbrec_port_binding *localnet_port;
+ /* The vtep port in this datapath, if any (at most one is allowed). */
+ const struct sbrec_port_binding *vtep_port;
+
struct {
const struct sbrec_port_binding *local;
const struct sbrec_port_binding *remote;
@@ -194,6 +194,15 @@ get_localnet_port(const struct hmap *local_datapaths, int64_t tunnel_key)
}
+static const struct sbrec_port_binding *
+get_vtep_port(const struct hmap *local_datapaths, int64_t tunnel_key)
+{
+ const struct local_datapath *ld = get_local_datapath(local_datapaths,
+ tunnel_key);
+ return ld ? ld->vtep_port : NULL;
+}
+
+
static struct zone_ids
get_zone_ids(const struct sbrec_port_binding *binding,
const struct simap *ct_zones)
@@ -1755,10 +1764,10 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
* set the output port to be the router patch port for which
* the redirect port was added.
*/
- struct ofpbuf ofpacts;
+ struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
ofpbuf_init(&ofpacts, 0);
- struct ofpbuf remote_ofpacts;
ofpbuf_init(&remote_ofpacts, 0);
+ ofpbuf_init(&remote_ofpacts_ramp, 0);
for (size_t i = 0; i < mc->n_ports; i++) {
struct sbrec_port_binding *port = mc->ports[i];
@@ -1783,6 +1792,7 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
local_output_pb(port->tunnel_key, &ofpacts);
} else {
local_output_pb(port->tunnel_key, &remote_ofpacts);
+ local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
}
} if (!strcmp(port->type, "remote")) {
if (port->chassis) {
@@ -1866,6 +1876,36 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
* multicast group as the logical output port. */
put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
&remote_ofpacts);
+
+ if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) {
+ struct match match_ramp;
+ match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
+ MLF_RCV_FROM_RAMP);
+
+ put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
+ &remote_ofpacts_ramp);
+
+ /* MCAST traffic which was originally received from RAMP_SWITCH
+ * is not allowed to be re-sent to remote_chassis.
+ * Process "patch" port binding for routing in
+ * OFTABLE_REMOTE_OUTPUT and resubmit packets to
+ * OFTABLE_LOCAL_OUTPUT for local delivery. */
+
+ match_outport_dp_and_port_keys(&match_ramp, dp_key,
+ mc->tunnel_key);
+
+ /* Match packets coming from RAMP_SWITCH and allowed for
+ * loopback processing (including routing). */
+ match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
+ MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
+ MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
+
+ put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);
+
+ ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120,
+ mc->header_.uuid.parts[0], &match_ramp,
+ &remote_ofpacts_ramp, &mc->header_.uuid);
+ }
}
fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
@@ -1886,6 +1926,7 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
}
ofpbuf_uninit(&ofpacts);
ofpbuf_uninit(&remote_ofpacts);
+ ofpbuf_uninit(&remote_ofpacts_ramp);
sset_destroy(&remote_chassis);
sset_destroy(&vtep_chassis);
}
@@ -7857,6 +7857,16 @@ build_vtep_hairpin(struct ovn_datapath *od, struct hmap *lflows)
ds_cstr(&match), "next;");
}
}
+
+ /* ARP/Neighbor Solicitation requests must skip ls_in_arp_rsp table for
+ * packets arrived from HW VTEP (ramp) switch.
+ * Neighbor resolution for router ports is done in logical router ingress
+ * pipeline. ARP resolution for vif lports is done directly by vif ports.
+ */
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 65535,
+ REGBIT_FROM_RAMP" == 1 && (arp || nd_ns)",
+ "flags.loopback = 1; next;");
+
ds_destroy(&match);
}
@@ -1378,6 +1378,12 @@
</p>
<ul>
+ <li>
+ If packet was received from HW VTEP (ramp switch), and this packet is
+ ARP or Neighbor Solicitation, such packet is passed to next table with
+ max proirity. ARP/ND requests from HW VTEP must be handled in logical
+ router ingress pipeline.
+ </li>
<li>
If the logical switch has no router ports with options:arp_proxy
configured add a priority-100 flows to skip the ARP responder if inport
@@ -4267,7 +4267,7 @@ ovn_start
ovn-nbctl ls-add lsw0
ovn-nbctl lsp-add lsw0 lp1
-ovn-nbctl lsp-set-addresses lp1 f0:00:00:00:00:01
+ovn-nbctl lsp-set-addresses lp1 'f0:00:00:00:00:01 192.168.1.24'
ovn-nbctl lsp-add lsw0 lp2
ovn-nbctl lsp-set-addresses lp2 f0:00:00:00:00:02
@@ -4288,7 +4288,6 @@ ovn-nbctl set Logical_Switch_Port lpr \
ovn-nbctl lrp-set-gateway-chassis lrp1 hv1
ovn-nbctl lsp-add lsw0 lpr2
-ovn-nbctl lr-add lr2
ovn-nbctl lrp-add lr lrp2 f0:00:00:00:00:f2 192.168.1.254/24
ovn-nbctl set Logical_Switch_Port lpr2 \
type=router \
@@ -4421,6 +4420,7 @@ spa=`ip_to_hex 192 168 1 2`
tpa=`ip_to_hex 192 168 1 1`
request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
as hv3 ovs-appctl netdev-dummy/receive vif3 $request
+echo $request >> 1.expected
echo $request >> 2.expected
lrpmac=f000000000f1
@@ -4431,12 +4431,107 @@ response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
# we expect arp reply packet on hv3
echo $response >> 3.expected
+# Ensure there is no MAC_Binding, send GARP from vtep side,
+# check that GARP was received by vif1, vif2 and MAC_Binding was created.
+wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
+
+sha=0200000000ff
+tha=ffffffffffff
+spa=`ip_to_hex 192 168 1 200`
+tpa=$spa
+garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
+echo $garp >> 1.expected
+echo $garp >> 2.expected
+
+wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
+
+# Send same GARP with changed SHA and ensure MAC_Binding was updated.
+sha=0200000000ee
+garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
+echo $garp >> 1.expected
+echo $garp >> 2.expected
+
+wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
+
+# Remove local ports on hv1 and check that ARP logic is stil working. -->
+check as hv1 ovs-vsctl remove interface vif1 external_ids iface-id
+hv_uuid=$(fetch_column Chassis _uuid name=hv1)
+wait_row_count Port_Binding 0 chassis="$hv_uuid" type="''"
+
+# Cleanup MAC_Bindings after previous checks.
+check ovn-sbctl --all destroy MAC_Binding
+
+# ARP request from VTEP to LRP should be responded by ARP responder.
+sha=f00000000003
+spa=`ip_to_hex 192 168 1 2`
+tpa=`ip_to_hex 192 168 1 1`
+request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $request
+echo $request >> 2.expected
+
+lrpmac=f000000000f1
+response=${sha}${lrpmac}08060001080006040002${lrpmac}${tpa}${sha}${spa}
+# since lrp1 has gateway chassis set on hv1, hv1 will suppress arp request and
+# answer with arp reply by OVS directly to vtep lport. all other lports,
+# except lport from which this request was initiated, will receive arp request.
+# we expect arp reply packet on hv3
+echo $response >> 3.expected
+
+# Ensure there is no MAC_Binding, send GARP from vtep side,
+# check that GARP was received by vif1, vif2 and MAC_Binding was created.
+wait_row_count MAC_Binding 0 ip="192.168.1.200" mac='"f0:00:00:00:00:12"'
+
+sha=0200000000ff
+tha=ffffffffffff
+spa=`ip_to_hex 192 168 1 200`
+tpa=$spa
+garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
+echo $garp >> 2.expected
+
+wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ff"'
+
+# Send same GARP with changed SHA and ensure MAC_Binding was updated.
+sha=0200000000ee
+garp=ffffffffffff${sha}08060001080006040002${sha}${spa}${tha}${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $garp
+echo $garp >> 2.expected
+
+wait_row_count MAC_Binding 2 ip="192.168.1.200" mac='"02:00:00:00:00:ee"'
+
+# Restore.
+check as hv1 ovs-vsctl set interface vif1 external_ids:iface-id=lp1
+wait_row_count Port_Binding 1 chassis="$hv_uuid" type='""'
+# <--
+
+# Check ARP response to request for IP on VIF port sent from vtep.
+# ARP request from VTEP to LSP's IP must be answered only by LSP vif1 (lp1).
+sha=f00000000003
+spa=`ip_to_hex 192 168 1 2`
+tpa=`ip_to_hex 192 168 1 24`
+request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
+as hv3 ovs-appctl netdev-dummy/receive vif3 $request
+echo $request >> 1.expected
+echo $request >> 2.expected
+
+# Simulate response from vif.
+sha=f00000000001
+tha=f00000000003
+spa=`ip_to_hex 192 168 1 200`
+tpa=`ip_to_hex 192 168 1 2`
+response=${tha}${sha}08060001080006040002${sha}${spa}${tha}${tpa}
+as hv1 ovs-appctl netdev-dummy/receive vif1 $response
+echo $response >> 3.expected
+
# First ensure basic flow contents are as we expect.
AT_CHECK([ovn-sbctl lflow-list lsw0 | grep 'reg0[\[14\]]' | sort | sed 's/table=../table=??/g'], [0], [dnl
table=??(ls_in_check_port_sec), priority=70 , match=(inport == "lp-vtep"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
table=??(ls_in_hairpin ), priority=1000 , match=(reg0[[14]] == 1), action=(next(pipeline=ingress, table=??);)
table=??(ls_in_hairpin ), priority=2000 , match=(reg0[[14]] == 1 && is_chassis_resident("cr-lrp1")), action=(next;)
table=??(ls_in_hairpin ), priority=2000 , match=(reg0[[14]] == 1 && is_chassis_resident("cr-lrp2")), action=(next;)
+ table=??(ls_in_arp_rsp ), priority=65535, match=(reg0[[14]] == 1 && (arp || nd_ns)), action=(flags.loopback = 1; next;)
])
# dump information with counters
This patch is intended to make next two changes: 1. Support create/update of MAC_Binding for GARP/ND from HW VTEP. Prior to this patch MAC_Binding records were created only when LRP issued ARP request/Neighbor solicitation. If IP to MAC in network attached via vtep lport was changed the old MAC_Binding prevented normal communication. Now router port (chassisredirect) in logical switch with vtep lport catches GARP or Neighbor Solicitation and updates MAC_Binding. New flow with max priority was added in ls_in_arp_nd_resp and additional OF rule in table 37 (OFTABLE_REMOTE_OUTPUT) to process multicast packets received from RAMP_SWITCH. 2. Answer to ARP/ND on requests coming from HW VTEP in lrouter pipeline. This is needed to reduce duplicated arp-responses, which were generated by OVN arp-responder flows in node, where chassisredirect port located with responses coming directly from VM interfaces. As a result of this change, now vifs located on same chassis, where chassisredirect ports are located receive ARP/ND mcast packets, which previously were suppressed by arp-responder on the node. Tests and documentation were updated to conform to new behaviour. Signed-off-by: Vladislav Odintsov <odivlad@gmail.com> --- v1 -> v2: - Addressed Dumitru's review comments. --- NEWS | 2 + controller/binding.c | 47 +++++++++++++++++++ controller/local_data.h | 3 ++ controller/physical.c | 45 ++++++++++++++++++- northd/northd.c | 10 +++++ northd/ovn-northd.8.xml | 6 +++ tests/ovn.at | 99 ++++++++++++++++++++++++++++++++++++++++- 7 files changed, 208 insertions(+), 4 deletions(-)