@@ -183,7 +183,7 @@ get_zone_ids(const struct sbrec_port_binding *binding,
}
static void
-put_local_common_flows(uint32_t dp_key, uint32_t port_key,
+put_local_common_flows(uint32_t dp_key, uint32_t port_key, ofp_port_t ofport,
bool nested_container, const struct zone_ids *zone_ids,
struct ofpbuf *ofpacts_p, struct hmap *flow_table)
{
@@ -259,6 +259,36 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key,
put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
&match, ofpacts_p);
+
+ /* Table 65, Priority 150.
+ * =======================
+ *
+ * Send packets with MLF_FORCE_EGRESS_LOOPBACK flag back to the
+ * ingress pipeline with inport = outport. */
+
+ match_init_catchall(&match);
+ ofpbuf_clear(ofpacts_p);
+ match_set_metadata(&match, htonll(dp_key));
+ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
+ match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
+ MLF_FORCE_EGRESS_LOOPBACK, MLF_FORCE_EGRESS_LOOPBACK);
+
+ size_t clone_ofs = ofpacts_p->size;
+ struct ofpact_nest *clone = ofpact_put_CLONE(ofpacts_p);
+ put_load(ofport, MFF_IN_PORT, 0, 16, ofpacts_p);
+ put_load(port_key, MFF_LOG_INPORT, 0, 32, ofpacts_p);
+ put_load(0, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+ put_load(MLF_EGRESS_LOOPBACK_OCCURRED, MFF_LOG_FLAGS, 0, 32, ofpacts_p);
+ for (int i = 0; i < MFF_N_LOG_REGS; i++) {
+ put_load(0, MFF_LOG_REG0 + i, 0, 32, ofpacts_p);
+ }
+ put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
+ clone = ofpbuf_at_assert(ofpacts_p, clone_ofs, sizeof *clone);
+ ofpacts_p->header = clone;
+ ofpact_finish_CLONE(ofpacts_p, &clone);
+
+ ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 150, 0,
+ &match, ofpacts_p);
}
static void
@@ -321,7 +351,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
}
struct zone_ids binding_zones = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, false, &binding_zones,
+ put_local_common_flows(dp_key, port_key, 0, false, &binding_zones,
ofpacts_p, flow_table);
match_init_catchall(&match);
@@ -490,8 +520,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
*/
struct zone_ids zone_ids = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, nested_container, &zone_ids,
- ofpacts_p, flow_table);
+ put_local_common_flows(dp_key, port_key, ofport, nested_container,
+ &zone_ids, ofpacts_p, flow_table);
/* Table 0, Priority 150 and 100.
* ==============================
@@ -96,6 +96,14 @@ ovn_init_symtab(struct shash *symtab)
MLF_FORCE_SNAT_FOR_LB_BIT);
expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL,
flags_str);
+ snprintf(flags_str, sizeof flags_str, "flags[%d]",
+ MLF_FORCE_EGRESS_LOOPBACK_BIT);
+ expr_symtab_add_subfield(symtab, "flags.force_egress_loopback", NULL,
+ flags_str);
+ snprintf(flags_str, sizeof flags_str, "flags[%d]",
+ MLF_EGRESS_LOOPBACK_OCCURRED_BIT);
+ expr_symtab_add_subfield(symtab, "flags.egress_loopback_occurred", NULL,
+ flags_str);
/* Connection tracking state. */
expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
@@ -49,6 +49,8 @@ enum mff_log_flags_bits {
MLF_RCV_FROM_VXLAN_BIT = 1,
MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
MLF_FORCE_SNAT_FOR_LB_BIT = 3,
+ MLF_FORCE_EGRESS_LOOPBACK_BIT = 4,
+ MLF_EGRESS_LOOPBACK_OCCURRED_BIT = 5,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -69,6 +71,18 @@ enum mff_log_flags {
/* Indicate that a packet needs a force SNAT in the gateway router when
* load-balancing has taken place. */
MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT),
+
+ /* Indicate that at the end of the egress pipeline in table
+ * OFTABLE_LOG_TO_PHY, instead of being sent to the peer patch port or
+ * out the outport, the packet should be forced back to the beginning
+ * of the ingress pipeline with inport = outport. */
+ MLF_FORCE_EGRESS_LOOPBACK = (1 << MLF_FORCE_EGRESS_LOOPBACK_BIT),
+
+ /* Indicate that this packet has been recirculated using egress
+ * loopback. This allows certain checks to be bypassed, such as a
+ * logical router dropping packets with source IP address equals
+ * one of the logical router's own IP addresses. */
+ MLF_EGRESS_LOOPBACK_OCCURRED = (1 << MLF_EGRESS_LOOPBACK_OCCURRED_BIT),
};
#endif /* ovn/lib/logical-fields.h */
@@ -928,7 +928,9 @@ output;
</li>
<li>
<code>ip4.src</code> or <code>ip6.src</code> is any IP
- address owned by the router.
+ address owned by the router, unless the packet was recirculated
+ due to egress loopback as indicated by
+ <code>flags.egress_loopback_occurred</code>
</li>
<li>
<code>ip4.src</code> is the broadcast address of any IP network
@@ -3765,6 +3765,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_cstr(&match, "ip4.src == ");
op_put_v4_networks(&match, op, true);
+ ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0");
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
ds_cstr(&match), "drop;");
@@ -4004,6 +4005,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_cstr(&match, "ip6.src == ");
op_put_v6_networks(&match, op);
+ ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0");
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
ds_cstr(&match), "drop;");
@@ -832,7 +832,7 @@
<li><code>reg0</code>...<code>reg9</code></li>
<li><code>xxreg0</code> <code>xxreg1</code></li>
<li><code>inport</code> <code>outport</code></li>
- <li><code>flags.loopback</code></li>
+ <li><code>flags.loopback</code><code>flags.force_egress_loopback</code><code>flags.egress_loopback_occurred</code></li>
<li><code>eth.src</code> <code>eth.dst</code> <code>eth.type</code></li>
<li><code>vlan.tci</code> <code>vlan.vid</code> <code>vlan.pcp</code> <code>vlan.present</code></li>
<li><code>ip.proto</code> <code>ip.dscp</code> <code>ip.ecn</code> <code>ip.ttl</code> <code>ip.frag</code></li>
@@ -1131,23 +1131,42 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow,
key);
}
+ uint32_t flags = uflow->regs[MFF_LOG_FLAGS - MFF_REG0];
+
if (pipeline == P_EGRESS) {
- ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
- "/* output to \"%s\", type \"%s\" */",
- out_name, port ? port->type : "");
- if (port && port->peer) {
- const struct ovntrace_port *peer = port->peer;
+ bool force_egress_loopback = (flags & MLF_FORCE_EGRESS_LOOPBACK) != 0;
+ if (port && (port->peer || force_egress_loopback)) {
+ const struct ovntrace_port *new_inport = force_egress_loopback ?
+ port : port->peer;
+ if (force_egress_loopback) {
+ ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
+ "/* force egress loopback at output to \"%s\" */",
+ out_name);
+ } else {
+ ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
+ "/* output to \"%s\", type \"%s\" */",
+ out_name, port ? port->type : "");
+ }
struct ovntrace_node *node = ovntrace_node_append(
super, OVNTRACE_NODE_PIPELINE,
"ingress(dp=\"%s\", inport=\"%s\")",
- peer->dp->name, peer->name);
+ new_inport->dp->name, new_inport->name);
struct flow new_uflow = *uflow;
- new_uflow.regs[MFF_LOG_INPORT - MFF_REG0] = peer->tunnel_key;
- new_uflow.regs[MFF_LOG_OUTPORT - MFF_REG0] = 0;
- trace__(peer->dp, &new_uflow, 0, P_INGRESS, &node->subs);
+ for (int i = 0; i < FLOW_N_REGS; i++) {
+ new_uflow.regs[i] = 0;
+ }
+ new_uflow.regs[MFF_LOG_INPORT - MFF_REG0] = new_inport->tunnel_key;
+ if (force_egress_loopback) {
+ new_uflow.regs[MFF_LOG_FLAGS - MFF_REG0]
+ = MLF_EGRESS_LOOPBACK_OCCURRED;
+ }
+ trace__(new_inport->dp, &new_uflow, 0, P_INGRESS, &node->subs);
} else {
+ ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
+ "/* output to \"%s\", type \"%s\" */",
+ out_name, port ? port->type : "");
ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
"output(\"%s\")", out_name);
@@ -1158,7 +1177,8 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow,
struct flow egress_uflow = *uflow;
for (int i = 0; i < FLOW_N_REGS; i++) {
if (i != MFF_LOG_INPORT - MFF_REG0 &&
- i != MFF_LOG_OUTPORT - MFF_REG0) {
+ i != MFF_LOG_OUTPORT - MFF_REG0 &&
+ i != MFF_LOG_FLAGS - MFF_REG0) {
egress_uflow.regs[i] = 0;
}
}
@@ -1166,7 +1186,6 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow,
uint16_t in_key = uflow->regs[MFF_LOG_INPORT - MFF_REG0];
const struct ovntrace_port *inport = ovntrace_port_find_by_key(dp, in_key);
const char *inport_name = !in_key ? "" : inport ? inport->name : "(unnamed)";
- uint32_t flags = uflow->regs[MFF_LOG_FLAGS - MFF_REG0];
bool allow_loopback = (flags & MLF_ALLOW_LOOPBACK) != 0;
if (mcgroup) {
This patch adds the capability to force loopback at the end of the egress pipeline. A new flags.force_egress_loopback symbol is defined, along with corresponding flags bits. When flags.force_egress_loopback is set, at OFTABLE_LOG_TO_PHY, instead of the packet being sent out to the peer patch port or out the outport, the packet is forced back to the beginning of the ingress pipeline with inport = outport. All other registers are cleared, as if the packet just arrived on that inport. This capability is needed in order to implement some of the east/west distributed NAT flows. Note: The existing flags.loopback allows a packet to go from the end of the ingress pipeline to the beginning of the egress pipeline with outport = inport, which is different. Initially, there are no tests incorporated in this patch. This functionality is tested in a subsequent distributed NAT flows patch. Tests specific to egress loopback may be added once the capability to inject a packet with one of the flags bits set is added. Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com> --- ovn/controller/physical.c | 38 ++++++++++++++++++++++++++++++++++---- ovn/lib/logical-fields.c | 8 ++++++++ ovn/lib/logical-fields.h | 14 ++++++++++++++ ovn/northd/ovn-northd.8.xml | 4 +++- ovn/northd/ovn-northd.c | 2 ++ ovn/ovn-sb.xml | 2 +- ovn/utilities/ovn-trace.c | 41 ++++++++++++++++++++++++++++++----------- 7 files changed, 92 insertions(+), 17 deletions(-)