@@ -39,6 +39,10 @@ Post-v3.3.0
datapath-specific manner. The Linux kernel datapath is the first to
support this feature by using the new datapath "psample" action. See
'local-group-id' column in the Flow_Sample_Collector_Set table.
+ - A new configuration knob "other_config:explicit-sampled-drops" in the
+ Open_vSwitch table controls whether an explicit drop action shall be
+ added at the end of datapath flows whose last action is an
+ observability-driven sample action.
v3.3.0 - 16 Feb 2024
@@ -3415,6 +3415,7 @@ compose_sample_action(struct xlate_ctx *ctx,
struct ofproto *ofproto = &ctx->xin->ofproto->up;
uint32_t meter_id = ofproto->slowpath_meter_id;
size_t cookie_offset = 0;
+ size_t observe_offset;
/* The meter action is only used to throttle userspace actions.
* If they are not needed and the sampling rate is 100%, avoid generating
@@ -3432,6 +3433,7 @@ compose_sample_action(struct xlate_ctx *ctx,
}
if (args->psample) {
+ observe_offset = ctx->odp_actions->size;
odp_put_psample_action(ctx->odp_actions,
args->psample->group_id,
(void *) &args->psample->cookie,
@@ -3443,6 +3445,7 @@ compose_sample_action(struct xlate_ctx *ctx,
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_METER, meter_id);
}
+ observe_offset = ctx->odp_actions->size;
odp_port_t odp_port = ofp_port_to_odp_port(
ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
@@ -3457,6 +3460,9 @@ compose_sample_action(struct xlate_ctx *ctx,
if (is_sample) {
nl_msg_end_nested(ctx->odp_actions, actions_offset);
nl_msg_end_nested(ctx->odp_actions, sample_offset);
+ ctx->xout->last_observe_offset = sample_offset;
+ } else {
+ ctx->xout->last_observe_offset = observe_offset;
}
return cookie_offset;
@@ -8053,12 +8059,16 @@ xlate_wc_finish(struct xlate_ctx *ctx)
}
}
-/* This will optimize the odp actions generated. For now, it will remove
- * trailing clone actions that are unnecessary. */
+/* This will tweak the odp actions generated. For now, it will:
+ * - Remove trailing clone actions that are unnecessary.
+ * - Add an explicit drop action if the action list is empty.
+ * - Add an explicit drop action if the last action is an observability
+ * sample. This tweak is controlled by a configurable knob. */
static void
-xlate_optimize_odp_actions(struct xlate_in *xin)
+xlate_tweak_odp_actions(struct xlate_ctx *ctx)
{
- struct ofpbuf *actions = xin->odp_actions;
+ uint32_t last_observe_offset = ctx->xout->last_observe_offset;
+ struct ofpbuf *actions = ctx->xin->odp_actions;
struct nlattr *last_action = NULL;
struct nlattr *a;
int left;
@@ -8072,11 +8082,28 @@ xlate_optimize_odp_actions(struct xlate_in *xin)
last_action = a;
}
+ if (!last_action) {
+ if (ovs_explicit_drop_action_supported(ctx->xbridge->ofproto)) {
+ put_drop_action(actions, XLATE_OK);
+ }
+ return;
+ }
+
/* Remove the trailing clone() action, by directly embedding the nested
* actions. */
- if (last_action && nl_attr_type(last_action) == OVS_ACTION_ATTR_CLONE) {
+ if (nl_attr_type(last_action) == OVS_ACTION_ATTR_CLONE) {
void *dest;
+ if (last_observe_offset != UINT32_MAX &&
+ (unsigned char *) actions->data + last_observe_offset >
+ (unsigned char *) last_action) {
+ /* The last sample is inside the trailing clone.
+ * Adjust its offset. */
+ last_observe_offset -= (unsigned char *) nl_attr_get(last_action) -
+ (unsigned char *) last_action;
+ ctx->xout->last_observe_offset = last_observe_offset;
+ }
+
nl_msg_reset_size(actions,
(unsigned char *) last_action -
(unsigned char *) actions->data);
@@ -8084,6 +8111,16 @@ xlate_optimize_odp_actions(struct xlate_in *xin)
dest = nl_msg_put_uninit(actions, nl_attr_get_size(last_action));
memmove(dest, nl_attr_get(last_action), nl_attr_get_size(last_action));
}
+
+ /* If the last action of the list is an observability action, add an
+ * explicit drop action so that drop statistics remain reliable. */
+ if (ctx->xbridge->ofproto->explicit_sampled_drops &&
+ ovs_explicit_drop_action_supported(ctx->xbridge->ofproto) &&
+ last_observe_offset != UINT32_MAX &&
+ (unsigned char *) last_action == (unsigned char *) actions->data +
+ last_observe_offset) {
+ put_drop_action(actions, XLATE_OK);
+ }
}
/* Translates the flow, actions, or rule in 'xin' into datapath actions in
@@ -8100,6 +8137,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
*xout = (struct xlate_out) {
.slow = 0,
.recircs = RECIRC_REFS_EMPTY_INITIALIZER,
+ .last_observe_offset = UINT32_MAX,
};
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
@@ -8528,17 +8566,15 @@ exit:
xout->slow = 0;
if (xin->odp_actions) {
ofpbuf_clear(xin->odp_actions);
+ /* Make the drop explicit if the datapath supports it. */
+ if (ovs_explicit_drop_action_supported(ctx.xbridge->ofproto)) {
+ put_drop_action(xin->odp_actions, ctx.error);
+ }
}
} else {
- /* In the non-error case, see if we can further optimize the datapath
- * rules by removing redundant (clone) actions. */
- xlate_optimize_odp_actions(xin);
- }
-
- /* Install drop action if datapath supports explicit drop action. */
- if (xin->odp_actions && !xin->odp_actions->size &&
- ovs_explicit_drop_action_supported(ctx.xbridge->ofproto)) {
- put_drop_action(xin->odp_actions, ctx.error);
+ /* In the non-error case, see if we can further optimize or tweak
+ * datapath actions. */
+ xlate_tweak_odp_actions(&ctx);
}
/* Since congestion drop and forwarding drop are not exactly
@@ -61,6 +61,10 @@ struct xlate_out {
/* Recirc action IDs on which references are held. */
struct recirc_refs recircs;
+
+ /* Keep track of the last action whose purpose is purely observational.
+ * e.g: IPFIX, sFlow, local sampling. */
+ uint32_t last_observe_offset;
};
struct xlate_in {
@@ -1819,6 +1819,7 @@ construct(struct ofproto *ofproto_)
ofproto->change_seq = 0;
ofproto->ams_seq = seq_create();
ofproto->ams_seqno = seq_read(ofproto->ams_seq);
+ ofproto->explicit_sampled_drops = false;
SHASH_FOR_EACH_SAFE (node, &init_ofp_ports) {
@@ -2091,6 +2092,11 @@ run(struct ofproto *ofproto_)
}
}
}
+
+ if (ofproto->explicit_sampled_drops != ofproto_explicit_sampled_drops) {
+ ofproto->explicit_sampled_drops = ofproto_explicit_sampled_drops;
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
return 0;
}
@@ -365,6 +365,8 @@ struct ofproto_dpif {
bool is_controller_connected; /* True if any controller admitted this
* switch connection. */
+ bool explicit_sampled_drops; /* If explicit drop actions must added after
+ * trailing sample actions. */
};
struct ofproto_dpif *ofproto_dpif_lookup_by_name(const char *name);
@@ -550,6 +550,10 @@ extern unsigned ofproto_offloaded_stats_delay;
* ofproto-dpif implementation. */
extern uint32_t n_handlers, n_revalidators;
+/* If an explicit datapath drop action shall be added after trailing sample
+ * actions coming from IPFIX / sFlow / local sampling. */
+extern bool ofproto_explicit_sampled_drops;
+
static inline struct rule *rule_from_cls_rule(const struct cls_rule *);
void ofproto_rule_expire(struct rule *rule, uint8_t reason)
@@ -312,6 +312,7 @@ unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT;
unsigned ofproto_max_revalidator = OFPROTO_MAX_REVALIDATOR_DEFAULT;
unsigned ofproto_min_revalidate_pps = OFPROTO_MIN_REVALIDATE_PPS_DEFAULT;
unsigned ofproto_offloaded_stats_delay = OFPROTO_OFFLOADED_STATS_DELAY;
+bool ofproto_explicit_sampled_drops = OFPROTO_EXPLICIT_SAMPLED_DROPS_DEFAULT;
uint32_t n_handlers, n_revalidators;
@@ -737,6 +738,14 @@ ofproto_set_offloaded_stats_delay(unsigned offloaded_stats_delay)
ofproto_offloaded_stats_delay = offloaded_stats_delay;
}
+/* Set if an explicit datapath drop action shall be added after trailing sample
+ * actions coming from IPFIX / sFlow / local sampling. */
+void
+ofproto_set_explicit_sampled_drops(bool explicit_sampled_drops)
+{
+ ofproto_explicit_sampled_drops = explicit_sampled_drops;
+}
+
/* If forward_bpdu is true, the NORMAL action will forward frames with
* reserved (e.g. STP) destination Ethernet addresses. if forward_bpdu is false,
* the NORMAL action will drop these frames. */
@@ -326,6 +326,7 @@ int ofproto_port_dump_done(struct ofproto_port_dump *);
#define OFPROTO_MAX_REVALIDATOR_DEFAULT 500 /* ms */
#define OFPROTO_MIN_REVALIDATE_PPS_DEFAULT 5
#define OFPROTO_OFFLOADED_STATS_DELAY 2000 /* ms */
+#define OFPROTO_EXPLICIT_SAMPLED_DROPS_DEFAULT false
const char *ofproto_port_open_type(const struct ofproto *,
const char *port_type);
@@ -398,6 +399,7 @@ void ofproto_ct_zone_limit_protection_update(const char *datapath_type,
bool protected);
void ofproto_get_datapath_cap(const char *datapath_type,
struct smap *dp_cap);
+void ofproto_set_explicit_sampled_drops(bool explicit_sampled_drops);
/* Configuration of ports. */
void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port);
@@ -191,3 +191,171 @@ ovs-appctl coverage/read-counter drop_action_too_many_mpls_labels
OVS_VSWITCHD_STOP(["/|WARN|/d"])
AT_CLEANUP
+
+m4_define([ICMP_PKT], [m4_join([,],
+ [in_port(1),packet_type(ns=0,id=0)],
+ [eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e)],
+ [ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no)],
+ [icmp(type=8,code=0)])])
+
+AT_SETUP([drop-stats - bridge sampling])
+
+OVS_VSWITCHD_START([dnl
+ set bridge br0 datapath_type=dummy \
+ protocols=OpenFlow10,OpenFlow13,OpenFlow14,OpenFlow15 -- \
+ add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1])
+
+AT_DATA([flows.txt], [dnl
+table=0,in_port=1,actions=drop
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \
+ --id=@fix create ipfix targets=\"127.0.0.1:4739\" \
+ sampling=1],
+ [0], [ignore])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:2, bytes:212, used:0.0s, dnl
+actions:userspace(pid=0,ipfix(output_port=4294967295))
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+0
+])
+
+dnl Now activate explicit sampled drops.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:explicit-sampled-drops=true])
+AT_CHECK([ovs-appctl revalidator/wait])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:5, bytes:530, used:0.0s, dnl
+actions:userspace(pid=0,ipfix(output_port=4294967295)),drop
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+3
+])
+
+OVS_VSWITCHD_STOP(["/sending to collector failed/d"])
+AT_CLEANUP
+
+AT_SETUP([drop-stats - sampling action])
+
+OVS_VSWITCHD_START
+add_of_ports br0 1 2 3
+
+AT_DATA([flows.txt], [dnl
+table=0,in_port=1,actions=sample(probability=65535,collector_set_id=1)
+table=0,in_port=2,actions=sample(probability=32767,collector_set_id=1),load:0->reg0
+table=0,in_port=3,actions=clone(sample(probability=65535,collector_set_id=1))
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-vsctl --id=@br0 get Bridge br0 \
+ -- --id=@ipfix create IPFIX targets=\"127.0.0.1:4739\" \
+ -- create Flow_Sample_Collector_Set id=1 bridge=@br0 \
+ ipfix=@ipfix],
+ [0], [ignore])
+
+m4_define([USERSPACE_SAMPLE_ACTION], [m4_join([,],
+ [userspace(pid=0],
+ [flow_sample(probability=$1,collector_set_id=1,obs_domain_id=0],
+ [obs_point_id=0,output_port=4294967295))])])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:2, bytes:212, used:0.0s, dnl
+actions:USERSPACE_SAMPLE_ACTION(65535)
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+0
+])
+
+dnl Now activate explicit sampled drops.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:explicit-sampled-drops=true])
+AT_CHECK([ovs-appctl revalidator/wait])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:5, bytes:530, used:0.0s, dnl
+actions:USERSPACE_SAMPLE_ACTION(65535),drop
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+3
+])
+
+AT_CHECK([ovs-appctl dpctl/del-flows])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(2),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:2, bytes:212, used:0.0s, dnl
+actions:sample(sample=50.0%,actions(USERSPACE_SAMPLE_ACTION(32767))),drop
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+6
+])
+
+AT_CHECK([ovs-appctl dpctl/del-flows])
+
+for i in $(seq 1 3); do
+AT_CHECK([ovs-appctl netdev-dummy/receive p3 'ICMP_PKT'], [0], [ignore])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | strip_used | sort], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dnl
+packets:2, bytes:212, used:0.0s, dnl
+actions:USERSPACE_SAMPLE_ACTION(65535),drop
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+
+AT_CHECK([ovs-appctl coverage/read-counter drop_action_of_pipeline], [0], [dnl
+9
+])
+
+OVS_VSWITCHD_STOP(["/sending to collector failed/d"])
+AT_CLEANUP
@@ -12335,3 +12335,52 @@ Datapath actions: EXPECTED_ACT
OVS_VSWITCHD_STOP("/Enabling an unsupported feature is very dangerous/d")
AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - Local sampling - drop])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+
+AT_CHECK([ovs-appctl dpif/set-dp-features --force br0 psample true], [0], [ignore])
+
+AT_CHECK([ovs-vsctl -- --id=@br0 get Bridge br0 \
+ -- create Flow_Sample_Collector_Set id=1 bridge=@br0 local_group_id=42],
+ [0], [ignore])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=slowpath pktps stats bands=type=drop rate=2'])
+
+AT_DATA([flows.txt], [dnl
+in_port=1, actions=sample(probability=32767,collector_set_id=1,obs_domain_id=100,obs_point_id=200)
+in_port=2, actions=sample(probability=65535,collector_set_id=1,obs_domain_id=100,obs_point_id=200)
+])
+
+AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt])
+
+m4_define([TRACE_PKT], [m4_join([,],
+ [eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800)],
+ [ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no)],
+ [icmp(type=8,code=0)])])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1) TRACE_PKT'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [dnl
+Datapath actions: sample(sample=50.0%,actions(psample(group=42,cookie=0x64000000c8)))
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2) TRACE_PKT'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [dnl
+Datapath actions: psample(group=42,cookie=0x64000000c8)
+])
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:explicit-sampled-drops=true])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1) TRACE_PKT'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [dnl
+Datapath actions: sample(sample=50.0%,actions(psample(group=42,cookie=0x64000000c8))),drop
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2) TRACE_PKT'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [dnl
+Datapath actions: psample(group=42,cookie=0x64000000c8),drop
+])
+
+OVS_VSWITCHD_STOP("/Enabling an unsupported feature is very dangerous/d")
+AT_CLEANUP
@@ -889,7 +889,9 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
ofproto_set_threads(
smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0),
smap_get_int(&ovs_cfg->other_config, "n-revalidator-threads", 0));
-
+ ofproto_set_explicit_sampled_drops(
+ smap_get_bool(&ovs_cfg->other_config, "explicit-sampled-drops",
+ OFPROTO_EXPLICIT_SAMPLED_DROPS_DEFAULT));
/* Destroy "struct bridge"s, "struct port"s, and "struct iface"s according
* to 'ovs_cfg', with only very minimal configuration otherwise.
*
@@ -869,6 +869,30 @@
The feature is considered experimental.
</p>
</column>
+
+ <column name="other_config" key="explicit-sampled-drops"
+ type='{"type": "boolean"}'>
+ <p>
+ When a flow is installed in the datapath with an empty action list,
+ it indicates an implicit "drop" action. Most datapaths report this
+ for event for statistics and monitoring (in datapath-specific ways).
+ </p>
+ <p>
+ However, if any of the per-bridge or per-flow sampling functionalities
+ are enabled (e.g: sFlow, IPFIX, local sampling), the action list might
+ not be empty, but contain an action to implement such functionality.
+ This makes the datapaths not report the packet drop.
+ </p>
+ <p>
+ This knob makes Open vSwitch detect when the last datapath action
+ comes from these sampling features and add an explicit drop action at
+ the end to keep drop statistics accurate.
+ </p>
+ <p>
+ The default value is <code>false</code>.
+ </p>
+ </column>
+
</group>
<group title="Status">
<column name="next_cfg">
When the flow translation results in a datapath action list whose last action is an "observational" action, i.e: one generated for IPFIX, sFlow or local sampling applications, the packet is actually going to be dropped (and observed). In that case, add an explicit drop action so that drop statistics remain accurate. This behavior is controlled by a configurable boolean knob called "explicit_sampled_drops" Combine the "optimizations" and other odp_actions "tweaks" into a single function. Signed-off-by: Adrian Moreno <amorenoz@redhat.com> --- NEWS | 4 + ofproto/ofproto-dpif-xlate.c | 64 ++++++++++--- ofproto/ofproto-dpif-xlate.h | 4 + ofproto/ofproto-dpif.c | 6 ++ ofproto/ofproto-dpif.h | 2 + ofproto/ofproto-provider.h | 4 + ofproto/ofproto.c | 9 ++ ofproto/ofproto.h | 2 + tests/drop-stats.at | 168 +++++++++++++++++++++++++++++++++++ tests/ofproto-dpif.at | 49 ++++++++++ vswitchd/bridge.c | 4 +- vswitchd/vswitch.xml | 24 +++++ 12 files changed, 325 insertions(+), 15 deletions(-)