@@ -21,6 +21,9 @@ Post-v2.4.0
targets to run a new system testsuite. These tests can be run inside
a Vagrant box. See INSTALL.md for details
- Dropped support for GRE64 tunnel.
+ - Add support for connection tracking through the new "ct" action
+ and "ct_state"/"ct_zone" match fields. Only available on Linux kernels
+ with the connection tracking module loaded.
v2.4.0 - 20 Aug 2015
@@ -24,6 +24,7 @@ TYPES = {"u8": (1, False),
FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8),
"hexadecimal": ("MFS_HEXADECIMAL", 1, 127),
+ "conn state": ("MFS_CT_STATE", 2, 2),
"Ethernet": ("MFS_ETHERNET", 6, 6),
"IPv4": ("MFS_IPV4", 4, 4),
"IPv6": ("MFS_IPV6", 16, 16),
@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -343,6 +343,8 @@ enum ovs_key_attr {
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
+ OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */
+ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
#ifdef __KERNEL__
/* Only used within kernel data path. */
@@ -456,6 +458,15 @@ struct ovs_key_nd {
__u8 nd_tll[ETH_ALEN];
};
+/* OVS_KEY_ATTR_CT_STATE flags */
+#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */
+#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */
+#define OVS_CS_F_RELATED 0x04 /* Related to an established
+ * connection. */
+#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */
+#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */
+#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */
+
/**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
@@ -642,6 +653,28 @@ struct ovs_action_push_tnl {
#endif
/**
+ * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
+ * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags.
+ * @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
+ */
+enum ovs_ct_attr {
+ OVS_CT_ATTR_UNSPEC,
+ OVS_CT_ATTR_FLAGS, /* u32 bitmask of OVS_CT_F_*. */
+ OVS_CT_ATTR_ZONE, /* u16 zone id. */
+ __OVS_CT_ATTR_MAX
+};
+
+#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
+
+/*
+ * OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_*
+ * @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows
+ * future packets for the same connection to be identified as 'established'
+ * or 'related'.
+ */
+#define OVS_CT_F_COMMIT 0x01
+
+/**
* enum ovs_action_attr - Action types.
*
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -672,6 +705,8 @@ struct ovs_action_push_tnl {
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
+ * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
+ * entries in the flow key.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -702,6 +737,7 @@ enum ovs_action_attr {
* data immediately followed by a mask.
* The data must be zero for the unmasked
* bits. */
+ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */
#ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
@@ -1920,6 +1920,11 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
return EINVAL;
}
+ /* Userspace datapath doesn't support conntrack. */
+ if (flow->ct_state || flow->ct_zone) {
+ return EINVAL;
+ }
+
return 0;
}
@@ -3596,6 +3601,13 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
break;
+ case OVS_ACTION_ATTR_CT:
+ /* If a flow with this action is slow-pathed, datapath assistance is
+ * required to implement it. However, we don't support this action
+ * in the userspace datapath. */
+ VLOG_WARN("Cannot execute conntrack action in userspace.");
+ break;
+
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:
@@ -1097,6 +1097,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet **packets, int cnt,
ovs_assert(cnt == 1);
switch ((enum ovs_action_attr)type) {
+ case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
@@ -123,7 +123,7 @@ struct mf_ctx {
* away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
* defined as macros. */
-#if (FLOW_WC_SEQ != 33)
+#if (FLOW_WC_SEQ != 34)
#define MINIFLOW_ASSERT(X) ovs_assert(X)
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
"assertions enabled. Consider updating FLOW_WC_SEQ after "
@@ -477,9 +477,10 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
}
miniflow_push_uint32(mf, dp_hash, md->dp_hash);
miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
- if (md->recirc_id) {
+ if (md->recirc_id || md->ct_state) {
miniflow_push_uint32(mf, recirc_id, md->recirc_id);
- miniflow_pad_to_64(mf, conj_id);
+ miniflow_push_uint16(mf, ct_state, md->ct_state);
+ miniflow_push_uint16(mf, ct_zone, md->ct_zone);
}
/* Initialize packet's layer pointer and offsets. */
@@ -794,7 +795,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
{
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
match_init_catchall(flow_metadata);
if (flow->tunnel.tun_id != htonll(0)) {
@@ -832,6 +833,12 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
}
match_set_in_port(flow_metadata, flow->in_port.ofp_port);
+ if (flow->ct_state != 0) {
+ match_set_ct_state(flow_metadata, flow->ct_state);
+ }
+ if (flow->ct_zone != 0) {
+ match_set_ct_zone(flow_metadata, flow->ct_zone);
+ }
}
char *
@@ -1107,6 +1114,12 @@ flow_format(struct ds *ds, const struct flow *flow)
if (!flow->dp_hash) {
WC_UNMASK_FIELD(wc, dp_hash);
}
+ if (!flow->ct_state) {
+ WC_UNMASK_FIELD(wc, ct_state);
+ }
+ if (!flow->ct_zone) {
+ WC_UNMASK_FIELD(wc, ct_zone);
+ }
for (int i = 0; i < FLOW_N_REGS; i++) {
if (!flow->regs[i]) {
WC_UNMASK_FIELD(wc, regs[i]);
@@ -1146,7 +1159,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
memset(&wc->masks, 0x0, sizeof wc->masks);
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
if (flow->tunnel.ip_dst) {
if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
@@ -1181,6 +1194,8 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
WC_MASK_FIELD(wc, skb_priority);
WC_MASK_FIELD(wc, pkt_mark);
+ WC_MASK_FIELD(wc, ct_state);
+ WC_MASK_FIELD(wc, ct_zone);
WC_MASK_FIELD(wc, recirc_id);
WC_MASK_FIELD(wc, dp_hash);
WC_MASK_FIELD(wc, in_port);
@@ -1257,7 +1272,7 @@ void
flow_wc_map(const struct flow *flow, struct flowmap *map)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
flowmap_init(map);
@@ -1284,6 +1299,8 @@ flow_wc_map(const struct flow *flow, struct flowmap *map)
FLOWMAP_SET(map, dl_src);
FLOWMAP_SET(map, dl_type);
FLOWMAP_SET(map, vlan_tci);
+ FLOWMAP_SET(map, ct_state);
+ FLOWMAP_SET(map, ct_zone);
/* Ethertype-dependent fields. */
if (OVS_LIKELY(flow->dl_type == htons(ETH_TYPE_IP))) {
@@ -1337,7 +1354,7 @@ void
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
@@ -1964,7 +1981,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
/* Clear all L3 and L4 fields and dp_hash. */
- BUILD_ASSERT(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT(FLOW_WC_SEQ == 34);
memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
flow->dp_hash = 0;
@@ -40,7 +40,7 @@ struct match;
/* This sequence number should be incremented whenever anything involving flows
* or the wildcarding of flows changes. This will cause build assertion
* failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 33
+#define FLOW_WC_SEQ 34
/* Number of Open vSwitch extension 32-bit registers. */
#define FLOW_N_REGS 8
@@ -102,9 +102,11 @@ struct flow {
* computation is opaque to the user space. */
union flow_in_port in_port; /* Input port.*/
uint32_t recirc_id; /* Must be exact match. */
+ uint16_t ct_state; /* Connection tracking state. */
+ uint16_t ct_zone; /* Connection tracking zone. */
uint32_t conj_id; /* Conjunction ID. */
ofp_port_t actset_output; /* Output port in action set. */
- uint8_t pad1[6]; /* Pad to 64 bits. */
+ uint8_t pad1[2]; /* Pad to 64 bits. */
/* L2, Order the same as in the Ethernet header! (64-bit aligned) */
struct eth_addr dl_dst; /* Ethernet destination address. */
@@ -154,7 +156,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0);
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
== sizeof(struct flow_tnl) + 192
- && FLOW_WC_SEQ == 33);
+ && FLOW_WC_SEQ == 34);
/* Incremental points at which flow classification may be performed in
* segments.
@@ -980,6 +982,8 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
md->skb_priority = flow->skb_priority;
md->pkt_mark = flow->pkt_mark;
md->in_port = flow->in_port;
+ md->ct_state = flow->ct_state;
+ md->ct_zone = flow->ct_zone;
}
static inline bool is_ip_any(const struct flow *flow)
@@ -285,6 +285,26 @@ match_set_pkt_mark_masked(struct match *match, uint32_t pkt_mark, uint32_t mask)
}
void
+match_set_ct_state(struct match *match, uint16_t ct_state)
+{
+ match_set_ct_state_masked(match, ct_state, UINT16_MAX);
+}
+
+void
+match_set_ct_state_masked(struct match *match, uint16_t ct_state, uint16_t mask)
+{
+ match->flow.ct_state = ct_state & mask;
+ match->wc.masks.ct_state = mask;
+}
+
+void
+match_set_ct_zone(struct match *match, uint16_t ct_zone)
+{
+ match->flow.ct_zone = ct_zone;
+ match->wc.masks.ct_zone = UINT16_MAX;
+}
+
+void
match_set_dl_type(struct match *match, ovs_be16 dl_type)
{
match->wc.masks.dl_type = OVS_BE16_MAX;
@@ -816,6 +836,21 @@ format_ipv6_netmask(struct ds *s, const char *name,
}
static void
+format_uint16_masked(struct ds *s, const char *name,
+ uint16_t value, uint16_t mask)
+{
+ if (mask != 0) {
+ ds_put_format(s, "%s=", name);
+ if (mask == UINT16_MAX) {
+ ds_put_format(s, "%"PRIu16, value);
+ } else {
+ ds_put_format(s, "0x%"PRIx16"/0x%"PRIx16, value, mask);
+ }
+ ds_put_char(s, ',');
+ }
+}
+
+static void
format_be16_masked(struct ds *s, const char *name,
ovs_be16 value, ovs_be16 mask)
{
@@ -921,7 +956,7 @@ match_format(const struct match *match, struct ds *s, int priority)
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
if (priority != OFP_DEFAULT_PRIORITY) {
ds_put_format(s, "priority=%d,", priority);
@@ -953,6 +988,26 @@ match_format(const struct match *match, struct ds *s, int priority)
ds_put_char(s, ',');
}
+ if (wc->masks.ct_state) {
+ if (wc->masks.ct_state == UINT16_MAX) {
+ ds_put_cstr(s, "ct_state=");
+ if (f->ct_state) {
+ format_flags(s, packet_ct_state_to_string, f->ct_state,
+ '|');
+ } else {
+ ds_put_cstr(s, "0"); /* No state. */
+ }
+ } else {
+ format_flags_masked(s, "ct_state", packet_ct_state_to_string,
+ f->ct_state, wc->masks.ct_state, UINT16_MAX);
+ }
+ ds_put_char(s, ',');
+ }
+
+ if (wc->masks.ct_zone) {
+ format_uint16_masked(s, "ct_zone", f->ct_zone, wc->masks.ct_zone);
+ }
+
if (wc->masks.dl_type) {
skip_type = true;
if (f->dl_type == htons(ETH_TYPE_IP)) {
@@ -83,6 +83,9 @@ void match_set_tun_gbp_flags(struct match *match, uint8_t flags);
void match_set_in_port(struct match *, ofp_port_t ofp_port);
void match_set_pkt_mark(struct match *, uint32_t pkt_mark);
void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask);
+void match_set_ct_state(struct match *, uint16_t ct_state);
+void match_set_ct_state_masked(struct match *, uint16_t ct_state, uint16_t mask);
+void match_set_ct_zone(struct match *, uint16_t ct_zone);
void match_set_skb_priority(struct match *, uint32_t skb_priority);
void match_set_dl_type(struct match *, ovs_be16);
void match_set_dl_src(struct match *, const struct eth_addr );
@@ -214,6 +214,10 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
return !wc->masks.skb_priority;
case MFF_PKT_MARK:
return !wc->masks.pkt_mark;
+ case MFF_CT_STATE:
+ return !wc->masks.ct_state;
+ case MFF_CT_ZONE:
+ return !wc->masks.ct_zone;
CASE_MFF_REGS:
return !wc->masks.regs[mf->id - MFF_REG0];
CASE_MFF_XREGS:
@@ -497,6 +501,8 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
case MFF_IN_PORT:
case MFF_SKB_PRIORITY:
case MFF_PKT_MARK:
+ case MFF_CT_STATE:
+ case MFF_CT_ZONE:
CASE_MFF_REGS:
CASE_MFF_XREGS:
case MFF_ETH_SRC:
@@ -644,6 +650,14 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow,
value->be32 = htonl(flow->pkt_mark);
break;
+ case MFF_CT_STATE:
+ value->be16 = htons(flow->ct_state);
+ break;
+
+ case MFF_CT_ZONE:
+ value->be16 = htons(flow->ct_zone);
+ break;
+
CASE_MFF_REGS:
value->be32 = htonl(flow->regs[mf->id - MFF_REG0]);
break;
@@ -876,6 +890,14 @@ mf_set_value(const struct mf_field *mf,
match_set_pkt_mark(match, ntohl(value->be32));
break;
+ case MFF_CT_STATE:
+ match_set_ct_state(match, ntohs(value->be16));
+ break;
+
+ case MFF_CT_ZONE:
+ match_set_ct_zone(match, ntohs(value->be16));
+ break;
+
CASE_MFF_REGS:
match_set_reg(match, mf->id - MFF_REG0, ntohl(value->be32));
break;
@@ -1160,6 +1182,14 @@ mf_set_flow_value(const struct mf_field *mf,
flow->pkt_mark = ntohl(value->be32);
break;
+ case MFF_CT_STATE:
+ flow->ct_state = ntohs(value->be16);
+ break;
+
+ case MFF_CT_ZONE:
+ flow->ct_zone = ntohs(value->be16);
+ break;
+
CASE_MFF_REGS:
flow->regs[mf->id - MFF_REG0] = ntohl(value->be32);
break;
@@ -1449,6 +1479,16 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
match->wc.masks.pkt_mark = 0;
break;
+ case MFF_CT_STATE:
+ match->flow.ct_state = 0;
+ match->wc.masks.ct_state = 0;
+ break;
+
+ case MFF_CT_ZONE:
+ match->flow.ct_zone = 0;
+ match->wc.masks.ct_zone = 0;
+ break;
+
CASE_MFF_REGS:
match_set_reg_masked(match, mf->id - MFF_REG0, 0, 0);
break;
@@ -1636,6 +1676,7 @@ mf_set(const struct mf_field *mf,
}
switch (mf->id) {
+ case MFF_CT_ZONE:
case MFF_RECIRC_ID:
case MFF_CONJ_ID:
case MFF_IN_PORT:
@@ -1711,6 +1752,10 @@ mf_set(const struct mf_field *mf,
ntohl(mask->be32));
break;
+ case MFF_CT_STATE:
+ match_set_ct_state_masked(match, ntohs(value->be16), ntohs(mask->be16));
+ break;
+
case MFF_ETH_DST:
match_set_dl_dst_masked(match, value->mac, mask->mac);
break;
@@ -2103,6 +2148,22 @@ mf_from_tun_flags_string(const char *s, ovs_be16 *flagsp, ovs_be16 *maskp)
htons(FLOW_TNL_PUB_F_MASK), maskp);
}
+static char *
+mf_from_ct_state_string(const char *s, ovs_be16 *flagsp, ovs_be16 *maskp)
+{
+ ovs_be16 flags, mask;
+ char *error;
+
+ error = parse_mf_flags(s, packet_ct_state_to_string, "ct_state", &flags,
+ htons(CS_SUPPORTED_MASK), &mask);
+ if (!error) {
+ *flagsp = flags;
+ *maskp = mask;
+ }
+
+ return error;
+}
+
/* Parses 's', a string value for field 'mf', into 'value' and 'mask'. Returns
* NULL if successful, otherwise a malloc()'d string describing the error. */
char *
@@ -2124,6 +2185,11 @@ mf_parse(const struct mf_field *mf, const char *s,
(uint8_t *) value, (uint8_t *) mask);
break;
+ case MFS_CT_STATE:
+ ovs_assert(mf->n_bytes == sizeof(ovs_be16));
+ error = mf_from_ct_state_string(s, &value->be16, &mask->be16);
+ break;
+
case MFS_ETHERNET:
error = mf_from_ethernet_string(mf, s, &value->mac, &mask->mac);
break;
@@ -2244,6 +2310,13 @@ mf_format_tcp_flags_string(ovs_be16 value, ovs_be16 mask, struct ds *s)
TCP_FLAGS(mask), TCP_FLAGS(OVS_BE16_MAX));
}
+static void
+mf_format_ct_state_string(ovs_be16 value, ovs_be16 mask, struct ds *s)
+{
+ format_flags_masked(s, NULL, packet_ct_state_to_string, ntohs(value),
+ ntohs(mask), UINT16_MAX);
+}
+
/* Appends to 's' a string representation of field 'mf' whose value is in
* 'value' and 'mask'. 'mask' may be NULL to indicate an exact match. */
void
@@ -2280,6 +2353,11 @@ mf_format(const struct mf_field *mf,
mf_format_integer_string(mf, (uint8_t *) value, (uint8_t *) mask, s);
break;
+ case MFS_CT_STATE:
+ mf_format_ct_state_string(value->be16,
+ mask ? mask->be16 : OVS_BE16_MAX, s);
+ break;
+
case MFS_ETHERNET:
eth_format_masked(value->mac, mask ? &mask->mac : NULL, s);
break;
@@ -703,6 +703,71 @@ enum OVS_PACKED_ENUM mf_field_id {
*/
MFF_PKT_MARK,
+ /* "ct_state".
+ *
+ * Connection tracking state. The field is populated by the NXAST_CT
+ * action. The following bit values describe the state of the connection:
+ *
+ * - New (0x01): This is the beginning of a new connection.
+ * - Established (0x02): This is part of an already existing connection.
+ * - Related (0x04): This is a separate connection that is related to an
+ * existing connection.
+ * - Invalid (0x20): This flow could not be associated with a connection.
+ * This could be set for a variety of reasons,
+ * including (but not limited to):
+ * - L3/L4 protocol handler is not loaded/unavailable.
+ * - L3/L4 protocol handler determines that the packet
+ * is malformed or invalid for the current FSM stage.
+ * - Packets are unexpected length for protocol.
+ * - Reply (0x40): This flow is in the reply direction, ie it did not
+ * initiate the connection.
+ * - Tracked (0x80): Connection tracking has occurred.
+ *
+ * The "Tracked" bit corresponds to the packet_state as described in the
+ * description of NXAST_CT action. The remaining bits correspond to
+ * connection state. The "New" bit implies that the connection state
+ * is uncommitted, while "Established" implies that it has previously been
+ * committed.
+ *
+ * There are additional constraints on the ct_state bits, listed in order
+ * of precedence below:
+ *
+ * - If "Tracked" is unset, no other bits may be set.
+ * - If "Tracked" is set, one or more other bits may be set.
+ * - The "Invalid" bit is only ever set with the "Tracked" bit.
+ * - The "New" and "Established" bits are mutually exclusive.
+ * - The "New" and "Reply" bits are mutually exclusive.
+ * - The "Related" bit may be set in conjunction with any other bits.
+ * Connections that are identified as "Related" are separate
+ * connections from the originating connection, so must be committed
+ * separately. All packets for a related connection will have the
+ * "Related" bit set (not just the initial packet).
+ *
+ * Type: be16.
+ * Maskable: bitwise.
+ * Formatting: conn state.
+ * Prerequisites: none.
+ * Access: read-only.
+ * NXM: NXM_NX_CT_STATE(105) since v2.5.
+ * OXM: none.
+ */
+ MFF_CT_STATE,
+
+ /* "ct_zone".
+ *
+ * Connection tracking zone. The field is populated by the
+ * NXAST_CT action.
+ *
+ * Type: be16.
+ * Maskable: no.
+ * Formatting: hexadecimal.
+ * Prerequisites: none.
+ * Access: read-only.
+ * NXM: NXM_NX_CT_ZONE(106) since v2.5.
+ * OXM: none.
+ */
+ MFF_CT_ZONE,
+
#if FLOW_N_REGS == 8
/* "reg<N>".
*
@@ -1679,6 +1744,7 @@ enum OVS_PACKED_ENUM mf_string {
MFS_HEXADECIMAL,
/* Other formats. */
+ MFS_CT_STATE, /* Connection tracking state */
MFS_ETHERNET,
MFS_IPV4,
MFS_IPV6,
@@ -901,7 +901,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
int match_len;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
/* Metadata. */
if (match->wc.masks.dp_hash) {
@@ -1036,6 +1036,16 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
nxm_put_32m(b, MFF_PKT_MARK, oxm, htonl(flow->pkt_mark),
htonl(match->wc.masks.pkt_mark));
+ /* Connection tracking. */
+ if (match->wc.masks.ct_state) {
+ nxm_put_16m(b, MFF_CT_STATE, oxm, htons(flow->ct_state),
+ htons(match->wc.masks.ct_state));
+ }
+ if (match->wc.masks.ct_zone) {
+ nxm_put_16m(b, MFF_CT_ZONE, oxm, htons(flow->ct_zone),
+ htons(match->wc.masks.ct_zone));
+ }
+
/* OpenFlow 1.1+ Metadata. */
nxm_put_64m(b, MFF_METADATA, oxm,
flow->metadata, match->wc.masks.metadata);
@@ -326,6 +326,8 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_TCP_FLAGS:
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
case __OVS_KEY_ATTR_MAX:
default:
OVS_NOT_REACHED();
@@ -414,6 +416,8 @@ odp_execute_masked_set_action(struct dp_packet *packet,
case OVS_KEY_ATTR_TUNNEL: /* Masked data not supported for tunnel. */
case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IN_PORT:
@@ -476,6 +480,7 @@ requires_datapath_assistance(const struct nlattr *a)
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
+ case OVS_ACTION_ATTR_CT:
return true;
case OVS_ACTION_ATTR_SET:
@@ -611,6 +616,7 @@ odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal,
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
+ case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
@@ -113,6 +113,7 @@ odp_action_len(uint16_t type)
case OVS_ACTION_ATTR_SET: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
+ case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
@@ -134,6 +135,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize)
case OVS_KEY_ATTR_ENCAP: return "encap";
case OVS_KEY_ATTR_PRIORITY: return "skb_priority";
case OVS_KEY_ATTR_SKB_MARK: return "skb_mark";
+ case OVS_KEY_ATTR_CT_STATE: return "ct_state";
+ case OVS_KEY_ATTR_CT_ZONE: return "ct_zone";
case OVS_KEY_ATTR_TUNNEL: return "tunnel";
case OVS_KEY_ATTR_IN_PORT: return "in_port";
case OVS_KEY_ATTR_ETHERNET: return "eth";
@@ -532,6 +535,44 @@ format_odp_tnl_push_action(struct ds *ds, const struct nlattr *attr)
ds_put_format(ds, ",out_port(%"PRIu32"))", data->out_port);
}
+static const struct nl_policy ovs_conntrack_policy[] = {
+ [OVS_CT_ATTR_FLAGS] = { .type = NL_A_U32, .optional = true,
+ .min_len = sizeof(uint32_t) },
+ [OVS_CT_ATTR_ZONE] = { .type = NL_A_U16, .optional = true,
+ .min_len = sizeof(uint16_t)},
+};
+
+static void
+format_odp_conntrack_action(struct ds *ds, const struct nlattr *attr)
+{
+ struct nlattr *a[ARRAY_SIZE(ovs_conntrack_policy)];
+ uint32_t flags;
+ uint16_t zone;
+
+ if (!nl_parse_nested(attr, ovs_conntrack_policy, a, ARRAY_SIZE(a))) {
+ ds_put_cstr(ds, "ct(error)");
+ return;
+ }
+
+ flags = a[OVS_CT_ATTR_FLAGS] ? nl_attr_get_u32(a[OVS_CT_ATTR_FLAGS]) : 0;
+ zone = a[OVS_CT_ATTR_ZONE] ? nl_attr_get_u16(a[OVS_CT_ATTR_ZONE]) : 0;
+
+ ds_put_format(ds, "ct");
+ if (flags || zone) {
+ ds_put_cstr(ds, "(");
+ if (flags & OVS_CT_F_COMMIT) {
+ ds_put_format(ds, "commit");
+ }
+ if (zone) {
+ if (ds_last(ds) != '(') {
+ ds_put_char(ds, ',');
+ }
+ ds_put_format(ds, "zone=%"PRIu16, zone);
+ }
+ ds_put_cstr(ds, ")");
+ }
+}
+
static void
format_odp_action(struct ds *ds, const struct nlattr *a)
{
@@ -622,6 +663,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
case OVS_ACTION_ATTR_SAMPLE:
format_odp_sample_action(ds, a);
break;
+ case OVS_ACTION_ATTR_CT:
+ format_odp_conntrack_action(ds, a);
+ break;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
default:
@@ -960,6 +1004,59 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
}
static int
+parse_conntrack_action(const char *s_, struct ofpbuf *actions)
+{
+ const char *s = s_;
+
+ if (ovs_scan(s, "ct")) {
+ uint32_t flags = 0;
+ uint16_t zone = 0;
+ size_t start;
+ char *end;
+
+ s += 2;
+ if (ovs_scan(s, "(")) {
+ s++;
+ end = strchr(s, ')');
+ if (!end) {
+ return -EINVAL;
+ }
+
+ while (s != end) {
+ int n = -1;
+
+ s += strspn(s, delimiters);
+ if (ovs_scan(s, "commit%n", &n)) {
+ flags |= OVS_CT_F_COMMIT;
+ s += n;
+ continue;
+ }
+ if (ovs_scan(s, "zone=%"SCNu16"%n", &zone, &n)) {
+ s += n;
+ continue;
+ }
+
+ if (n < 0) {
+ return -EINVAL;
+ }
+ }
+ s++;
+ }
+
+ start = nl_msg_start_nested(actions, OVS_ACTION_ATTR_CT);
+ if (flags) {
+ nl_msg_put_u32(actions, OVS_CT_ATTR_FLAGS, flags);
+ }
+ if (zone) {
+ nl_msg_put_u16(actions, OVS_CT_ATTR_ZONE, zone);
+ }
+ nl_msg_end_nested(actions, start);
+ }
+
+ return s - s_;
+}
+
+static int
parse_odp_action(const char *s, const struct simap *port_names,
struct ofpbuf *actions)
{
@@ -1117,6 +1214,15 @@ parse_odp_action(const char *s, const struct simap *port_names,
}
{
+ int retval;
+
+ retval = parse_conntrack_action(s, actions);
+ if (retval) {
+ return retval;
+ }
+ }
+
+ {
struct ovs_action_push_tnl data;
int n;
@@ -1211,6 +1317,8 @@ static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] =
[OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
[OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
[OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
+ [OVS_KEY_ATTR_CT_STATE] = { .len = 1 },
+ [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 },
};
/* Returns the correct length of the payload for a flow key attribute of the
@@ -2016,6 +2124,21 @@ format_frag(struct ds *ds, const char *name, uint8_t key,
}
}
+static bool
+mask_empty(const struct nlattr *ma)
+{
+ const void *mask;
+ size_t n;
+
+ if (!ma) {
+ return true;
+ }
+ mask = nl_attr_get(ma);
+ n = nl_attr_get_size(ma);
+
+ return is_all_zeros(mask, n);
+}
+
static void
format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
const struct hmap *portno_names, struct ds *ds,
@@ -2057,6 +2180,24 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
}
break;
+ case OVS_KEY_ATTR_CT_STATE:
+ if (!is_exact) {
+ format_flags_masked(ds, NULL, packet_ct_state_to_string,
+ nl_attr_get_u8(a), nl_attr_get_u8(ma),
+ UINT8_MAX);
+ } else {
+ format_flags(ds, packet_ct_state_to_string,
+ nl_attr_get_u8(a), ',');
+ }
+ break;
+
+ case OVS_KEY_ATTR_CT_ZONE:
+ if (verbose || !mask_empty(ma)) {
+ ds_put_format(ds, "%"PRIx16, nl_attr_get_u16(a));
+ }
+ break;
+
+
case OVS_KEY_ATTR_TUNNEL:
format_odp_tun_attr(a, ma, ds, verbose);
break;
@@ -2503,6 +2644,26 @@ scan_u8(const char *s, uint8_t *key, uint8_t *mask)
}
static int
+scan_u16(const char *s, uint16_t *key, uint16_t *mask)
+{
+ int n;
+
+ if (ovs_scan(s, "%"SCNi16"%n", key, &n)) {
+ int len = n;
+
+ if (mask) {
+ if (ovs_scan(s + len, "/%"SCNi16"%n", mask, &n)) {
+ len += n;
+ } else {
+ *mask = UINT16_MAX;
+ }
+ }
+ return len;
+ }
+ return 0;
+}
+
+static int
scan_u32(const char *s, uint32_t *key, uint32_t *mask)
{
int n;
@@ -2605,6 +2766,25 @@ scan_tcp_flags(const char *s, ovs_be16 *key, ovs_be16 *mask)
}
static int
+scan_ct_state(const char *s, uint8_t *key, uint8_t *mask)
+{
+ uint32_t flags, fmask;
+ int n;
+
+ n = parse_flags(s, packet_ct_state_to_string, ')', NULL, NULL, &flags,
+ CS_SUPPORTED_MASK, mask ? &fmask : NULL);
+
+ if (n >= 0) {
+ *key = flags;
+ if (mask) {
+ *mask = fmask;
+ }
+ return n;
+ }
+ return 0;
+}
+
+static int
scan_frag(const char *s, uint8_t *key, uint8_t *mask)
{
int n;
@@ -3127,6 +3307,9 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names,
OVS_KEY_ATTR_RECIRC_ID);
SCAN_SINGLE("dp_hash(", uint32_t, u32, OVS_KEY_ATTR_DP_HASH);
+ SCAN_SINGLE("ct_state(", uint8_t, ct_state, OVS_KEY_ATTR_CT_STATE);
+ SCAN_SINGLE("ct_zone(", uint16_t, u16, OVS_KEY_ATTR_CT_ZONE);
+
SCAN_BEGIN_NESTED("tunnel(", OVS_KEY_ATTR_TUNNEL) {
SCAN_FIELD_NESTED("tun_id=", ovs_be64, be64, OVS_TUNNEL_KEY_ATTR_ID);
SCAN_FIELD_NESTED("src=", ovs_be32, ipv4, OVS_TUNNEL_KEY_ATTR_IPV4_SRC);
@@ -3361,6 +3544,12 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
+ if (parms->support.ct_state) {
+ nl_msg_put_u8(buf, OVS_KEY_ATTR_CT_STATE, data->ct_state);
+ }
+ if (parms->support.ct_zone) {
+ nl_msg_put_u16(buf, OVS_KEY_ATTR_CT_ZONE, data->ct_zone);
+ }
if (parms->support.recirc) {
nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id);
nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash);
@@ -3543,6 +3732,13 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md)
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, md->pkt_mark);
+ if (md->ct_state) {
+ nl_msg_put_u8(buf, OVS_KEY_ATTR_CT_STATE, md->ct_state);
+ if (md->ct_zone) {
+ nl_msg_put_u16(buf, OVS_KEY_ATTR_CT_ZONE, md->ct_zone);
+ }
+ }
+
/* Add an ingress port attribute if 'odp_in_port' is not the magical
* value "ODPP_NONE". */
if (md->in_port.odp_port != ODPP_NONE) {
@@ -3590,6 +3786,14 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
md->pkt_mark = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK);
break;
+ case OVS_KEY_ATTR_CT_STATE:
+ md->ct_state = nl_attr_get_u8(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_STATE);
+ break;
+ case OVS_KEY_ATTR_CT_ZONE:
+ md->ct_zone = nl_attr_get_u16(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_ZONE);
+ break;
case OVS_KEY_ATTR_TUNNEL: {
enum odp_key_fitness res;
@@ -4144,6 +4348,15 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK;
}
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_STATE)) {
+ flow->ct_state = nl_attr_get_u8(attrs[OVS_KEY_ATTR_CT_STATE]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_STATE;
+ }
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_ZONE)) {
+ flow->ct_zone = nl_attr_get_u16(attrs[OVS_KEY_ATTR_CT_ZONE]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_ZONE;
+ }
+
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
enum odp_key_fitness res;
@@ -120,6 +120,8 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
* OVS_KEY_ATTR_DP_HASH 4 -- 4 8
* OVS_KEY_ATTR_RECIRC_ID 4 -- 4 8
+ * OVS_KEY_ATTR_CONN_STATE 2 2 4 8
+ * OVS_KEY_ATTR_CONN_ZONE 2 2 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
* OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
* OVS_KEY_ATTR_VLAN 2 2 4 8
@@ -129,13 +131,13 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* ----------------------------------------------------------
- * total 488
+ * total 504
*
* We include some slack space in case the calculation isn't quite right or we
* add another field and forget to adjust this value.
*/
#define ODPUTIL_FLOW_KEY_BYTES 512
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
* key. An array of "struct nlattr" might not, in theory, be sufficiently
@@ -166,6 +168,10 @@ struct odp_support {
/* If this is true, then recirculation fields will always be serialised. */
bool recirc;
+
+ /* If true, serialise the corresponding OVS_KEY_ATTR_CONN_* field. */
+ bool ct_state;
+ bool ct_zone;
};
struct odp_flow_key_parms {
@@ -286,6 +286,9 @@ enum ofp_raw_action_type {
/* NX1.0+(34): struct nx_action_conjunction. */
NXAST_RAW_CONJUNCTION,
+ /* NX1.0+(35): struct nx_action_conntrack, ... */
+ NXAST_RAW_CT,
+
/* ## ------------------ ## */
/* ## Debugging actions. ## */
/* ## ------------------ ## */
@@ -346,6 +349,10 @@ static void *ofpact_put_raw(struct ofpbuf *, enum ofp_version,
static char *OVS_WARN_UNUSED_RESULT ofpacts_parse(
char *str, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols,
bool allow_instructions, enum ofpact_type outer_action);
+static enum ofperr ofpacts_pull_openflow_actions__(
+ struct ofpbuf *openflow, unsigned int actions_len,
+ enum ofp_version version, uint32_t allowed_ovsinsts,
+ struct ofpbuf *ofpacts, enum ofpact_type outer_action);
/* Pull off existing actions or instructions. Used by nesting actions to keep
* ofpacts_parse() oblivious of actions nesting.
@@ -4455,6 +4462,245 @@ format_DEBUG_RECIRC(const struct ofpact_null *a OVS_UNUSED, struct ds *s)
{
ds_put_cstr(s, "debug_recirc");
}
+
+/* Action structure for NXAST_CT.
+ *
+ * Pass traffic to the connection tracker.
+ *
+ * There are two important concepts to understanding the connection tracking
+ * interface: Packet state and Connection state. Packets may be "Untracked" or
+ * "Tracked". Connections may be "Uncommitted" or "Committed".
+ *
+ * - Packet State:
+ *
+ * Untracked packets have not yet passed through the connection tracker,
+ * and the connection state for such packets is unknown. In most cases,
+ * packets entering the OpenFlow pipeline will initially be in the
+ * untracked state. Untracked packets may become tracked by executing
+ * NXAST_CT with a "recirc_table" specified. This makes various aspects
+ * about the connection available, in particular the connection state.
+ *
+ * Tracked packets have previously passed through the connection tracker.
+ * These packets will remain tracked through until the end of the OpenFlow
+ * pipeline. Tracked packets which have NXAST_CT executed with a
+ * "recirc_table" specified will return to the tracked state.
+ *
+ * The packet state is only significant for the duration of packet
+ * processing within the OpenFlow pipeline.
+ *
+ * - Connection State:
+ *
+ * Multiple packets may be associated with a single connection. Initially,
+ * all connections are uncommitted. The connection state corresponding to
+ * a packet is available in the NXM_NX_CT_STATE field for tracked packets.
+ *
+ * Uncommitted connections have no state stored about them. Uncommitted
+ * connections may transition into the committed state by executing
+ * NXAST_CT with the NX_CT_F_COMMIT flag.
+ *
+ * Once a connection becomes committed, information may be gathered about
+ * the connection by passing subsequent packets through the connection
+ * tracker, and the state of the connection will be stored beyond the
+ * lifetime of packet processing.
+ *
+ * Connections may transition back into the uncommitted state due to
+ * external timers, or due to the contents of packets that are sent to the
+ * connection tracker. This behaviour is outside of the scope of the
+ * OpenFlow interface.
+ *
+ * The "zone" specifies a context within which the tracking is done:
+ *
+ * The connection tracking zone is a 16-bit number. Each zone is an
+ * independent connection tracking context. The connection state for each
+ * connection is completely separate for each zone, so if a connection
+ * is committed to zone A, then it will remain uncommitted in zone B.
+ * If NXAST_CT is executed with the same zone multiple times, later
+ * executions have no effect.
+ *
+ * If 'zone_src' is nonzero, this specifies that the zone should be
+ * sourced from a field zone_src[ofs:ofs+nbits]. The format and semantics
+ * of 'zone_src' and 'zone_ofs_nbits' are similar to those for the
+ * NXAST_REG_LOAD action. The acceptable nxm_header values for 'zone_src'
+ * are the same as the acceptable nxm_header values for the 'src' field of
+ * NXAST_REG_MOVE.
+ *
+ * If 'zone_src' is zero, then the value of 'zone_imm' will be used as the
+ * connection tracking zone.
+ *
+ * The "recirc_table" allows NXM_NX_CT_* fields to become available:
+ *
+ * If "recirc_table" has a value other than NX_CT_RECIRC_NONE, then the
+ * packet will be logically cloned prior to executing this action. One
+ * copy will be sent to the connection tracker, then will be re-injected
+ * into the OpenFlow pipeline beginning at the OpenFlow table specified in
+ * this field. When the packet re-enters the pipeline, the NXM_NX_CT_*
+ * fields will be populated. The original instance of the packet will
+ * continue the current actions list. This can be thought of as similar to
+ * the effect of the "output" action: One copy is sent out (in this case,
+ * to the connection tracker), but the current copy continues processing.
+ *
+ * It is strongly recommended that this table is later than the current
+ * table, to prevent loops.
+ */
+struct nx_action_conntrack {
+ ovs_be16 type; /* OFPAT_VENDOR. */
+ ovs_be16 len; /* At least 24. */
+ ovs_be32 vendor; /* NX_VENDOR_ID. */
+ ovs_be16 subtype; /* NXAST_CT. */
+ ovs_be16 flags; /* Zero or more NX_CT_F_* flags.
+ * Unspecified flag bits must be zero. */
+ ovs_be32 zone_src; /* Connection tracking context. */
+ union {
+ ovs_be16 zone_ofs_nbits;/* Range to use from source field. */
+ ovs_be16 zone_imm; /* Immediate value for zone. */
+ };
+ uint8_t recirc_table; /* Recirculate to a specific table, or
+ NX_CT_RECIRC_NONE for no recirculation. */
+ uint8_t pad[5]; /* Zeroes */
+};
+OFP_ASSERT(sizeof(struct nx_action_conntrack) == 24);
+
+static enum ofperr
+decode_ct_zone(const struct nx_action_conntrack *nac,
+ struct ofpact_conntrack *out)
+{
+ if (nac->zone_src) {
+ enum ofperr error;
+
+ out->zone_src.field = mf_from_nxm_header(ntohl(nac->zone_src));
+ out->zone_src.ofs = nxm_decode_ofs(nac->zone_ofs_nbits);
+ out->zone_src.n_bits = nxm_decode_n_bits(nac->zone_ofs_nbits);
+ error = mf_check_src(&out->zone_src, NULL);
+ if (error) {
+ return error;
+ }
+
+ if (out->zone_src.n_bits != 16) {
+ VLOG_WARN_RL(&rl, "zone n_bits %d not within valid range [16..16]",
+ out->zone_src.n_bits);
+ return OFPERR_OFPBAC_BAD_SET_LEN;
+ }
+ } else {
+ out->zone_src.field = NULL;
+ out->zone_imm = ntohs(nac->zone_imm);
+ }
+
+ return 0;
+}
+
+static enum ofperr
+decode_NXAST_RAW_CT(const struct nx_action_conntrack *nac, struct ofpbuf *out)
+{
+ struct ofpact_conntrack *conntrack;
+ int error = 0;
+
+ conntrack = ofpact_put_CT(out);
+ conntrack->flags = ntohs(nac->flags);
+ error = decode_ct_zone(nac, conntrack);
+ if (error) {
+ goto out;
+ }
+ conntrack->recirc_table = nac->recirc_table;
+
+out:
+ return error;
+}
+
+static void
+encode_CT(const struct ofpact_conntrack *conntrack,
+ enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out)
+{
+ struct nx_action_conntrack *nac;
+
+ nac = put_NXAST_CT(out);
+ nac->flags = htons(conntrack->flags);
+ if (conntrack->zone_src.field) {
+ nac->zone_src = htonl(mf_nxm_header(conntrack->zone_src.field->id));
+ nac->zone_ofs_nbits = nxm_encode_ofs_nbits(conntrack->zone_src.ofs,
+ conntrack->zone_src.n_bits);
+ } else {
+ nac->zone_src = htonl(0);
+ nac->zone_imm = htons(conntrack->zone_imm);
+ }
+ nac->recirc_table = conntrack->recirc_table;
+}
+
+/* Parses 'arg' as the argument to a "ct" action, and appends such an
+ * action to 'ofpacts'.
+ *
+ * Returns NULL if successful, otherwise a malloc()'d string describing the
+ * error. The caller is responsible for freeing the returned string. */
+static char * OVS_WARN_UNUSED_RESULT
+parse_CT(char *arg, struct ofpbuf *ofpacts,
+ enum ofputil_protocol *usable_protocols OVS_UNUSED)
+{
+ struct ofpact_conntrack *oc;
+ char *error = NULL;
+ char *key, *value;
+
+ oc = ofpact_put_CT(ofpacts);
+ oc->flags = 0;
+ oc->recirc_table = NX_CT_RECIRC_NONE;
+ while (ofputil_parse_key_value(&arg, &key, &value)) {
+ if (!strcmp(key, "commit")) {
+ oc->flags |= NX_CT_F_COMMIT;
+ } else if (!strcmp(key, "table")) {
+ error = str_to_u8(value, "recirc_table", &oc->recirc_table);
+ } else if (!strcmp(key, "zone")) {
+ error = str_to_u16(value, "zone", &oc->zone_imm);
+
+ if (error) {
+ free(error);
+ error = mf_parse_subfield(&oc->zone_src, value);
+ if (error) {
+ return error;
+ }
+ }
+ } else {
+ error = xasprintf("invalid argument to \"ct\" action: `%s'", key);
+ }
+ if (error) {
+ break;
+ }
+ }
+
+ return error;
+}
+
+static void
+append_comma(struct ds *s, bool *first)
+{
+ if (*first) {
+ *first = false;
+ } else {
+ ds_put_char(s, ',');
+ }
+}
+
+static void
+format_CT(const struct ofpact_conntrack *a, struct ds *s)
+{
+ bool first = true;
+
+ ds_put_cstr(s, "ct(");
+ if (a->flags & NX_CT_F_COMMIT) {
+ append_comma(s, &first);
+ ds_put_cstr(s, "commit");
+ }
+ if (a->recirc_table != NX_CT_RECIRC_NONE) {
+ append_comma(s, &first);
+ ds_put_format(s, "table=%"PRIu8, a->recirc_table);
+ }
+ if (a->zone_src.field) {
+ append_comma(s, &first);
+ ds_put_format(s, "zone=");
+ mf_format_subfield(&a->zone_src, s);
+ } else if (a->zone_imm) {
+ append_comma(s, &first);
+ ds_put_format(s, "zone=%"PRIu16, a->zone_imm);
+ }
+ ds_put_char(s, ')');
+}
/* Meter instruction. */
@@ -4834,6 +5080,7 @@ ofpact_is_set_or_move_action(const struct ofpact *a)
return true;
case OFPACT_BUNDLE:
case OFPACT_CLEAR_ACTIONS:
+ case OFPACT_CT:
case OFPACT_CONTROLLER:
case OFPACT_DEC_MPLS_TTL:
case OFPACT_DEC_TTL:
@@ -4908,6 +5155,7 @@ ofpact_is_allowed_in_actions_set(const struct ofpact *a)
* in the action set is undefined. */
case OFPACT_BUNDLE:
case OFPACT_CONTROLLER:
+ case OFPACT_CT:
case OFPACT_ENQUEUE:
case OFPACT_EXIT:
case OFPACT_UNROLL_XLATE:
@@ -5137,6 +5385,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type)
case OFPACT_UNROLL_XLATE:
case OFPACT_SAMPLE:
case OFPACT_DEBUG_RECIRC:
+ case OFPACT_CT:
default:
return OVSINST_OFPIT11_APPLY_ACTIONS;
}
@@ -5693,6 +5942,16 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a,
case OFPACT_SAMPLE:
return 0;
+ case OFPACT_CT: {
+ struct ofpact_conntrack *oc = ofpact_get_CT(a);
+
+ if (!dl_type_is_ip_any(flow->dl_type)
+ || (flow->ct_state & CS_INVALID && oc->flags & NX_CT_F_COMMIT)) {
+ inconsistent_match(usable_protocols);
+ }
+ return 0;
+ }
+
case OFPACT_CLEAR_ACTIONS:
return 0;
@@ -6162,6 +6421,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port)
case OFPACT_METER:
case OFPACT_GROUP:
case OFPACT_DEBUG_RECIRC:
+ case OFPACT_CT:
default:
return false;
}
@@ -106,6 +106,7 @@
OFPACT(EXIT, ofpact_null, ofpact, "exit") \
OFPACT(SAMPLE, ofpact_sample, ofpact, "sample") \
OFPACT(UNROLL_XLATE, ofpact_unroll_xlate, ofpact, "unroll_xlate") \
+ OFPACT(CT, ofpact_conntrack, ofpact, "ct") \
\
/* Debugging actions. \
* \
@@ -471,6 +472,30 @@ BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions) % OFPACT_ALIGNTO == 0);
BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions)
== sizeof(struct ofpact_nest));
+/* Bits for 'flags' in struct nx_action_conntrack.
+ *
+ * If NX_CT_F_COMMIT is set, then the connection entry is moved from the
+ * unconfirmed to confirmed list in the tracker. */
+enum nx_conntrack_flags {
+ NX_CT_F_COMMIT = 1 << 0,
+};
+
+/* Magic value for struct nx_action_conntrack 'recirc_table' field, to specify
+ * that the packet should not be recirculated. This value should commonly be
+ * used in conjunction with the NX_CT_F_COMMIT flag above. */
+#define NX_CT_RECIRC_NONE OFPTT_ALL
+
+/* OFPACT_CT.
+ *
+ * Used for NXAST_CT. */
+struct ofpact_conntrack {
+ struct ofpact ofpact;
+ uint16_t flags;
+ uint16_t zone_imm;
+ struct mf_subfield zone_src;
+ uint8_t recirc_table;
+};
+
static inline size_t
ofpact_nest_get_action_len(const struct ofpact_nest *on)
{
@@ -199,7 +199,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
void
ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
/* Initialize most of wc. */
flow_wildcards_init_catchall(wc);
@@ -1062,3 +1062,24 @@ packet_csum_pseudoheader(const struct ip_header *ip)
return partial;
}
+
+const char *
+packet_ct_state_to_string(uint32_t flag)
+{
+ switch (flag) {
+ case CS_REPLY_DIR:
+ return "rpl";
+ case CS_TRACKED:
+ return "trk";
+ case CS_NEW:
+ return "new";
+ case CS_ESTABLISHED:
+ return "est";
+ case CS_RELATED:
+ return "rel";
+ case CS_INVALID:
+ return "inv";
+ default:
+ return NULL;
+ }
+}
@@ -25,6 +25,7 @@
#include "compiler.h"
#include "geneve.h"
#include "openvswitch/types.h"
+#include "odp-netlink.h"
#include "random.h"
#include "hash.h"
#include "tun-metadata.h"
@@ -126,6 +127,8 @@ struct pkt_metadata {
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
union flow_in_port in_port; /* Input port. */
+ uint16_t ct_state; /* Connection state. */
+ uint16_t ct_zone; /* Connection zone. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note that
* if 'ip_dst' == 0, the rest of the fields may
* be uninitialized. */
@@ -134,13 +137,18 @@ struct pkt_metadata {
static inline void
pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
{
- /* It can be expensive to zero out all of the tunnel metadata. However,
- * we can just zero out ip_dst and the rest of the data will never be
- * looked at. */
- memset(md, 0, offsetof(struct pkt_metadata, tunnel));
- md->tunnel.ip_dst = 0;
+ /* It can be expensive to zero out all metadata. Therefore:
+ *
+ * - We can just zero out 'tunnel.ip_dst' and the rest of the 'tunnel'
+ * field will never be looked at.
+ * - We can just zero out 'ct_state' and the rest of the 'ct_*' members
+ * will never be looked at. */
+ memset(md, 0, offsetof(struct pkt_metadata, in_port));
md->in_port.odp_port = port;
+
+ md->ct_state = 0;
+ md->tunnel.ip_dst = 0;
}
bool dpid_from_string(const char *s, uint64_t *dpidp);
@@ -710,6 +718,24 @@ struct tcp_header {
};
BUILD_ASSERT_DECL(TCP_HEADER_LEN == sizeof(struct tcp_header));
+/* Connection states */
+#define CS_NEW 0x01
+BUILD_ASSERT_DECL(CS_NEW == OVS_CS_F_NEW);
+#define CS_ESTABLISHED 0x02
+BUILD_ASSERT_DECL(CS_ESTABLISHED == OVS_CS_F_ESTABLISHED);
+#define CS_RELATED 0x04
+BUILD_ASSERT_DECL(CS_RELATED == OVS_CS_F_RELATED);
+#define CS_INVALID 0x20
+BUILD_ASSERT_DECL(CS_INVALID == OVS_CS_F_INVALID);
+#define CS_REPLY_DIR 0x40
+BUILD_ASSERT_DECL(CS_REPLY_DIR == OVS_CS_F_REPLY_DIR);
+#define CS_TRACKED 0x80
+BUILD_ASSERT_DECL(CS_TRACKED == OVS_CS_F_TRACKED);
+
+/* Undefined connection state bits. */
+#define CS_UNSUPPORTED_MASK 0x18
+#define CS_SUPPORTED_MASK (~CS_UNSUPPORTED_MASK & 0xFFFF)
+
#define ARP_HRD_ETHERNET 1
#define ARP_PRO_IP 0x0800
#define ARP_OP_REQUEST 1
@@ -937,4 +963,6 @@ void compose_arp(struct dp_packet *, uint16_t arp_op,
ovs_be32 arp_spa, ovs_be32 arp_tpa);
uint32_t packet_csum_pseudoheader(const struct ip_header *);
+const char *packet_ct_state_to_string(uint32_t flag);
+
#endif /* packets.h */
@@ -137,6 +137,7 @@ recirc_metadata_hash(const struct recirc_state *state)
flow_tnl_size(state->metadata.tunnel)
/ sizeof(uint64_t), hash);
}
+ hash = hash_boolean(state->conntracked, hash);
hash = hash_words64((const uint64_t *) &state->metadata.metadata,
(sizeof state->metadata - sizeof state->metadata.tunnel)
/ sizeof(uint64_t),
@@ -168,6 +169,7 @@ recirc_metadata_equal(const struct recirc_state *a,
(!b->stack || !b->stack->size))
|| (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack)))
&& a->mirrors == b->mirrors
+ && a->conntracked == b->conntracked
&& a->action_set_len == b->action_set_len
&& ofpacts_equal(a->ofpacts, a->ofpacts_len,
b->ofpacts, b->ofpacts_len));
@@ -93,7 +93,7 @@ struct rule;
/* Metadata for restoring pipeline context after recirculation. Helpers
* are inlined below to keep them together with the definition for easier
* updates. */
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
struct recirc_metadata {
/* Metadata in struct flow. */
@@ -142,6 +142,7 @@ struct recirc_state {
struct recirc_metadata metadata; /* Flow metadata. */
struct ofpbuf *stack; /* Stack if any. */
mirror_mask_t mirrors; /* Mirrors already output. */
+ bool conntracked; /* Conntrack occurred prior to recirc. */
/* Actions to be translated on recirculation. */
uint32_t action_set_len; /* How much of 'ofpacts' consists of an
@@ -1029,6 +1029,8 @@ sflow_read_set_action(const struct nlattr *attr,
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_ARP:
case OVS_KEY_ATTR_ND:
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
@@ -1137,6 +1139,7 @@ dpif_sflow_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_HASH:
+ case OVS_ACTION_ATTR_CT:
break;
case OVS_ACTION_ATTR_SET_MASKED:
@@ -300,6 +300,11 @@ struct xlate_ctx {
* the MPLS label stack that was originally present. */
bool was_mpls;
+ /* True if conntrack has been performed on this packet during processing
+ * on the current bridge. This is used to determine whether conntrack
+ * state from the datapath should be honored after recirculation. */
+ bool conntracked;
+
/* OpenFlow 1.1+ action set.
*
* 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
@@ -2798,6 +2803,13 @@ xlate_commit_actions(struct xlate_ctx *ctx)
}
static void
+clear_conntrack(struct flow *flow)
+{
+ flow->ct_state = 0;
+ flow->ct_zone = 0;
+}
+
+static void
compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
const struct xlate_bond_recirc *xr, bool check_stp)
{
@@ -2814,7 +2826,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 34);
memset(&flow_tnl, 0, sizeof flow_tnl);
if (!xport) {
@@ -2852,6 +2864,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (xport->peer) {
const struct xport *peer = xport->peer;
struct flow old_flow = ctx->xin->flow;
+ bool old_conntrack = ctx->conntracked;
bool old_was_mpls = ctx->was_mpls;
cls_version_t old_version = ctx->tables_version;
struct ofpbuf old_stack = ctx->stack;
@@ -2867,6 +2880,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
memset(&flow->tunnel, 0, sizeof flow->tunnel);
memset(flow->regs, 0, sizeof flow->regs);
flow->actset_output = OFPP_UNSET;
+ ctx->conntracked = false;
+ clear_conntrack(flow);
/* The bridge is now known so obtain its table version. */
ctx->tables_version
@@ -2921,6 +2936,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
* bridge. */
ctx->was_mpls = old_was_mpls;
+ /* The peer bridge's conntrack execution should have no effect on the
+ * original bridge. */
+ ctx->conntracked = old_conntrack;
+
/* The fact that the peer bridge exits (for any reason) does not mean
* that the original bridge should exit. Specifically, if the peer
* bridge recirculates (which typically modifies the packet), the
@@ -3509,24 +3528,23 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
dp_packet_delete(packet);
}
-/* Called only when ctx->recirc_action_offset is set. */
static void
-compose_recirculate_action(struct xlate_ctx *ctx)
+compose_recirculate_action__(struct xlate_ctx *ctx, uint8_t table)
{
struct recirc_metadata md;
uint32_t id;
- xlate_commit_actions(ctx);
recirc_metadata_from_flow(&md, &ctx->xin->flow);
ovs_assert(ctx->recirc_action_offset >= 0);
struct recirc_state state = {
- .table_id = 0,
+ .table_id = table,
.ofproto = ctx->xbridge->ofproto,
.metadata = md,
.stack = &ctx->stack,
.mirrors = ctx->mirrors,
+ .conntracked = ctx->conntracked,
.action_set_len = ctx->recirc_action_offset,
.ofpacts_len = ctx->action_set.size,
.ofpacts = ctx->action_set.data,
@@ -3563,6 +3581,14 @@ compose_recirculate_action(struct xlate_ctx *ctx)
ctx->last_unroll_offset = -1;
}
+/* Called only when ctx->recirc_action_offset is set. */
+static void
+compose_recirculate_action(struct xlate_ctx *ctx)
+{
+ xlate_commit_actions(ctx);
+ compose_recirculate_action__(ctx, 0);
+}
+
static void
compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
{
@@ -4096,6 +4122,7 @@ recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
case OFPACT_METER:
case OFPACT_SAMPLE:
case OFPACT_DEBUG_RECIRC:
+ case OFPACT_CT:
break;
/* These need not be copied for restoration. */
@@ -4119,6 +4146,43 @@ recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
}
static void
+compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
+{
+ uint32_t flags = 0;
+ size_t ct_offset;
+ uint16_t zone;
+
+ /* Ensure that any prior actions are applied before composing the new
+ * conntrack action. */
+ xlate_commit_actions(ctx);
+
+ if (ofc->flags & NX_CT_F_COMMIT) {
+ flags |= OVS_CT_F_COMMIT;
+ }
+ if (ofc->zone_src.field) {
+ zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow) & 0xFF;
+ } else {
+ zone = ofc->zone_imm;
+ }
+
+ ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
+ nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_FLAGS, flags);
+ nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
+ nl_msg_end_nested(ctx->odp_actions, ct_offset);
+
+ if (ofc->recirc_table == NX_CT_RECIRC_NONE) {
+ /* If we do not recirculate as part of this action, hide the results of
+ * connection tracking from subsequent recirculations. */
+ ctx->conntracked = false;
+ } else {
+ /* Use ct_* fields from datapath during recirculation upcall. */
+ ctx->conntracked = true;
+ ctx_trigger_recirculation(ctx);
+ compose_recirculate_action__(ctx, ofc->recirc_table);
+ }
+}
+
+static void
do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
struct xlate_ctx *ctx)
{
@@ -4482,6 +4546,11 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
break;
+ case OFPACT_CT:
+ CHECK_MPLS_RECIRCULATION();
+ compose_conntrack_action(ctx, ofpact_get_CT(a));
+ break;
+
case OFPACT_DEBUG_RECIRC:
ctx_trigger_recirculation(ctx);
a = ofpact_next(a);
@@ -4786,6 +4855,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
.last_unroll_offset = -1,
.was_mpls = false,
+ .conntracked = false,
.action_set_has_group = false,
.action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
@@ -4855,6 +4925,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
ctx.table_id = state->table_id;
xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id);
+ if (!state->conntracked) {
+ clear_conntrack(flow);
+ }
+
/* Restore pipeline metadata. May change flow's in_port and other
* metadata to the values that existed when recirculation was
* triggered. */
@@ -1228,6 +1228,45 @@ check_masked_set_action(struct dpif_backer *backer)
return !error;
}
+#define CHECK_FEATURE__(NAME, FIELD) \
+static bool \
+check_##NAME(struct dpif_backer *backer) \
+{ \
+ struct flow flow; \
+ struct odputil_keybuf keybuf; \
+ struct ofpbuf key; \
+ bool enable; \
+ struct odp_flow_key_parms odp_parms = { \
+ .flow = &flow, \
+ .support = { \
+ .NAME = true, \
+ }, \
+ }; \
+ \
+ memset(&flow, 0, sizeof flow); \
+ flow.FIELD = 1; \
+ \
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); \
+ odp_flow_key_from_flow(&odp_parms, &key); \
+ enable = dpif_probe_feature(backer->dpif, #NAME, &key, NULL); \
+ \
+ if (enable) { \
+ VLOG_INFO("%s: Datapath supports "#NAME, dpif_name(backer->dpif)); \
+ } else { \
+ VLOG_INFO("%s: Datapath does not support "#NAME, \
+ dpif_name(backer->dpif)); \
+ } \
+ \
+ return enable; \
+}
+#define CHECK_FEATURE(FIELD) CHECK_FEATURE__(FIELD, FIELD)
+
+CHECK_FEATURE(ct_state)
+CHECK_FEATURE(ct_zone)
+
+#undef CHECK_FEATURE
+#undef CHECK_FEATURE__
+
static void
check_support(struct dpif_backer *backer)
{
@@ -1239,6 +1278,9 @@ check_support(struct dpif_backer *backer)
backer->support.masked_set_action = check_masked_set_action(backer);
backer->support.ufid = check_ufid(backer);
backer->support.tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
+
+ backer->support.odp.ct_state = check_ct_state(backer);
+ backer->support.odp.ct_zone = check_ct_zone(backer);
}
static int
@@ -3935,10 +3977,40 @@ rule_dealloc(struct rule *rule_)
}
static enum ofperr
+rule_check(struct rule *rule)
+{
+ uint16_t ct_state_mask, ct_zone_mask;
+
+ ct_state_mask = MINIFLOW_GET_U16(&rule->cr.match.mask->masks, ct_state);
+ ct_zone_mask = MINIFLOW_GET_U16(&rule->cr.match.mask->masks, ct_zone);
+
+ if (ct_state_mask || ct_zone_mask) {
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->ofproto);
+ const struct odp_support *support = &ofproto_dpif_get_support(ofproto)->odp;
+
+ if ((ct_state_mask && !support->ct_state)
+ || (ct_zone_mask && !support->ct_zone)) {
+ return OFPERR_OFPBMC_BAD_FIELD;
+ }
+ if (ct_state_mask & CS_UNSUPPORTED_MASK) {
+ return OFPERR_OFPBMC_BAD_MASK;
+ }
+ }
+ return 0;
+}
+
+static enum ofperr
rule_construct(struct rule *rule_)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
+ int error;
+
+ error = rule_check(rule_);
+ if (error) {
+ return error;
+ }
+
ovs_mutex_init_adaptive(&rule->stats_mutex);
rule->stats.n_packets = 0;
rule->stats.n_bytes = 0;
@@ -103,6 +103,10 @@ only metadata. The metadata can be:
Packet QoS priority.
.IP \fIpkt_mark\fR
Mark of the packet.
+.IP \fIct_state\fR
+Connection state of the packet.
+.IP \fIct_zone\fR
+Connection tracking zone for packet.
.IP \fItun_id\fR
The tunnel ID on which the packet arrived.
.IP \fIin_port\fR
@@ -110,3 +110,10 @@ fi
if test "$IS_WIN32" = "yes"; then
HAVE_PYTHON="no"
fi
+
+# Conntrack test requirements
+if test x`which conntrack` != x; then
+ HAVE_CONNTRACK="yes"
+else
+ HAVE_CONNTRACK="no"
+fi
@@ -337,6 +337,7 @@ CHECK_PYFILES = \
tests/test-daemon.py \
tests/test-json.py \
tests/test-jsonrpc.py \
+ tests/test-l7.py \
tests/test-ovsdb.py \
tests/test-reconnect.py \
tests/MockXenAPI.py \
@@ -82,7 +82,7 @@ AT_CHECK([cat ovs-vswitchd.log | grep -A 1 'miss upcall' | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)
])
AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl
-pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions: <del>
+pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,ct_state=0,ct_zone=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions: <del>
recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, actions: <del>
])
@@ -86,6 +86,11 @@ sed '/bos=0/{
s/^/ODP_FIT_TOO_LITTLE: /
}' < odp-in.txt > odp-out.txt
+dnl Some fields are always printed for this test, because wildcards aren't
+dnl specified. We can skip these.
+sed -i 's/\(skb_mark(0)\),\(ct\)/\1,ct_state(0),ct_zone(0),\2/' odp-out.txt
+sed -i 's/\(skb_mark([[^)]]*)\),\(recirc\)/\1,ct_state(0),ct_zone(0),\2/' odp-out.txt
+
AT_CHECK_UNQUOTED([ovstest test-odp parse-keys < odp-in.txt], [0], [`cat odp-out.txt`
])
AT_CLEANUP
@@ -153,6 +158,10 @@ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/
s/$/)/' odp-base.txt
echo
+ echo '# Valid forms with conntrack fields.'
+ sed 's/\(eth([[^)]]*),?\)/\1,ct_state(+trk),/' odp-base.txt
+
+ echo
echo '# Valid forms with IP first fragment.'
sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt
@@ -293,6 +302,9 @@ tnl_push(tnl_port(6),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:1
tnl_push(tnl_port(6),header(size=50,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=6081,csum=0x0),geneve(oam,vni=0x1c7)),out_port(1))
tnl_push(tnl_port(6),header(size=58,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=6081,csum=0x0),geneve(crit,vni=0x1c7,options({class=0xffff,type=0x80,len=4,0xa}))),out_port(1))
tnl_push(tnl_port(6),header(size=50,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=6081,csum=0xffff),geneve(vni=0x1c7)),out_port(1))
+ct
+ct(commit)
+ct(commit,zone=5)
])
AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0],
[`cat actions.txt`
@@ -6523,8 +6523,8 @@ for i in 1 2 3 4; do
done
sleep 1
AT_CHECK([cat ovs-vswitchd.log | STRIP_UFID | FILTER_FLOW_INSTALL | STRIP_USED], [0], [dnl
-pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions:2
-pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions:drop
+pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,ct_state=0,ct_zone=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions:2
+pkt_mark=0,recirc_id=0,dp_hash=0,skb_priority=0,ct_state=0,ct_zone=0,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,nw_ttl=64,icmp_type=8,icmp_code=0, actions:drop
])
AT_CHECK([cat ovs-vswitchd.log | STRIP_UFID | FILTER_FLOW_DUMP | grep 'packets:3'], [0], [dnl
skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:3, bytes:180, used:0.0s, actions:2
@@ -1618,6 +1618,8 @@ metadata in_port in_port_oxm pkt_mark reg0 reg1 reg2 reg3 reg4 reg5 reg6 reg7 xr
in_port_oxm: exact match or wildcard
actset_output: exact match or wildcard
pkt_mark: arbitrary mask
+ ct_state: arbitrary mask
+ ct_zone: exact match or wildcard
reg0: arbitrary mask
reg1: arbitrary mask
reg2: arbitrary mask
@@ -118,3 +118,21 @@ m4_define([ADD_NATIVE_TUNNEL],
# Strip variant pieces from ping output so the output can be reliably compared.
#
m4_define([FORMAT_PING], [grep "transmitted" | sed 's/time.*ms$/time 0ms/'])
+
+# FORMAT_CT()
+#
+# Strip content from the piped input which would differ from test to test.
+#
+m4_define([FORMAT_CT],
+ [[grep "dst=$1" | sed -e 's/port=[0-9]*/port=<cleared>/g' -e 's/ */ /g' -e 's/secctx[^ ]* //' | cut -d' ' -f4- | sort | uniq]])
+
+# NETNS_DAEMONIZE([namespace], [command], [pidfile])
+#
+# Run 'command' as a background process within 'namespace' and record its pid
+# to 'pidfile' to allow cleanup on exit.
+#
+m4_define([NETNS_DAEMONIZE],
+ [ip netns exec $1 $2 & echo $! > $3
+ echo "kill \`cat $3\`" >> cleanup
+ ]
+)
@@ -43,3 +43,19 @@ m4_define([OVS_TRAFFIC_VSWITCHD_STOP],
[OVS_VSWITCHD_STOP([$1])
AT_CHECK([:; $2])
])
+
+# CHECK_CONNTRACK()
+#
+# Perform requirements checks for running conntrack tests, and flush the
+# kernel conntrack tables when the test is finished.
+#
+m4_define([CHECK_CONNTRACK],
+ [AT_SKIP_IF([test $HAVE_CONNTRACK = no])
+ AT_SKIP_IF([test $HAVE_PYTHON = no])
+ m4_foreach([mod], [[nf_conntrack_ipv4], [nf_conntrack_ipv6]],
+ [modprobe mod || echo "Module mod not loaded."
+ on_exit 'modprobe -r mod'
+ ])
+ on_exit 'conntrack -F'
+ ]
+)
@@ -139,3 +139,474 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([conntrack - controller])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,udp,action=ct(commit),controller
+priority=100,in_port=2,ct_state=-trk,udp,action=ct(table=0)
+priority=100,in_port=2,ct_state=+trk+est,udp,action=controller
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log])
+
+dnl Send an unsolicited reply from port 2. This should be dropped.
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) '50540000000a50540000000908004500001c00000000001100000a0101020a0101010002000100080000'])
+
+dnl OK, now start a new connection from port 1.
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 1 ct\(commit\),controller '50540000000a50540000000908004500001c00000000001100000a0101010a0101020001000200080000'])
+
+dnl Now try a reply from port 2.
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) '50540000000a50540000000908004500001c00000000001100000a0101020a0101010002000100080000'])
+
+dnl Check this output. We only see the latter two packets, not the first.
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): total_len=42 in_port=1 (via action) data_len=42 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1,tp_dst=2 udp_csum:0
+NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=42 ct_state=est|rpl|trk,in_port=2 (via action) data_len=42 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=2,tp_dst=1 udp_csum:0
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - IPv4 HTTP])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,action=ct(commit),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0)
+priority=100,in_port=2,ct_state=+trk+est,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Basic connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 >/dev/null])
+
+dnl HTTP requests from ns0->ns1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.2)], [0], [dnl
+TIME_WAIT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> src=10.1.1.2 dst=10.1.1.1 sport=<cleared> dport=<cleared> [[ASSURED]] mark=0 use=1
+])
+
+dnl HTTP requests from ns1->ns0 should fail due to network failure.
+dnl Try 3 times, in 1 second intervals.
+NETNS_DAEMONIZE([at_ns0], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns1], [wget 10.1.1.1 -t 3 -T 1 -v -o wget1.log], [4])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - IPv6 HTTP])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "fc00::1/96")
+ADD_VETH(p1, at_ns1, br0, "fc00::2/96")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,icmp6,action=normal
+priority=100,in_port=1,tcp6,action=ct(commit),2
+priority=100,in_port=2,ct_state=-trk,tcp6,action=ct(table=0)
+priority=100,in_port=2,ct_state=+trk+est,tcp6,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Without this sleep, we get occasional failures due to the following error:
+dnl "connect: Cannot assign requested address"
+sleep 2;
+
+dnl HTTP requests from ns0->ns1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py http6]], [http0.pid])
+
+NS_CHECK_EXEC([at_ns0], [wget http://[[fc00::2]] -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+dnl HTTP requests from ns1->ns0 should fail due to network failure.
+dnl Try 3 times, in 1 second intervals.
+NETNS_DAEMONIZE([at_ns0], [[$PYTHON $srcdir/test-l7.py http6]], [http1.pid])
+NS_CHECK_EXEC([at_ns1], [wget http://[[fc00::1]] -t 3 -T 1 -v -o wget1.log], [4])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - commit, recirc])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1, at_ns2, at_ns3)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24")
+ADD_VETH(p3, at_ns3, br0, "10.1.1.4/24")
+
+dnl Allow any traffic from ns0->ns1, ns2->ns3.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(commit,table=0)
+priority=100,in_port=1,tcp,ct_state=+trk,action=2
+priority=100,in_port=2,tcp,ct_state=-trk,action=ct(table=0)
+priority=100,in_port=2,tcp,ct_state=+trk,action=1
+priority=100,in_port=3,tcp,ct_state=-trk,action=set_field:0->metadata,ct(table=0)
+priority=100,in_port=3,tcp,ct_state=+trk,metadata=0,action=set_field:1->metadata,ct(commit,table=0)
+priority=100,in_port=3,tcp,ct_state=+trk,metadata=1,action=4
+priority=100,in_port=4,tcp,ct_state=-trk,action=ct(commit,table=0)
+priority=100,in_port=4,tcp,ct_state=+trk,action=3
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+dnl HTTP requests from p2->p3 should work fine.
+NETNS_DAEMONIZE([at_ns3], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns2], [wget 10.1.1.4 -t 3 -T 1 --retry-connrefused -v -o wget1.log])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - preserve registers])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1, at_ns2, at_ns3)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24")
+ADD_VETH(p3, at_ns3, br0, "10.1.1.4/24")
+
+dnl Allow any traffic from ns0->ns1, ns2->ns3.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(commit,table=0)
+priority=100,in_port=1,tcp,ct_state=+trk,action=2
+priority=100,in_port=2,tcp,ct_state=-trk,action=ct(table=0)
+priority=100,in_port=2,tcp,ct_state=+trk,action=1
+priority=100,in_port=3,tcp,ct_state=-trk,action=load:0->NXM_NX_REG0[[]],ct(table=0)
+priority=100,in_port=3,tcp,ct_state=+trk,reg0=0,action=load:1->NXM_NX_REG0[[]],ct(commit,table=0)
+priority=100,in_port=3,tcp,ct_state=+trk,reg0=1,action=4
+priority=100,in_port=4,tcp,ct_state=-trk,action=ct(commit,table=0)
+priority=100,in_port=4,tcp,ct_state=+trk,action=3
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+dnl HTTP requests from p2->p3 should work fine.
+NETNS_DAEMONIZE([at_ns3], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns2], [wget 10.1.1.4 -t 3 -T 1 --retry-connrefused -v -o wget1.log])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - invalid])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1, at_ns2, at_ns3)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24")
+ADD_VETH(p3, at_ns3, br0, "10.1.1.4/24")
+
+dnl Pass traffic from ns0->ns1 without committing, but attempt to track in
+dnl the opposite direction. This should fail.
+dnl Pass traffic from ns3->ns4 without committing, and this time match
+dnl invalid traffic and allow it through.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,action=ct(),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0)
+priority=100,in_port=2,ct_state=+trk+new,tcp,action=1
+priority=100,in_port=3,tcp,action=ct(),4
+priority=100,in_port=4,ct_state=-trk,tcp,action=ct(table=0)
+priority=100,in_port=4,ct_state=+trk+inv,tcp,action=3
+priority=100,in_port=4,ct_state=+trk+new,tcp,action=3
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl We set up our rules to allow the request without committing. The return
+dnl traffic can't be identified, because the initial request wasn't committed.
+dnl For the first pair of ports, this means that the connection fails.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log], [4])
+
+dnl For the second pair, we allow packets from invalid connections, so it works.
+NETNS_DAEMONIZE([at_ns3], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns2], [wget 10.1.1.4 -t 3 -T 1 --retry-connrefused -v -o wget1.log])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - zones])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1, at_ns2, at_ns3)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24")
+ADD_VETH(p3, at_ns3, br0, "10.1.1.4/24")
+
+dnl Allow any traffic from ns0->ns1. Allow return traffic, matching on zone.
+dnl For ns2->ns3, use a different zone and see that the match fails.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,action=ct(commit,zone=1),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=1)
+priority=100,in_port=2,ct_state=+trk,ct_zone=1,tcp,action=1
+priority=100,in_port=3,tcp,action=ct(commit,zone=2),4
+priority=100,in_port=4,ct_state=-trk,tcp,action=ct(table=0,zone=2)
+priority=100,in_port=4,ct_state=+trk,ct_zone=1,tcp,action=3
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.2)], [0], [dnl
+TIME_WAIT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> src=10.1.1.2 dst=10.1.1.1 sport=<cleared> dport=<cleared> [[ASSURED]] mark=0 zone=1 use=1
+])
+
+dnl HTTP requests from p2->p3 should fail due to network failure.
+dnl Try 3 times, in 1 second intervals.
+NETNS_DAEMONIZE([at_ns3], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns2], [wget 10.1.1.4 -t 3 -T 1 -v -o wget1.log], [4])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.4)], [0], [dnl
+SYN_RECV src=10.1.1.3 dst=10.1.1.4 sport=<cleared> dport=<cleared> src=10.1.1.4 dst=10.1.1.3 sport=<cleared> dport=<cleared> mark=0 zone=2 use=1
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - zones from field])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1, at_ns2, at_ns3)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24")
+ADD_VETH(p3, at_ns3, br0, "10.1.1.4/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,action=load:1->NXM_NX_REG0[[0..15]],ct(commit,zone=NXM_NX_REG0[[0..15]]),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=load:1->NXM_NX_REG0[[0..15]],ct(table=0,zone=NXM_NX_REG0[[0..15]])
+priority=100,in_port=2,ct_state=+trk,ct_zone=1,tcp,action=1
+priority=100,in_port=3,tcp,action=load:2->NXM_NX_REG0[[0..15]],ct(commit,zone=NXM_NX_REG0[[0..15]]),4
+priority=100,in_port=4,ct_state=-trk,tcp,action=load:2->NXM_NX_REG0[[0..15]],ct(table=0,zone=NXM_NX_REG0[[0..15]])
+priority=100,in_port=4,ct_state=+trk,ct_zone=1,tcp,action=3
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.2)], [0], [dnl
+TIME_WAIT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> src=10.1.1.2 dst=10.1.1.1 sport=<cleared> dport=<cleared> [[ASSURED]] mark=0 zone=1 use=1
+])
+
+dnl HTTP requests from p2->p3 should fail due to network failure.
+dnl Try 3 times, in 1 second intervals.
+NETNS_DAEMONIZE([at_ns3], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NS_CHECK_EXEC([at_ns2], [wget 10.1.1.4 -t 3 -T 1 -v -o wget1.log], [4])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.4)], [0], [dnl
+SYN_RECV src=10.1.1.3 dst=10.1.1.4 sport=<cleared> dport=<cleared> src=10.1.1.4 dst=10.1.1.3 sport=<cleared> dport=<cleared> mark=0 zone=2 use=1
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - multiple bridges])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone --\
+ add-br br1 --\
+ add-port br0 patch+ -- set int patch+ type=patch options:peer=patch- --\
+ add-port br1 patch- -- set int patch- type=patch options:peer=patch+ --])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->br1, allow established in reverse.
+AT_DATA([flows-br0.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=2,tcp,ct_state=-trk,action=ct(commit,zone=1),1
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(table=0,zone=1)
+priority=100,in_port=1,tcp,ct_state=+trk+est,ct_zone=1,action=2
+])
+
+dnl Allow any traffic from br0->ns1, allow established in reverse.
+AT_DATA([flows-br1.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(table=0,zone=2)
+priority=100,in_port=1,tcp,ct_state=+trk+new,ct_zone=2,action=ct(commit,zone=2),2
+priority=100,in_port=1,tcp,ct_state=+trk+est,ct_zone=2,action=2
+priority=100,in_port=2,tcp,ct_state=-trk,action=ct(table=0,zone=2)
+priority=100,in_port=2,tcp,ct_state=+trk+est,ct_zone=2,action=ct(commit,zone=2),1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows-br0.txt])
+AT_CHECK([ovs-ofctl add-flows br1 flows-br1.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - multiple zones])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,action=ct(commit,zone=1),ct(commit,zone=2),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=2)
+priority=100,in_port=2,ct_state=+trk,ct_zone=2,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl HTTP requests from p0->p1 should work fine.
+NETNS_DAEMONIZE([at_ns1], [[$PYTHON $srcdir/test-l7.py]], [http0.pid])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+dnl (again) HTTP requests from p0->p1 should work fine.
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([conntrack -L 2>&1 | FORMAT_CT(10.1.1.2)], [0], [dnl
+SYN_SENT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> [[UNREPLIED]] src=10.1.1.2 dst=10.1.1.1 sport=<cleared> dport=<cleared> mark=0 zone=1 use=1
+TIME_WAIT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> src=10.1.1.2 dst=10.1.1.1 sport=<cleared> dport=<cleared> [[ASSURED]] mark=0 zone=2 use=1
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - ICMP related 2])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "172.16.0.1/24")
+ADD_VETH(p1, at_ns1, br0, "172.16.0.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,ct_state=-trk,udp,action=ct(commit,table=0)
+priority=100,in_port=1,ct_state=+trk,actions=controller
+priority=100,in_port=2,ct_state=-trk,action=ct(table=0)
+priority=100,in_port=2,ct_state=+trk+rel+rpl,action=controller
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log])
+
+dnl 1. Send an ICMP port unreach reply for port 8738, without any previous request
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) 'f64c473528c9c6f54ecb72db080045c0003d2e8700004001f355ac100004ac1000030303553f0000000045000021317040004011b138ac100003ac10000411112222000d20966369616f0a'])
+
+dnl 2. Send and UDP packet to port 5555
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 1 ct\(commit,table=0\) 'c6f94ecb72dbe64c473528c9080045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
+
+dnl 3. Send an ICMP port unreach reply for port 5555, related to the first packet
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) 'e64c473528c9c6f94ecb72db080045c0003d2e8700004001f355ac100002ac1000010303553f0000000045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
+
+dnl Check this output. We only see the latter two packets, not the first.
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=47 ct_state=new|trk,in_port=1 (via action) data_len=47 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=e6:4c:47:35:28:c9,dl_dst=c6:f9:4e:cb:72:db,nw_src=172.16.0.1,nw_dst=172.16.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=41614,tp_dst=5555 udp_csum:2096
+NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=75 ct_state=rel|rpl|trk,in_port=2 (via action) data_len=75 (unbuffered)
+icmp,vlan_tci=0x0000,dl_src=c6:f9:4e:cb:72:db,dl_dst=e6:4c:47:35:28:c9,nw_src=172.16.0.2,nw_dst=172.16.0.1,nw_tos=192,nw_ecn=0,nw_ttl=64,icmp_type=3,icmp_code=3 icmp_csum:553f
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
@@ -37,3 +37,12 @@ m4_define([OVS_TRAFFIC_VSWITCHD_STOP],
/dpif_netlink.*Generic Netlink family 'ovs_datapath' does not exist. The Open vSwitch kernel module is probably not loaded./d"])
AT_CHECK([:; $2])
])
+
+# CHECK_CONNTRACK()
+#
+# Perform requirements checks for running conntrack tests, and flush the
+# kernel conntrack tables when the test is finished.
+#
+m4_define([CHECK_CONNTRACK],
+ [AT_SKIP_IF(true)]
+)
new file mode 100755
@@ -0,0 +1,72 @@
+# Copyright (c) 2015 Nicira, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import socket
+
+from BaseHTTPServer import HTTPServer
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+from SocketServer import TCPServer
+
+
+class TCPServerV6(HTTPServer):
+ address_family = socket.AF_INET6
+
+
+def get_ftpd():
+ try:
+ from pyftpdlib.authorizers import DummyAuthorizer
+ from pyftpdlib.handlers import FTPHandler
+ from pyftpdlib.servers import FTPServer
+
+ class OVSFTPHandler(FTPHandler):
+ authorizer = DummyAuthorizer()
+ authorizer.add_anonymous("/tmp")
+ server = [FTPServer, OVSFTPHandler, 21]
+ except ImportError:
+ server = None
+ pass
+ return server
+
+
+def main():
+ SERVERS = {
+ 'http': [TCPServer, SimpleHTTPRequestHandler, 80],
+ 'http6': [TCPServerV6, SimpleHTTPRequestHandler, 80],
+ }
+
+ ftpd = get_ftpd()
+ if ftpd is not None:
+ SERVERS['ftp'] = ftpd
+
+ protocols = [srv for srv in SERVERS]
+ parser = argparse.ArgumentParser(
+ description='Run basic application servers.')
+ parser.add_argument('proto', default='http', nargs='?',
+ help='protocol to serve (%s)' % protocols)
+ args = parser.parse_args()
+
+ if args.proto not in SERVERS:
+ parser.print_help()
+ exit(1)
+
+ constructor = SERVERS[args.proto][0]
+ handler = SERVERS[args.proto][1]
+ port = SERVERS[args.proto][2]
+ srv = constructor(('', port), handler)
+ srv.serve_forever()
+
+
+if __name__ == '__main__':
+ main()
@@ -58,6 +58,8 @@ parse_keys(bool wc_keys)
.flow = &flow,
.support = {
.recirc = true,
+ .ct_state = true,
+ .ct_zone = true,
},
};
@@ -1305,6 +1305,65 @@ is used only with the \fBconjunction\fR action (see below).
.IP
This field was introduced in Open vSwitch 2.4.
.
+.IP \fBct_state=\fIflags\fB/\fImask\fR
+.IQ \fBct_state=\fR[\fB+\fIflag\fR...][\fB-\fIflag\fR...]
+Bitwise match on connection state flags. The flags are only available
+after a call to the \fBct\fR action with the \fBtable\fR specified.
+
+.IP
+The \fIflags\fR and \fImask\fR are 8-bit numbers written in decimal or
+in hexadecimal prefixed by \fB0x\fR. Each 1-bit in \fImask\fR requires
+that the corresponding bit in \fIflags\fR must match. Each 0-bit in
+\fImask\fR causes the corresponding bit to be ignored.
+.IP
+Alternatively, the flags can be specified by their symbolic names
+(listed below), each preceded by either \fB+\fR for a flag that must
+be set, or \fB\-\fR for a flag that must be unset, without any other
+delimiters between the flags. Flags not mentioned are wildcarded. For
+example, \fBtcp,ct_state=+trk\-new\fR matches TCP packets that
+have been run through the connection tracker and do not establish a new
+flow.
+.IP
+The following flags describe the state of the tracking:
+.RS
+.IP "\fB0x80: trk\fR"
+Connection tracking has occurred.
+.IP "\fB0x40: rpl\fR"
+The flow is in the reply direction, meaning it did not initiate the
+connection.
+.IP "\fB0x20: inv\fR"
+The flow is invalid, meaning that the connection tracker couldn't identify the
+connection.
+.RS
+.PP
+This flag may be set for the following reasons:
+.RS
+L3/L4 protocol handler is not loaded/unavailable. With the Linux kernel
+datapath, this may mean that the "nf_conntrack_ipv4" or "nf_conntrack_ipv6"
+modules are not loaded.
+.PP
+L3/L4 protocol handler determines that the packet is malformed or invalid.
+.PP
+Packets are unexpected length for protocol.
+.RE
+.RE
+.IP "\fB0x01: new\fR"
+This is the beginning of a new connection.
+.IP "\fB0x02: est\fR"
+This is part of an already existing connection.
+.IP "\fB0x04: rel\fR"
+This is a connection that is related to an existing connection, for
+instance ICMP "destination unreachable" messages or FTP data connections.
+.RE
+.
+.PP
+The following fields are data associated with the connection tracker and
+can only be matched or set after running through the connection tracker
+by using the \fBct\fR action.
+.
+.IP \fBct_zone=\fIvalue
+Matches connection zone \fIvalue\fR exactly.
+.
.PP
Defining IPv6 flows (those with \fBdl_type\fR equal to 0x86dd) requires
support for NXM. The following shorthand notations are available for
@@ -1542,6 +1601,37 @@ OpenFlow implementations do not support queuing at all.
Restores the queue to the value it was before any \fBset_queue\fR
actions were applied.
.
+.IP \fBct\fR
+.IQ \fBct\fB(\fR[\fIargument\fR][\fB,\fIargument\fR...]\fB)
+Send the packet through the connection tracker. The following arguments
+are supported:
+
+.RS
+.IP \fBcommit\fR
+Commit the flow to the connection tracking module.
+.IP \fBtable=\fInumber\fR
+Fork pipeline processing in two. The original instance of the packet will
+continue processing the current actions list. An additional instance of the
+packet will be sent to the connection tracker, which will be re-injected into
+the OpenFlow pipeline to resume processing in table \fInumber\fR, with the
+\fBct_state\fR and other ct match fields set. If the \fBtable\fR is not
+specified, then the packet is submitted to the connection tracker, but the
+pipeline does not fork and the ct match fields are not populated. It is
+strongly recommended to specify a table later than the current table to prevent
+loops.
+.IP \fBzone=\fIvalue\fR
+.IQ \fBzone=\fIsrc\fB[\fIstart\fB..\fIend\fB]\fR
+A 16-bit context id that can be used to isolate connections into separate
+domains, allowing overlapping network addresses in different zones. If a zone
+is not provided, then the default is to use zone zero. The \fBzone\fR may be
+specified either as an immediate 16-bit \fIvalue\fR, or may be provided from an
+NXM field \fIsrc\fR. The \fIstart\fR and \fIend\fR pair are inclusive, and must
+specify a 16-bit range within the field.
+.RE
+.IP
+Currently, connection tracking is only available on Linux kernels with the
+conntrack module loaded.
+.
.IP \fBdec_ttl\fR
.IQ \fBdec_ttl\fB[\fR(\fIid1,id2\fI)\fR]\fR
Decrement TTL of IPv4 packet or hop limit of IPv6 packet. If the