diff mbox

[ovs-dev,v5] xlate: Use OVS_CT_ATTR_EVENTMASK.

Message ID 1493338913-35978-1-git-send-email-jarno@ovn.org
State Accepted
Delegated to: Joe Stringer
Headers show

Commit Message

Jarno Rajahalme April 28, 2017, 12:21 a.m. UTC
Specify the event mask with CT commit including bits for CT features
exposed at the OVS interface (mark and label changes in addition to
basic creation and destruction of conntrack entries).

Without this any listener of conntrack update events will typically
(depending on system configuration) receive events for each L4 (e.g.,
TCP) state machine change, which can multiply the number of events
received per connection.

By including the new, related, and destroy events any listener of new
conntrack events gets notified of new related and non-related
connections, and any listener of destroy events will get notified of
deleted (typically timed out) conntrack entries.

By including the flags for mark and labels, any listener of conntrack
update events gets notified whenever the connmark or conntrack labels
are changed from the values reported within the new events.

VMware-BZ: #1837218
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
---
v5: Use addresses in the loopback range and zero port numbers in the probe
    to avoid hitting any real conntrack entries.

build-aux/extract-odp-netlink-h |  2 ++
 ofproto/ofproto-dpif-xlate.c    |  6 ++++
 ofproto/ofproto-dpif.c          | 63 +++++++++++++++++++++++++++++++++++++++++
 ofproto/ofproto-dpif.h          |  5 +++-
 4 files changed, 75 insertions(+), 1 deletion(-)

Comments

Joe Stringer April 28, 2017, 7:31 p.m. UTC | #1
On 27 April 2017 at 17:21, Jarno Rajahalme <jarno@ovn.org> wrote:
> Specify the event mask with CT commit including bits for CT features
> exposed at the OVS interface (mark and label changes in addition to
> basic creation and destruction of conntrack entries).
>
> Without this any listener of conntrack update events will typically
> (depending on system configuration) receive events for each L4 (e.g.,
> TCP) state machine change, which can multiply the number of events
> received per connection.
>
> By including the new, related, and destroy events any listener of new
> conntrack events gets notified of new related and non-related
> connections, and any listener of destroy events will get notified of
> deleted (typically timed out) conntrack entries.
>
> By including the flags for mark and labels, any listener of conntrack
> update events gets notified whenever the connmark or conntrack labels
> are changed from the values reported within the new events.
>
> VMware-BZ: #1837218
> Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
> Acked-by: Joe Stringer <joe@ovn.org>
> ---
> v5: Use addresses in the loopback range and zero port numbers in the probe
>     to avoid hitting any real conntrack entries.

Thanks for spinning this new version out, it seems unlikely to clash
with existing connections now. LGTM.
Jarno Rajahalme April 28, 2017, 7:54 p.m. UTC | #2
> On Apr 28, 2017, at 12:31 PM, Joe Stringer <joe@ovn.org> wrote:
> 
> On 27 April 2017 at 17:21, Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>> wrote:
>> Specify the event mask with CT commit including bits for CT features
>> exposed at the OVS interface (mark and label changes in addition to
>> basic creation and destruction of conntrack entries).
>> 
>> Without this any listener of conntrack update events will typically
>> (depending on system configuration) receive events for each L4 (e.g.,
>> TCP) state machine change, which can multiply the number of events
>> received per connection.
>> 
>> By including the new, related, and destroy events any listener of new
>> conntrack events gets notified of new related and non-related
>> connections, and any listener of destroy events will get notified of
>> deleted (typically timed out) conntrack entries.
>> 
>> By including the flags for mark and labels, any listener of conntrack
>> update events gets notified whenever the connmark or conntrack labels
>> are changed from the values reported within the new events.
>> 
>> VMware-BZ: #1837218
>> Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
>> Acked-by: Joe Stringer <joe@ovn.org>
>> ---
>> v5: Use addresses in the loopback range and zero port numbers in the probe
>>    to avoid hitting any real conntrack entries.
> 
> Thanks for spinning this new version out, it seems unlikely to clash
> with existing connections now. LGTM.

Thanks, pushed to master.

  Jarno
diff mbox

Patch

diff --git a/build-aux/extract-odp-netlink-h b/build-aux/extract-odp-netlink-h
index 907a70a..7fb6ce8 100755
--- a/build-aux/extract-odp-netlink-h
+++ b/build-aux/extract-odp-netlink-h
@@ -19,6 +19,8 @@  $i\
 #ifdef _WIN32\
 #include "OvsDpInterfaceExt.h"\
 #include "OvsDpInterfaceCtExt.h"\
+#else\
+#include "linux/netfilter/nf_conntrack_common.h"\
 #endif\
 
 # Use OVS's own struct eth_addr instead of a 6-byte char array.
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index d8c6a7c..ab5eef8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5351,6 +5351,12 @@  compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
     if (ofc->flags & NX_CT_F_COMMIT) {
         nl_msg_put_flag(ctx->odp_actions, ofc->flags & NX_CT_F_FORCE ?
                         OVS_CT_ATTR_FORCE_COMMIT : OVS_CT_ATTR_COMMIT);
+        if (ctx->xbridge->support.ct_eventmask) {
+            nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_EVENTMASK,
+                           1 << IPCT_NEW | 1 << IPCT_RELATED |
+                           1 << IPCT_DESTROY | 1 << IPCT_MARK |
+                           1 << IPCT_LABEL);
+        }
     }
     nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
     put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index c73c273..23896f0 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -1241,6 +1241,68 @@  check_clone(struct dpif_backer *backer)
     return !error;
 }
 
+/* Tests whether 'backer''s datapath supports the OVS_CT_ATTR_EVENTMASK
+ * attribute in OVS_ACTION_ATTR_CT. */
+static bool
+check_ct_eventmask(struct dpif_backer *backer)
+{
+    struct dpif_execute execute;
+    struct dp_packet packet;
+    struct ofpbuf actions;
+    struct flow flow = {
+        .dl_type = CONSTANT_HTONS(ETH_TYPE_IP),
+        .nw_proto = IPPROTO_UDP,
+        .nw_ttl = 64,
+        /* Use the broadcast address on the loopback address range 127/8 to
+         * avoid hitting any real conntrack entries.  We leave the UDP ports to
+         * zeroes for the same purpose. */
+        .nw_src = CONSTANT_HTONL(0x7fffffff),
+        .nw_dst = CONSTANT_HTONL(0x7fffffff),
+    };
+    size_t ct_start;
+    int error;
+
+    /* Compose CT action with eventmask attribute and check if datapath can
+     * decode the message.  */
+    ofpbuf_init(&actions, 64);
+    ct_start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_CT);
+    /* Eventmask has no effect without the commit flag, but currently the
+     * datapath will accept an eventmask even without commit.  This is useful
+     * as we do not want to persist the probe connection in the conntrack
+     * table. */
+    nl_msg_put_u32(&actions, OVS_CT_ATTR_EVENTMASK, ~0);
+    nl_msg_end_nested(&actions, ct_start);
+
+    /* Compose a dummy UDP packet. */
+    dp_packet_init(&packet, 0);
+    flow_compose(&packet, &flow);
+
+    /* Execute the actions.  On older datapaths this fails with EINVAL, on
+     * newer datapaths it succeeds. */
+    execute.actions = actions.data;
+    execute.actions_len = actions.size;
+    execute.packet = &packet;
+    execute.flow = &flow;
+    execute.needs_help = false;
+    execute.probe = true;
+    execute.mtu = 0;
+
+    error = dpif_execute(backer->dpif, &execute);
+
+    dp_packet_uninit(&packet);
+    ofpbuf_uninit(&actions);
+
+    if (error) {
+        VLOG_INFO("%s: Datapath does not support eventmask in conntrack action",
+                  dpif_name(backer->dpif));
+    } else {
+        VLOG_INFO("%s: Datapath supports eventmask in conntrack action",
+                  dpif_name(backer->dpif));
+    }
+
+    return !error;
+}
+
 #define CHECK_FEATURE__(NAME, SUPPORT, FIELD, VALUE)                        \
 static bool                                                                 \
 check_##NAME(struct dpif_backer *backer)                                    \
@@ -1300,6 +1362,7 @@  check_support(struct dpif_backer *backer)
     backer->support.tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
     backer->support.clone = check_clone(backer);
     backer->support.sample_nesting = check_max_sample_nesting(backer);
+    backer->support.ct_eventmask = check_ct_eventmask(backer);
 
     /* Flow fields. */
     backer->support.odp.ct_state = check_ct_state(backer);
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 81a0bdf..1fe2e25 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -174,7 +174,10 @@  struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
     DPIF_SUPPORT_FIELD(bool, clone, "Clone action")                         \
                                                                             \
     /* Maximum level of nesting allowed by OVS_ACTION_ATTR_SAMPLE action. */\
-    DPIF_SUPPORT_FIELD(size_t, sample_nesting, "Sample nesting")
+    DPIF_SUPPORT_FIELD(size_t, sample_nesting, "Sample nesting")            \
+                                                                            \
+    /* OVS_CT_ATTR_EVENTMASK supported by OVS_ACTION_ATTR_CT action. */     \
+    DPIF_SUPPORT_FIELD(bool, ct_eventmask, "Conntrack eventmask")
 
 /* Stores the various features which the corresponding backer supports. */
 struct dpif_backer_support {