diff mbox series

[ovs-dev,PATCHv2] userspace: Add conntrack timeout policy support.

Message ID 1586293629-30094-1-git-send-email-u9012063@gmail.com
State Superseded
Headers show
Series [ovs-dev,PATCHv2] userspace: Add conntrack timeout policy support. | expand

Commit Message

William Tu April 7, 2020, 9:07 p.m. UTC
Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
policy support") adds conntrack timeout policy for kernel datapath.
This patch enables support for the userspace datapath.  I tested
using the 'make check-system-userspace' which checks the timeout
policies for ICMP and UDP cases.

Signed-off-by: William Tu <u9012063@gmail.com>
---
v2:
  - enable icmp_reply timeout for userspace datapath
  - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/672248069
---
 Documentation/faq/releases.rst   |   2 +-
 NEWS                             |   2 +
 lib/automake.mk                  |   2 +
 lib/conntrack-icmp.c             |   7 +-
 lib/conntrack-other.c            |   9 +-
 lib/conntrack-private.h          |  27 +++--
 lib/conntrack-tcp.c              |  22 ++--
 lib/conntrack-tp.c               | 247 +++++++++++++++++++++++++++++++++++++++
 lib/conntrack-tp.h               |  24 ++++
 lib/conntrack.c                  | 159 ++++++++++++++++++++++++-
 lib/conntrack.h                  |  14 ++-
 lib/dpif-netdev.c                |  74 ++++++++++--
 tests/system-traffic.at          |   4 +-
 tests/system-userspace-macros.at |   6 +-
 tests/test-conntrack.c           |   6 +-
 15 files changed, 563 insertions(+), 42 deletions(-)
 create mode 100644 lib/conntrack-tp.c
 create mode 100644 lib/conntrack-tp.h

Comments

Yi-Hung Wei April 13, 2020, 11:31 p.m. UTC | #1
On Tue, Apr 7, 2020 at 2:08 PM William Tu <u9012063@gmail.com> wrote:
>
> Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> policy support") adds conntrack timeout policy for kernel datapath.
> This patch enables support for the userspace datapath.  I tested
> using the 'make check-system-userspace' which checks the timeout
> policies for ICMP and UDP cases.
>
> Signed-off-by: William Tu <u9012063@gmail.com>

Thanks William for implementing this feature in the userspace. I have
some comments as below.


> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> index b3507bd1c7fa..ffcda37f9985 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
>      ========================== ============== ============== ========= =======
>      Connection tracking             4.3            2.5          2.6      YES
>      Conntrack Fragment Reass.       4.3            2.6          2.12     YES
> -    Conntrack Timeout Policies      5.2            2.12         NO       NO
> +    Conntrack Timeout Policies      5.2            2.12         2.13     NO

For the support of userspace timeout policies in userspace datapath,
should it be 2.14?



> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 9a8ca3910157..852482303684 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -118,6 +118,8 @@ struct conn {
>      /* Immutable data. */
>      bool alg_related; /* True if alg data connection. */
>      enum ct_conn_type conn_type;
> +
> +    uint32_t tpid; /* Timeout policy ID. */

I have a minor suggestion here.  I would recommend to replace tpid
with tp_id so that when we grep tp_it it would not be confused with
vlan's tpid (tunnel protocol id) field.


> @@ -143,9 +145,9 @@ enum ct_update_res {
>      CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
>      CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
>
> -/* The smallest of the above values: it is used as an upper bound for the
> - * interval between two rounds of cleanup of expired entries */
> -#define CT_TM_MIN (30 * 1000)
> +/* This is used as an upper bound for the interval between two
> + * rounds of cleanup of expired entries */
> +#define CT_TM_MIN (1 * 1000)

I understand that while this patch enables users to customize the
timeout policy, users can set the minimum timeout configuration to 1
second. Thus, updates CT_TM_MIN to 1 second would make the conntrack
clean thread to check timer expiration every single second to meet
that requirement.  However, this may increase the system load even
without any customized timeout policy.  I would recommend to update
CT_TM_MIN dynamically to the minimum customized timeout value on the
fly.


> diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> new file mode 100644
> index 000000000000..6be4b8276340
> --- /dev/null
> +++ b/lib/conntrack-tp.c
> @@ -0,0 +1,247 @@
> +/*
> + * Copyright (c) 2020 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "conntrack-private.h"
> +#include "conntrack-tp.h"
> +#include "dp-packet.h"
> +
> +#include "openvswitch/vlog.h"
> +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +
> +static const char *ct_timeout_str[] = {
> +#define CT_TIMEOUT(NAME, VALUE) #NAME
> +    CT_TIMEOUTS
> +#undef CT_TIMEOUT
> +};
> +
> +static inline bool
> +check_present_and_set(struct timeout_policy *tp, uint32_t *v,
> +                      enum ct_dpif_tp_attr attr)
> +{
> +    if (tp && (tp->p.present & (1 << attr))) {
> +        *v = tp->p.attrs[attr];
> +        return true;
> +    }
> +    return false;
> +}
> +
> +#define TP_HAS_FUNC(ATTR)  \
> +static bool \
> +tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v)              \
> +{                                                                  \
> +    return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR);   \
> +}
> +
> +/* These functions check whether the timeout value is set from the
> + * present bit.  If true, then set the value to '*v'.  For meaning
> + * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
> + */
> +TP_HAS_FUNC(TCP_SYN_SENT)
> +TP_HAS_FUNC(TCP_SYN_RECV)
> +TP_HAS_FUNC(TCP_ESTABLISHED)
> +TP_HAS_FUNC(TCP_FIN_WAIT)
> +TP_HAS_FUNC(TCP_TIME_WAIT)
> +TP_HAS_FUNC(TCP_CLOSE)
> +TP_HAS_FUNC(UDP_FIRST)
> +TP_HAS_FUNC(UDP_SINGLE)
> +TP_HAS_FUNC(UDP_MULTIPLE)
> +TP_HAS_FUNC(ICMP_FIRST)
> +TP_HAS_FUNC(ICMP_REPLY)
> +
> +static bool
> +conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> +                                   enum ct_timeout tm, long long now,
> +                                   struct timeout_policy *tp)
> +{
> +    uint32_t val;
> +
> +    switch (tm) {
> +    case CT_TM_TCP_FIRST_PACKET:
> +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_OPENING:
> +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_ESTABLISHED:
> +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_CLOSING:
> +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_FIN_WAIT:
> +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_CLOSED:
> +        if (tp_has_TCP_CLOSE(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_FIRST:
> +        if (tp_has_UDP_FIRST(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_BIDIR:
> +        if (tp_has_UDP_SINGLE(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_MULTIPLE:
> +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_ICMP_FIRST:
> +        if (tp_has_ICMP_FIRST(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case CT_TM_ICMP_REPLY:
> +        if (tp_has_ICMP_REPLY(tp, &val)) {
> +            goto update_with_val;
> +        }
> +        break;
> +    case N_CT_TM:
> +    default:
> +        OVS_NOT_REACHED();
> +        break;
> +    }
> +    return false;
> +
> +update_with_val:
> +    conn_update_expiration(ct, conn, tm, now, val, false);
> +    return true;
> +}
> +
> +static bool
> +conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> +                                 enum ct_timeout tm, long long now,
> +                                 struct timeout_policy *tp)
> +{
> +    uint32_t val;
> +
> +    switch (tm) {
> +    case CT_TM_TCP_FIRST_PACKET:
> +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_OPENING:
> +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_ESTABLISHED:
> +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_CLOSING:
> +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_FIN_WAIT:
> +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_TCP_CLOSED:
> +        if (tp_has_TCP_CLOSE(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_FIRST:
> +        if (tp_has_UDP_FIRST(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_BIDIR:
> +        if (tp_has_UDP_SINGLE(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_OTHER_MULTIPLE:
> +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_ICMP_FIRST:
> +        if (tp_has_ICMP_FIRST(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case CT_TM_ICMP_REPLY:
> +        if (tp_has_ICMP_REPLY(tp, &val)) {
> +            goto init_with_val;
> +        }
> +        break;
> +    case N_CT_TM:
> +    default:
> +        OVS_NOT_REACHED();
> +        break;
> +    }
> +    return false;
> +
> +init_with_val:
> +    conn_init_expiration(ct, conn, tm, now, val, false);
> +    return true;
> +}
> +
> +void
> +conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> +                             enum ct_timeout tm, long long now)
> +{
> +    struct timeout_policy *tp;
> +
> +    tp = timeout_policy_lookup(ct, conn->tpid);
> +    if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
> +        VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
> +                    ct_timeout_str[tm]);
> +    } else {
> +        /* Init with default. */
> +        conn_init_expiration(ct, conn, tm, now, 0, true);
> +    }
> +}
> +
> +void
> +conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> +                               enum ct_timeout tm, long long now)
> +{
> +    struct timeout_policy *tp;
> +
> +    tp = timeout_policy_lookup(ct, conn->tpid);
> +    if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
> +        VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
> +                    ct_timeout_str[tm]);
> +    } else {
> +        /* Update with default. */
> +        conn_update_expiration(ct, conn, tm, now, 0, true);
> +    }
> +}


> diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
> new file mode 100644
> index 000000000000..34048ac2aeea
> --- /dev/null
> +++ b/lib/conntrack-tp.h
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright (c) 2020 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef CONNTRACK_TP_H
> +#define CONNTRACK_TP_H 1
> +
> +void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> +                                  enum ct_timeout tm, long long now);
> +void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> +                                    enum ct_timeout tm, long long now);

I think introduce the _with_tp() version may cause confusion to choose
the *with_tp() API or the existing API. Since our goal is to use
customized timeout if it is available. I would suggest the following
two change options.

1) Since both conn_init_expiration_with_tp() and
conn_update_expiration_with_tp() did a lookup() on the timeout policy
before init or udpate the expiration time. Maybe we can abstrct the
lookup logic as a seperate function, and call the abstract funcion in
existing conn_init_expiration() and conn_update_expiration()?

2) The other options is to maybe to rename existing
conn_update_expiration() to conn_update_expiration__() and rename
conn_update_expiration_with_tp() to conn_update_expiration()? Both
option will avoid changes in the L4 specific conntrack code and have
less confustion about whether to use the _with_tp() API or the
existing API.


> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 0cbc8f6d2b25..de934bdf7169 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
>      return 0;
>  }
>
> +struct timeout_policy *
> +timeout_policy_get(struct conntrack *ct, int32_t tpid)
> +{
> +    struct timeout_policy *tp;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    tp = timeout_policy_lookup(ct, tpid);
> +    if (!tp) {
> +        ovs_mutex_unlock(&ct->ct_lock);
> +        return NULL;
> +    }
> +
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return tp;
> +}
> +
> +struct timeout_policy *
> +timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t hash;
> +
> +    hash = zone_key_hash(tpid, ct->hash_basis);
> +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> +        if (tp->p.id == tpid) {
> +            return tp;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static bool
> +is_valid_tpid(uint32_t tpid OVS_UNUSED)
> +{
> +    return true;
> +}

We can get rid of this function.


> +
> +static int
> +timeout_policy_create(struct conntrack *ct,
> +                      struct timeout_policy *new_tp)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    uint32_t tpid = new_tp->p.id;
> +    uint32_t hash;
> +
> +    if (is_valid_tpid(tpid)) {
> +        struct timeout_policy *tp;
> +
> +        tp = xzalloc(sizeof *tp);
> +        memcpy(&tp->p, &new_tp->p, sizeof tp->p);

Instead of directly copy 'new_tp' into 'tp', if we can fill in the
default values for the unpresent fields in the configuration time.
We can reduce the effort to lookup the default vaule at run time in
conntrack-tp.c.  Same thing can apply to the update function.


> @@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now)
>   * - We want to reduce the number of wakeups and batch connection cleanup
>   *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
>   *   are coping with the current cleanup tasks, then we wait at least
> - *   5 seconds to do further cleanup.
> + *   1 seconds to do further cleanup.
>   *
>   * - We don't want to keep the map locked too long, as we might prevent
>   *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
>   *   behind, there is at least some 200ms blocks of time when the map will be
>   *   left alone, so the datapath can operate unhindered.
>   */
> -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
> +#define CT_CLEAN_INTERVAL 1000 /* 1 second */

Not sure if we need to change CT_CLEAN_INTERVAL from 5 to 1, given
that we set CT_TM_MIN dynamically.


> diff --git a/lib/conntrack.h b/lib/conntrack.h
> index b0d0fc8d9597..a55609bb2907 100644
> --- a/lib/conntrack.h
> +++ b/lib/conntrack.h
> @@ -20,6 +20,7 @@
>  #include <stdbool.h>
>
>  #include "cmap.h"
> +#include "ct-dpif.h"
>  #include "latch.h"
>  #include "odp-netlink.h"
>  #include "openvswitch/hmap.h"
> @@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
>                        const struct ovs_key_ct_labels *setlabel,
>                        ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
>                        const struct nat_action_info_t *nat_action_info,
> -                      long long now);
> +                      long long now, uint32_t tpid);
>  void conntrack_clear(struct dp_packet *packet);
>
>  struct conntrack_dump {
> @@ -111,6 +112,11 @@ struct conntrack_zone_limit {
>      uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
>  };
>
> +struct timeout_policy {
> +    struct hmap_node node;
> +    struct ct_dpif_timeout_policy p;

It seems to be more clear to me to name it as 'tp' than 'p'.


> @@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
>                                             int32_t zone);
>  int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
>  int zone_limit_delete(struct conntrack *ct, uint16_t zone);
> +int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
> +int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
> +struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid);
> +struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
> +                                             int32_t tpid);
> +
>
>  #endif /* conntrack.h */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index e456cc9befbe..41e805a832c8 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>          bool commit = false;
>          unsigned int left;
>          uint16_t zone = 0;
> +        uint32_t tpid = 0;
>          const char *helper = NULL;
>          const uint32_t *setmark = NULL;
>          const struct ovs_key_ct_labels *setlabel = NULL;
> @@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>                   * netlink events. */
>                  break;
>              case OVS_CT_ATTR_TIMEOUT:
> -                /* Userspace datapath does not support customized timeout
> -                 * policy yet. */
> +                if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
> +                    VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
> +                }

Looks like if the OVS_CT_ATTR_TIMEOUT is not present, we will always
use timeout policy 0 rather than the default timeout policy, since
from ofproto-dpif.c the tiemout policy id pools allocate id number
from starting from 0.  One simple fix may be to adjust the timeout
policy id pool (backer->tpids) to start from 1, so that userspace can
use 0 as the reserved default timeout policy.  Kernel space is
agnostic to the id pool starting number.

Thanks,

-Yi-Hung
William Tu April 15, 2020, 8:45 p.m. UTC | #2
On Mon, Apr 13, 2020 at 4:32 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:
>
> On Tue, Apr 7, 2020 at 2:08 PM William Tu <u9012063@gmail.com> wrote:
> >
> > Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> > policy support") adds conntrack timeout policy for kernel datapath.
> > This patch enables support for the userspace datapath.  I tested
> > using the 'make check-system-userspace' which checks the timeout
> > policies for ICMP and UDP cases.
> >
> > Signed-off-by: William Tu <u9012063@gmail.com>
>
> Thanks William for implementing this feature in the userspace. I have
> some comments as below.
>
>
> > diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> > index b3507bd1c7fa..ffcda37f9985 100644
> > --- a/Documentation/faq/releases.rst
> > +++ b/Documentation/faq/releases.rst
> > @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
> >      ========================== ============== ============== ========= =======
> >      Connection tracking             4.3            2.5          2.6      YES
> >      Conntrack Fragment Reass.       4.3            2.6          2.12     YES
> > -    Conntrack Timeout Policies      5.2            2.12         NO       NO
> > +    Conntrack Timeout Policies      5.2            2.12         2.13     NO
>
> For the support of userspace timeout policies in userspace datapath,
> should it be 2.14?
>
Yes, thanks!

>
>
> > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > index 9a8ca3910157..852482303684 100644
> > --- a/lib/conntrack-private.h
> > +++ b/lib/conntrack-private.h
> > @@ -118,6 +118,8 @@ struct conn {
> >      /* Immutable data. */
> >      bool alg_related; /* True if alg data connection. */
> >      enum ct_conn_type conn_type;
> > +
> > +    uint32_t tpid; /* Timeout policy ID. */
>
> I have a minor suggestion here.  I would recommend to replace tpid
> with tp_id so that when we grep tp_it it would not be confused with
> vlan's tpid (tunnel protocol id) field.

OK, make sense.

>
>
> > @@ -143,9 +145,9 @@ enum ct_update_res {
> >      CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
> >      CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
> >
> > -/* The smallest of the above values: it is used as an upper bound for the
> > - * interval between two rounds of cleanup of expired entries */
> > -#define CT_TM_MIN (30 * 1000)
> > +/* This is used as an upper bound for the interval between two
> > + * rounds of cleanup of expired entries */
> > +#define CT_TM_MIN (1 * 1000)
>
> I understand that while this patch enables users to customize the
> timeout policy, users can set the minimum timeout configuration to 1
> second. Thus, updates CT_TM_MIN to 1 second would make the conntrack
> clean thread to check timer expiration every single second to meet
> that requirement.  However, this may increase the system load even
> without any customized timeout policy.  I would recommend to update
> CT_TM_MIN dynamically to the minimum customized timeout value on the
> fly.

ok, will do it in next version.
>
>
> > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> > new file mode 100644
> > index 000000000000..6be4b8276340
> > --- /dev/null
> > +++ b/lib/conntrack-tp.c
> > @@ -0,0 +1,247 @@
> > +/*
> > + * Copyright (c) 2020 VMware, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +
> > +#include "conntrack-private.h"
> > +#include "conntrack-tp.h"
> > +#include "dp-packet.h"
> > +
> > +#include "openvswitch/vlog.h"
> > +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> > +
> > +static const char *ct_timeout_str[] = {
> > +#define CT_TIMEOUT(NAME, VALUE) #NAME
> > +    CT_TIMEOUTS
> > +#undef CT_TIMEOUT
> > +};
> > +
> > +static inline bool
> > +check_present_and_set(struct timeout_policy *tp, uint32_t *v,
> > +                      enum ct_dpif_tp_attr attr)
> > +{
> > +    if (tp && (tp->p.present & (1 << attr))) {
> > +        *v = tp->p.attrs[attr];
> > +        return true;
> > +    }
> > +    return false;
> > +}
> > +
> > +#define TP_HAS_FUNC(ATTR)  \
> > +static bool \
> > +tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v)              \
> > +{                                                                  \
> > +    return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR);   \
> > +}
> > +
> > +/* These functions check whether the timeout value is set from the
> > + * present bit.  If true, then set the value to '*v'.  For meaning
> > + * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
> > + */
> > +TP_HAS_FUNC(TCP_SYN_SENT)
> > +TP_HAS_FUNC(TCP_SYN_RECV)
> > +TP_HAS_FUNC(TCP_ESTABLISHED)
> > +TP_HAS_FUNC(TCP_FIN_WAIT)
> > +TP_HAS_FUNC(TCP_TIME_WAIT)
> > +TP_HAS_FUNC(TCP_CLOSE)
> > +TP_HAS_FUNC(UDP_FIRST)
> > +TP_HAS_FUNC(UDP_SINGLE)
> > +TP_HAS_FUNC(UDP_MULTIPLE)
> > +TP_HAS_FUNC(ICMP_FIRST)
> > +TP_HAS_FUNC(ICMP_REPLY)
> > +
> > +static bool
> > +conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> > +                                   enum ct_timeout tm, long long now,
> > +                                   struct timeout_policy *tp)
> > +{
> > +    uint32_t val;
> > +
> > +    switch (tm) {
> > +    case CT_TM_TCP_FIRST_PACKET:
> > +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_OPENING:
> > +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_ESTABLISHED:
> > +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSING:
> > +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_FIN_WAIT:
> > +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSED:
> > +        if (tp_has_TCP_CLOSE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_FIRST:
> > +        if (tp_has_UDP_FIRST(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_BIDIR:
> > +        if (tp_has_UDP_SINGLE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_MULTIPLE:
> > +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_FIRST:
> > +        if (tp_has_ICMP_FIRST(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_REPLY:
> > +        if (tp_has_ICMP_REPLY(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case N_CT_TM:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +        break;
> > +    }
> > +    return false;
> > +
> > +update_with_val:
> > +    conn_update_expiration(ct, conn, tm, now, val, false);
> > +    return true;
> > +}
> > +
> > +static bool
> > +conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> > +                                 enum ct_timeout tm, long long now,
> > +                                 struct timeout_policy *tp)
> > +{
> > +    uint32_t val;
> > +
> > +    switch (tm) {
> > +    case CT_TM_TCP_FIRST_PACKET:
> > +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_OPENING:
> > +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_ESTABLISHED:
> > +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSING:
> > +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_FIN_WAIT:
> > +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSED:
> > +        if (tp_has_TCP_CLOSE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_FIRST:
> > +        if (tp_has_UDP_FIRST(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_BIDIR:
> > +        if (tp_has_UDP_SINGLE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_MULTIPLE:
> > +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_FIRST:
> > +        if (tp_has_ICMP_FIRST(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_REPLY:
> > +        if (tp_has_ICMP_REPLY(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case N_CT_TM:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +        break;
> > +    }
> > +    return false;
> > +
> > +init_with_val:
> > +    conn_init_expiration(ct, conn, tm, now, val, false);
> > +    return true;
> > +}
> > +
> > +void
> > +conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                             enum ct_timeout tm, long long now)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tpid);
> > +    if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
> > +        VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
> > +                    ct_timeout_str[tm]);
> > +    } else {
> > +        /* Init with default. */
> > +        conn_init_expiration(ct, conn, tm, now, 0, true);
> > +    }
> > +}
> > +
> > +void
> > +conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                               enum ct_timeout tm, long long now)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tpid);
> > +    if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
> > +        VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
> > +                    ct_timeout_str[tm]);
> > +    } else {
> > +        /* Update with default. */
> > +        conn_update_expiration(ct, conn, tm, now, 0, true);
> > +    }
> > +}
>
>
> > diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
> > new file mode 100644
> > index 000000000000..34048ac2aeea
> > --- /dev/null
> > +++ b/lib/conntrack-tp.h
> > @@ -0,0 +1,24 @@
> > +/*
> > + * Copyright (c) 2020 VMware, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef CONNTRACK_TP_H
> > +#define CONNTRACK_TP_H 1
> > +
> > +void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                                  enum ct_timeout tm, long long now);
> > +void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                                    enum ct_timeout tm, long long now);
>
> I think introduce the _with_tp() version may cause confusion to choose
> the *with_tp() API or the existing API. Since our goal is to use
> customized timeout if it is available. I would suggest the following
> two change options.
>
> 1) Since both conn_init_expiration_with_tp() and
> conn_update_expiration_with_tp() did a lookup() on the timeout policy
> before init or udpate the expiration time. Maybe we can abstrct the
> lookup logic as a seperate function, and call the abstract funcion in
> existing conn_init_expiration() and conn_update_expiration()?
>
> 2) The other options is to maybe to rename existing
> conn_update_expiration() to conn_update_expiration__() and rename
> conn_update_expiration_with_tp() to conn_update_expiration()? Both
> option will avoid changes in the L4 specific conntrack code and have
> less confustion about whether to use the _with_tp() API or the
> existing API.
>
OK I will rename the existing one to conn_update_expiration__()

>
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 0cbc8f6d2b25..de934bdf7169 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
> >      return 0;
> >  }
> >
> > +struct timeout_policy *
> > +timeout_policy_get(struct conntrack *ct, int32_t tpid)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    tp = timeout_policy_lookup(ct, tpid);
> > +    if (!tp) {
> > +        ovs_mutex_unlock(&ct->ct_lock);
> > +        return NULL;
> > +    }
> > +
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +    return tp;
> > +}
> > +
> > +struct timeout_policy *
> > +timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    struct timeout_policy *tp;
> > +    uint32_t hash;
> > +
> > +    hash = zone_key_hash(tpid, ct->hash_basis);
> > +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> > +        if (tp->p.id == tpid) {
> > +            return tp;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static bool
> > +is_valid_tpid(uint32_t tpid OVS_UNUSED)
> > +{
> > +    return true;
> > +}
>
> We can get rid of this function.
>
>
> > +
> > +static int
> > +timeout_policy_create(struct conntrack *ct,
> > +                      struct timeout_policy *new_tp)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    uint32_t tpid = new_tp->p.id;
> > +    uint32_t hash;
> > +
> > +    if (is_valid_tpid(tpid)) {
> > +        struct timeout_policy *tp;
> > +
> > +        tp = xzalloc(sizeof *tp);
> > +        memcpy(&tp->p, &new_tp->p, sizeof tp->p);
>
> Instead of directly copy 'new_tp' into 'tp', if we can fill in the
> default values for the unpresent fields in the configuration time.
> We can reduce the effort to lookup the default vaule at run time in
> conntrack-tp.c.  Same thing can apply to the update function.

I see, looks like a better design.

>
>
> > @@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now)
> >   * - We want to reduce the number of wakeups and batch connection cleanup
> >   *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
> >   *   are coping with the current cleanup tasks, then we wait at least
> > - *   5 seconds to do further cleanup.
> > + *   1 seconds to do further cleanup.
> >   *
> >   * - We don't want to keep the map locked too long, as we might prevent
> >   *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
> >   *   behind, there is at least some 200ms blocks of time when the map will be
> >   *   left alone, so the datapath can operate unhindered.
> >   */
> > -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
> > +#define CT_CLEAN_INTERVAL 1000 /* 1 second */
>
> Not sure if we need to change CT_CLEAN_INTERVAL from 5 to 1, given
> that we set CT_TM_MIN dynamically.
>
>
> > diff --git a/lib/conntrack.h b/lib/conntrack.h
> > index b0d0fc8d9597..a55609bb2907 100644
> > --- a/lib/conntrack.h
> > +++ b/lib/conntrack.h
> > @@ -20,6 +20,7 @@
> >  #include <stdbool.h>
> >
> >  #include "cmap.h"
> > +#include "ct-dpif.h"
> >  #include "latch.h"
> >  #include "odp-netlink.h"
> >  #include "openvswitch/hmap.h"
> > @@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
> >                        const struct ovs_key_ct_labels *setlabel,
> >                        ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
> >                        const struct nat_action_info_t *nat_action_info,
> > -                      long long now);
> > +                      long long now, uint32_t tpid);
> >  void conntrack_clear(struct dp_packet *packet);
> >
> >  struct conntrack_dump {
> > @@ -111,6 +112,11 @@ struct conntrack_zone_limit {
> >      uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> >  };
> >
> > +struct timeout_policy {
> > +    struct hmap_node node;
> > +    struct ct_dpif_timeout_policy p;
>
> It seems to be more clear to me to name it as 'tp' than 'p'.
>
>
> > @@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
> >                                             int32_t zone);
> >  int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
> >  int zone_limit_delete(struct conntrack *ct, uint16_t zone);
> > +int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
> > +int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
> > +struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid);
> > +struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
> > +                                             int32_t tpid);
> > +
> >
> >  #endif /* conntrack.h */
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index e456cc9befbe..41e805a832c8 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >          bool commit = false;
> >          unsigned int left;
> >          uint16_t zone = 0;
> > +        uint32_t tpid = 0;
> >          const char *helper = NULL;
> >          const uint32_t *setmark = NULL;
> >          const struct ovs_key_ct_labels *setlabel = NULL;
> > @@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >                   * netlink events. */
> >                  break;
> >              case OVS_CT_ATTR_TIMEOUT:
> > -                /* Userspace datapath does not support customized timeout
> > -                 * policy yet. */
> > +                if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
> > +                    VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
> > +                }
>
> Looks like if the OVS_CT_ATTR_TIMEOUT is not present, we will always
> use timeout policy 0 rather than the default timeout policy, since
> from ofproto-dpif.c the tiemout policy id pools allocate id number
> from starting from 0.  One simple fix may be to adjust the timeout
> policy id pool (backer->tpids) to start from 1, so that userspace can
> use 0 as the reserved default timeout policy.  Kernel space is
> agnostic to the id pool starting number.
>
thanks!
William
diff mbox series

Patch

diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index b3507bd1c7fa..ffcda37f9985 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -118,7 +118,7 @@  Q: Are all features available with all datapaths?
     ========================== ============== ============== ========= =======
     Connection tracking             4.3            2.5          2.6      YES
     Conntrack Fragment Reass.       4.3            2.6          2.12     YES
-    Conntrack Timeout Policies      5.2            2.12         NO       NO
+    Conntrack Timeout Policies      5.2            2.12         2.13     NO
     Conntrack Zone Limit            4.18           2.10         2.13     YES
     Conntrack NAT                   4.6            2.6          2.8      YES
     Tunnel - LISP                   NO             2.11         NO       NO
diff --git a/NEWS b/NEWS
index 70bd17584594..1e6af8f57bdd 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@  Post-v2.13.0
      * Deprecated DPDK ring ports (dpdkr) are no longer supported.
    - Linux datapath:
      * Support for kernel versions up to 5.5.x.
+   - Userspace datapath:
+     * Add support for conntrack zone-based timeout policy.
 
 
 v2.13.0 - 14 Feb 2020
diff --git a/lib/automake.mk b/lib/automake.mk
index 95925b57c351..86940ccd2f9e 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -53,6 +53,8 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/conntrack-icmp.c \
 	lib/conntrack-private.h \
 	lib/conntrack-tcp.c \
+	lib/conntrack-tp.c \
+	lib/conntrack-tp.h \
 	lib/conntrack-other.c \
 	lib/conntrack.c \
 	lib/conntrack.h \
diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
index 63246f0124d0..81393a6f1846 100644
--- a/lib/conntrack-icmp.c
+++ b/lib/conntrack-icmp.c
@@ -22,6 +22,7 @@ 
 #include <netinet/icmp6.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM icmp_state {
@@ -51,7 +52,8 @@  icmp_conn_update(struct conntrack *ct, struct conn *conn_,
 {
     struct conn_icmp *conn = conn_icmp_cast(conn_);
     conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
-    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+    conn_update_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+                                   now);
 
     return CT_UPDATE_VALID;
 }
@@ -80,7 +82,8 @@  icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
 {
     struct conn_icmp *conn = xzalloc(sizeof *conn);
     conn->state = ICMPS_FIRST;
-    conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+    conn_init_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+                                 now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index de22ef87cc19..cabd51964c08 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -17,6 +17,7 @@ 
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM other_state {
@@ -56,8 +57,9 @@  other_conn_update(struct conntrack *ct, struct conn *conn_,
         ret = CT_UPDATE_VALID_NEW;
     }
 
-    conn_update_expiration(ct, &conn->up, other_timeouts[conn->state], now);
-
+    conn_update_expiration_with_tp(ct, &conn->up,
+                                   other_timeouts[conn->state],
+                                   now);
     return ret;
 }
 
@@ -76,7 +78,8 @@  other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
     conn = xzalloc(sizeof *conn);
     conn->state = OTHERS_FIRST;
 
-    conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now);
+    conn_init_expiration_with_tp(ct, &conn->up, other_timeouts[conn->state],
+                                 now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 9a8ca3910157..852482303684 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -118,6 +118,8 @@  struct conn {
     /* Immutable data. */
     bool alg_related; /* True if alg data connection. */
     enum ct_conn_type conn_type;
+
+    uint32_t tpid; /* Timeout policy ID. */
 };
 
 enum ct_update_res {
@@ -143,9 +145,9 @@  enum ct_update_res {
     CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
     CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
 
-/* The smallest of the above values: it is used as an upper bound for the
- * interval between two rounds of cleanup of expired entries */
-#define CT_TM_MIN (30 * 1000)
+/* This is used as an upper bound for the interval between two
+ * rounds of cleanup of expired entries */
+#define CT_TM_MIN (1 * 1000)
 
 #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
     CT_TIMEOUTS
@@ -163,6 +165,7 @@  struct conntrack {
     struct cmap conns OVS_GUARDED;
     struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
     struct hmap zone_limits OVS_GUARDED;
+    struct hmap timeout_policies OVS_GUARDED;
     uint32_t hash_basis; /* Salt for hashing a connection key. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
     struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
@@ -212,16 +215,22 @@  extern long long ct_timeout_val[];
 /* ct_lock must be held. */
 static inline void
 conn_init_expiration(struct conntrack *ct, struct conn *conn,
-                     enum ct_timeout tm, long long now)
+                     enum ct_timeout tm, long long now,
+                     uint32_t tp_value, bool use_default)
 {
-    conn->expiration = now + ct_timeout_val[tm];
+    if (use_default) {
+        conn->expiration = now + ct_timeout_val[tm];
+    } else {
+        conn->expiration = now + tp_value * 1000;
+    }
     ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
 }
 
 /* The conn entry lock must be held on entry and exit. */
 static inline void
 conn_update_expiration(struct conntrack *ct, struct conn *conn,
-                       enum ct_timeout tm, long long now)
+                       enum ct_timeout tm, long long now,
+                       uint32_t tp_value, bool use_default)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     ovs_mutex_unlock(&conn->lock);
@@ -229,7 +238,11 @@  conn_update_expiration(struct conntrack *ct, struct conn *conn,
     ovs_mutex_lock(&ct->ct_lock);
     ovs_mutex_lock(&conn->lock);
     if (!conn->cleaned) {
-        conn->expiration = now + ct_timeout_val[tm];
+        if (use_default) {
+            conn->expiration = now + ct_timeout_val[tm];
+        } else {
+            conn->expiration = now + tp_value * 1000;
+        }
         ovs_list_remove(&conn->exp_node);
         ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
     }
diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
index 47261c7551d1..194c5a1ba410 100644
--- a/lib/conntrack-tcp.c
+++ b/lib/conntrack-tcp.c
@@ -39,6 +39,7 @@ 
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "coverage.h"
 #include "ct-dpif.h"
 #include "dp-packet.h"
@@ -188,7 +189,8 @@  tcp_conn_update(struct conntrack *ct, struct conn *conn_,
             return CT_UPDATE_NEW;
         } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
             src->state = CT_DPIF_TCPS_SYN_SENT;
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now);
+            conn_update_expiration_with_tp(ct, &conn->up,
+                                           CT_TM_TCP_FIRST_PACKET, now);
             return CT_UPDATE_VALID_NEW;
         }
     }
@@ -339,18 +341,23 @@  tcp_conn_update(struct conntrack *ct, struct conn *conn_,
 
         if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
             && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSED, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSED,
+                                           now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    && dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIN_WAIT, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_FIN_WAIT,
+                                           now);
         } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
                    || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_OPENING, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_OPENING,
+                                           now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    || dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSING, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSING,
+                                           now);
         } else {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_ESTABLISHED, now);
+            conn_update_expiration_with_tp(ct, &conn->up,
+                                           CT_TM_TCP_ESTABLISHED, now);
         }
     } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
                 || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
@@ -471,7 +478,8 @@  tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
     src->state = CT_DPIF_TCPS_SYN_SENT;
     dst->state = CT_DPIF_TCPS_CLOSED;
 
-    conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
+    conn_init_expiration_with_tp(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET,
+                                 now);
 
     return &newconn->up;
 }
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
new file mode 100644
index 000000000000..6be4b8276340
--- /dev/null
+++ b/lib/conntrack-tp.c
@@ -0,0 +1,247 @@ 
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "conntrack-private.h"
+#include "conntrack-tp.h"
+#include "dp-packet.h"
+
+#include "openvswitch/vlog.h"
+VLOG_DEFINE_THIS_MODULE(conntrack_tp);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static const char *ct_timeout_str[] = {
+#define CT_TIMEOUT(NAME, VALUE) #NAME
+    CT_TIMEOUTS
+#undef CT_TIMEOUT
+};
+
+static inline bool
+check_present_and_set(struct timeout_policy *tp, uint32_t *v,
+                      enum ct_dpif_tp_attr attr)
+{
+    if (tp && (tp->p.present & (1 << attr))) {
+        *v = tp->p.attrs[attr];
+        return true;
+    }
+    return false;
+}
+
+#define TP_HAS_FUNC(ATTR)  \
+static bool \
+tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v)              \
+{                                                                  \
+    return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR);   \
+}
+
+/* These functions check whether the timeout value is set from the
+ * present bit.  If true, then set the value to '*v'.  For meaning
+ * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
+ */
+TP_HAS_FUNC(TCP_SYN_SENT)
+TP_HAS_FUNC(TCP_SYN_RECV)
+TP_HAS_FUNC(TCP_ESTABLISHED)
+TP_HAS_FUNC(TCP_FIN_WAIT)
+TP_HAS_FUNC(TCP_TIME_WAIT)
+TP_HAS_FUNC(TCP_CLOSE)
+TP_HAS_FUNC(UDP_FIRST)
+TP_HAS_FUNC(UDP_SINGLE)
+TP_HAS_FUNC(UDP_MULTIPLE)
+TP_HAS_FUNC(ICMP_FIRST)
+TP_HAS_FUNC(ICMP_REPLY)
+
+static bool
+conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+                                   enum ct_timeout tm, long long now,
+                                   struct timeout_policy *tp)
+{
+    uint32_t val;
+
+    switch (tm) {
+    case CT_TM_TCP_FIRST_PACKET:
+        if (tp_has_TCP_SYN_SENT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_OPENING:
+        if (tp_has_TCP_SYN_RECV(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_ESTABLISHED:
+        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSING:
+        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_FIN_WAIT:
+        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSED:
+        if (tp_has_TCP_CLOSE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_FIRST:
+        if (tp_has_UDP_FIRST(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_BIDIR:
+        if (tp_has_UDP_SINGLE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_MULTIPLE:
+        if (tp_has_UDP_MULTIPLE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_ICMP_FIRST:
+        if (tp_has_ICMP_FIRST(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_ICMP_REPLY:
+        if (tp_has_ICMP_REPLY(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case N_CT_TM:
+    default:
+        OVS_NOT_REACHED();
+        break;
+    }
+    return false;
+
+update_with_val:
+    conn_update_expiration(ct, conn, tm, now, val, false);
+    return true;
+}
+
+static bool
+conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+                                 enum ct_timeout tm, long long now,
+                                 struct timeout_policy *tp)
+{
+    uint32_t val;
+
+    switch (tm) {
+    case CT_TM_TCP_FIRST_PACKET:
+        if (tp_has_TCP_SYN_SENT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_OPENING:
+        if (tp_has_TCP_SYN_RECV(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_ESTABLISHED:
+        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSING:
+        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_FIN_WAIT:
+        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSED:
+        if (tp_has_TCP_CLOSE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_FIRST:
+        if (tp_has_UDP_FIRST(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_BIDIR:
+        if (tp_has_UDP_SINGLE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_MULTIPLE:
+        if (tp_has_UDP_MULTIPLE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_ICMP_FIRST:
+        if (tp_has_ICMP_FIRST(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_ICMP_REPLY:
+        if (tp_has_ICMP_REPLY(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case N_CT_TM:
+    default:
+        OVS_NOT_REACHED();
+        break;
+    }
+    return false;
+
+init_with_val:
+    conn_init_expiration(ct, conn, tm, now, val, false);
+    return true;
+}
+
+void
+conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                             enum ct_timeout tm, long long now)
+{
+    struct timeout_policy *tp;
+
+    tp = timeout_policy_lookup(ct, conn->tpid);
+    if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
+        VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
+                    ct_timeout_str[tm]);
+    } else {
+        /* Init with default. */
+        conn_init_expiration(ct, conn, tm, now, 0, true);
+    }
+}
+
+void
+conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                               enum ct_timeout tm, long long now)
+{
+    struct timeout_policy *tp;
+
+    tp = timeout_policy_lookup(ct, conn->tpid);
+    if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
+        VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
+                    ct_timeout_str[tm]);
+    } else {
+        /* Update with default. */
+        conn_update_expiration(ct, conn, tm, now, 0, true);
+    }
+}
diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
new file mode 100644
index 000000000000..34048ac2aeea
--- /dev/null
+++ b/lib/conntrack-tp.h
@@ -0,0 +1,24 @@ 
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONNTRACK_TP_H
+#define CONNTRACK_TP_H 1
+
+void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                                  enum ct_timeout tm, long long now);
+void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                                    enum ct_timeout tm, long long now);
+#endif
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 0cbc8f6d2b25..de934bdf7169 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -143,6 +143,12 @@  detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
 static void
 expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
 
+static void timeout_policy_init(struct conntrack *ct);
+static int timeout_policy_create(struct conntrack *ct,
+                                 struct timeout_policy *tp);
+static void timeout_policy_clean(struct conntrack *ct,
+                                 struct timeout_policy *tp);
+
 static struct ct_l4_proto *l4_protos[] = {
     [IPPROTO_TCP] = &ct_proto_tcp,
     [IPPROTO_UDP] = &ct_proto_other,
@@ -312,6 +318,7 @@  conntrack_init(void)
     }
     hmap_init(&ct->zone_limits);
     ct->zone_limit_seq = 0;
+    timeout_policy_init(ct);
     ovs_mutex_unlock(&ct->ct_lock);
 
     ct->hash_basis = random_uint32();
@@ -430,6 +437,139 @@  zone_limit_delete(struct conntrack *ct, uint16_t zone)
     return 0;
 }
 
+struct timeout_policy *
+timeout_policy_get(struct conntrack *ct, int32_t tpid)
+{
+    struct timeout_policy *tp;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    tp = timeout_policy_lookup(ct, tpid);
+    if (!tp) {
+        ovs_mutex_unlock(&ct->ct_lock);
+        return NULL;
+    }
+
+    ovs_mutex_unlock(&ct->ct_lock);
+    return tp;
+}
+
+struct timeout_policy *
+timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy *tp;
+    uint32_t hash;
+
+    hash = zone_key_hash(tpid, ct->hash_basis);
+    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
+        if (tp->p.id == tpid) {
+            return tp;
+        }
+    }
+    return NULL;
+}
+
+static bool
+is_valid_tpid(uint32_t tpid OVS_UNUSED)
+{
+    return true;
+}
+
+static int
+timeout_policy_create(struct conntrack *ct,
+                      struct timeout_policy *new_tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    uint32_t tpid = new_tp->p.id;
+    uint32_t hash;
+
+    if (is_valid_tpid(tpid)) {
+        struct timeout_policy *tp;
+
+        tp = xzalloc(sizeof *tp);
+        memcpy(&tp->p, &new_tp->p, sizeof tp->p);
+
+        hash = zone_key_hash(tpid, ct->hash_basis);
+        hmap_insert(&ct->timeout_policies, &tp->node, hash);
+
+        return 0;
+    } else {
+        return EINVAL;
+    }
+}
+
+static void
+update_existing_tp(struct timeout_policy *tp_dst,
+                   struct timeout_policy *tp_src)
+{
+    struct ct_dpif_timeout_policy *dst, *src;
+    int i;
+
+    dst = &tp_dst->p;
+    src = &tp_src->p;
+
+    /* Set the value to dst if present bit in src is set. */
+    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
+        if (src->present & (1 << i)) {
+            dst->attrs[i] = src->attrs[i];
+            dst->present |= (1 << i);
+        }
+    }
+}
+
+int
+timeout_policy_update(struct conntrack *ct, struct timeout_policy *new_tp)
+{
+    int err = 0;
+    uint32_t tpid = new_tp->p.id;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+    if (tp) {
+        VLOG_INFO("Changed timeout policy of existing tpid %d", tpid);
+        update_existing_tp(tp, new_tp);
+    } else {
+        err = timeout_policy_create(ct, new_tp);
+        if (err) {
+            VLOG_WARN("Request to create timeout policy failed");
+        } else {
+            VLOG_INFO("Created timeout policy tpid %d", tpid);
+        }
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return err;
+}
+
+static void
+timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    hmap_remove(&ct->timeout_policies, &tp->node);
+    free(tp);
+}
+
+int
+timeout_policy_delete(struct conntrack *ct, uint32_t tpid)
+{
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+    if (tp) {
+        VLOG_INFO("Deleted timeout policy for id %d", tpid);
+        timeout_policy_clean(ct, tp);
+    } else {
+        VLOG_INFO("Attempted delete of non-existent timeout policy: zone %d",
+                  tpid);
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return 0;
+}
+
+void
+timeout_policy_init(struct conntrack *ct)
+{
+    hmap_init(&ct->timeout_policies);
+}
+
 static void
 conn_clean_cmn(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
@@ -502,6 +642,12 @@  conntrack_destroy(struct conntrack *ct)
     }
     hmap_destroy(&ct->zone_limits);
 
+    struct timeout_policy *tp;
+    HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
+        free(tp);
+    }
+    hmap_destroy(&ct->timeout_policies);
+
     ovs_mutex_unlock(&ct->ct_lock);
     ovs_mutex_destroy(&ct->ct_lock);
 
@@ -1275,7 +1421,8 @@  process_one(struct conntrack *ct, struct dp_packet *pkt,
             bool force, bool commit, long long now, const uint32_t *setmark,
             const struct ovs_key_ct_labels *setlabel,
             const struct nat_action_info_t *nat_action_info,
-            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
+            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+            uint32_t tpid)
 {
     /* Reset ct_state whenever entering a new zone. */
     if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
@@ -1323,6 +1470,7 @@  process_one(struct conntrack *ct, struct dp_packet *pkt,
                                               nat_action_info,
                                               ct_alg_ctl, now,
                                               &create_new_conn))) {
+            conn->tpid = tpid;
             create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
         }
         if (nat_action_info && !create_new_conn) {
@@ -1395,7 +1543,7 @@  conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                   const struct ovs_key_ct_labels *setlabel,
                   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                   const struct nat_action_info_t *nat_action_info,
-                  long long now)
+                  long long now, uint32_t tpid)
 {
     ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
                              ct->hash_basis);
@@ -1417,7 +1565,8 @@  conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
             write_ct_md(packet, zone, NULL, NULL, NULL);
         } else {
             process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
-                        setlabel, nat_action_info, tp_src, tp_dst, helper);
+                        setlabel, nat_action_info, tp_src, tp_dst, helper,
+                        tpid);
         }
     }
 
@@ -1540,14 +1689,14 @@  conntrack_clean(struct conntrack *ct, long long now)
  * - We want to reduce the number of wakeups and batch connection cleanup
  *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
  *   are coping with the current cleanup tasks, then we wait at least
- *   5 seconds to do further cleanup.
+ *   1 seconds to do further cleanup.
  *
  * - We don't want to keep the map locked too long, as we might prevent
  *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
  *   behind, there is at least some 200ms blocks of time when the map will be
  *   left alone, so the datapath can operate unhindered.
  */
-#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
+#define CT_CLEAN_INTERVAL 1000 /* 1 second */
 #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
 
 static void *
diff --git a/lib/conntrack.h b/lib/conntrack.h
index b0d0fc8d9597..a55609bb2907 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -20,6 +20,7 @@ 
 #include <stdbool.h>
 
 #include "cmap.h"
+#include "ct-dpif.h"
 #include "latch.h"
 #include "odp-netlink.h"
 #include "openvswitch/hmap.h"
@@ -93,7 +94,7 @@  int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                       const struct ovs_key_ct_labels *setlabel,
                       ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                       const struct nat_action_info_t *nat_action_info,
-                      long long now);
+                      long long now, uint32_t tpid);
 void conntrack_clear(struct dp_packet *packet);
 
 struct conntrack_dump {
@@ -111,6 +112,11 @@  struct conntrack_zone_limit {
     uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
 };
 
+struct timeout_policy {
+    struct hmap_node node;
+    struct ct_dpif_timeout_policy p;
+};
+
 enum {
     INVALID_ZONE = -2,
     DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
@@ -139,5 +145,11 @@  struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
                                            int32_t zone);
 int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
 int zone_limit_delete(struct conntrack *ct, uint16_t zone);
+int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
+struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid);
+struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
+                                             int32_t tpid);
+
 
 #endif /* conntrack.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e456cc9befbe..41e805a832c8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7337,6 +7337,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         bool commit = false;
         unsigned int left;
         uint16_t zone = 0;
+        uint32_t tpid = 0;
         const char *helper = NULL;
         const uint32_t *setmark = NULL;
         const struct ovs_key_ct_labels *setlabel = NULL;
@@ -7372,8 +7373,9 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
                  * netlink events. */
                 break;
             case OVS_CT_ATTR_TIMEOUT:
-                /* Userspace datapath does not support customized timeout
-                 * policy yet. */
+                if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
+                    VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
+                }
                 break;
             case OVS_CT_ATTR_NAT: {
                 const struct nlattr *b_nest;
@@ -7459,7 +7461,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
                           commit, zone, setmark, setlabel, aux->flow->tp_src,
                           aux->flow->tp_dst, helper, nat_action_info_ref,
-                          pmd->ctx.now / 1000);
+                          pmd->ctx.now / 1000, tpid);
         break;
     }
 
@@ -7693,6 +7695,64 @@  dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
 }
 
 static int
+dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
+                                  const struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy tp;
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    memcpy(&tp.p, dpif_tp, sizeof tp.p);
+    err = timeout_policy_update(dp->conntrack, &tp);
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
+                                  struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy *tp;
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    tp = timeout_policy_get(dp->conntrack, tp_id);
+    if (!tp) {
+        return EINVAL;
+    }
+    memcpy(dpif_tp, &tp->p, sizeof tp->p);
+    return err;
+}
+
+static int
+dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
+                                  uint32_t tp_id)
+{
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    err = timeout_policy_delete(dp->conntrack, tp_id);
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+                                       uint32_t tp_id,
+                                       uint16_t dl_type OVS_UNUSED,
+                                       uint8_t nw_proto OVS_UNUSED,
+                                       char **tp_name, bool *is_generic)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "%"PRIu32, tp_id);
+    *tp_name = ds_steal_cstr(&ds);
+    *is_generic = true;
+    return 0;
+}
+
+static int
 dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7802,13 +7862,13 @@  const struct dpif_class dpif_netdev_class = {
     dpif_netdev_ct_set_limits,
     dpif_netdev_ct_get_limits,
     dpif_netdev_ct_del_limits,
-    NULL,                       /* ct_set_timeout_policy */
-    NULL,                       /* ct_get_timeout_policy */
-    NULL,                       /* ct_del_timeout_policy */
+    dpif_netdev_ct_set_timeout_policy,
+    dpif_netdev_ct_get_timeout_policy,
+    dpif_netdev_ct_del_timeout_policy,
     NULL,                       /* ct_timeout_policy_dump_start */
     NULL,                       /* ct_timeout_policy_dump_next */
     NULL,                       /* ct_timeout_policy_dump_done */
-    NULL,                       /* ct_get_timeout_policy_name */
+    dpif_netdev_ct_get_timeout_policy_name,
     dpif_netdev_ipf_set_enabled,
     dpif_netdev_ipf_set_min_frag,
     dpif_netdev_ipf_set_max_nfrags,
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 4a39c929c207..a61c934aaf2f 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3301,8 +3301,10 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 AT_CHECK([ovs-appctl dpctl/flush-conntrack])
 
 dnl Shorten the udp_single and icmp_first timeout in zone 5
+dnl Userspace datapath uses udp_first and icmp_reply, and
+dnl kernel datapath uses udp_single and icmp_first
 VSCTL_ADD_DATAPATH_TABLE()
-AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_first=3 udp_single=3 icmp_first=3 icmp_reply=3])
 
 dnl Send ICMP and UDP traffic
 NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index ba7f4102f494..72c84b9c7c82 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -99,12 +99,8 @@  m4_define([CHECK_CONNTRACK_NAT])
 # CHECK_CONNTRACK_TIMEOUT()
 #
 # Perform requirements checks for running conntrack customized timeout tests.
-* The userspace datapath does not support this feature yet.
 #
-m4_define([CHECK_CONNTRACK_TIMEOUT],
-[
-    AT_SKIP_IF([:])
-])
+m4_define([CHECK_CONNTRACK_TIMEOUT])
 
 # CHECK_CT_DPIF_SET_GET_MAXCONNS()
 #
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index f77ee75e38df..e7c73220aef5 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -90,7 +90,7 @@  ct_thread_main(void *aux_)
     ovs_barrier_block(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
         conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
     ovs_barrier_block(&barrier);
     destroy_packets(pkt_batch);
@@ -174,7 +174,7 @@  pcap_batch_execute_conntrack(struct conntrack *ct_,
 
         if (flow.dl_type != dl_type) {
             conntrack_execute(ct_, &new_batch, dl_type, false, true, 0,
-                              NULL, NULL, 0, 0, NULL, NULL, now);
+                              NULL, NULL, 0, 0, NULL, NULL, now, 0);
             dp_packet_batch_init(&new_batch);
         }
         dp_packet_batch_add(&new_batch, packet);
@@ -182,7 +182,7 @@  pcap_batch_execute_conntrack(struct conntrack *ct_,
 
     if (!dp_packet_batch_is_empty(&new_batch)) {
         conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
 
 }