Message ID | 1567030469-120137-10-git-send-email-yihung.wei@gmail.com |
---|---|
State | Accepted |
Commit | 187bb41fbf447acf9fb6ac117dc923bbe649e78c |
Headers | show |
Series | Support zone-based conntrack timeout policy | expand |
> On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote: > > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > index e988626ea05b..4a599c8eb866 100644 > --- a/lib/dpif-provider.h > +++ b/lib/dpif-provider.h > @@ -542,6 +542,16 @@ struct dpif_class { > struct ct_dpif_timeout_policy *tp); > int (*ct_timeout_policy_dump_done)(struct dpif *, void *state); > > + /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'. > + * On success, returns 0, stores the timeout policy name in 'tp_name', > + * and sets 'is_generic'. 'is_generic' is false if the returned timeout > + * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the > + * datapath, i.e. in kernel datapath. Sets 'is_generic' to true, if > + * the timeout policy supports all OVS supported L3/L4 protocols. */ > + int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id, > + uint16_t dl_type, uint8_t nw_proto, > + struct ds *tp_name, bool *is_generic); I don't have a great justification, but we've generally used "char **" instead of "struct ds" in these interfaces. Let me know what you think of the attached incremental. It ended up being a more invasive change than I expected, but I think it will be more consistent in our API. > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index 17800f3c8a3f..c8e508bca2c8 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -5983,6 +5983,25 @@ put_ct_helper(struct xlate_ctx *ctx, > } > > static void > +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer, > + const struct flow *flow, struct flow_wildcards *wc, > + uint16_t zone_id) > +{ > + struct ds tp_name = DS_EMPTY_INITIALIZER; > + bool unwildcard; > + > + if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id, > + ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) { > + nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name)); > + > + if (unwildcard) { > + memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); I think it's worth mentioning again why we're unwildcarding "nw_proto" and why it wasn't necessary to also unwildcard "dl_type". How about something like the following: /* The underlying datapath requires separate timeout * policies for different Ethertypes and IP protocols. We * don't need to unwildcard 'wc->masks.dl_type' since that * field is always unwildcarded in megaflows. */ memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); Sorry for the long delay in finishing this review. Assuming you're happy with the changes, I'll merge the series into master. Thanks, --Justin -=-=-=-=-=-=-=-=-=-=- diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index 2181c83157ad..23db72dbbbb1 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -866,7 +866,7 @@ ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state) int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, - struct ds *tp_name, bool *is_generic) + char **tp_name, bool *is_generic) { return (dpif->dpif_class->ct_get_timeout_policy_name ? dpif->dpif_class->ct_get_timeout_policy_name( diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index b95e6ac762ab..bbcb537ba46d 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -320,6 +320,6 @@ int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state, int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, - struct ds *tp_name, bool *is_generic); + char **tp_name, bool *is_generic); #endif /* CT_DPIF_H */ diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 8f1c6d1ffde7..71d9cdbabc5b 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -3056,28 +3056,32 @@ static struct dpif_netlink_timeout_policy_protocol tp_protos[] = { static void dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num, - struct ds *tp_name) + char **tp_name) { - ds_clear(tp_name); - ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id); - ct_dpif_format_ipproto(tp_name, l4num); + struct ds ds = DS_EMPTY_INITIALIZER; + ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id); + ct_dpif_format_ipproto(&ds, l4num); if (l3num == AF_INET) { - ds_put_cstr(tp_name, "4"); + ds_put_cstr(&ds, "4"); } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) { - ds_put_cstr(tp_name, "6"); + ds_put_cstr(&ds, "6"); } - ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX); + ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX); + + *tp_name = ds_steal_cstr(&ds); } static int dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, - uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, struct ds *tp_name, - bool *is_generic) + uint32_t tp_id, uint16_t dl_type, + uint8_t nw_proto, char **tp_name, + bool *is_generic) { dpif_netlink_format_tp_name(tp_id, - dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, nw_proto, tp_name); + dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, + nw_proto, tp_name); *is_generic = false; return 0; } @@ -3275,14 +3279,17 @@ static int dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED, const struct ct_dpif_timeout_policy *tp) { - struct nl_ct_timeout_policy nl_tp; - struct ds nl_tp_name = DS_EMPTY_INITIALIZER; int err = 0; for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + struct nl_ct_timeout_policy nl_tp; + char *nl_tp_name; + dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num, tp_protos[i].l4num, &nl_tp_name); - ovs_strlcpy(nl_tp.name, ds_cstr(&nl_tp_name), sizeof nl_tp.name); + ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name); + free(nl_tp_name); + nl_tp.l3num = tp_protos[i].l3num; nl_tp.l4num = tp_protos[i].l4num; dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp); @@ -3295,7 +3302,6 @@ dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED, } out: - ds_destroy(&nl_tp_name); return err; } @@ -3304,27 +3310,29 @@ dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED, uint32_t tp_id, struct ct_dpif_timeout_policy *tp) { - struct nl_ct_timeout_policy nl_tp; - struct ds nl_tp_name = DS_EMPTY_INITIALIZER; int err = 0; tp->id = tp_id; tp->present = 0; for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + struct nl_ct_timeout_policy nl_tp; + char *nl_tp_name; + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, tp_protos[i].l4num, &nl_tp_name); - err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp); + err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp); if (err) { VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)", - nl_tp.name, ovs_strerror(err)); + nl_tp_name, ovs_strerror(err)); + free(nl_tp_name); goto out; } + free(nl_tp_name); dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp); } out: - ds_destroy(&nl_tp_name); return err; } @@ -3334,25 +3342,25 @@ static int dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED, uint32_t tp_id) { - struct ds nl_tp_name = DS_EMPTY_INITIALIZER; int ret = 0; for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + char *nl_tp_name; dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, tp_protos[i].l4num, &nl_tp_name); - int err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name)); + int err = nl_ct_del_timeout_policy(nl_tp_name); if (err == ENOENT) { err = 0; } if (err) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6); VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)", - ds_cstr(&nl_tp_name), ovs_strerror(err)); + nl_tp_name, ovs_strerror(err)); ret = 1; } + free(nl_tp_name); } - ds_destroy(&nl_tp_name); return ret; } diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 4a599c8eb866..61feb94e8fca 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -546,11 +546,14 @@ struct dpif_class { * On success, returns 0, stores the timeout policy name in 'tp_name', * and sets 'is_generic'. 'is_generic' is false if the returned timeout * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the - * datapath, i.e. in kernel datapath. Sets 'is_generic' to true, if - * the timeout policy supports all OVS supported L3/L4 protocols. */ + * datapath (e.g., the Linux kernel datapath). Sets 'is_generic' to + * true, if the timeout policy supports all OVS supported L3/L4 + * protocols. + * + * The caller is responsible for freeing 'tp_name'. */ int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, - struct ds *tp_name, bool *is_generic); + char **tp_name, bool *is_generic); /* IP Fragmentation. */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index c8e508bca2c8..a9d5aa754ebc 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -5987,18 +5987,22 @@ put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer, const struct flow *flow, struct flow_wildcards *wc, uint16_t zone_id) { - struct ds tp_name = DS_EMPTY_INITIALIZER; bool unwildcard; + char *tp_name = NULL; if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id, ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) { - nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name)); + nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, tp_name); if (unwildcard) { - memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); + /* The underlying datapath requires separate timeout + * policies for different Ethertypes and IP protocols. We + * don't need to unwildcard 'wc->masks.dl_type' since that + * field is always unwildcarded in megaflows. */ + memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); } } - ds_destroy(&tp_name); + free(tp_name); } static void diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 53c3515d15db..496a16c8a4af 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -5441,19 +5441,19 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) } /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and - * 'nw_proto'. Returns true if the zoned-based timeout policy is configured. + * 'nw_proto'. Returns true if the zone-based timeout policy is configured. * On success, stores the timeout policy name in 'tp_name', and sets * 'unwildcard' based on the dpif implementation. If 'unwildcard' is true, * the returned timeout policy is 'dl_type' and 'nw_proto' specific, and OVS * needs to unwildcard the datapath flow for this timeout policy in flow - * translation. */ + * translation. + * + * The caller is responsible for freeing 'tp_name'. */ bool ofproto_dpif_ct_zone_timeout_policy_get_name( const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, - uint8_t nw_proto, struct ds *tp_name, bool *unwildcard) + uint8_t nw_proto, char **tp_name, bool *unwildcard) { - bool is_generic_tp; - if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) { return false; } @@ -5463,14 +5463,15 @@ ofproto_dpif_ct_zone_timeout_policy_get_name( return false; } + bool is_generic; if (ct_dpif_get_timeout_policy_name(backer->dpif, - ct_zone->ct_tp->tp_id, dl_type, - nw_proto, tp_name, &is_generic_tp)) { + ct_zone->ct_tp->tp_id, dl_type, + nw_proto, tp_name, &is_generic)) { return false; } /* Unwildcard datapath flow if it is not a generic timeout policy. */ - *unwildcard = !is_generic_tp; + *unwildcard = !is_generic; return true; } diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 7703be9899e6..cd453636c920 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -380,6 +380,6 @@ bool ovs_native_tunneling_is_on(struct ofproto_dpif *); bool ofproto_dpif_ct_zone_timeout_policy_get_name( const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, - uint8_t nw_proto, struct ds *tp_name, bool *unwildcard); + uint8_t nw_proto, char **tp_name, bool *unwildcard); #endif /* ofproto-dpif.h */
On Wed, Sep 25, 2019 at 3:59 PM Justin Pettit <jpettit@ovn.org> wrote: > > > > On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote: > > > > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > > index e988626ea05b..4a599c8eb866 100644 > > --- a/lib/dpif-provider.h > > +++ b/lib/dpif-provider.h > > @@ -542,6 +542,16 @@ struct dpif_class { > > struct ct_dpif_timeout_policy *tp); > > int (*ct_timeout_policy_dump_done)(struct dpif *, void *state); > > > > + /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'. > > + * On success, returns 0, stores the timeout policy name in 'tp_name', > > + * and sets 'is_generic'. 'is_generic' is false if the returned timeout > > + * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the > > + * datapath, i.e. in kernel datapath. Sets 'is_generic' to true, if > > + * the timeout policy supports all OVS supported L3/L4 protocols. */ > > + int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id, > > + uint16_t dl_type, uint8_t nw_proto, > > + struct ds *tp_name, bool *is_generic); > > I don't have a great justification, but we've generally used "char **" instead of "struct ds" in these interfaces. Let me know what you think of the attached incremental. It ended up being a more invasive change than I expected, but I think it will be more consistent in our API. > > > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > > index 17800f3c8a3f..c8e508bca2c8 100644 > > --- a/ofproto/ofproto-dpif-xlate.c > > +++ b/ofproto/ofproto-dpif-xlate.c > > @@ -5983,6 +5983,25 @@ put_ct_helper(struct xlate_ctx *ctx, > > } > > > > static void > > +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer, > > + const struct flow *flow, struct flow_wildcards *wc, > > + uint16_t zone_id) > > +{ > > + struct ds tp_name = DS_EMPTY_INITIALIZER; > > + bool unwildcard; > > + > > + if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id, > > + ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) { > > + nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name)); > > + > > + if (unwildcard) { > > + memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); > > I think it's worth mentioning again why we're unwildcarding "nw_proto" and why it wasn't necessary to also unwildcard "dl_type". How about something like the following: > > /* The underlying datapath requires separate timeout > * policies for different Ethertypes and IP protocols. We > * don't need to unwildcard 'wc->masks.dl_type' since that > * field is always unwildcarded in megaflows. */ > memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); > > Sorry for the long delay in finishing this review. Assuming you're happy with the changes, I'll merge the series into master. > > Thanks, > > --Justin Thanks Justin for the review. The proposed diff looks good to me. Thanks, -Yi-Hung
> On Sep 25, 2019, at 4:34 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote: > > Thanks Justin for the review. The proposed diff looks good to me. Thanks. I pushed the series to master. --Justin
diff --git a/NEWS b/NEWS index c5caa13d6374..9f7fbb852e08 100644 --- a/NEWS +++ b/NEWS @@ -69,6 +69,7 @@ v2.12.0 - xx xxx xxxx - Linux datapath: * Support for the kernel versions 4.19.x and 4.20.x. * Support for the kernel version 5.0.x. + * Add support for conntrack zone-based timeout policy. - 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded flows. 'ovs-appctl dpctl/dump-flows' should be used instead. - Add L2 GRE tunnel over IPv6 support. diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index be59f34ff995..2181c83157ad 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -862,3 +862,14 @@ ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state) ? dpif->dpif_class->ct_timeout_policy_dump_done(dpif, state) : EOPNOTSUPP); } + +int +ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, + uint16_t dl_type, uint8_t nw_proto, + struct ds *tp_name, bool *is_generic) +{ + return (dpif->dpif_class->ct_get_timeout_policy_name + ? dpif->dpif_class->ct_get_timeout_policy_name( + dpif, tp_id, dl_type, nw_proto, tp_name, is_generic) + : EOPNOTSUPP); +} diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index aabd6962f2c0..b95e6ac762ab 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -318,5 +318,8 @@ int ct_dpif_timeout_policy_dump_start(struct dpif *dpif, void **statep); int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state, struct ct_dpif_timeout_policy *tp); int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); +int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, + uint16_t dl_type, uint8_t nw_proto, + struct ds *tp_name, bool *is_generic); #endif /* CT_DPIF_H */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7240a3e6f3c8..36637052e598 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -7539,6 +7539,7 @@ const struct dpif_class dpif_netdev_class = { NULL, /* ct_timeout_policy_dump_start */ NULL, /* ct_timeout_policy_dump_next */ NULL, /* ct_timeout_policy_dump_done */ + NULL, /* ct_get_timeout_policy_name */ dpif_netdev_ipf_set_enabled, dpif_netdev_ipf_set_min_frag, dpif_netdev_ipf_set_max_nfrags, diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 85827cd65503..8f1c6d1ffde7 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -3071,6 +3071,17 @@ dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num, ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX); } +static int +dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, + uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, struct ds *tp_name, + bool *is_generic) +{ + dpif_netlink_format_tp_name(tp_id, + dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, nw_proto, tp_name); + *is_generic = false; + return 0; +} + #define CT_DPIF_NL_TP_TCP_MAPPINGS \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ @@ -3905,6 +3916,7 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_ct_timeout_policy_dump_start, dpif_netlink_ct_timeout_policy_dump_next, dpif_netlink_ct_timeout_policy_dump_done, + dpif_netlink_ct_get_timeout_policy_name, NULL, /* ipf_set_enabled */ NULL, /* ipf_set_min_frag */ NULL, /* ipf_set_max_nfrags */ diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index e988626ea05b..4a599c8eb866 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -542,6 +542,16 @@ struct dpif_class { struct ct_dpif_timeout_policy *tp); int (*ct_timeout_policy_dump_done)(struct dpif *, void *state); + /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'. + * On success, returns 0, stores the timeout policy name in 'tp_name', + * and sets 'is_generic'. 'is_generic' is false if the returned timeout + * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the + * datapath, i.e. in kernel datapath. Sets 'is_generic' to true, if + * the timeout policy supports all OVS supported L3/L4 protocols. */ + int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id, + uint16_t dl_type, uint8_t nw_proto, + struct ds *tp_name, bool *is_generic); + /* IP Fragmentation. */ /* Disables or enables conntrack fragment reassembly. The default diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 17800f3c8a3f..c8e508bca2c8 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -5983,6 +5983,25 @@ put_ct_helper(struct xlate_ctx *ctx, } static void +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer, + const struct flow *flow, struct flow_wildcards *wc, + uint16_t zone_id) +{ + struct ds tp_name = DS_EMPTY_INITIALIZER; + bool unwildcard; + + if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id, + ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) { + nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name)); + + if (unwildcard) { + memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); + } + } + ds_destroy(&tp_name); +} + +static void put_ct_nat(struct xlate_ctx *ctx) { struct ofpact_nat *ofn = ctx->ct_nat_action; @@ -6072,6 +6091,10 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_EVENTMASK, OVS_CT_EVENTMASK_DEFAULT); } + if (ctx->xbridge->support.ct_timeout) { + put_ct_timeout(ctx->odp_actions, ctx->xbridge->ofproto->backer, + &ctx->xin->flow, ctx->wc, zone); + } } nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone); put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 4b4c4d722645..eb9dea3528c6 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1323,6 +1323,67 @@ check_ct_clear(struct dpif_backer *backer) return supported; } +/* Tests whether 'backer''s datapath supports the OVS_CT_ATTR_TIMEOUT + * attribute in OVS_ACTION_ATTR_CT. */ +static bool +check_ct_timeout_policy(struct dpif_backer *backer) +{ + struct dpif_execute execute; + struct dp_packet packet; + struct ofpbuf actions; + struct flow flow = { + .dl_type = CONSTANT_HTONS(ETH_TYPE_IP), + .nw_proto = IPPROTO_UDP, + .nw_ttl = 64, + /* Use the broadcast address on the loopback address range 127/8 to + * avoid hitting any real conntrack entries. We leave the UDP ports to + * zeroes for the same purpose. */ + .nw_src = CONSTANT_HTONL(0x7fffffff), + .nw_dst = CONSTANT_HTONL(0x7fffffff), + }; + size_t ct_start; + int error; + + /* Compose CT action with timeout policy attribute and check if datapath + * can decode the message. */ + ofpbuf_init(&actions, 64); + ct_start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_CT); + /* Timeout policy has no effect without the commit flag, but currently the + * datapath will accept a timeout policy even without commit. This is + * useful as we do not want to persist the probe connection in the + * conntrack table. */ + nl_msg_put_string(&actions, OVS_CT_ATTR_TIMEOUT, "ovs_test_tp"); + nl_msg_end_nested(&actions, ct_start); + + /* Compose a dummy UDP packet. */ + dp_packet_init(&packet, 0); + flow_compose(&packet, &flow, NULL, 64); + + /* Execute the actions. On older datapaths this fails with EINVAL, on + * newer datapaths it succeeds. */ + execute.actions = actions.data; + execute.actions_len = actions.size; + execute.packet = &packet; + execute.flow = &flow; + execute.needs_help = false; + execute.probe = true; + execute.mtu = 0; + + error = dpif_execute(backer->dpif, &execute); + + dp_packet_uninit(&packet); + ofpbuf_uninit(&actions); + + if (error) { + VLOG_INFO("%s: Datapath does not support timeout policy in conntrack " + "action", dpif_name(backer->dpif)); + } else { + VLOG_INFO("%s: Datapath supports timeout policy in conntrack action", + dpif_name(backer->dpif)); + } + + return !error; +} /* Tests whether 'backer''s datapath supports the * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ @@ -1473,6 +1534,7 @@ check_support(struct dpif_backer *backer) backer->rt_support.ct_clear = check_ct_clear(backer); backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer); backer->rt_support.check_pkt_len = check_check_pkt_len(backer); + backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); /* Flow fields. */ backer->rt_support.odp.ct_state = check_ct_state(backer); @@ -5378,6 +5440,40 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) } } +/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and + * 'nw_proto'. Returns true if the zoned-based timeout policy is configured. + * On success, stores the timeout policy name in 'tp_name', and sets + * 'unwildcard' based on the dpif implementation. If 'unwildcard' is true, + * the returned timeout policy is 'dl_type' and 'nw_proto' specific, and OVS + * needs to unwildcard the datapath flow for this timeout policy in flow + * translation. */ +bool +ofproto_dpif_ct_zone_timeout_policy_get_name( + const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, + uint8_t nw_proto, struct ds *tp_name, bool *unwildcard) +{ + bool is_generic_tp; + + if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) { + return false; + } + + struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone); + if (!ct_zone) { + return false; + } + + if (ct_dpif_get_timeout_policy_name(backer->dpif, + ct_zone->ct_tp->tp_id, dl_type, + nw_proto, tp_name, &is_generic_tp)) { + return false; + } + + /* Unwildcard datapath flow if it is not a generic timeout policy. */ + *unwildcard = !is_generic_tp; + return true; +} + static bool set_frag_handling(struct ofproto *ofproto_, enum ofputil_frag_handling frag_handling) diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 0dd7a45fe550..7703be9899e6 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -194,8 +194,12 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, /* Highest supported dp_hash algorithm. */ \ DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm") \ \ - /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \ - DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action") + /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \ + DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action") \ + \ + /* True if the datapath supports OVS_CT_ATTR_TIMEOUT in \ + * OVS_ACTION_ATTR_CT action. */ \ + DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy") /* Stores the various features which the corresponding backer supports. */ struct dpif_backer_support { @@ -374,4 +378,8 @@ int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, bool ovs_native_tunneling_is_on(struct ofproto_dpif *); +bool ofproto_dpif_ct_zone_timeout_policy_get_name( + const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type, + uint8_t nw_proto, struct ds *tp_name, bool *unwildcard); + #endif /* ofproto-dpif.h */ diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 48e94642b948..75b59ae66ba7 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -100,6 +100,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_TIMEOUT() +# +# Perform requirements checks for running conntrack customized timeout tests. +# +m4_define([CHECK_CONNTRACK_TIMEOUT], +[ + AT_SKIP_IF([! cat /boot/config-$(uname -r) | grep NF_CONNTRACK_TIMEOUT | grep '=y' > /dev/null]) + modprobe nfnetlink_cttimeout + on_exit 'modprobe -r nfnetlink_cttimeout' +]) + # CHECK_CT_DPIF_PER_ZONE_LIMIT() # # Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per @@ -185,3 +196,12 @@ m4_define([OVS_CHECK_KERNEL_EXCL], sublevel=$(uname -r | sed -e 's/\./ /g' | awk '{print $ 2}') AT_SKIP_IF([ ! ( test $version -lt $1 || ( test $version -eq $1 && test $sublevel -lt $2 ) || test $version -gt $3 || ( test $version -eq $3 && test $sublevel -gt $4 ) ) ]) ]) + +# VSCTL_ADD_DATAPATH_TABLE() +# +# Create system datapath table "system" for kernel tests in ovsdb +m4_define([VSCTL_ADD_DATAPATH_TABLE], +[ + AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"system"=@m], [0], [stdout]) + DP_TYPE=$(echo "system") +]) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 1a04199dcfe9..f2870e922906 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -3179,6 +3179,72 @@ NXST_FLOW reply: OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - zone-based timeout policy]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_TIMEOUT() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_DATA([flows.txt], [dnl +priority=1,action=drop +priority=10,arp,action=normal +priority=100,in_port=1,ip,action=ct(zone=5, table=1) +priority=100,in_port=2,ip,action=ct(zone=5, table=1) +table=1,in_port=2,ip,ct_state=+trk+est,action=1 +table=1,in_port=1,ip,ct_state=+trk+new,action=ct(commit,zone=5),2 +table=1,in_port=1,ip,ct_state=+trk+est,action=2 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +dnl Test with default timeout +dnl The default udp_single and icmp_first timeouts are 30 seconds in +dnl kernel DP, and 60 seconds in userspace DP. + +dnl Send ICMP and UDP traffic +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) + +sleep 4 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5 +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +dnl Shorten the udp_single and icmp_first timeout in zone 5 +VSCTL_ADD_DATAPATH_TABLE() +AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3]) + +dnl Send ICMP and UDP traffic +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5 +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5 +]) + +dnl Wait until the timeout expire. +dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired. +sleep 4 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([conntrack - L7]) AT_SETUP([conntrack - IPv4 HTTP]) diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index a411e3d8919d..0a21db88a998 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -98,6 +98,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_TIMEOUT() +# +# Perform requirements checks for running conntrack customized timeout tests. +* The userspace datapath does not support this feature yet. +# +m4_define([CHECK_CONNTRACK_TIMEOUT], +[ + AT_SKIP_IF([:]) +]) + # CHECK_CT_DPIF_PER_ZONE_LIMIT() # # Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per @@ -295,3 +305,12 @@ m4_define([OVS_CHECK_KERNEL_EXCL], [ AT_SKIP_IF([:]) ]) + +# VSCTL_ADD_DATAPATH_TABLE() +# +# Create datapath table "netdev" for userspace tests in ovsdb +m4_define([VSCTL_ADD_DATAPATH_TABLE], +[ + AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"netdev"=@m], [0], [stdout]) + DP_TYPE=$(echo "netdev") +])