Message ID | 1564097054-72663-8-git-send-email-yihung.wei@gmail.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Support zone-based conntrack timeout policy | expand |
On Thu, Jul 25, 2019 at 04:24:09PM -0700, Yi-Hung Wei wrote: > This patch implements all the conntrack timeout policy related functions > defined in dpif_class for dpif-netlink class in Linux kernel datapath. > > In Linux kernel, the timeout policy is maintained per L3/L4 protocol, > and it is identified by 32 bytes null terminated string. However, > in vswitchd, the timeout policy is a generic one that consists of all > the supported L4 protocols. Therefore, the main task for this patch > is to break down the generic timeout policy into 6 sub policies ( > ipv4 tcp, udp, icmp, and ipv6 tcp, udp, icmp) in dpif-netlink.c > and push down the configuration using the netlink API in > netlink-conntrack.c. > > This patch also adds missing symbols in the windows datapath so > that the build on windows can pass. > > Appveyor CI: > * https://ci.appveyor.com/project/YiHungWei/ovs/builds/26250549 > > Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> > --- > datapath-windows/include/OvsDpInterfaceCtExt.h | 114 ++++++ > datapath-windows/ovsext/Netlink/NetlinkProto.h | 1 + > include/windows/automake.mk | 1 + > .../windows/linux/netfilter/nfnetlink_cttimeout.h | 0 > lib/dpif-netlink.c | 432 ++++++++++++++++++++- > lib/dpif-netlink.h | 2 +- > lib/netlink-conntrack.c | 363 +++++++++++++++++ > lib/netlink-conntrack.h | 29 ++ > lib/netlink-protocol.h | 1 + > 9 files changed, 936 insertions(+), 7 deletions(-) > create mode 100644 include/windows/linux/netfilter/nfnetlink_cttimeout.h > > diff --git a/datapath-windows/include/OvsDpInterfaceCtExt.h b/datapath-windows/include/OvsDpInterfaceCtExt.h > index 3b947782e90c..4379855bb8dd 100644 > --- a/datapath-windows/include/OvsDpInterfaceCtExt.h > +++ b/datapath-windows/include/OvsDpInterfaceCtExt.h > @@ -421,4 +421,118 @@ struct nf_ct_tcp_flags { > UINT8 mask; > }; > > +/* File: nfnetlink_cttimeout.h */ > +enum ctnl_timeout_msg_types { > + IPCTNL_MSG_TIMEOUT_NEW, > + IPCTNL_MSG_TIMEOUT_GET, > + IPCTNL_MSG_TIMEOUT_DELETE, > + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, > + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, > + > + IPCTNL_MSG_TIMEOUT_MAX > +}; > + > +enum ctattr_timeout { > + CTA_TIMEOUT_UNSPEC, > + CTA_TIMEOUT_NAME, > + CTA_TIMEOUT_L3PROTO, > + CTA_TIMEOUT_L4PROTO, > + CTA_TIMEOUT_DATA, > + CTA_TIMEOUT_USE, > + __CTA_TIMEOUT_MAX > +}; > +#define CTA_TIMEOUT_MAX (__CTA_TIMEOUT_MAX - 1) > + > +enum ctattr_timeout_generic { > + CTA_TIMEOUT_GENERIC_UNSPEC, > + CTA_TIMEOUT_GENERIC_TIMEOUT, > + __CTA_TIMEOUT_GENERIC_MAX > +}; > +#define CTA_TIMEOUT_GENERIC_MAX (__CTA_TIMEOUT_GENERIC_MAX - 1) > + > +enum ctattr_timeout_tcp { > + CTA_TIMEOUT_TCP_UNSPEC, > + CTA_TIMEOUT_TCP_SYN_SENT, > + CTA_TIMEOUT_TCP_SYN_RECV, > + CTA_TIMEOUT_TCP_ESTABLISHED, > + CTA_TIMEOUT_TCP_FIN_WAIT, > + CTA_TIMEOUT_TCP_CLOSE_WAIT, > + CTA_TIMEOUT_TCP_LAST_ACK, > + CTA_TIMEOUT_TCP_TIME_WAIT, > + CTA_TIMEOUT_TCP_CLOSE, > + CTA_TIMEOUT_TCP_SYN_SENT2, > + CTA_TIMEOUT_TCP_RETRANS, > + CTA_TIMEOUT_TCP_UNACK, > + __CTA_TIMEOUT_TCP_MAX > +}; > +#define CTA_TIMEOUT_TCP_MAX (__CTA_TIMEOUT_TCP_MAX - 1) > + > +enum ctattr_timeout_udp { > + CTA_TIMEOUT_UDP_UNSPEC, > + CTA_TIMEOUT_UDP_UNREPLIED, > + CTA_TIMEOUT_UDP_REPLIED, > + __CTA_TIMEOUT_UDP_MAX > +}; > +#define CTA_TIMEOUT_UDP_MAX (__CTA_TIMEOUT_UDP_MAX - 1) > + > +enum ctattr_timeout_udplite { > + CTA_TIMEOUT_UDPLITE_UNSPEC, > + CTA_TIMEOUT_UDPLITE_UNREPLIED, > + CTA_TIMEOUT_UDPLITE_REPLIED, > + __CTA_TIMEOUT_UDPLITE_MAX > +}; > +#define CTA_TIMEOUT_UDPLITE_MAX (__CTA_TIMEOUT_UDPLITE_MAX - 1) > + > +enum ctattr_timeout_icmp { > + CTA_TIMEOUT_ICMP_UNSPEC, > + CTA_TIMEOUT_ICMP_TIMEOUT, > + __CTA_TIMEOUT_ICMP_MAX > +}; > +#define CTA_TIMEOUT_ICMP_MAX (__CTA_TIMEOUT_ICMP_MAX - 1) > + > +enum ctattr_timeout_dccp { > + CTA_TIMEOUT_DCCP_UNSPEC, > + CTA_TIMEOUT_DCCP_REQUEST, > + CTA_TIMEOUT_DCCP_RESPOND, > + CTA_TIMEOUT_DCCP_PARTOPEN, > + CTA_TIMEOUT_DCCP_OPEN, > + CTA_TIMEOUT_DCCP_CLOSEREQ, > + CTA_TIMEOUT_DCCP_CLOSING, > + CTA_TIMEOUT_DCCP_TIMEWAIT, > + __CTA_TIMEOUT_DCCP_MAX > +}; > +#define CTA_TIMEOUT_DCCP_MAX (__CTA_TIMEOUT_DCCP_MAX - 1) > + > +enum ctattr_timeout_sctp { > + CTA_TIMEOUT_SCTP_UNSPEC, > + CTA_TIMEOUT_SCTP_CLOSED, > + CTA_TIMEOUT_SCTP_COOKIE_WAIT, > + CTA_TIMEOUT_SCTP_COOKIE_ECHOED, > + CTA_TIMEOUT_SCTP_ESTABLISHED, > + CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, > + CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, > + CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, > + CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, > + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, > + __CTA_TIMEOUT_SCTP_MAX > +}; > +#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) > + > +enum ctattr_timeout_icmpv6 { > + CTA_TIMEOUT_ICMPV6_UNSPEC, > + CTA_TIMEOUT_ICMPV6_TIMEOUT, > + __CTA_TIMEOUT_ICMPV6_MAX > +}; > +#define CTA_TIMEOUT_ICMPV6_MAX (__CTA_TIMEOUT_ICMPV6_MAX - 1) > + > +enum ctattr_timeout_gre { > + CTA_TIMEOUT_GRE_UNSPEC, > + CTA_TIMEOUT_GRE_UNREPLIED, > + CTA_TIMEOUT_GRE_REPLIED, > + __CTA_TIMEOUT_GRE_MAX > +}; > +#define CTA_TIMEOUT_GRE_MAX (__CTA_TIMEOUT_GRE_MAX - 1) > + > +#define CTNL_TIMEOUT_NAME_MAX 32 > + > #endif /* __OVS_DP_INTERFACE_CT_EXT_H_ */ > diff --git a/datapath-windows/ovsext/Netlink/NetlinkProto.h b/datapath-windows/ovsext/Netlink/NetlinkProto.h > index 59b56565c1dc..db1fa2bacae8 100644 > --- a/datapath-windows/ovsext/Netlink/NetlinkProto.h > +++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h > @@ -51,6 +51,7 @@ > #define NLM_F_ECHO 0x008 > > #define NLM_F_ROOT 0x100 > +#define NLM_F_REPLACE 0x100 > #define NLM_F_MATCH 0x200 > #define NLM_F_EXCL 0x200 > #define NLM_F_ATOMIC 0x400 > diff --git a/include/windows/automake.mk b/include/windows/automake.mk > index 382627b51787..883bbbf5d97c 100644 > --- a/include/windows/automake.mk > +++ b/include/windows/automake.mk > @@ -15,6 +15,7 @@ noinst_HEADERS += \ > include/windows/linux/netfilter/nf_conntrack_tcp.h \ > include/windows/linux/netfilter/nfnetlink.h \ > include/windows/linux/netfilter/nfnetlink_conntrack.h \ > + include/windows/linux/netfilter/nfnetlink_cttimeout.h \ > include/windows/linux/pkt_sched.h \ > include/windows/linux/types.h \ > include/windows/net/if.h \ > diff --git a/include/windows/linux/netfilter/nfnetlink_cttimeout.h b/include/windows/linux/netfilter/nfnetlink_cttimeout.h > new file mode 100644 > index 000000000000..e69de29bb2d1 > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index 9825ce46a7f5..abfad9543c3b 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -50,6 +50,7 @@ > #include "odp-util.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/flow.h" > +#include "openvswitch/hmap.h" > #include "openvswitch/match.h" > #include "openvswitch/ofpbuf.h" > #include "openvswitch/poll-loop.h" > @@ -3028,6 +3029,425 @@ dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED, > ofpbuf_delete(request); > return err; > } > + > +#define NL_TP_NAME_PREFIX "ovs_tp_" > + > +struct dpif_netlink_timeout_policy_protocol { > + uint16_t l3num; > + uint8_t l4num; > +}; > + > +enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol { > + DPIF_NL_TP_AF_INET_TCP, > + DPIF_NL_TP_AF_INET_UDP, > + DPIF_NL_TP_AF_INET_ICMP, > + DPIF_NL_TP_AF_INET6_TCP, > + DPIF_NL_TP_AF_INET6_UDP, > + DPIF_NL_TP_AF_INET6_ICMPV6, > + DPIF_NL_TP_MAX > +}; > + > +#define DPIF_NL_ALL_TP 0x3F > + > +static struct dpif_netlink_timeout_policy_protocol tp_protos[] = { > + [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP }, > + [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP }, > + [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP }, > + [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP }, > + [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP }, > + [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6, > + .l4num = IPPROTO_ICMPV6 }, > +}; > + > +static void > +dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num, > + struct ds *tp_name) > +{ > + ds_clear(tp_name); > + ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id); > + ct_dpif_format_ipproto(tp_name, l4num); > + > + if (l3num == AF_INET) { > + ds_put_cstr(tp_name, "4"); > + } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) { Why excluding IPPROTO_ICMPV6 above? > + ds_put_cstr(tp_name, "6"); > + } > + > + ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX); > +} > + > +#define CT_DPIF_TO_NL_TP_TCP_MAPPINGS \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS) \ > + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK) > + > +#define CT_DPIF_TO_NL_TP_UDP_MAPPINGS \ > + CT_DPIF_TO_NL_TP_MAPPING(UDP, UDP, FIRST, UNREPLIED) \ > + CT_DPIF_TO_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED) > + > +#define CT_DPIF_TO_NL_TP_ICMP_MAPPINGS \ > + CT_DPIF_TO_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT) > + > +#define CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS \ > + CT_DPIF_TO_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT) > + > + > +#define CT_DPIF_TO_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \ > +if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) { \ > + nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2; \ > + nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] = \ > + tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]; \ > +} > + > +static void > +dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + CT_DPIF_TO_NL_TP_TCP_MAPPINGS > +} > + > +static void > +dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + CT_DPIF_TO_NL_TP_UDP_MAPPINGS > +} > + > +static void > +dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + CT_DPIF_TO_NL_TP_ICMP_MAPPINGS > +} > + > +static void > +dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS > +} > + > +#undef CT_DPIF_TO_NL_TP_MAPPING > + > +static void > +dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp, > + uint8_t l4num, struct nl_ct_timeout_policy *nl_tp) > +{ > + nl_tp->present = 0; > + > + if (l4num == IPPROTO_TCP) { > + dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp); > + } else if (l4num == IPPROTO_UDP) { > + dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp); > + } else if (l4num == IPPROTO_ICMP) { > + dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp); > + } else if (l4num == IPPROTO_ICMPV6) { > + dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp); > + } > +} > + > +#define CT_DPIF_TO_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \ > +if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) { \ > + tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1; \ > + tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] = \ > + nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]; \ > + } > + > +static void > +dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp, > + struct ct_dpif_timeout_policy *tp) > +{ > + CT_DPIF_TO_NL_TP_TCP_MAPPINGS Is this better to renamed as CT_DPIF_FROM_NL_TP_TCP_MAPPINGS? You're using the same macro name, one for setting the nl_tp->attrs from tp->attrs, the other for setting the tp->attrs from nl_tp_attrs > +} > + > +static void > +dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp, > + struct ct_dpif_timeout_policy *tp) > +{ > + CT_DPIF_TO_NL_TP_UDP_MAPPINGS > +} > + > +static void > +dpif_netlink_set_ct_dpif_tp_icmp_attrs( > + const struct nl_ct_timeout_policy *nl_tp, > + struct ct_dpif_timeout_policy *tp) > +{ > + CT_DPIF_TO_NL_TP_ICMP_MAPPINGS > +} > + > +static void > +dpif_netlink_set_ct_dpif_tp_icmpv6_attrs( > + const struct nl_ct_timeout_policy *nl_tp, > + struct ct_dpif_timeout_policy *tp) > +{ > + CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS > +} > + > +#undef CT_DPIF_TO_NL_TP_MAPPING > + > +static void > +dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp, > + struct ct_dpif_timeout_policy *tp) > +{ > + if (nl_tp->l4num == IPPROTO_TCP) { > + dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp); > + } else if (nl_tp->l4num == IPPROTO_UDP) { > + dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp); > + } else if (nl_tp->l4num == IPPROTO_ICMP) { > + dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp); > + } else if (nl_tp->l4num == IPPROTO_ICMPV6) { > + dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp); > + } > +} > + > +static int > +dpif_netlink_ct_add_timeout_policy(struct dpif *dpif OVS_UNUSED, > + bool is_default, > + const struct ct_dpif_timeout_policy *tp) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else > + struct nl_ct_timeout_policy nl_tp; > + struct ds ds = DS_EMPTY_INITIALIZER; > + int i, err; > + > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > + dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num, > + tp_protos[i].l4num, &ds); > + ovs_strlcpy(nl_tp.name, ds_cstr(&ds), sizeof nl_tp.name); > + nl_tp.l3num = tp_protos[i].l3num; > + nl_tp.l4num = tp_protos[i].l4num; > + dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp); > + if (!is_default) { > + err = nl_ct_set_timeout_policy(&nl_tp); > + } else if (tp_protos[i].l3num == AF_INET) { > + /* The default timeout policy is shared between AF_INET and > + * AF_INET6 in the kernel. So configure AF_INET is sufficient. */ > + err = nl_ct_set_default_timeout_policy(&nl_tp); > + } > + if (err) { > + VLOG_INFO("failed to set timeout policy %s (%s)", nl_tp.name, > + ovs_strerror(err)); ds_destroy(&ds); > + return err; > + } > + } > + > + ds_destroy(&ds); > + return 0; > +#endif > +} > + > +static int > +dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED, > + bool is_default, uint32_t tp_id, > + struct ct_dpif_timeout_policy *tp) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else if _WIN32 is alway return EOPNOTSUPP, is it better if we aggregate all 6 functions and have a larger #ifdef _WIN32 // all six functions return EOPNOTSUPP #else // actual implementations #endif > + struct nl_ct_timeout_policy nl_tp; > + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; > + int i, err; > + > + tp->id = tp_id; > + tp->present = 0; > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > + if (!is_default) { > + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, > + tp_protos[i].l4num, &nl_tp_name); > + err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp); > + } else if (tp_protos[i].l3num == AF_INET) { > + /* The default timeout is shared between AF_INET and AF_INET6 > + * in the kernel. So get from AF_INET is sufficient. */ Then why checking 'tp_protos[i].l3num == AF_INET'? What happens when tp_protos[i].l3num == AF_INET6? then 'err' becomes uninitialized. > + err = nl_ct_get_default_timeout_policy(tp_protos[i].l3num, > + tp_protos[i].l4num, &nl_tp); > + } > + if (err) { ds_destroy(&nl_tp_name); > + return err; > + } > + dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp); > + } > + > + ds_destroy(&nl_tp_name); > + return 0; > +#endif > +} > + > +static int > +dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED, > + uint32_t tp_id) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else > + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; > + int i, err; > + > + if (!tp_id) { > + return EINVAL; > + } > + > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, > + tp_protos[i].l4num, &nl_tp_name); > + err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name)); > + if (err) { > + VLOG_INFO("failed to delete timeout policy %s (%s)", > + ds_cstr(&nl_tp_name), ovs_strerror(err)); Use VLOG_WARN? or VLOG_WARN_RL? ds_destroy(&nl_tp_name); > + return err; > + } > + } > + > + ds_destroy(&nl_tp_name); > + return 0; > +#endif > +} > + > +struct dpif_netlink_ct_timeout_policy_dump_state { > + struct nl_ct_timeout_policy_dump_state *nl_dump_state; > + struct hmap tp_dump_map; > +}; > + > +struct dpif_netlink_tp_dump_node { > + struct hmap_node hmap_node; /* node in tp_dump_map. */ > + struct ct_dpif_timeout_policy *tp; > + uint32_t present; > +}; > + > +static struct dpif_netlink_tp_dump_node * > +get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id, > + struct hmap *tp_dump_map) > +{ > + struct dpif_netlink_tp_dump_node *tp_dump_node; > + > + HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0), > + tp_dump_map) { > + if (tp_dump_node->tp->id == tp_id) { > + return tp_dump_node; > + } > + } > + return NULL; > +} > + > +static void > +update_dpif_netlink_tp_dump_node( > + const struct nl_ct_timeout_policy *nl_tp, > + struct dpif_netlink_tp_dump_node *tp_dump_node) > +{ > + int i; > + > + dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp); > + for (i = 0; i < DPIF_NL_TP_MAX; ++i) { > + if (nl_tp->l3num == tp_protos[i].l3num && > + nl_tp->l4num == tp_protos[i].l4num) { > + tp_dump_node->present |= 1 << i; > + break; > + } > + } > +} > + > +static int > +dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED, > + void **statep) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else > + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state; > + int err; > + > + *statep = dump_state = xzalloc(sizeof *dump_state); > + err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state); > + if (err) { > + free(dump_state); > + return err; > + } > + hmap_init(&dump_state->tp_dump_map); > + return 0; > +#endif > +} > + > +static int > +dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED, > + void *state, > + struct ct_dpif_timeout_policy **tp) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else > + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state; > + struct dpif_netlink_tp_dump_node *tp_dump_node; > + struct nl_ct_timeout_policy nl_tp; > + uint32_t tp_id; > + int err; > + > + do { > + err = nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state, > + &nl_tp); > + if (err) { > + break; > + } > + > + if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) { > + continue; > + } > + > + tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id( > + tp_id, &dump_state->tp_dump_map); > + if (!tp_dump_node) { > + tp_dump_node = xzalloc(sizeof *tp_dump_node); > + tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp); > + tp_dump_node->tp->id = tp_id; > + hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node, > + hash_int(tp_id, 0)); > + } > + > + update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node); > + if (tp_dump_node->present == DPIF_NL_ALL_TP) { > + hmap_remove(&dump_state->tp_dump_map, &tp_dump_node->hmap_node); > + *tp = tp_dump_node->tp; > + free(tp_dump_node); Do we have to remove and free tp_dump_node here? Isn't it done at dpif_netlink_ct_timeout_policy_dump_done()? > + break; > + } > + } while (true); > + return err; > +#endif > +} > + > +static int > +dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED, > + void *state) > +{ > +#ifdef _WIN32 > + return EOPNOTSUPP; > +#else > + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state; > + struct dpif_netlink_tp_dump_node *tp_dump_node; > + int err; > + > + err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state); > + /* clear dump map, modulize */ > + HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) { > + VLOG_INFO("Partial timeout policy in dpif-netlink %"PRIu32, > + tp_dump_node->tp->id); > + free(tp_dump_node->tp); > + free(tp_dump_node); > + } > + hmap_destroy(&dump_state->tp_dump_map); > + free(dump_state); > + return err; > +#endif > +} > + > > /* Meters */ > > @@ -3434,12 +3854,12 @@ const struct dpif_class dpif_netlink_class = { > dpif_netlink_ct_set_limits, > dpif_netlink_ct_get_limits, > dpif_netlink_ct_del_limits, > - NULL, /* ct_set_timeout_policy */ > - NULL, /* ct_get_timeout_policy */ > - NULL, /* ct_del_timeout_policy */ > - NULL, /* ct_timeout_policy_dump_start */ > - NULL, /* ct_timeout_policy_dump_next */ > - NULL, /* ct_timeout_policy_dump_done */ > + dpif_netlink_ct_add_timeout_policy, > + dpif_netlink_ct_get_timeout_policy, > + dpif_netlink_ct_del_timeout_policy, > + dpif_netlink_ct_timeout_policy_dump_start, > + dpif_netlink_ct_timeout_policy_dump_next, > + dpif_netlink_ct_timeout_policy_dump_done, > NULL, /* ipf_set_enabled */ > NULL, /* ipf_set_min_frag */ > NULL, /* ipf_set_max_nfrags */ > diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h > index 0a9628088275..7e75120161f6 100644 > --- a/lib/dpif-netlink.h > +++ b/lib/dpif-netlink.h > @@ -23,6 +23,7 @@ > #include "odp-netlink.h" > > #include "flow.h" > +#include "netlink-conntrack.h" > > struct ofpbuf; > > @@ -50,7 +51,6 @@ struct dpif_netlink_vport { > }; > > void dpif_netlink_vport_init(struct dpif_netlink_vport *); > - > int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request, > struct dpif_netlink_vport *reply, > struct ofpbuf **bufp); > diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c > index 7631ba5d5d31..9bc0ddb66248 100644 > --- a/lib/netlink-conntrack.c > +++ b/lib/netlink-conntrack.c > @@ -840,6 +840,369 @@ nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper) > return parsed; > } > > +static int nl_ct_timeout_policy_max_attr[] = { > + [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX, > + [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX, > + [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX, > + [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX > +}; > + > +static void > +nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp, > + uint32_t attr, uint32_t val) > +{ > + nl_tp->present |= 1 << attr; > + nl_tp->attrs[attr] = val; > +} > + > +static int > +nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + static const struct nl_policy policy[] = { > + [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_CLOSE] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_RETRANS] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_TCP_UNACK] = { .type = NL_A_BE32, > + .optional = false }, > + }; > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + int i; > + > + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { > + VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " > + "Possibly incompatible Linux kernel version."); > + return EINVAL; > + } > + > + for (i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) { > + nl_ct_set_timeout_policy_attr(nl_tp, i, > + ntohl(nl_attr_get_be32(attrs[i]))); > + } > + return 0; > +} > + > +static int > +nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + static const struct nl_policy policy[] = { > + [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NL_A_BE32, > + .optional = false }, > + [CTA_TIMEOUT_UDP_REPLIED] = { .type = NL_A_BE32, > + .optional = false }, > + }; > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + int i; > + > + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { > + VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " > + "Possibly incompatible Linux kernel version."); > + return EINVAL; > + } > + > + for (i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED; i++) { > + nl_ct_set_timeout_policy_attr(nl_tp, i, > + ntohl(nl_attr_get_be32(attrs[i]))); > + } > + return 0; > +} > + > +static int > +nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + static const struct nl_policy policy[] = { > + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NL_A_BE32, > + .optional = false }, > + }; > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + > + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { > + VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. " > + "Possibly incompatible Linux kernel version."); > + return EINVAL; > + } > + > + nl_ct_set_timeout_policy_attr( > + nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT, > + ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT]))); > + return 0; > +} > + > +static int > +nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + static const struct nl_policy policy[] = { > + [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NL_A_BE32, > + .optional = false }, > + }; > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + > + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { > + VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. " > + "Possibly incompatible Linux kernel version."); > + return EINVAL; > + } > + > + nl_ct_set_timeout_policy_attr( > + nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT, > + ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT]))); > + return 0; > +} > + > +static int > +nl_ct_parse_timeout_policy_data(struct nlattr *nla, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + switch (nl_tp->l4num) { > + case IPPROTO_TCP: > + return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp); > + case IPPROTO_UDP: > + return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp); > + case IPPROTO_ICMP: > + return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp); > + case IPPROTO_ICMPV6: > + return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp); > + default: > + return EINVAL; > + } > +} > + > +static int > +nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf, > + struct nl_ct_timeout_policy *nl_tp, > + bool default_tp) > +{ > + static const struct nl_policy policy[] = { > + [CTA_TIMEOUT_NAME] = { .type = NL_A_STRING, .optional = false }, > + [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, > + [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, > + [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } > + }; > + static const struct nl_policy policy_default_tp[] = { > + [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, > + [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, > + [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } > + }; > + > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size); > + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); > + struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg); > + int err; > + > + if (!nlmsg || !nfmsg > + || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT > + || nfmsg->version != NFNETLINK_V0 > + || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy, > + attrs, default_tp ? ARRAY_SIZE(policy_default_tp) : > + ARRAY_SIZE(policy))) { > + return EINVAL; > + } > + > + if (!default_tp) { > + ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]), > + sizeof nl_tp->name); > + } > + nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO])); > + nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]); > + nl_tp->present = 0; > + > + err = nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp); > + return err; > +} > + > +int > +nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp) > +{ > + struct ofpbuf buf; > + size_t offset; > + int i, err; > + > + ofpbuf_init(&buf, 512); > + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE > + | NLM_F_ACK | NLM_F_REPLACE); > + > + nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name); > + nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num)); > + nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num); > + > + offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA); > + for (i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) { > + if (nl_tp->present & 1 << i) { > + nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i])); > + } > + } > + nl_msg_end_nested(&buf, offset); > + > + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); > + ofpbuf_uninit(&buf); > + return err; > +} > + > +int > +nl_ct_set_default_timeout_policy(const struct nl_ct_timeout_policy *nl_tp) > +{ > + struct ofpbuf buf; > + size_t offset; > + int i, err; > + > + ofpbuf_init(&buf, 512); > + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, NLM_F_REQUEST > + | NLM_F_ACK | NLM_F_REPLACE); > + > + nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num)); > + nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num); > + > + offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA); > + for (i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) { > + if (nl_tp->present & 1 << i) { > + nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i])); > + } > + } > + nl_msg_end_nested(&buf, offset); > + > + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); > + ofpbuf_uninit(&buf); > + return err; > +} > + > +int > +nl_ct_get_timeout_policy(const char *tp_name, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + struct ofpbuf request, *reply; > + int err; > + > + ofpbuf_init(&request, 512); > + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK); > + nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name); > + err = nl_transact(NETLINK_NETFILTER, &request, &reply); > + if (err) { > + goto out; > + } > + > + err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false); > + > +out: > + ofpbuf_uninit(&request); > + ofpbuf_delete(reply); > + return err; > +} > + > +int > +nl_ct_get_default_timeout_policy(uint16_t l3num, uint8_t l4num, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + struct ofpbuf request, *reply; > + int err; > + > + ofpbuf_init(&request, 512); > + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, > + NLM_F_REQUEST | NLM_F_ACK); > + > + nl_msg_put_be16(&request, CTA_TIMEOUT_L3PROTO, htons(l3num)); > + nl_msg_put_u8(&request, CTA_TIMEOUT_L4PROTO, l4num); > + err = nl_transact(NETLINK_NETFILTER, &request, &reply); > + if (err) { > + goto out; > + } > + > + err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, true); > + > +out: > + ofpbuf_uninit(&request); > + ofpbuf_delete(reply); > + return err; > +} > + > +int > +nl_ct_del_timeout_policy(const char *tp_name) > +{ > + struct ofpbuf buf; > + int err; > + > + ofpbuf_init(&buf, 64); > + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK); > + > + nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name); > + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); > + ofpbuf_uninit(&buf); > + return err; > +} > + > +struct nl_ct_timeout_policy_dump_state { > + struct nl_dump dump; > + struct ofpbuf buf; > +}; > + > +int > +nl_ct_timeout_policy_dump_start( > + struct nl_ct_timeout_policy_dump_state **statep) > +{ > + struct ofpbuf request; > + struct nl_ct_timeout_policy_dump_state *state; > + > + *statep = state = xzalloc(sizeof *state); > + ofpbuf_init(&request, 512); > + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, > + IPCTNL_MSG_TIMEOUT_GET, > + NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); > + > + nl_dump_start(&state->dump, NETLINK_NETFILTER, &request); > + ofpbuf_uninit(&request); > + ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); > + return 0; > +} > + > +int > +nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state, > + struct nl_ct_timeout_policy *nl_tp) > +{ > + struct ofpbuf reply; > + int err; > + > + if (!nl_dump_next(&state->dump, &reply, &state->buf)) { > + return EOF; > + } > + err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false); > + ofpbuf_uninit(&reply); > + return err; > +} > + > +int > +nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state) > +{ > + int err = nl_dump_done(&state->dump); > + ofpbuf_uninit(&state->buf); > + free(state); > + return err; > +} > + > /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */ > static uint32_t > ips_status_to_dpif_flags(uint32_t status) > diff --git a/lib/netlink-conntrack.h b/lib/netlink-conntrack.h > index 8b536fd65ba8..ae6e428e0929 100644 > --- a/lib/netlink-conntrack.h > +++ b/lib/netlink-conntrack.h > @@ -17,9 +17,12 @@ > #ifndef NETLINK_CONNTRACK_H > #define NETLINK_CONNTRACK_H > > +#include <linux/netfilter/nfnetlink_cttimeout.h> > + > #include "byte-order.h" > #include "compiler.h" > #include "ct-dpif.h" > +#include "netlink-socket.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/hmap.h" > #include "openvswitch/ofpbuf.h" > @@ -33,7 +36,18 @@ enum nl_ct_event_type { > NL_CT_EVENT_DELETE = 1 << 2, > }; > > +#define NL_CT_TIMEOUT_POLICY_MAX_ATTR (CTA_TIMEOUT_TCP_MAX + 1) > + > +struct nl_ct_timeout_policy { > + char name[CTNL_TIMEOUT_NAME_MAX]; > + uint16_t l3num; > + uint8_t l4num; > + uint32_t attrs[NL_CT_TIMEOUT_POLICY_MAX_ATTR]; > + uint32_t present; > +}; > + > struct nl_ct_dump_state; > +struct nl_ct_timeout_policy_dump_state; > > int nl_ct_dump_start(struct nl_ct_dump_state **, const uint16_t *zone, > int *ptot_bkts); > @@ -44,6 +58,21 @@ int nl_ct_flush(void); > int nl_ct_flush_zone(uint16_t zone); > int nl_ct_flush_tuple(const struct ct_dpif_tuple *, uint16_t zone); > > +int nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp); > +int nl_ct_set_default_timeout_policy(const struct nl_ct_timeout_policy *nl_tp); > +int nl_ct_get_timeout_policy(const char *tp_name, > + struct nl_ct_timeout_policy *nl_tp); > +int nl_ct_get_default_timeout_policy(uint16_t l3num, uint8_t l4num, > + struct nl_ct_timeout_policy *nl_tp); > +int nl_ct_del_timeout_policy(const char *tp_name); > +int nl_ct_timeout_policy_dump_start( > + struct nl_ct_timeout_policy_dump_state **statep); > +int nl_ct_timeout_policy_dump_next( > + struct nl_ct_timeout_policy_dump_state *state, > + struct nl_ct_timeout_policy *nl_tp); > +int nl_ct_timeout_policy_dump_done( > + struct nl_ct_timeout_policy_dump_state *state); > + > bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *, > enum nl_ct_event_type *); > void nl_ct_format_event_entry(const struct ct_dpif_entry *, > diff --git a/lib/netlink-protocol.h b/lib/netlink-protocol.h > index c0617dfad21f..bf631b1a14d0 100644 > --- a/lib/netlink-protocol.h > +++ b/lib/netlink-protocol.h > @@ -48,6 +48,7 @@ > #define NLM_F_ECHO 0x008 > > #define NLM_F_ROOT 0x100 > +#define NLM_F_REPLACE 0x100 > #define NLM_F_MATCH 0x200 > #define NLM_F_EXCL 0x200 > #define NLM_F_ATOMIC 0x400 > -- > 2.7.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Fri, Jul 26, 2019 at 10:15 AM William Tu <u9012063@gmail.com> wrote: > > +static void > > +dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num, > > + struct ds *tp_name) > > +{ > > + ds_clear(tp_name); > > + ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id); > > + ct_dpif_format_ipproto(tp_name, l4num); > > + > > + if (l3num == AF_INET) { > > + ds_put_cstr(tp_name, "4"); > > + } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) { > > Why excluding IPPROTO_ICMPV6 above? Thanks for review. It is because ct_dpif_format_ipproto returns "icmpv6" for IPPROTO_ICMPV6 and "icmp" for "IPPROTO_ICMP", and I found it to be confusing to have ovs_tp_<tp_id>_icmpv66 as the timeout policy name. > > +#define CT_DPIF_TO_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \ > > +if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) { \ > > + tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1; \ > > + tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] = \ > > + nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]; \ > > + } > > + > > +static void > > +dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp, > > + struct ct_dpif_timeout_policy *tp) > > +{ > > + CT_DPIF_TO_NL_TP_TCP_MAPPINGS > > Is this better to renamed as CT_DPIF_FROM_NL_TP_TCP_MAPPINGS? > > You're using the same macro name, one for > setting the nl_tp->attrs from tp->attrs, the other for > setting the tp->attrs from nl_tp_attrs Thanks for the suggestion. As our offline discussion, it is confusing to have "_TO_" in the marco name, I will get rid of it. > > +static int > > +dpif_netlink_ct_add_timeout_policy(struct dpif *dpif OVS_UNUSED, > > + bool is_default, > > + const struct ct_dpif_timeout_policy *tp) > > +{ > > +#ifdef _WIN32 > > + return EOPNOTSUPP; > > +#else > > + struct nl_ct_timeout_policy nl_tp; > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + int i, err; > > + > > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > > + dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num, > > + tp_protos[i].l4num, &ds); > > + ovs_strlcpy(nl_tp.name, ds_cstr(&ds), sizeof nl_tp.name); > > + nl_tp.l3num = tp_protos[i].l3num; > > + nl_tp.l4num = tp_protos[i].l4num; > > + dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp); > > + if (!is_default) { > > + err = nl_ct_set_timeout_policy(&nl_tp); > > + } else if (tp_protos[i].l3num == AF_INET) { > > + /* The default timeout policy is shared between AF_INET and > > + * AF_INET6 in the kernel. So configure AF_INET is sufficient. */ > > + err = nl_ct_set_default_timeout_policy(&nl_tp); > > + } > > + if (err) { > > + VLOG_INFO("failed to set timeout policy %s (%s)", nl_tp.name, > > + ovs_strerror(err)); > ds_destroy(&ds); Thanks, I will destroy the dynamic string properly in all the following cases in v2. > > +static int > > +dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED, > > + bool is_default, uint32_t tp_id, > > + struct ct_dpif_timeout_policy *tp) > > +{ > > +#ifdef _WIN32 > > + return EOPNOTSUPP; > > +#else > if _WIN32 is alway return EOPNOTSUPP, > is it better if we aggregate all 6 functions and have a larger > #ifdef _WIN32 > // all six functions return EOPNOTSUPP > #else > // actual implementations > #endif Sure, I will make proper change to make the code looks clearly in the next version. > > + struct nl_ct_timeout_policy nl_tp; > > + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; > > + int i, err; > > + > > + tp->id = tp_id; > > + tp->present = 0; > > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > > + if (!is_default) { > > + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, > > + tp_protos[i].l4num, &nl_tp_name); > > + err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp); > > + } else if (tp_protos[i].l3num == AF_INET) { > > + /* The default timeout is shared between AF_INET and AF_INET6 > > + * in the kernel. So get from AF_INET is sufficient. */ > Then why checking 'tp_protos[i].l3num == AF_INET'? > What happens when tp_protos[i].l3num == AF_INET6? then 'err' becomes uninitialized. This function is called from ct-dpif to query the timeout policy stored in the kernel. It will loop through all L3/L4 pairs (ipv4 tcp/udp/icmp and ipv6 tcp/udp/icmpv6). The main purpose for this check is to skip AF_INET6 cases for default timeout since it does not distingush the ipv4 and ipv6 cases in the kernel. > > +static int > > +dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED, > > + uint32_t tp_id) > > +{ > > +#ifdef _WIN32 > > + return EOPNOTSUPP; > > +#else > > + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; > > + int i, err; > > + > > + if (!tp_id) { > > + return EINVAL; > > + } > > + > > + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { > > + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, > > + tp_protos[i].l4num, &nl_tp_name); > > + err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name)); > > + if (err) { > > + VLOG_INFO("failed to delete timeout policy %s (%s)", > > + ds_cstr(&nl_tp_name), ovs_strerror(err)); > Use VLOG_WARN? or VLOG_WARN_RL? I will change that to VLOG_WARN_RL in v2. > > +static int > > +dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED, > > + void *state, > > + struct ct_dpif_timeout_policy **tp) > > +{ > > +#ifdef _WIN32 > > + return EOPNOTSUPP; > > +#else > > + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state; > > + struct dpif_netlink_tp_dump_node *tp_dump_node; > > + struct nl_ct_timeout_policy nl_tp; > > + uint32_t tp_id; > > + int err; > > + > > + do { > > + err = nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state, > > + &nl_tp); > > + if (err) { > > + break; > > + } > > + > > + if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) { > > + continue; > > + } > > + > > + tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id( > > + tp_id, &dump_state->tp_dump_map); > > + if (!tp_dump_node) { > > + tp_dump_node = xzalloc(sizeof *tp_dump_node); > > + tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp); > > + tp_dump_node->tp->id = tp_id; > > + hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node, > > + hash_int(tp_id, 0)); > > + } > > + > > + update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node); > > + if (tp_dump_node->present == DPIF_NL_ALL_TP) { > > + hmap_remove(&dump_state->tp_dump_map, &tp_dump_node->hmap_node); > > + *tp = tp_dump_node->tp; > > + free(tp_dump_node); > Do we have to remove and free tp_dump_node here? > Isn't it done at dpif_netlink_ct_timeout_policy_dump_done()? This is the case where we gather all of the 6 sub timeout policies and return that to ct-dpif layer. Once a full profile is gathered, we will report that to ct-dpif layer and reomve the tp_dump_node. What we free in dpif_netlink_ct_timeout_policy_dump_done() is the incomplete timeout policies. Thanks, -Yi-Hung
diff --git a/datapath-windows/include/OvsDpInterfaceCtExt.h b/datapath-windows/include/OvsDpInterfaceCtExt.h index 3b947782e90c..4379855bb8dd 100644 --- a/datapath-windows/include/OvsDpInterfaceCtExt.h +++ b/datapath-windows/include/OvsDpInterfaceCtExt.h @@ -421,4 +421,118 @@ struct nf_ct_tcp_flags { UINT8 mask; }; +/* File: nfnetlink_cttimeout.h */ +enum ctnl_timeout_msg_types { + IPCTNL_MSG_TIMEOUT_NEW, + IPCTNL_MSG_TIMEOUT_GET, + IPCTNL_MSG_TIMEOUT_DELETE, + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, + + IPCTNL_MSG_TIMEOUT_MAX +}; + +enum ctattr_timeout { + CTA_TIMEOUT_UNSPEC, + CTA_TIMEOUT_NAME, + CTA_TIMEOUT_L3PROTO, + CTA_TIMEOUT_L4PROTO, + CTA_TIMEOUT_DATA, + CTA_TIMEOUT_USE, + __CTA_TIMEOUT_MAX +}; +#define CTA_TIMEOUT_MAX (__CTA_TIMEOUT_MAX - 1) + +enum ctattr_timeout_generic { + CTA_TIMEOUT_GENERIC_UNSPEC, + CTA_TIMEOUT_GENERIC_TIMEOUT, + __CTA_TIMEOUT_GENERIC_MAX +}; +#define CTA_TIMEOUT_GENERIC_MAX (__CTA_TIMEOUT_GENERIC_MAX - 1) + +enum ctattr_timeout_tcp { + CTA_TIMEOUT_TCP_UNSPEC, + CTA_TIMEOUT_TCP_SYN_SENT, + CTA_TIMEOUT_TCP_SYN_RECV, + CTA_TIMEOUT_TCP_ESTABLISHED, + CTA_TIMEOUT_TCP_FIN_WAIT, + CTA_TIMEOUT_TCP_CLOSE_WAIT, + CTA_TIMEOUT_TCP_LAST_ACK, + CTA_TIMEOUT_TCP_TIME_WAIT, + CTA_TIMEOUT_TCP_CLOSE, + CTA_TIMEOUT_TCP_SYN_SENT2, + CTA_TIMEOUT_TCP_RETRANS, + CTA_TIMEOUT_TCP_UNACK, + __CTA_TIMEOUT_TCP_MAX +}; +#define CTA_TIMEOUT_TCP_MAX (__CTA_TIMEOUT_TCP_MAX - 1) + +enum ctattr_timeout_udp { + CTA_TIMEOUT_UDP_UNSPEC, + CTA_TIMEOUT_UDP_UNREPLIED, + CTA_TIMEOUT_UDP_REPLIED, + __CTA_TIMEOUT_UDP_MAX +}; +#define CTA_TIMEOUT_UDP_MAX (__CTA_TIMEOUT_UDP_MAX - 1) + +enum ctattr_timeout_udplite { + CTA_TIMEOUT_UDPLITE_UNSPEC, + CTA_TIMEOUT_UDPLITE_UNREPLIED, + CTA_TIMEOUT_UDPLITE_REPLIED, + __CTA_TIMEOUT_UDPLITE_MAX +}; +#define CTA_TIMEOUT_UDPLITE_MAX (__CTA_TIMEOUT_UDPLITE_MAX - 1) + +enum ctattr_timeout_icmp { + CTA_TIMEOUT_ICMP_UNSPEC, + CTA_TIMEOUT_ICMP_TIMEOUT, + __CTA_TIMEOUT_ICMP_MAX +}; +#define CTA_TIMEOUT_ICMP_MAX (__CTA_TIMEOUT_ICMP_MAX - 1) + +enum ctattr_timeout_dccp { + CTA_TIMEOUT_DCCP_UNSPEC, + CTA_TIMEOUT_DCCP_REQUEST, + CTA_TIMEOUT_DCCP_RESPOND, + CTA_TIMEOUT_DCCP_PARTOPEN, + CTA_TIMEOUT_DCCP_OPEN, + CTA_TIMEOUT_DCCP_CLOSEREQ, + CTA_TIMEOUT_DCCP_CLOSING, + CTA_TIMEOUT_DCCP_TIMEWAIT, + __CTA_TIMEOUT_DCCP_MAX +}; +#define CTA_TIMEOUT_DCCP_MAX (__CTA_TIMEOUT_DCCP_MAX - 1) + +enum ctattr_timeout_sctp { + CTA_TIMEOUT_SCTP_UNSPEC, + CTA_TIMEOUT_SCTP_CLOSED, + CTA_TIMEOUT_SCTP_COOKIE_WAIT, + CTA_TIMEOUT_SCTP_COOKIE_ECHOED, + CTA_TIMEOUT_SCTP_ESTABLISHED, + CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, + CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, + CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, + __CTA_TIMEOUT_SCTP_MAX +}; +#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) + +enum ctattr_timeout_icmpv6 { + CTA_TIMEOUT_ICMPV6_UNSPEC, + CTA_TIMEOUT_ICMPV6_TIMEOUT, + __CTA_TIMEOUT_ICMPV6_MAX +}; +#define CTA_TIMEOUT_ICMPV6_MAX (__CTA_TIMEOUT_ICMPV6_MAX - 1) + +enum ctattr_timeout_gre { + CTA_TIMEOUT_GRE_UNSPEC, + CTA_TIMEOUT_GRE_UNREPLIED, + CTA_TIMEOUT_GRE_REPLIED, + __CTA_TIMEOUT_GRE_MAX +}; +#define CTA_TIMEOUT_GRE_MAX (__CTA_TIMEOUT_GRE_MAX - 1) + +#define CTNL_TIMEOUT_NAME_MAX 32 + #endif /* __OVS_DP_INTERFACE_CT_EXT_H_ */ diff --git a/datapath-windows/ovsext/Netlink/NetlinkProto.h b/datapath-windows/ovsext/Netlink/NetlinkProto.h index 59b56565c1dc..db1fa2bacae8 100644 --- a/datapath-windows/ovsext/Netlink/NetlinkProto.h +++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h @@ -51,6 +51,7 @@ #define NLM_F_ECHO 0x008 #define NLM_F_ROOT 0x100 +#define NLM_F_REPLACE 0x100 #define NLM_F_MATCH 0x200 #define NLM_F_EXCL 0x200 #define NLM_F_ATOMIC 0x400 diff --git a/include/windows/automake.mk b/include/windows/automake.mk index 382627b51787..883bbbf5d97c 100644 --- a/include/windows/automake.mk +++ b/include/windows/automake.mk @@ -15,6 +15,7 @@ noinst_HEADERS += \ include/windows/linux/netfilter/nf_conntrack_tcp.h \ include/windows/linux/netfilter/nfnetlink.h \ include/windows/linux/netfilter/nfnetlink_conntrack.h \ + include/windows/linux/netfilter/nfnetlink_cttimeout.h \ include/windows/linux/pkt_sched.h \ include/windows/linux/types.h \ include/windows/net/if.h \ diff --git a/include/windows/linux/netfilter/nfnetlink_cttimeout.h b/include/windows/linux/netfilter/nfnetlink_cttimeout.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 9825ce46a7f5..abfad9543c3b 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -50,6 +50,7 @@ #include "odp-util.h" #include "openvswitch/dynamic-string.h" #include "openvswitch/flow.h" +#include "openvswitch/hmap.h" #include "openvswitch/match.h" #include "openvswitch/ofpbuf.h" #include "openvswitch/poll-loop.h" @@ -3028,6 +3029,425 @@ dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED, ofpbuf_delete(request); return err; } + +#define NL_TP_NAME_PREFIX "ovs_tp_" + +struct dpif_netlink_timeout_policy_protocol { + uint16_t l3num; + uint8_t l4num; +}; + +enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol { + DPIF_NL_TP_AF_INET_TCP, + DPIF_NL_TP_AF_INET_UDP, + DPIF_NL_TP_AF_INET_ICMP, + DPIF_NL_TP_AF_INET6_TCP, + DPIF_NL_TP_AF_INET6_UDP, + DPIF_NL_TP_AF_INET6_ICMPV6, + DPIF_NL_TP_MAX +}; + +#define DPIF_NL_ALL_TP 0x3F + +static struct dpif_netlink_timeout_policy_protocol tp_protos[] = { + [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP }, + [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP }, + [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP }, + [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP }, + [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP }, + [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6, + .l4num = IPPROTO_ICMPV6 }, +}; + +static void +dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num, + struct ds *tp_name) +{ + ds_clear(tp_name); + ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id); + ct_dpif_format_ipproto(tp_name, l4num); + + if (l3num == AF_INET) { + ds_put_cstr(tp_name, "4"); + } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) { + ds_put_cstr(tp_name, "6"); + } + + ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX); +} + +#define CT_DPIF_TO_NL_TP_TCP_MAPPINGS \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS) \ + CT_DPIF_TO_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK) + +#define CT_DPIF_TO_NL_TP_UDP_MAPPINGS \ + CT_DPIF_TO_NL_TP_MAPPING(UDP, UDP, FIRST, UNREPLIED) \ + CT_DPIF_TO_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED) + +#define CT_DPIF_TO_NL_TP_ICMP_MAPPINGS \ + CT_DPIF_TO_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT) + +#define CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS \ + CT_DPIF_TO_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT) + + +#define CT_DPIF_TO_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \ +if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) { \ + nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2; \ + nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] = \ + tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]; \ +} + +static void +dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp, + struct nl_ct_timeout_policy *nl_tp) +{ + CT_DPIF_TO_NL_TP_TCP_MAPPINGS +} + +static void +dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp, + struct nl_ct_timeout_policy *nl_tp) +{ + CT_DPIF_TO_NL_TP_UDP_MAPPINGS +} + +static void +dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp, + struct nl_ct_timeout_policy *nl_tp) +{ + CT_DPIF_TO_NL_TP_ICMP_MAPPINGS +} + +static void +dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp, + struct nl_ct_timeout_policy *nl_tp) +{ + CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS +} + +#undef CT_DPIF_TO_NL_TP_MAPPING + +static void +dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp, + uint8_t l4num, struct nl_ct_timeout_policy *nl_tp) +{ + nl_tp->present = 0; + + if (l4num == IPPROTO_TCP) { + dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp); + } else if (l4num == IPPROTO_UDP) { + dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp); + } else if (l4num == IPPROTO_ICMP) { + dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp); + } else if (l4num == IPPROTO_ICMPV6) { + dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp); + } +} + +#define CT_DPIF_TO_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \ +if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) { \ + tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1; \ + tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] = \ + nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]; \ + } + +static void +dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp, + struct ct_dpif_timeout_policy *tp) +{ + CT_DPIF_TO_NL_TP_TCP_MAPPINGS +} + +static void +dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp, + struct ct_dpif_timeout_policy *tp) +{ + CT_DPIF_TO_NL_TP_UDP_MAPPINGS +} + +static void +dpif_netlink_set_ct_dpif_tp_icmp_attrs( + const struct nl_ct_timeout_policy *nl_tp, + struct ct_dpif_timeout_policy *tp) +{ + CT_DPIF_TO_NL_TP_ICMP_MAPPINGS +} + +static void +dpif_netlink_set_ct_dpif_tp_icmpv6_attrs( + const struct nl_ct_timeout_policy *nl_tp, + struct ct_dpif_timeout_policy *tp) +{ + CT_DPIF_TO_NL_TP_ICMPV6_MAPPINGS +} + +#undef CT_DPIF_TO_NL_TP_MAPPING + +static void +dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp, + struct ct_dpif_timeout_policy *tp) +{ + if (nl_tp->l4num == IPPROTO_TCP) { + dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp); + } else if (nl_tp->l4num == IPPROTO_UDP) { + dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp); + } else if (nl_tp->l4num == IPPROTO_ICMP) { + dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp); + } else if (nl_tp->l4num == IPPROTO_ICMPV6) { + dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp); + } +} + +static int +dpif_netlink_ct_add_timeout_policy(struct dpif *dpif OVS_UNUSED, + bool is_default, + const struct ct_dpif_timeout_policy *tp) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct nl_ct_timeout_policy nl_tp; + struct ds ds = DS_EMPTY_INITIALIZER; + int i, err; + + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num, + tp_protos[i].l4num, &ds); + ovs_strlcpy(nl_tp.name, ds_cstr(&ds), sizeof nl_tp.name); + nl_tp.l3num = tp_protos[i].l3num; + nl_tp.l4num = tp_protos[i].l4num; + dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp); + if (!is_default) { + err = nl_ct_set_timeout_policy(&nl_tp); + } else if (tp_protos[i].l3num == AF_INET) { + /* The default timeout policy is shared between AF_INET and + * AF_INET6 in the kernel. So configure AF_INET is sufficient. */ + err = nl_ct_set_default_timeout_policy(&nl_tp); + } + if (err) { + VLOG_INFO("failed to set timeout policy %s (%s)", nl_tp.name, + ovs_strerror(err)); + return err; + } + } + + ds_destroy(&ds); + return 0; +#endif +} + +static int +dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED, + bool is_default, uint32_t tp_id, + struct ct_dpif_timeout_policy *tp) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct nl_ct_timeout_policy nl_tp; + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; + int i, err; + + tp->id = tp_id; + tp->present = 0; + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + if (!is_default) { + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, + tp_protos[i].l4num, &nl_tp_name); + err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp); + } else if (tp_protos[i].l3num == AF_INET) { + /* The default timeout is shared between AF_INET and AF_INET6 + * in the kernel. So get from AF_INET is sufficient. */ + err = nl_ct_get_default_timeout_policy(tp_protos[i].l3num, + tp_protos[i].l4num, &nl_tp); + } + if (err) { + return err; + } + dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp); + } + + ds_destroy(&nl_tp_name); + return 0; +#endif +} + +static int +dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED, + uint32_t tp_id) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct ds nl_tp_name = DS_EMPTY_INITIALIZER; + int i, err; + + if (!tp_id) { + return EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(tp_protos); ++i) { + dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num, + tp_protos[i].l4num, &nl_tp_name); + err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name)); + if (err) { + VLOG_INFO("failed to delete timeout policy %s (%s)", + ds_cstr(&nl_tp_name), ovs_strerror(err)); + return err; + } + } + + ds_destroy(&nl_tp_name); + return 0; +#endif +} + +struct dpif_netlink_ct_timeout_policy_dump_state { + struct nl_ct_timeout_policy_dump_state *nl_dump_state; + struct hmap tp_dump_map; +}; + +struct dpif_netlink_tp_dump_node { + struct hmap_node hmap_node; /* node in tp_dump_map. */ + struct ct_dpif_timeout_policy *tp; + uint32_t present; +}; + +static struct dpif_netlink_tp_dump_node * +get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id, + struct hmap *tp_dump_map) +{ + struct dpif_netlink_tp_dump_node *tp_dump_node; + + HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0), + tp_dump_map) { + if (tp_dump_node->tp->id == tp_id) { + return tp_dump_node; + } + } + return NULL; +} + +static void +update_dpif_netlink_tp_dump_node( + const struct nl_ct_timeout_policy *nl_tp, + struct dpif_netlink_tp_dump_node *tp_dump_node) +{ + int i; + + dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp); + for (i = 0; i < DPIF_NL_TP_MAX; ++i) { + if (nl_tp->l3num == tp_protos[i].l3num && + nl_tp->l4num == tp_protos[i].l4num) { + tp_dump_node->present |= 1 << i; + break; + } + } +} + +static int +dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED, + void **statep) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state; + int err; + + *statep = dump_state = xzalloc(sizeof *dump_state); + err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state); + if (err) { + free(dump_state); + return err; + } + hmap_init(&dump_state->tp_dump_map); + return 0; +#endif +} + +static int +dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED, + void *state, + struct ct_dpif_timeout_policy **tp) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state; + struct dpif_netlink_tp_dump_node *tp_dump_node; + struct nl_ct_timeout_policy nl_tp; + uint32_t tp_id; + int err; + + do { + err = nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state, + &nl_tp); + if (err) { + break; + } + + if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) { + continue; + } + + tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id( + tp_id, &dump_state->tp_dump_map); + if (!tp_dump_node) { + tp_dump_node = xzalloc(sizeof *tp_dump_node); + tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp); + tp_dump_node->tp->id = tp_id; + hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node, + hash_int(tp_id, 0)); + } + + update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node); + if (tp_dump_node->present == DPIF_NL_ALL_TP) { + hmap_remove(&dump_state->tp_dump_map, &tp_dump_node->hmap_node); + *tp = tp_dump_node->tp; + free(tp_dump_node); + break; + } + } while (true); + return err; +#endif +} + +static int +dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED, + void *state) +{ +#ifdef _WIN32 + return EOPNOTSUPP; +#else + struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state; + struct dpif_netlink_tp_dump_node *tp_dump_node; + int err; + + err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state); + /* clear dump map, modulize */ + HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) { + VLOG_INFO("Partial timeout policy in dpif-netlink %"PRIu32, + tp_dump_node->tp->id); + free(tp_dump_node->tp); + free(tp_dump_node); + } + hmap_destroy(&dump_state->tp_dump_map); + free(dump_state); + return err; +#endif +} + /* Meters */ @@ -3434,12 +3854,12 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_ct_set_limits, dpif_netlink_ct_get_limits, dpif_netlink_ct_del_limits, - NULL, /* ct_set_timeout_policy */ - NULL, /* ct_get_timeout_policy */ - NULL, /* ct_del_timeout_policy */ - NULL, /* ct_timeout_policy_dump_start */ - NULL, /* ct_timeout_policy_dump_next */ - NULL, /* ct_timeout_policy_dump_done */ + dpif_netlink_ct_add_timeout_policy, + dpif_netlink_ct_get_timeout_policy, + dpif_netlink_ct_del_timeout_policy, + dpif_netlink_ct_timeout_policy_dump_start, + dpif_netlink_ct_timeout_policy_dump_next, + dpif_netlink_ct_timeout_policy_dump_done, NULL, /* ipf_set_enabled */ NULL, /* ipf_set_min_frag */ NULL, /* ipf_set_max_nfrags */ diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h index 0a9628088275..7e75120161f6 100644 --- a/lib/dpif-netlink.h +++ b/lib/dpif-netlink.h @@ -23,6 +23,7 @@ #include "odp-netlink.h" #include "flow.h" +#include "netlink-conntrack.h" struct ofpbuf; @@ -50,7 +51,6 @@ struct dpif_netlink_vport { }; void dpif_netlink_vport_init(struct dpif_netlink_vport *); - int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request, struct dpif_netlink_vport *reply, struct ofpbuf **bufp); diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c index 7631ba5d5d31..9bc0ddb66248 100644 --- a/lib/netlink-conntrack.c +++ b/lib/netlink-conntrack.c @@ -840,6 +840,369 @@ nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper) return parsed; } +static int nl_ct_timeout_policy_max_attr[] = { + [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX, + [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX, + [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX, + [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX +}; + +static void +nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp, + uint32_t attr, uint32_t val) +{ + nl_tp->present |= 1 << attr; + nl_tp->attrs[attr] = val; +} + +static int +nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla, + struct nl_ct_timeout_policy *nl_tp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_CLOSE] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_RETRANS] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_TCP_UNACK] = { .type = NL_A_BE32, + .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + int i; + + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { + VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " + "Possibly incompatible Linux kernel version."); + return EINVAL; + } + + for (i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) { + nl_ct_set_timeout_policy_attr(nl_tp, i, + ntohl(nl_attr_get_be32(attrs[i]))); + } + return 0; +} + +static int +nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla, + struct nl_ct_timeout_policy *nl_tp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NL_A_BE32, + .optional = false }, + [CTA_TIMEOUT_UDP_REPLIED] = { .type = NL_A_BE32, + .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + int i; + + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { + VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. " + "Possibly incompatible Linux kernel version."); + return EINVAL; + } + + for (i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED; i++) { + nl_ct_set_timeout_policy_attr(nl_tp, i, + ntohl(nl_attr_get_be32(attrs[i]))); + } + return 0; +} + +static int +nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla, + struct nl_ct_timeout_policy *nl_tp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NL_A_BE32, + .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { + VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. " + "Possibly incompatible Linux kernel version."); + return EINVAL; + } + + nl_ct_set_timeout_policy_attr( + nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT, + ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT]))); + return 0; +} + +static int +nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla, + struct nl_ct_timeout_policy *nl_tp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NL_A_BE32, + .optional = false }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + + if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) { + VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. " + "Possibly incompatible Linux kernel version."); + return EINVAL; + } + + nl_ct_set_timeout_policy_attr( + nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT, + ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT]))); + return 0; +} + +static int +nl_ct_parse_timeout_policy_data(struct nlattr *nla, + struct nl_ct_timeout_policy *nl_tp) +{ + switch (nl_tp->l4num) { + case IPPROTO_TCP: + return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp); + case IPPROTO_UDP: + return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp); + case IPPROTO_ICMP: + return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp); + case IPPROTO_ICMPV6: + return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp); + default: + return EINVAL; + } +} + +static int +nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf, + struct nl_ct_timeout_policy *nl_tp, + bool default_tp) +{ + static const struct nl_policy policy[] = { + [CTA_TIMEOUT_NAME] = { .type = NL_A_STRING, .optional = false }, + [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, + [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, + [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } + }; + static const struct nl_policy policy_default_tp[] = { + [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false }, + [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false }, + [CTA_TIMEOUT_DATA] = { .type = NL_A_NESTED, .optional = false } + }; + + struct nlattr *attrs[ARRAY_SIZE(policy)]; + struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg); + int err; + + if (!nlmsg || !nfmsg + || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT + || nfmsg->version != NFNETLINK_V0 + || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy, + attrs, default_tp ? ARRAY_SIZE(policy_default_tp) : + ARRAY_SIZE(policy))) { + return EINVAL; + } + + if (!default_tp) { + ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]), + sizeof nl_tp->name); + } + nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO])); + nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]); + nl_tp->present = 0; + + err = nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp); + return err; +} + +int +nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp) +{ + struct ofpbuf buf; + size_t offset; + int i, err; + + ofpbuf_init(&buf, 512); + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE + | NLM_F_ACK | NLM_F_REPLACE); + + nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name); + nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num)); + nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num); + + offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA); + for (i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) { + if (nl_tp->present & 1 << i) { + nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i])); + } + } + nl_msg_end_nested(&buf, offset); + + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); + ofpbuf_uninit(&buf); + return err; +} + +int +nl_ct_set_default_timeout_policy(const struct nl_ct_timeout_policy *nl_tp) +{ + struct ofpbuf buf; + size_t offset; + int i, err; + + ofpbuf_init(&buf, 512); + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, NLM_F_REQUEST + | NLM_F_ACK | NLM_F_REPLACE); + + nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num)); + nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num); + + offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA); + for (i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) { + if (nl_tp->present & 1 << i) { + nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i])); + } + } + nl_msg_end_nested(&buf, offset); + + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); + ofpbuf_uninit(&buf); + return err; +} + +int +nl_ct_get_timeout_policy(const char *tp_name, + struct nl_ct_timeout_policy *nl_tp) +{ + struct ofpbuf request, *reply; + int err; + + ofpbuf_init(&request, 512); + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK); + nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name); + err = nl_transact(NETLINK_NETFILTER, &request, &reply); + if (err) { + goto out; + } + + err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false); + +out: + ofpbuf_uninit(&request); + ofpbuf_delete(reply); + return err; +} + +int +nl_ct_get_default_timeout_policy(uint16_t l3num, uint8_t l4num, + struct nl_ct_timeout_policy *nl_tp) +{ + struct ofpbuf request, *reply; + int err; + + ofpbuf_init(&request, 512); + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, + NLM_F_REQUEST | NLM_F_ACK); + + nl_msg_put_be16(&request, CTA_TIMEOUT_L3PROTO, htons(l3num)); + nl_msg_put_u8(&request, CTA_TIMEOUT_L4PROTO, l4num); + err = nl_transact(NETLINK_NETFILTER, &request, &reply); + if (err) { + goto out; + } + + err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, true); + +out: + ofpbuf_uninit(&request); + ofpbuf_delete(reply); + return err; +} + +int +nl_ct_del_timeout_policy(const char *tp_name) +{ + struct ofpbuf buf; + int err; + + ofpbuf_init(&buf, 64); + nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK); + + nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name); + err = nl_transact(NETLINK_NETFILTER, &buf, NULL); + ofpbuf_uninit(&buf); + return err; +} + +struct nl_ct_timeout_policy_dump_state { + struct nl_dump dump; + struct ofpbuf buf; +}; + +int +nl_ct_timeout_policy_dump_start( + struct nl_ct_timeout_policy_dump_state **statep) +{ + struct ofpbuf request; + struct nl_ct_timeout_policy_dump_state *state; + + *statep = state = xzalloc(sizeof *state); + ofpbuf_init(&request, 512); + nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT, + IPCTNL_MSG_TIMEOUT_GET, + NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); + + nl_dump_start(&state->dump, NETLINK_NETFILTER, &request); + ofpbuf_uninit(&request); + ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); + return 0; +} + +int +nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state, + struct nl_ct_timeout_policy *nl_tp) +{ + struct ofpbuf reply; + int err; + + if (!nl_dump_next(&state->dump, &reply, &state->buf)) { + return EOF; + } + err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false); + ofpbuf_uninit(&reply); + return err; +} + +int +nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state) +{ + int err = nl_dump_done(&state->dump); + ofpbuf_uninit(&state->buf); + free(state); + return err; +} + /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */ static uint32_t ips_status_to_dpif_flags(uint32_t status) diff --git a/lib/netlink-conntrack.h b/lib/netlink-conntrack.h index 8b536fd65ba8..ae6e428e0929 100644 --- a/lib/netlink-conntrack.h +++ b/lib/netlink-conntrack.h @@ -17,9 +17,12 @@ #ifndef NETLINK_CONNTRACK_H #define NETLINK_CONNTRACK_H +#include <linux/netfilter/nfnetlink_cttimeout.h> + #include "byte-order.h" #include "compiler.h" #include "ct-dpif.h" +#include "netlink-socket.h" #include "openvswitch/dynamic-string.h" #include "openvswitch/hmap.h" #include "openvswitch/ofpbuf.h" @@ -33,7 +36,18 @@ enum nl_ct_event_type { NL_CT_EVENT_DELETE = 1 << 2, }; +#define NL_CT_TIMEOUT_POLICY_MAX_ATTR (CTA_TIMEOUT_TCP_MAX + 1) + +struct nl_ct_timeout_policy { + char name[CTNL_TIMEOUT_NAME_MAX]; + uint16_t l3num; + uint8_t l4num; + uint32_t attrs[NL_CT_TIMEOUT_POLICY_MAX_ATTR]; + uint32_t present; +}; + struct nl_ct_dump_state; +struct nl_ct_timeout_policy_dump_state; int nl_ct_dump_start(struct nl_ct_dump_state **, const uint16_t *zone, int *ptot_bkts); @@ -44,6 +58,21 @@ int nl_ct_flush(void); int nl_ct_flush_zone(uint16_t zone); int nl_ct_flush_tuple(const struct ct_dpif_tuple *, uint16_t zone); +int nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp); +int nl_ct_set_default_timeout_policy(const struct nl_ct_timeout_policy *nl_tp); +int nl_ct_get_timeout_policy(const char *tp_name, + struct nl_ct_timeout_policy *nl_tp); +int nl_ct_get_default_timeout_policy(uint16_t l3num, uint8_t l4num, + struct nl_ct_timeout_policy *nl_tp); +int nl_ct_del_timeout_policy(const char *tp_name); +int nl_ct_timeout_policy_dump_start( + struct nl_ct_timeout_policy_dump_state **statep); +int nl_ct_timeout_policy_dump_next( + struct nl_ct_timeout_policy_dump_state *state, + struct nl_ct_timeout_policy *nl_tp); +int nl_ct_timeout_policy_dump_done( + struct nl_ct_timeout_policy_dump_state *state); + bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *, enum nl_ct_event_type *); void nl_ct_format_event_entry(const struct ct_dpif_entry *, diff --git a/lib/netlink-protocol.h b/lib/netlink-protocol.h index c0617dfad21f..bf631b1a14d0 100644 --- a/lib/netlink-protocol.h +++ b/lib/netlink-protocol.h @@ -48,6 +48,7 @@ #define NLM_F_ECHO 0x008 #define NLM_F_ROOT 0x100 +#define NLM_F_REPLACE 0x100 #define NLM_F_MATCH 0x200 #define NLM_F_EXCL 0x200 #define NLM_F_ATOMIC 0x400
This patch implements all the conntrack timeout policy related functions defined in dpif_class for dpif-netlink class in Linux kernel datapath. In Linux kernel, the timeout policy is maintained per L3/L4 protocol, and it is identified by 32 bytes null terminated string. However, in vswitchd, the timeout policy is a generic one that consists of all the supported L4 protocols. Therefore, the main task for this patch is to break down the generic timeout policy into 6 sub policies ( ipv4 tcp, udp, icmp, and ipv6 tcp, udp, icmp) in dpif-netlink.c and push down the configuration using the netlink API in netlink-conntrack.c. This patch also adds missing symbols in the windows datapath so that the build on windows can pass. Appveyor CI: * https://ci.appveyor.com/project/YiHungWei/ovs/builds/26250549 Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> --- datapath-windows/include/OvsDpInterfaceCtExt.h | 114 ++++++ datapath-windows/ovsext/Netlink/NetlinkProto.h | 1 + include/windows/automake.mk | 1 + .../windows/linux/netfilter/nfnetlink_cttimeout.h | 0 lib/dpif-netlink.c | 432 ++++++++++++++++++++- lib/dpif-netlink.h | 2 +- lib/netlink-conntrack.c | 363 +++++++++++++++++ lib/netlink-conntrack.h | 29 ++ lib/netlink-protocol.h | 1 + 9 files changed, 936 insertions(+), 7 deletions(-) create mode 100644 include/windows/linux/netfilter/nfnetlink_cttimeout.h