@@ -341,6 +341,8 @@ if LINUX
lib_libopenvswitch_la_SOURCES += \
lib/dpif-netlink.c \
lib/dpif-netlink.h \
+ lib/tc.h \
+ lib/tc.c \
lib/dpif-hw-netlink.c \
lib/dpif-hw-netlink.h \
lib/if-notifier.c \
new file mode 100644
@@ -0,0 +1,796 @@
+
+#include <config.h>
+
+#include <errno.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/gen_stats.h>
+#include "timeval.h"
+#include "netlink-socket.h"
+#include "netlink.h"
+#include "ofpbuf.h"
+#include "rtnetlink.h"
+#include "openvswitch/vlog.h"
+#include "tc.h"
+
+bool SKIP_HW = false;
+
+VLOG_DEFINE_THIS_MODULE(tc);
+
+/* Returns tc handle 'major':'minor'. */
+static unsigned int
+tc_make_handle(unsigned int major, unsigned int minor)
+{
+ return TC_H_MAKE(major << 16, minor);
+}
+
+static struct tcmsg *
+hw_tc_make_request(int ifindex, int type, unsigned int flags,
+ struct ofpbuf *request)
+{
+ struct tcmsg *tcmsg;
+
+ ofpbuf_init(request, 512);
+
+ struct nlmsghdr *nlmsghdr;
+
+ ovs_assert(request->size == 0);
+
+ nl_msg_reserve(request, NLMSG_HDRLEN + sizeof *tcmsg);
+ nlmsghdr = nl_msg_put_uninit(request, NLMSG_HDRLEN);
+ nlmsghdr->nlmsg_len = 0;
+ nlmsghdr->nlmsg_type = type;
+ nlmsghdr->nlmsg_flags = NLM_F_REQUEST | flags;
+ nlmsghdr->nlmsg_seq = 0;
+ nlmsghdr->nlmsg_pid = 0;
+
+ tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
+ tcmsg->tcm_family = AF_UNSPEC;
+ tcmsg->tcm_ifindex = ifindex;
+ /* Caller should fill in tcmsg->tcm_handle. */
+ /* Caller should fill in tcmsg->tcm_parent. */
+
+ return tcmsg;
+}
+
+static int
+tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
+{
+ int error = nl_transact(NETLINK_ROUTE, request, replyp);
+
+ ofpbuf_uninit(request);
+ return error;
+}
+
+int
+parse_tc_flow(struct ofpbuf *reply, struct tc_flow *tc_flow)
+{
+ struct tcmsg *tc;
+ struct ofpbuf mask_d, *mask = &mask_d;
+
+ memset(tc_flow, 0, sizeof (*tc_flow));
+ ofpbuf_init(mask, 512);
+ if (NLMSG_HDRLEN + (sizeof *tc) > reply->size) {
+ VLOG_ERR
+ ("parse error, offfset + size <= b->size @ %d + %lu <= %d\n",
+ NLMSG_HDRLEN, sizeof *tc, reply->size);
+ return -1;
+ }
+
+ tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+ tc_flow->handle = tc->tcm_handle;
+ tc_flow->eth_type = TC_H_MIN(tc->tcm_info);
+ VLOG_DBG("SUCCESS, handle: 0x%x, %d, eth_type: 0x%x\n", tc->tcm_handle,
+ tc->tcm_handle, ntohs(tc_flow->eth_type));
+ if (!tc_flow->handle)
+ return EAGAIN;
+
+ static const struct nl_policy tca_policy[] = {
+ [TCA_KIND] = {.type = NL_A_STRING,.optional = false},
+ [TCA_OPTIONS] = {.type = NL_A_NESTED,.optional = true},
+ [TCA_STATS] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct tc_stats),.optional = true},
+ [TCA_STATS2] = {.type = NL_A_NESTED,.optional = true},
+ };
+ struct nlattr *ta[ARRAY_SIZE(tca_policy)];
+
+ if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof (struct tcmsg),
+ tca_policy, ta, ARRAY_SIZE(ta))) {
+ VLOG_ERR("failed to parse tca policy");
+ return EPROTO;
+ }
+ if (!ta[TCA_OPTIONS]) {
+ VLOG_ERR("TCA_OPTIONS IS MISSING!\n");
+ return EPROTO;
+ }
+
+ const char *kind = nl_attr_get_string(ta[TCA_KIND]);
+
+ if (strcmp(kind, "flower")) {
+ VLOG_ERR("error, TCA_KIND not flower!\n");
+ return EPROTO;
+ }
+
+ struct nlattr *nl_options = ta[TCA_OPTIONS];
+
+ static const struct nl_policy tca_flower_policy[TCA_FLOWER_MAX + 1] = {
+ [TCA_FLOWER_CLASSID] = {.type = NL_A_U32,.optional = true},
+ [TCA_FLOWER_INDEV] = {.type = NL_A_STRING,.max_len =
+ IFNAMSIZ,.optional = true},
+
+ [TCA_FLOWER_KEY_ETH_DST] = {.type = NL_A_UNSPEC,.min_len =
+ ETH_ALEN,.optional = true},
+ [TCA_FLOWER_KEY_ETH_DST_MASK] = {.type = NL_A_UNSPEC,.min_len =
+ ETH_ALEN,.optional = true},
+ [TCA_FLOWER_KEY_ETH_SRC] = {.type = NL_A_UNSPEC,.min_len =
+ ETH_ALEN,.optional = true},
+ [TCA_FLOWER_KEY_ETH_SRC_MASK] = {.type = NL_A_UNSPEC,.min_len =
+ ETH_ALEN,.optional = true},
+ [TCA_FLOWER_KEY_ETH_TYPE] = {.type = NL_A_U16,.optional = false},
+
+ [TCA_FLOWER_FLAGS] = {.type = NL_A_U32,.optional = false},
+ [TCA_FLOWER_ACT] = {.type = NL_A_NESTED,.optional = false},
+
+ [TCA_FLOWER_KEY_IP_PROTO] = {.type = NL_A_U8,.optional = true},
+
+ [TCA_FLOWER_KEY_IPV4_SRC] = {.type = NL_A_U32,.optional = true},
+ [TCA_FLOWER_KEY_IPV4_SRC_MASK] = {.type = NL_A_U32,.optional = true},
+ [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32,.optional = true},
+ [TCA_FLOWER_KEY_IPV4_DST_MASK] = {.type = NL_A_U32,.optional = true},
+
+ [TCA_FLOWER_KEY_IPV6_SRC] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct in6_addr),.optional =
+ true},
+ [TCA_FLOWER_KEY_IPV6_SRC_MASK] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct in6_addr),.optional =
+ true},
+ [TCA_FLOWER_KEY_IPV6_DST] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct in6_addr),.optional =
+ true},
+ [TCA_FLOWER_KEY_IPV6_DST_MASK] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct in6_addr),.optional =
+ true},
+
+ [TCA_FLOWER_KEY_TCP_SRC] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_TCP_DST] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_TCP_SRC_MASK] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_TCP_DST_MASK] = {.type = NL_A_U16,.optional = true},
+
+ [TCA_FLOWER_KEY_UDP_SRC] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_UDP_DST] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_UDP_SRC_MASK] = {.type = NL_A_U16,.optional = true},
+ [TCA_FLOWER_KEY_UDP_DST_MASK] = {.type = NL_A_U16,.optional = true},
+
+ };
+
+ struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
+
+ if (!nl_parse_nested(nl_options, tca_flower_policy,
+ attrs, ARRAY_SIZE(tca_flower_policy))) {
+ VLOG_ERR("failed to parse flower classifier options");
+ return EPROTO;
+ }
+
+ int flags = nl_attr_get_u32(attrs[TCA_FLOWER_FLAGS]);
+
+ VLOG_DBG("flags: 0x%x, skip_sw: %d skip_hw: %d\n", flags,
+ flags & TCA_CLS_FLAGS_SKIP_SW ? 1 : 0,
+ flags & TCA_CLS_FLAGS_SKIP_HW ? 1 : 0);
+
+ if (attrs[TCA_FLOWER_INDEV]) {
+ const char *indev = nl_attr_get_string(attrs[TCA_FLOWER_INDEV]);
+
+ VLOG_DBG("indev: %s\n", indev);
+ }
+
+ int in_eth_type = nl_attr_get_u16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
+
+ VLOG_DBG("flower eth_type: %x\n", ntohs(in_eth_type));
+ tc_flow->eth_type = in_eth_type;
+
+ const struct eth_addr *eth = 0;
+ char eth_str[32] = "";
+
+ if (attrs[TCA_FLOWER_KEY_ETH_SRC]) {
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
+ sprintf(eth_str, "%02x:%02x:%02x:%02x:%02x:%02x\n", eth->ea[0],
+ eth->ea[1], eth->ea[2], eth->ea[3], eth->ea[4], eth->ea[5]);
+ VLOG_DBG("eth_src: %s\n", eth_str);
+ memcpy(&tc_flow->src_mac, eth, sizeof (tc_flow->src_mac));
+
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
+ sprintf(eth_str, "%02x:%02x:%02x:%02x:%02x:%02x\n", eth->ea[0],
+ eth->ea[1], eth->ea[2], eth->ea[3], eth->ea[4], eth->ea[5]);
+ VLOG_DBG("eth_src_mask: %s\n", eth_str);
+ memcpy(&tc_flow->src_mac_mask, eth, sizeof (tc_flow->src_mac_mask));
+ }
+
+ if (attrs[TCA_FLOWER_KEY_ETH_DST]) {
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
+ sprintf(eth_str, "%02x:%02x:%02x:%02x:%02x:%02x\n", eth->ea[0],
+ eth->ea[1], eth->ea[2], eth->ea[3], eth->ea[4], eth->ea[5]);
+ VLOG_DBG("eth_dst: %s\n", eth_str);
+ memcpy(&tc_flow->dst_mac, eth, sizeof (tc_flow->dst_mac));
+
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
+ sprintf(eth_str, "%02x:%02x:%02x:%02x:%02x:%02x\n", eth->ea[0],
+ eth->ea[1], eth->ea[2], eth->ea[3], eth->ea[4], eth->ea[5]);
+ VLOG_DBG("eth_dst_mask: %s\n", eth_str);
+ memcpy(&tc_flow->dst_mac_mask, eth, sizeof (tc_flow->dst_mac_mask));
+ }
+
+ if (attrs[TCA_FLOWER_KEY_IP_PROTO]) {
+ int proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]);
+
+ tc_flow->ip_proto = proto;
+
+ if (in_eth_type == ntohs(ETH_P_IP))
+ tc_flow->ip_type = 4;
+ else if (in_eth_type == ntohs(ETH_P_IPV6))
+ tc_flow->ip_type = 6;
+
+ VLOG_DBG("ip_proto: %d (0x%x), ip_type: %d\n", proto, proto,
+ tc_flow->ip_type);
+
+ if (attrs[TCA_FLOWER_KEY_IPV4_SRC])
+ tc_flow->ipv4.ipv4_src =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
+ if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK])
+ tc_flow->ipv4.ipv4_src_mask =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]);
+ if (attrs[TCA_FLOWER_KEY_IPV4_DST])
+ tc_flow->ipv4.ipv4_dst =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]);
+ if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK])
+ tc_flow->ipv4.ipv4_dst_mask =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]);
+
+ if (proto == IPPROTO_TCP) {
+ if (attrs[TCA_FLOWER_KEY_TCP_SRC])
+ tc_flow->src_port =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
+ if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK])
+ tc_flow->src_port_mask =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
+ else
+ memset(&tc_flow->src_port_mask, 0xFF,
+ sizeof (tc_flow->src_port_mask));
+ if (attrs[TCA_FLOWER_KEY_TCP_DST])
+ tc_flow->dst_port =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
+ if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK])
+ tc_flow->dst_port_mask =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
+ else
+ memset(&tc_flow->dst_port_mask, 0xFF,
+ sizeof (tc_flow->dst_port_mask));
+ } else if (proto == IPPROTO_UDP) {
+ if (attrs[TCA_FLOWER_KEY_UDP_SRC]) {
+ tc_flow->src_port =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
+ }
+ if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
+ tc_flow->src_port_mask =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
+ } else
+ memset(&tc_flow->src_port_mask, 0xFF,
+ sizeof (tc_flow->src_port_mask));
+ if (attrs[TCA_FLOWER_KEY_UDP_DST]) {
+ tc_flow->dst_port =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
+ }
+ if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
+ tc_flow->dst_port_mask =
+ nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
+ } else
+ memset(&tc_flow->dst_port_mask, 0xFF,
+ sizeof (tc_flow->dst_port_mask));
+ }
+ }
+
+ if (attrs[TCA_FLOWER_ACT]) {
+ struct nlattr *actions = attrs[TCA_FLOWER_ACT];
+ int i = 0;
+
+ static struct nl_policy actions_orders_policy[32 + 1] = { };
+ struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+
+ for (i = 0; i < 33; i++) {
+ actions_orders_policy[i].type = NL_A_NESTED;
+ actions_orders_policy[i].optional = true;
+ }
+
+ if (!nl_parse_nested
+ (actions, actions_orders_policy, actions_orders,
+ ARRAY_SIZE(actions_orders_policy))) {
+ VLOG_ERR("failed to parse action orders (TCA_FLOWER_ACT)");
+ return EPROTO;
+ }
+
+ for (int i = 0; i < 32; i++) {
+ if (actions_orders[i]) {
+ struct nlattr *action = actions_orders[i];
+
+ static const struct nl_policy act_policy[TCA_ACT_MAX + 1] = {
+ [TCA_ACT_KIND] = {.type = NL_A_STRING,.optional = false},
+ [TCA_ACT_OPTIONS] = {.type = NL_A_NESTED,.optional =
+ false},
+ [TCA_ACT_STATS] = {.type = NL_A_NESTED,.optional = true},
+ };
+ struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
+
+ if (!nl_parse_nested(action, act_policy,
+ action_attrs, ARRAY_SIZE(act_policy))) {
+ VLOG_ERR("failed to parse single action options ");
+ return EPROTO;
+ }
+ const char *act_kind =
+ nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
+ struct nlattr *act_options = action_attrs[TCA_ACT_OPTIONS];
+
+ if (!strcmp(act_kind, "gact")) {
+ static const struct nl_policy gact_policy[TCA_GACT_MAX +
+ 1] = {
+ [TCA_GACT_PARMS] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct tc_gact),.optional =
+ false},
+ [TCA_GACT_PROB] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct tc_gact_p),.optional
+ = true},
+ [TCA_GACT_TM] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct tcf_t),.optional =
+ false},
+ };
+
+ struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
+
+ if (!nl_parse_nested(act_options, gact_policy,
+ gact_attrs,
+ ARRAY_SIZE(gact_policy))) {
+ VLOG_ERR("failed to parse gact action options");
+ return EPROTO;
+ }
+
+ if (gact_attrs[TCA_GACT_PARMS]) {
+ const struct tc_gact *p =
+ nl_attr_get_unspec(gact_attrs[TCA_GACT_PARMS],
+ sizeof (struct tc_gact));
+
+ if (p->action == TC_ACT_SHOT) {
+ VLOG_DBG("kind gact - dropping packet\n");
+ } else
+ VLOG_ERR("unkown actions: %d\n", p->action);
+ } else
+ VLOG_ERR("missing gact params!\n");
+
+ if (gact_attrs[TCA_GACT_TM]) {
+ const struct tcf_t *tm =
+ nl_attr_get_unspec(gact_attrs[TCA_GACT_TM],
+ sizeof (struct tcf_t));
+ unsigned long long int lastuse = tm->lastuse * 10;
+ unsigned long long int now = time_msec();
+
+ if (lastuse < 1000000000) {
+ tc_flow->lastused = now - lastuse;
+ VLOG_DBG
+ ("lastuse: %llu ms, now - lastuse: %llu\n",
+ lastuse, now - lastuse);
+ } else
+ VLOG_DBG("excessive lastuse: %llu ms\n", lastuse);
+ } else
+ VLOG_ERR("missing gact tm!\n");
+ } else if (!strcmp(act_kind, "mirred")) {
+ static const struct nl_policy mirred_policy[TCA_GACT_MAX +
+ 1] = {
+ [TCA_MIRRED_PARMS] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct
+ tc_mirred),.optional =
+ false},
+ [TCA_MIRRED_TM] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct tcf_t),.optional =
+ false},
+ };
+
+ struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)];
+
+ if (!nl_parse_nested(act_options, mirred_policy,
+ mirred_attrs,
+ ARRAY_SIZE(mirred_policy))) {
+ VLOG_ERR("failed to parse mirred action options");
+ return EPROTO;
+ }
+
+ if (mirred_attrs[TCA_MIRRED_PARMS]) {
+ const struct tc_mirred *m =
+ nl_attr_get_unspec(mirred_attrs[TCA_MIRRED_PARMS],
+ sizeof (struct tc_mirred));
+
+ if (m->action == TC_ACT_STOLEN
+ && m->eaction == TCA_EGRESS_REDIR && m->ifindex) {
+ VLOG_DBG("mirred - redirect to ifinex: %d\n",
+ m->ifindex);
+ tc_flow->ifindex_out = m->ifindex;
+ } else
+ VLOG_ERR("unkown mirred actions: %d, %d, %d\n",
+ m->action, m->eaction, m->ifindex);
+ } else
+ VLOG_ERR("missing mirred params!\n");
+
+ if (mirred_attrs[TCA_MIRRED_TM]) {
+ const struct tcf_t *tm =
+ nl_attr_get_unspec(mirred_attrs[TCA_MIRRED_TM],
+ sizeof (struct tcf_t));
+ unsigned long long int lastuse = tm->lastuse * 10;
+ unsigned long long int now = time_msec();
+
+ if (lastuse < 1000000000) {
+ VLOG_DBG("lastuse: %llu ms, now - lastuse: %llu\n",
+ lastuse, now - lastuse);
+ tc_flow->lastused = now - lastuse;
+ } else
+ VLOG_ERR("excessive lastuse: %llu ms\n", lastuse);
+ } else
+ VLOG_ERR("missing mirred tm!\n");
+ } else
+ VLOG_ERR("unkown TCA_ACT_KIND attribute: %s\n", act_kind);
+
+ if (action_attrs[TCA_ACT_STATS]) {
+ struct nlattr *act_stats = action_attrs[TCA_ACT_STATS];
+
+ static const struct nl_policy stats_policy[TCA_STATS_MAX +
+ 1] = {
+ [TCA_STATS_BASIC] = {.type = NL_A_UNSPEC,.min_len =
+ sizeof (struct
+ gnet_stats_basic),.optional
+ = true},
+ };
+
+ struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
+
+ if (!nl_parse_nested(act_stats, stats_policy,
+ stats_attrs,
+ ARRAY_SIZE(stats_policy))) {
+ VLOG_ERR
+ ("failed to parse action's TCA_ACT_STATS policy");
+ return EPROTO;
+ }
+ if (stats_attrs[TCA_STATS_BASIC]) {
+ const struct gnet_stats_basic *bs =
+ nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC],
+ sizeof (struct
+ gnet_stats_basic));
+ VLOG_DBG
+ ("basic stats packets (gnet_stats_basic): %u, %llu\n",
+ bs->packets, bs->bytes);
+ struct ovs_flow_stats *stats = &tc_flow->stats;
+
+ stats->n_packets.lo = bs->packets;
+ stats->n_packets.hi = 0;
+
+ stats->n_bytes.hi = bs->bytes >> 32;
+ stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
+ } else
+ VLOG_ERR
+ ("missing tca action basic stats (TCA_STATS_BASIC)\n");
+ } else
+ VLOG_ERR("missing action stats (TCA_ACT_STATS)\n");
+ }
+ }
+ } else
+ VLOG_ERR("missing flower action (TCA_FLOWER_ACT)\n");
+
+ VLOG_DBG("\n");
+ return 0;
+}
+
+int
+tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+{
+ struct ofpbuf request;
+ struct tcmsg *tcmsg;
+
+ tcmsg =
+ hw_tc_make_request(ifindex, RTM_GETTFILTER,
+ 0 | (NLM_F_REQUEST | NLM_F_DUMP), &request);
+ tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+ tcmsg->tcm_info = tc_make_handle(0, 0);
+ tcmsg->tcm_handle = 0;
+
+ nl_dump_start(dump, NETLINK_ROUTE, &request);
+ ofpbuf_uninit(&request);
+
+ return 0;
+}
+
+int
+tc_flush_flower(int ifindex)
+{
+ struct ofpbuf request;
+ int error = 0;
+ struct tcmsg *tcmsg;
+
+ VLOG_DBG("%s %d %s: flusing ifindex: %d\n", __FILE__, __LINE__, __func__,
+ ifindex);
+
+ tcmsg = hw_tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
+ tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+ tcmsg->tcm_info = tc_make_handle(0, 0);
+
+ error = tc_transact(&request, 0);
+ if (error) {
+ VLOG_ERR("%s %d: tc error: %d\n", __func__, __LINE__, error);
+ return error;
+ }
+ return 0;
+}
+
+int
+tc_del_flower(int ifindex, int handle, int prio)
+{
+ struct ofpbuf request;
+ int error = 0;
+ struct tcmsg *tcmsg;
+ struct ofpbuf *reply;
+
+ tcmsg = hw_tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
+ tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+ tcmsg->tcm_info = tc_make_handle(prio, 0);
+ tcmsg->tcm_handle = handle;
+
+ error = tc_transact(&request, &reply);
+ if (error) {
+ VLOG_ERR("%s %d: tc error: %d\n", __func__, __LINE__, error);
+ return error;
+ }
+ return 0;
+}
+
+int
+tc_get_flower(int ifindex, int handle, int prio, struct tc_flow *tc_flow)
+{
+ struct ofpbuf request;
+ int error = 0;
+ struct tcmsg *tcmsg;
+ struct ofpbuf *reply;
+
+ tcmsg = hw_tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request);
+ tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+ tcmsg->tcm_info = tc_make_handle(prio, 0);
+ tcmsg->tcm_handle = handle;
+
+ error = tc_transact(&request, &reply);
+ if (error) {
+ VLOG_ERR("%s %d %s: tc error: %d\n", __FILE__, __LINE__, __func__,
+ error);
+ return error;
+ }
+
+ parse_tc_flow(reply, tc_flow);
+ return error;
+}
+
+void
+tc_set_skip_hw(bool set)
+{
+ VLOG_INFO("********* ************ setting SKIP_HW = %s\n",
+ set ? "true" : "false");
+ SKIP_HW = set;
+}
+
+int
+tc_replace_flower(struct tc_flow *tc_flow)
+{
+ struct ofpbuf request;
+ int error = 0;
+ struct tcmsg *tcmsg;
+ struct ofpbuf *reply;
+
+ VLOG_DBG("%s %d %s: eth_type %x ip_proto %d (%x), ifindex fwd: %d -> %d\n",
+ __FILE__, __LINE__, __func__, ntohs(tc_flow->eth_type),
+ tc_flow->ip_proto, tc_flow->ip_proto, tc_flow->ifindex,
+ tc_flow->ifindex_out);
+
+ tcmsg =
+ hw_tc_make_request(tc_flow->ifindex, RTM_NEWTFILTER,
+ NLM_F_CREATE | NLM_F_ECHO, &request);
+ tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+ tcmsg->tcm_info =
+ tc_make_handle((OVS_FORCE uint16_t) tc_flow->eth_type,
+ (OVS_FORCE uint16_t) tc_flow->eth_type);
+ if (tc_flow->handle) {
+ VLOG_DBG
+ ("requested handle: %d (%x) (replace?, handle will be replaced if exists, add NLM_F_EXCL to not touch existing)",
+ tc_flow->handle, tc_flow->handle);
+ tcmsg->tcm_handle = tc_flow->handle;
+ }
+
+ nl_msg_put_string(&request, TCA_KIND, "flower");
+ size_t basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+
+ {
+ if (tc_flow->dst_mac.ea[0]) {
+ VLOG_DBG("putting dst_mac/mask\n");
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_ETH_DST,
+ &tc_flow->dst_mac, ETH_ALEN);
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_ETH_DST_MASK,
+ &tc_flow->dst_mac_mask, ETH_ALEN);
+ }
+ if (tc_flow->src_mac.ea[0]) {
+ VLOG_DBG("putting src_mac/mask\n");
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_ETH_SRC,
+ &tc_flow->src_mac, ETH_ALEN);
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ &tc_flow->src_mac_mask, ETH_ALEN);
+ }
+
+ if (ntohs(tc_flow->eth_type) == ETH_P_IP
+ || ntohs(tc_flow->eth_type) == ETH_P_IPV6) {
+ VLOG_DBG("flower, protocol is ipv4/v6, proto: %d\n",
+ tc_flow->ip_proto);
+
+ if (tc_flow->ip_proto) {
+ VLOG_DBG("adding ip proto\n");
+ nl_msg_put_u8(&request, TCA_FLOWER_KEY_IP_PROTO,
+ tc_flow->ip_proto);
+
+ if (tc_flow->ip_proto == IPPROTO_UDP) {
+ VLOG_DBG("adding udp ports %d/%x, %d/%x\n",
+ ntohs(tc_flow->src_port),
+ ntohs(tc_flow->src_port_mask),
+ ntohs(tc_flow->dst_port),
+ ntohs(tc_flow->dst_port_mask));
+ if (tc_flow->src_port) {
+ VLOG_DBG("adding udp src port/msk\n");
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_UDP_SRC,
+ tc_flow->src_port);
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_UDP_SRC_MASK,
+ tc_flow->src_port_mask);
+ }
+ if (tc_flow->dst_port) {
+ VLOG_DBG("adding udp dst port/msk\n");
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_UDP_DST,
+ tc_flow->dst_port);
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_UDP_DST_MASK,
+ tc_flow->dst_port_mask);
+ }
+ } else if (tc_flow->ip_proto == IPPROTO_TCP) {
+ VLOG_DBG("adding tcp ports %d/%x, %d/%x\n",
+ ntohs(tc_flow->src_port), tc_flow->src_port_mask,
+ ntohs(tc_flow->dst_port), tc_flow->dst_port_mask);
+
+ if (tc_flow->src_port) {
+ VLOG_DBG("adding tcp src port/msk\n");
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_TCP_SRC,
+ tc_flow->src_port);
+ nl_msg_put_u16(&request, TCA_FLOWER_KEY_TCP_SRC_MASK,
+ tc_flow->src_port_mask);
+ }
+ if (tc_flow->dst_port) {
+ VLOG_DBG("adding tcp dst port/msk\n");
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_TCP_DST,
+ tc_flow->dst_port);
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_TCP_DST_MASK,
+ tc_flow->dst_port_mask);
+ }
+ } else if (tc_flow->ip_proto == IPPROTO_ICMP) {
+ VLOG_DBG("proto is icmp\n");
+ }
+ }
+ if (ntohs(tc_flow->eth_type) == ETH_P_IP && tc_flow->ip_type == 4) {
+ VLOG_DBG("ip_proto is ip, checking ips\n");
+ if (tc_flow->ipv4.ipv4_src) {
+ VLOG_DBG("putting ipv4 src/msk, %d/%d\n",
+ tc_flow->ipv4.ipv4_src,
+ tc_flow->ipv4.ipv4_src_mask);
+ nl_msg_put_be32(&request, TCA_FLOWER_KEY_IPV4_SRC,
+ tc_flow->ipv4.ipv4_src);
+ nl_msg_put_be32(&request, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ tc_flow->ipv4.ipv4_src_mask);
+ }
+ if (tc_flow->ipv4.ipv4_dst) {
+ VLOG_DBG("putting ipv4 dst/msk %d/%d\n",
+ tc_flow->ipv4.ipv4_dst,
+ tc_flow->ipv4.ipv4_dst_mask);
+ nl_msg_put_be32(&request, TCA_FLOWER_KEY_IPV4_DST,
+ tc_flow->ipv4.ipv4_dst);
+ nl_msg_put_be32(&request, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ tc_flow->ipv4.ipv4_dst_mask);
+ }
+ } else if (tc_flow->ip_type == 6) {
+ VLOG_DBG("proto ipv6 ip\n");
+ if (tc_flow->ipv6.ipv6_src[0]) {
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_IPV6_SRC,
+ tc_flow->ipv6.ipv6_src,
+ sizeof (tc_flow->ipv6.ipv6_src));
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ tc_flow->ipv6.ipv6_src_mask,
+ sizeof (tc_flow->ipv6.ipv6_src_mask));
+ }
+ if (tc_flow->ipv6.ipv6_dst[0]) {
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_IPV6_SRC,
+ tc_flow->ipv6.ipv6_dst,
+ sizeof (tc_flow->ipv6.ipv6_dst));
+ nl_msg_put_unspec(&request, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ tc_flow->ipv6.ipv6_dst_mask,
+ sizeof (tc_flow->ipv6.ipv6_dst_mask));
+ }
+ }
+ }
+
+ VLOG_DBG("putting eth_type: %x (nthos)\n", ntohs(tc_flow->eth_type));
+ nl_msg_put_be16(&request, TCA_FLOWER_KEY_ETH_TYPE, tc_flow->eth_type);
+
+ if (SKIP_HW) {
+ VLOG_DBG
+ ("putting SKIP_HW to avoid using counters, firmware bugs\n");
+ nl_msg_put_u32(&request, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_HW);
+ } else
+ nl_msg_put_u32(&request, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
+
+ size_t offset2 = nl_msg_start_nested(&request, TCA_FLOWER_ACT);
+
+ {
+
+ size_t offset3 = nl_msg_start_nested(&request, 1);
+
+ {
+ if (!tc_flow->ifindex_out) {
+ VLOG_DBG("flower: dropping\n");
+ nl_msg_put_string(&request, TCA_ACT_KIND, "gact");
+ size_t offset4 =
+ nl_msg_start_nested(&request, TCA_ACT_OPTIONS);
+ {
+ struct tc_gact p;
+
+ memset(&p, 0, sizeof (p));
+
+ p.action = TC_ACT_SHOT;
+ nl_msg_put_unspec(&request, TCA_GACT_PARMS, &p,
+ sizeof (p));
+ }
+ nl_msg_end_nested(&request, offset4);
+ } else {
+ VLOG_DBG("flower: reidrecting\n");
+ nl_msg_put_string(&request, TCA_ACT_KIND, "mirred");
+ size_t offset4 =
+ nl_msg_start_nested(&request, TCA_ACT_OPTIONS);
+ {
+ struct tc_mirred m;
+
+ memset(&m, 0, sizeof (m));
+
+ m.eaction = TCA_EGRESS_REDIR;
+ m.action = TC_ACT_STOLEN;
+ m.ifindex = tc_flow->ifindex_out;
+
+ nl_msg_put_unspec(&request, TCA_MIRRED_PARMS, &m,
+ sizeof (m));
+ }
+ nl_msg_end_nested(&request, offset4);
+ }
+ }
+ nl_msg_end_nested(&request, offset3);
+
+ /* more actions here */
+ }
+ nl_msg_end_nested(&request, offset2);
+ }
+ nl_msg_end_nested(&request, basic_offset);
+
+ error = tc_transact(&request, &reply);
+ if (error) {
+ VLOG_ERR("%s %d: tc error: %d\n", __func__, __LINE__, error);
+ return error;
+ } else {
+ VLOG_DBG("REPLY SIZE: %d\n", reply->size);
+ if (reply->size) {
+ struct tcmsg *tc =
+ ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+ VLOG_DBG("SUCCESS, handle: %x\n", tc->tcm_handle);
+ tc_flow->handle = tc->tcm_handle;
+ }
+ }
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,102 @@
+#ifndef TC_H
+#define TC_H 1
+
+#include "odp-netlink.h"
+
+#define ETH_ALEN 6
+
+enum {
+ TCA_FLOWER_UNSPEC,
+ TCA_FLOWER_CLASSID,
+ TCA_FLOWER_INDEV,
+ TCA_FLOWER_ACT,
+ TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
+ TCA_FLOWER_KEY_IP_PROTO, /* u8 */
+ TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
+ TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_TCP_SRC, /* be16 */
+ TCA_FLOWER_KEY_TCP_DST, /* be16 */
+ TCA_FLOWER_KEY_UDP_SRC, /* be16 */
+ TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+ TCA_FLOWER_FLAGS,
+
+ TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */
+ TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */
+ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */
+ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */
+
+ __TCA_FLOWER_MAX,
+
+};
+
+/* tca flags definitions */
+#define TCA_CLS_FLAGS_SKIP_HW (1 << 0)
+#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+struct netdev;
+
+struct tc_flow {
+ uint32_t handle;
+ uint32_t priority;
+
+ odp_port_t ovs_inport;
+ odp_port_t ovs_outport;
+
+ struct netdev *indev;
+ struct netdev *outdev;
+ int ifindex;
+ int ifindex_out;
+
+ ovs_be16 eth_type;
+ uint8_t ip_proto;
+
+ struct eth_addr dst_mac;
+ struct eth_addr dst_mac_mask;
+ struct eth_addr src_mac;
+ struct eth_addr src_mac_mask;
+
+ uint16_t src_port;
+ uint16_t src_port_mask;
+ uint16_t dst_port;
+ uint16_t dst_port_mask;
+
+ int ip_type;
+ union {
+ struct {
+ ovs_be32 ipv4_src;
+ ovs_be32 ipv4_src_mask;
+ ovs_be32 ipv4_dst;
+ ovs_be32 ipv4_dst_mask;
+ } ipv4;
+ struct {
+ ovs_be32 ipv6_src[4];
+ ovs_be32 ipv6_src_mask[4];
+ ovs_be32 ipv6_dst[4];
+ ovs_be32 ipv6_dst_mask[4];
+ } ipv6;
+ };
+
+ struct ovs_flow_stats stats;
+ uint64_t lastused;
+};
+
+int tc_replace_flower(struct tc_flow *flow);
+int tc_del_flower(int ifindex, int handle, int prio);
+int tc_get_flower(int ifindex, int handle, int prio, struct tc_flow *tc_flow);
+int tc_flush_flower(int ifindex);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
+int parse_tc_flow(struct ofpbuf *reply, struct tc_flow *tc_flow);
+void tc_set_skip_hw(bool set);
+
+#endif /* tc.h */