@@ -105,6 +105,7 @@ enum tca_id {
TCA_ID_IFE = TCA_ACT_IFE,
TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
/* other actions go here */
+ TCA_ID_CTINFO,
__TCA_ID_MAX = 255
};
new file mode 100644
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CTINFO_H
+#define __UAPI_TC_CTINFO_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+struct tc_ctinfo {
+ tc_gen;
+};
+
+enum {
+ TCA_CTINFO_UNSPEC,
+ TCA_CTINFO_PAD,
+ TCA_CTINFO_TM,
+ TCA_CTINFO_ACT,
+ TCA_CTINFO_ZONE,
+ TCA_CTINFO_PARMS_DSCP_MASK,
+ TCA_CTINFO_PARMS_DSCP_STATEMASK,
+ TCA_CTINFO_PARMS_CPMARK_MASK,
+ TCA_CTINFO_STATS_DSCP_SET,
+ TCA_CTINFO_STATS_DSCP_ERROR,
+ TCA_CTINFO_STATS_CPMARK_SET,
+ __TCA_CTINFO_MAX
+};
+
+#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
+
+enum {
+ CTINFO_MODE_DSCP = BIT(0),
+ CTINFO_MODE_CPMARK = BIT(1)
+};
+
+#endif
new file mode 100644
@@ -0,0 +1,170 @@
+.TH "ctinfo action in tc" 8 "4 Jun 2019" "iproute2" "Linux"
+.SH NAME
+ctinfo \- tc connmark processing action
+.SH SYNOPSIS
+.B tc ... action ctinfo
+[
+.B dscp
+MASK [STATEMASK] ] [
+.B cpmark
+[MASK] ] [
+.B zone
+ZONE ] [
+.B CONTROL
+] [
+.B index
+<INDEX>
+]
+
+.SH DESCRIPTION
+CTINFO (Conntrack Information) is a tc action for retrieving data from
+conntrack marks into various fields. At present it has two independent
+processing modes which may be viewed as sub-functions.
+
+DSCP mode copies a DSCP stored in conntrack's connmark into the IPv4/v6 diffserv
+field. The copying may conditionally occur based on a flag also stored in the
+connmark. DSCP mode was designed to assist in restoring packet classifications on
+ingress, classifications which may then be used by qdiscs such as CAKE. It may be
+used in any circumstance where ingress classification needs to be maintained across
+links that otherwise bleach or remap according to their own policies.
+
+CPMARK (copymark) mode copies the conntrack connmark into the packet's mark field. Without
+additional parameters it is functionally completely equivalent to the existing
+connmark action. An optional mask may be specified to mask which bits of the
+connmark are restored. This may be useful when DSCP and CPMARK modes are combined.
+
+Simple statistics (tc -s) on DSCP restores and CPMARK copies are maintained where values for
+set indicate a count of packets altered for that mode. DSCP includes an error count
+where the destination packet's diffserv field was unwriteable.
+.SH PARAMETERS
+.SS DSCP mode parameters:
+.IP mask
+A mask of 6 contiguous bits indicating where the DSCP value is located in the 32 bit
+conntrack mark field. A mask must be provided for this mode. mask is a 32 bit
+unsigned value.
+.IP statemask
+A mask of at least 1 bit indicating where a conditional restore flag is located in the
+32 bit conntrack mark field. The statemask bit/s must NOT overlap the mask bits. The
+DSCP will be restored if the conntrack mark logically ANDed with the statemask yields
+a non-zero result. statemask is an optional unsigned 32 bit value.
+.SS CPMARK mode parameters:
+.IP mask
+Store the logically ANDed result of conntrack mark and mask into the packet's mark
+field. Default is 0xffffffff i.e. the whole mark field. mask is an optional unsigned 32 bit
+value
+.SS Overall action parameters:
+.IP zone
+Specify the conntrack zone when doing conntrack lookups for packets.
+zone is a 16bit unsigned decimal value.
+Default is 0.
+.IP CONTROL
+The following keywords allow to control how the tree of qdisc, classes,
+filters and actions is further traversed after this action.
+.RS
+.TP
+.B reclassify
+Restart with the first filter in the current list.
+.TP
+.B pipe
+Continue with the next action attached to the same filter.
+.TP
+.B drop
+Drop the packet.
+.TP
+.B shot
+synonym for
+.B drop
+.TP
+.B continue
+Continue classification with the next filter in line.
+.TP
+.B pass
+Finish classification process and return to calling qdisc for further packet
+processing. This is the default.
+.RE
+.IP index
+Specify an index for this action in order to being able to identify it in later
+commands. index is a 32bit unsigned decimal value.
+.SH EXAMPLES
+Example showing conditional restoration of DSCP on ingress via an IFB
+.RS
+.EX
+
+#Set up the IFB interface
+.br
+tc qdisc add dev ifb4eth0 handle ffff: ingress
+
+#Put CAKE qdisc on it
+.br
+tc qdisc add dev ifb4eth0 root cake bandwidth 40mbit
+
+#Set interface UP
+.br
+ip link set dev ifb4eth0 up
+
+#Add 2 actions, ctinfo to restore dscp & mirred to redirect the packets to IFB
+.br
+tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \\
+ match u32 0 0 flowid 1:1 action \\
+ ctinfo dscp 0xfc000000 0x01000000 \\
+ mirred egress redirect dev ifb4eth0
+
+tc -s qdisc show dev eth0 ingress
+
+ filter parent ffff: protocol all pref 10 u32 chain 0
+ filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
+ filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw
+ match 00000000/00000000 at 0
+ action order 1: ctinfo zone 0 pipe
+ index 2 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 72 sec used 0 sec DSCP set 1333 error 0 CPMARK set 0
+ Action statistics:
+ Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+
+ action order 2: mirred (Egress Redirect to device ifb4eth0) stolen
+ index 1 ref 1 bind 1 installed 72 sec used 0 sec
+ Action statistics:
+ Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+.EE
+.RE
+
+Example showing conditional restoration of DSCP on egress
+
+This may appear nonsensical since iptables marking of egress packets is easy
+to achieve, however the iptables flow classification rules may be extensive
+and so some sort of set once and forget may be useful especially on cpu
+constrained devices.
+.RS
+.EX
+
+# Send unmarked connections to a marking chain which needs to store a DSCP
+and set statemask bit in the connmark
+.br
+iptables -t mangle -A POSTROUTING -o eth0 -m connmark \\
+ --mark 0x00000000/0x01000000 -g CLASS_MARKING_CHAIN
+
+# Apply marked DSCP to the packets
+.br
+tc filter add dev eth0 protocol all prio 10 u32 \\
+ match u32 0 0 flowid 1:1 action \\
+ ctinfo dscp 0xfc000000 0x01000000
+
+tc -s filter show dev eth0
+ filter parent 800e: protocol all pref 10 u32 chain 0
+ filter parent 800e: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
+ filter parent 800e: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw
+ match 00000000/00000000 at 0
+ action order 1: ctinfo zone 0 pipe
+ index 1 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 7414 sec used 0 sec DSCP set 53404 error 0 CPMARK set 0
+ Action statistics:
+ Sent 32890260 bytes 120441 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+.br
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-cake (8)
+.BR tc-connmark (8)
+.BR tc-mirred (8)
+.SH AUTHORS
+ctinfo was written by Kevin Darbyshire-Bryant.
@@ -48,6 +48,7 @@ TCMODULES += m_csum.o
TCMODULES += m_simple.o
TCMODULES += m_vlan.o
TCMODULES += m_connmark.o
+TCMODULES += m_ctinfo.o
TCMODULES += m_bpf.o
TCMODULES += m_tunnel_key.o
TCMODULES += m_sample.o
new file mode 100644
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * m_ctinfo.c netfilter ctinfo mark action
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include <linux/tc_act/tc_ctinfo.h>
+
+static void
+explain(void)
+{
+ fprintf(stderr,
+ "Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index <INDEX>]\n"
+ "where :\n"
+ "\tdscp MASK bitmask location of stored DSCP\n"
+ "\t STATEMASK bitmask to determine conditional restoring\n"
+ "\tcpmark MASK mask applied to mark on restoration\n"
+ "\tZONE is the conntrack zone\n"
+ "\tCONTROL := reclassify | pipe | drop | continue | ok |\n"
+ "\t goto chain <CHAIN_INDEX>\n");
+}
+
+static void
+usage(void)
+{
+ explain();
+ exit(-1);
+}
+
+static int
+parse_ctinfo(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
+ struct nlmsghdr *n)
+{
+ unsigned int cpmarkmask = 0, dscpmask = 0, dscpstatemask = 0;
+ struct tc_ctinfo sel = {};
+ unsigned short zone = 0;
+ char **argv = *argv_p;
+ struct rtattr *tail;
+ int argc = *argc_p;
+ int ok = 0;
+ __u8 i;
+
+ while (argc > 0) {
+ if (matches(*argv, "ctinfo") == 0) {
+ ok = 1;
+ NEXT_ARG_FWD();
+ } else if (matches(*argv, "help") == 0) {
+ usage();
+ } else {
+ break;
+ }
+
+ }
+
+ if (!ok) {
+ explain();
+ return -1;
+ }
+
+ if (argc) {
+ if (matches(*argv, "dscp") == 0) {
+ NEXT_ARG();
+ if (get_u32(&dscpmask, *argv, 0)) {
+ fprintf(stderr,
+ "ctinfo: Illegal dscp \"mask\"\n");
+ return -1;
+ }
+ if (NEXT_ARG_OK()) {
+ NEXT_ARG_FWD();
+ if (!get_u32(&dscpstatemask, *argv, 0))
+ NEXT_ARG_FWD(); /* was a statemask */
+ } else {
+ NEXT_ARG_FWD();
+ }
+ }
+ }
+
+ /* cpmark has optional mask parameter, so the next arg might not */
+ /* exist, or it might be the next option, or it may actually be a */
+ /* 32bit mask */
+ if (argc) {
+ if (matches(*argv, "cpmark") == 0) {
+ cpmarkmask = ~0;
+ if (NEXT_ARG_OK()) {
+ NEXT_ARG_FWD();
+ if (!get_u32(&cpmarkmask, *argv, 0))
+ NEXT_ARG_FWD(); /* was a mask */
+ } else {
+ NEXT_ARG_FWD();
+ }
+ }
+ }
+
+ if (argc) {
+ if (matches(*argv, "zone") == 0) {
+ NEXT_ARG();
+ if (get_u16(&zone, *argv, 10)) {
+ fprintf(stderr, "ctinfo: Illegal \"zone\"\n");
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ }
+ }
+
+ parse_action_control_dflt(&argc, &argv, &sel.action,
+ false, TC_ACT_PIPE);
+
+ if (argc) {
+ if (matches(*argv, "index") == 0) {
+ NEXT_ARG();
+ if (get_u32(&sel.index, *argv, 10)) {
+ fprintf(stderr, "ctinfo: Illegal \"index\"\n");
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ }
+ }
+
+ if (dscpmask & dscpstatemask) {
+ fprintf(stderr,
+ "ctinfo: dscp mask & statemask must NOT overlap\n");
+ return -1;
+ }
+
+ i = ffs(dscpmask);
+ if (i && ((~0 & (dscpmask >> (i - 1))) != 0x3f)) {
+ fprintf(stderr,
+ "ctinfo: dscp mask must be 6 contiguous bits long\n");
+ return -1;
+ }
+
+ tail = addattr_nest(n, MAX_MSG, tca_id);
+ addattr_l(n, MAX_MSG, TCA_CTINFO_ACT, &sel, sizeof(sel));
+ addattr16(n, MAX_MSG, TCA_CTINFO_ZONE, zone);
+
+ if (dscpmask)
+ addattr32(n, MAX_MSG,
+ TCA_CTINFO_PARMS_DSCP_MASK, dscpmask);
+
+ if (dscpstatemask)
+ addattr32(n, MAX_MSG,
+ TCA_CTINFO_PARMS_DSCP_STATEMASK, dscpstatemask);
+
+ if (cpmarkmask)
+ addattr32(n, MAX_MSG,
+ TCA_CTINFO_PARMS_CPMARK_MASK, cpmarkmask);
+
+ addattr_nest_end(n, tail);
+
+ *argc_p = argc;
+ *argv_p = argv;
+ return 0;
+}
+
+static void print_ctinfo_stats(FILE *f, struct rtattr *tb[TCA_CTINFO_MAX + 1])
+{
+ struct tcf_t *tm;
+
+ if (tb[TCA_CTINFO_TM]) {
+ tm = RTA_DATA(tb[TCA_CTINFO_TM]);
+
+ print_tm(f, tm);
+ }
+
+ if (tb[TCA_CTINFO_STATS_DSCP_SET])
+ print_lluint(PRINT_ANY, "dscpset", " DSCP set %llu",
+ rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_SET]));
+ if (tb[TCA_CTINFO_STATS_DSCP_ERROR])
+ print_lluint(PRINT_ANY, "dscperror", " error %llu",
+ rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_ERROR]));
+
+ if (tb[TCA_CTINFO_STATS_CPMARK_SET])
+ print_lluint(PRINT_ANY, "cpmarkset", " CPMARK set %llu",
+ rta_getattr_u64(tb[TCA_CTINFO_STATS_CPMARK_SET]));
+}
+
+static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg)
+{
+ unsigned int cpmarkmask = ~0, dscpmask = 0, dscpstatemask = 0;
+ struct rtattr *tb[TCA_CTINFO_MAX + 1];
+ unsigned short zone = 0;
+ struct tc_ctinfo *ci;
+
+ if (arg == NULL)
+ return -1;
+
+ parse_rtattr_nested(tb, TCA_CTINFO_MAX, arg);
+ if (!tb[TCA_CTINFO_ACT]) {
+ print_string(PRINT_FP, NULL, "%s",
+ "[NULL ctinfo action parameters]");
+ return -1;
+ }
+
+ ci = RTA_DATA(tb[TCA_CTINFO_ACT]);
+
+ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+ if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_MASK]) >=
+ sizeof(__u32))
+ dscpmask = rta_getattr_u32(
+ tb[TCA_CTINFO_PARMS_DSCP_MASK]);
+ else
+ print_string(PRINT_FP, NULL, "%s",
+ "[invalid dscp mask parameter]");
+ }
+
+ if (tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) {
+ if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) >=
+ sizeof(__u32))
+ dscpstatemask = rta_getattr_u32(
+ tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]);
+ else
+ print_string(PRINT_FP, NULL, "%s",
+ "[invalid dscp statemask parameter]");
+ }
+
+ if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
+ if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_CPMARK_MASK]) >=
+ sizeof(__u32))
+ cpmarkmask = rta_getattr_u32(
+ tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
+ else
+ print_string(PRINT_FP, NULL, "%s",
+ "[invalid cpmark mask parameter]");
+ }
+
+ if (tb[TCA_CTINFO_ZONE] && RTA_PAYLOAD(tb[TCA_CTINFO_ZONE]) >=
+ sizeof(__u16))
+ zone = rta_getattr_u16(tb[TCA_CTINFO_ZONE]);
+
+ print_string(PRINT_ANY, "kind", "%s ", "ctinfo");
+ print_hu(PRINT_ANY, "zone", "zone %u", zone);
+ print_action_control(f, " ", ci->action, "");
+
+ print_string(PRINT_FP, NULL, "%s", _SL_);
+ print_uint(PRINT_ANY, "index", "\t index %u", ci->index);
+ print_int(PRINT_ANY, "ref", " ref %d", ci->refcnt);
+ print_int(PRINT_ANY, "bind", " bind %d", ci->bindcnt);
+
+ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+ print_0xhex(PRINT_ANY, "dscpmask", " dscp %#010llx", dscpmask);
+ print_0xhex(PRINT_ANY, "dscpstatemask", " %#010llx",
+ dscpstatemask);
+ }
+
+ if (tb[TCA_CTINFO_PARMS_CPMARK_MASK])
+ print_0xhex(PRINT_ANY, "cpmark", " cpmark %#010llx",
+ cpmarkmask);
+
+ if (show_stats)
+ print_ctinfo_stats(f, tb);
+
+ print_string(PRINT_FP, NULL, "%s", _SL_);
+
+ return 0;
+}
+
+struct action_util ctinfo_action_util = {
+ .id = "ctinfo",
+ .parse_aopt = parse_ctinfo,
+ .print_aopt = print_ctinfo,
+};