@@ -1490,6 +1490,31 @@ parse_put_flow_ct_action(struct tc_flower *flower,
return 0;
}
+/* This function returns true if the tc layer will add a l4 checksum action
+ * for this set action. Refer to the csum_update_flag() function for
+ * detailed logic. Note that even the kernel only supports updating TCP,
+ * UDP and ICMPv6.
+ */
+static bool
+will_tc_add_l4_checksum(struct tc_flower *flower, int type)
+{
+ switch (type) {
+ case OVS_KEY_ATTR_IPV4:
+ case OVS_KEY_ATTR_IPV6:
+ case OVS_KEY_ATTR_TCP:
+ case OVS_KEY_ATTR_UDP:
+ switch (flower->key.ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_ICMPV6:
+ case IPPROTO_UDPLITE:
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
static int
parse_put_flow_set_masked_action(struct tc_flower *flower,
struct tc_action *action,
@@ -1522,6 +1547,14 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
return EOPNOTSUPP;
}
+ if (flower->key.flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
+ && will_tc_add_l4_checksum(flower, type)) {
+ VLOG_DBG_RL(&rl, "set action type %d not supported on fragments "
+ "due to checksum limitation", type);
+ ofpbuf_uninit(&set_buf);
+ return EOPNOTSUPP;
+ }
+
for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
struct netlink_field *f = &set_flower_map[type][i];
@@ -2447,6 +2480,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
}
mask->nw_frag = 0;
+ } else {
+ mask->nw_frag = FLOW_NW_FRAG_MASK;
}
if (key->nw_proto == IPPROTO_TCP) {
@@ -2958,7 +2958,10 @@ csum_update_flag(struct tc_flower *flower,
* eth(dst=<mac>),eth_type(0x0800) actions=set(ipv4(src=<new_ip>))
* we need to force a more specific flow as this can, for example,
* need a recalculation of icmp checksum if the packet that passes
- * is ICMPv6 and tcp checksum if its tcp. */
+ * is ICMPv6 and tcp checksum if its tcp.
+ *
+ * Ensure that you keep the pre-check function in netdev-offload-tc.c,
+ * will_tc_add_l4_checksum(), in sync if you make any changes here. */
switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
@@ -2436,6 +2436,99 @@ recirc_id(<recirc>),in_port(2),eth_type(0x86dd),ipv6(proto=58,frag=no),icmpv6(ty
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([datapath - mod_nw_src/set_field on IP fragments])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", 36:b1:ee:7c:01:03)
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", 36:b1:ee:7c:01:02)
+
+AT_DATA([flows.txt], [dnl
+ in_port=ovs-p0,ip,nw_src=10.1.1.1 actions=mod_nw_src=11.1.1.1,ovs-p1
+ in_port=ovs-p0,ipv6,ipv6_src=fc00::1 actions=set_field:fc00::100->ipv6_src,ovs-p1
+])
+
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl -Oopenflow13 add-flows br0 flows.txt])
+
+NETNS_DAEMONIZE([at_ns1],
+ [tcpdump -l -nn -xx -U -i p1 -w p1.pcap 2> tcpdump.err],
+ [tcpdump.pid])
+OVS_WAIT_UNTIL([grep "listening" tcpdump.err])
+
+dnl IPv4 Packet content:
+dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl Type: IPv4 (0x0800)
+dnl Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2
+dnl 0100 .... = Version: 4
+dnl .... 0101 = Header Length: 20 bytes (5)
+dnl Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT)
+dnl Total Length: 38
+dnl Identification: 0x0001 (1)
+dnl 001. .... = Flags: 0x1, More fragments
+dnl 0... .... = Reserved bit: Not set
+dnl .0.. .... = Don't fragment: Not set
+dnl ..1. .... = More fragments: Set
+dnl ...0 0000 0000 0000 = Fragment Offset: 0
+dnl Time to Live: 64
+dnl Protocol: UDP (17)
+dnl Header Checksum: 0x44c2
+dnl Data (18 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 08 00"
+ip="45 00 00 26 00 01 20 00 40 11 44 c2 0a 01 01 01 0a 01 01 02"
+data="0b c4 08 84 00 26 e9 64 01 02 03 04 05 06 07 08 09 0a"
+packet="${eth} ${ip} ${data}"
+
+dnl We send each packet multiple times, ones for learning which will flow
+dnl through the used datapath for learning, and the others will go through the
+dnl actual datapath.
+for i in 1 2 3 4 5; do
+ NS_CHECK_EXEC([at_ns0],
+ [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update source address, and checksums in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' \
+ -e 's/0a010101/0b010101/g' -e 's/44c2/43c2/g' -e 's/e964/e864/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+dnl Repeat similar test with IPv6.
+dnl Packet content:
+dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl Type: IPv6 (0x86dd)
+dnl Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2
+dnl Payload Length: 24
+dnl Next Header: Fragment Header for IPv6 (44)
+dnl Hop Limit: 64
+dnl Fragment Header for IPv6
+dnl Next header: UDP (17)
+dnl Reserved octet: 0x00
+dnl 0000 0000 0000 0... = Offset: 0 (0 bytes)
+dnl .... .... .... .00. = Reserved bits: 0
+dnl .... .... .... ...1 = More Fragments: Yes
+dnl Identification: 0x2316ab36
+dnl Data (16 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 86 dd"
+ip="60 00 00 00 00 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \
+ fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 23 16 ab 36"
+data="0b c4 08 84 00 26 07 65 01 02 03 04 05 06 07 08"
+packet="${eth} ${ip} ${data}"
+
+for i in 1 2 3 4 5; do
+ NS_CHECK_EXEC([at_ns0],
+ [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update checksum and source address in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' -e 's/0765/0666/g' -e \
+ 's/fc000000000000000000000000000001/fc000000000000000000000000000100/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_BANNER([MPLS])
AT_SETUP([mpls - encap header dp-support])
While offloading header modifications to TC, OVS is using {TCA_PEDIT} + {TCA_CSUM} combination as that it the only way to represent header rewrite. However, {TCA_CSUM} is unable to calculate L4 checksums for IP fragments. Since TC already applies fragmentation bit masking, this patch simply needs to prevent these packets from being processed through TC. Reported-at: https://issues.redhat.com/browse/FDP-545 Signed-off-by: Eelco Chaudron <echaudro@redhat.com> --- v3: - Fixed some comment style issue. - Add the nw_frag mask if not set. v2: - Fixed and added some comments. - Use ovs-pcap to compare packets. --- lib/netdev-offload-tc.c | 35 ++++++++++++++++ lib/tc.c | 5 ++- tests/system-traffic.at | 93 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 1 deletion(-)