From patchwork Sun Oct 16 23:06:12 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Siemon X-Patchwork-Id: 120079 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id DA2EAB6F8C for ; Mon, 17 Oct 2011 10:06:21 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752660Ab1JPXGQ (ORCPT ); Sun, 16 Oct 2011 19:06:16 -0400 Received: from alpha.coverfire.com ([69.41.199.58]:47451 "EHLO alpha.coverfire.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751945Ab1JPXGP (ORCPT ); Sun, 16 Oct 2011 19:06:15 -0400 Received: from [192.168.88.98] (ganymede [192.168.88.98]) (authenticated bits=0) by alpha.coverfire.com (8.14.4/8.14.4) with ESMTP id p9GN6DI6016906 (version=TLSv1/SSLv3 cipher=DHE-RSA-CAMELLIA256-SHA bits=256 verify=NO) for ; Sun, 16 Oct 2011 19:06:13 -0400 Subject: [PATCH] cls_flow: Add tunnel support to the flow classifier From: Dan Siemon To: netdev Date: Sun, 16 Oct 2011 19:06:12 -0400 X-Mailer: Evolution 3.0.3 (3.0.3-1.fc15) Message-ID: <1318806373.7169.35.camel@ganymede> Mime-Version: 1.0 X-Spam-Status: No, score=-0.4 required=5.0 tests=ALL_TRUSTED,BAYES_00, DATE_IN_FUTURE_96_Q autolearn=no version=3.3.2 X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on alpha.coverfire.com Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org When used on an interface carrying tunneled traffic the flow classifier can't look into the tunnels so all of the traffic within the tunnel is treated as a single flow. This does not allow any type of intelligent queuing to occur. This patch adds new keys to the flow classifier which look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP tunnels are supported. If you are interested I have posted some background and experimental results at: http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/ The related iproute2 patch can be found at the above URL as well. Signed-off-by: Dan Siemon diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index defbde2..2f80fa0 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -333,6 +333,11 @@ enum { FLOW_KEY_SKGID, FLOW_KEY_VLAN_TAG, FLOW_KEY_RXHASH, + FLOW_KEY_TUNNEL_SRC, + FLOW_KEY_TUNNEL_DST, + FLOW_KEY_TUNNEL_PROTO, + FLOW_KEY_TUNNEL_PROTO_SRC, + FLOW_KEY_TUNNEL_PROTO_DST, __FLOW_KEY_MAX, }; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6994214..f0bd3ad 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -311,6 +311,301 @@ static u32 flow_get_rxhash(struct sk_buff *skb) return skb_get_rxhash(skb); } +static u32 tunnel_inner_ip_src(struct sk_buff *skb) +{ + if (pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct iphdr))) { + return ntohl(ipip_hdr(skb)->saddr); + } + + return 0; +} + +static u32 tunnel_inner_ipv6_src(struct sk_buff *skb) +{ + if (pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct ipv6hdr))) { + struct ipv6hdr *iph = (struct ipv6hdr *) + skb_transport_header(skb); + return ntohl(iph->saddr.s6_addr32[3]); + } + + return 0; +} + +static u32 flow_get_tunnel_src(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (pskb_network_may_pull(skb, sizeof(struct iphdr))) { + if (ip_hdr(skb)->protocol == IPPROTO_IPIP) { + return tunnel_inner_ip_src(skb); + } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) { + return tunnel_inner_ipv6_src(skb); + } + } + break; + case htons(ETH_P_IPV6): + if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) { + return tunnel_inner_ip_src(skb); + } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) { + return tunnel_inner_ipv6_src(skb); + } + } + break; + } + + return 0; +} + +static u32 tunnel_inner_ip_dst(struct sk_buff *skb) +{ + if (pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct iphdr))) { + return ntohl(ipip_hdr(skb)->daddr); + } + + return 0; +} + +static u32 tunnel_inner_ipv6_dst(struct sk_buff *skb) +{ + if (pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct ipv6hdr))) { + struct ipv6hdr *iph = (struct ipv6hdr *) + skb_transport_header(skb); + return ntohl(iph->daddr.s6_addr32[3]); + } + + return 0; +} + +static u32 flow_get_tunnel_dst(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (pskb_network_may_pull(skb, sizeof(struct iphdr))) { + if (ip_hdr(skb)->protocol == IPPROTO_IPIP) { + return tunnel_inner_ip_dst(skb); + } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) { + return tunnel_inner_ipv6_dst(skb); + } + } + break; + case htons(ETH_P_IPV6): + if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) { + return tunnel_inner_ip_dst(skb); + } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) { + return tunnel_inner_ipv6_dst(skb); + } + } + break; + } + + return 0; +} + +static u32 tunnel_inner_ip_proto(struct sk_buff *skb) +{ + struct iphdr *iph; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct iphdr))) { + return 0; + } + + iph = ipip_hdr(skb); + + return iph->protocol; +} + +static u32 tunnel_inner_ipv6_proto(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct ipv6hdr))) { + return 0; + } + + ipv6h = (struct ipv6hdr *)skb_transport_header(skb); + + return ipv6h->nexthdr; +} + +static u32 flow_get_tunnel_proto(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (pskb_network_may_pull(skb, sizeof(struct iphdr))) { + if (ip_hdr(skb)->protocol == IPPROTO_IPIP) { + return tunnel_inner_ip_proto(skb); + } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto(skb); + } + } + break; + case htons(ETH_P_IPV6): + if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) { + return tunnel_inner_ip_proto(skb); + } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto(skb); + } + } + break; + } + + return 0; +} + +static u32 tunnel_inner_ip_proto_src(struct sk_buff *skb) +{ + struct iphdr *iph; + int poff; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct iphdr))) { + return 0; + } + + iph = ipip_hdr(skb); + + if (ip_is_fragment(iph)) + return 0; + + poff = proto_ports_offset(iph->protocol); + if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb) + + iph->ihl * 4 + 2 + poff)) { + return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + poff)); + } + + return 0; +} + +static u32 tunnel_inner_ipv6_proto_src(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + int poff; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct ipv6hdr))) { + return 0; + } + + ipv6h = (struct ipv6hdr *)skb_transport_header(skb); + + poff = proto_ports_offset(ipv6h->nexthdr); + if (poff >= 0 && + pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 2)) { + return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) + + poff)); + } + + return 0; +} + +static u32 flow_get_tunnel_proto_src(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (pskb_network_may_pull(skb, sizeof(struct iphdr))) { + if (ip_hdr(skb)->protocol == IPPROTO_IPIP) { + return tunnel_inner_ip_proto_src(skb); + } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto_src(skb); + } + return 0; + } + break; + case htons(ETH_P_IPV6): + if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) { + return tunnel_inner_ip_proto_src(skb); + } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto_src(skb); + } + } + break; + } + + return 0; +} + +static u32 tunnel_inner_ip_proto_dst(struct sk_buff *skb) +{ + struct iphdr *iph; + int poff; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct iphdr))) { + return 0; + } + + iph = ipip_hdr(skb); + + if (ip_is_fragment(iph)) + return 0; + + poff = proto_ports_offset(iph->protocol); + if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb) + + iph->ihl * 4 + 4 + poff)) { + return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2 + poff)); + } + + return 0; +} + +static u32 tunnel_inner_ipv6_proto_dst(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + int poff; + + if (!pskb_network_may_pull(skb, skb_network_header_len(skb) + + sizeof(struct ipv6hdr))) { + return 0; + } + + ipv6h = (struct ipv6hdr *)skb_transport_header(skb); + + poff = proto_ports_offset(ipv6h->nexthdr); + if (poff >= 0 && + pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 4)) { + return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) + + poff + 2)); + } + + return 0; +} + +static u32 flow_get_tunnel_proto_dst(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (pskb_network_may_pull(skb, sizeof(struct iphdr))) { + if (ip_hdr(skb)->protocol == IPPROTO_IPIP) { + return tunnel_inner_ip_proto_dst(skb); + } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto_dst(skb); + } + } + break; + case htons(ETH_P_IPV6): + if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) { + return tunnel_inner_ip_proto_dst(skb); + } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) { + return tunnel_inner_ipv6_proto_dst(skb); + } + } + break; + } + + return 0; +} + static u32 flow_key_get(struct sk_buff *skb, int key) { switch (key) { @@ -350,6 +645,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key) return flow_get_vlan_tag(skb); case FLOW_KEY_RXHASH: return flow_get_rxhash(skb); + case FLOW_KEY_TUNNEL_SRC: + return flow_get_tunnel_src(skb); + case FLOW_KEY_TUNNEL_DST: + return flow_get_tunnel_dst(skb); + case FLOW_KEY_TUNNEL_PROTO: + return flow_get_tunnel_proto(skb); + case FLOW_KEY_TUNNEL_PROTO_SRC: + return flow_get_tunnel_proto_src(skb); + case FLOW_KEY_TUNNEL_PROTO_DST: + return flow_get_tunnel_proto_dst(skb); default: WARN_ON(1); return 0;