From patchwork Tue Mar 15 09:17:04 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wei Xu X-Patchwork-Id: 597395 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3qPTYN42dNz9sds for ; Tue, 15 Mar 2016 20:18:00 +1100 (AEDT) Received: from localhost ([::1]:46566 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1afl70-0002dj-Bl for incoming@patchwork.ozlabs.org; Tue, 15 Mar 2016 05:17:58 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:39170) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1afl6R-0001lZ-JV for qemu-devel@nongnu.org; Tue, 15 Mar 2016 05:17:25 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1afl6P-0003Pg-OI for qemu-devel@nongnu.org; Tue, 15 Mar 2016 05:17:23 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45064) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1afl6P-0003Pa-EV for qemu-devel@nongnu.org; Tue, 15 Mar 2016 05:17:21 -0400 Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) by mx1.redhat.com (Postfix) with ESMTPS id 2BB51A149A for ; Tue, 15 Mar 2016 09:17:21 +0000 (UTC) Received: from wei-thinkpad.nay.redhat.com (vpn1-6-161.pek2.redhat.com [10.72.6.161]) by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u2F9HBFB024405; Tue, 15 Mar 2016 05:17:18 -0400 From: wexu@redhat.com To: qemu-devel@nongnu.org Date: Tue, 15 Mar 2016 17:17:04 +0800 Message-Id: <1458033424-25414-3-git-send-email-wexu@redhat.com> In-Reply-To: <1458033424-25414-1-git-send-email-wexu@redhat.com> References: <1458033424-25414-1-git-send-email-wexu@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 209.132.183.28 Cc: victork@redhat.com, mst@redhat.com, jasowang@redhat.com, yvugenfi@redhat.com, Wei Xu , marcel@redhat.com, dfleytma@redhat.com Subject: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Wei Xu Most things like ipv4 except there is a significant difference between ipv4 and ipv6, the fragment lenght in ipv4 header includes itself, while it's not included for ipv6, thus means ipv6 can carry a real '65535' unit. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 146 ++++++++++++++++++++++++++++++++++++++++----- include/hw/virtio/virtio.h | 5 +- 2 files changed, 135 insertions(+), 16 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index c23b45f..ef61b74 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -52,9 +52,14 @@ #define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ) #define MAX_TCP_PAYLOAD 65535 -/* max payload with virtio header */ +#define IP6_HDR_SZ (sizeof(struct ip6_header)) +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) +#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ +#define MAX_IP6_PAYLOAD MAX_TCP_PAYLOAD + +/* ip6 max payload, payload in ipv6 don't include the header */ #define MAX_VIRTIO_PAYLOAD (sizeof(struct virtio_net_hdr_mrg_rxbuf) \ - + ETH_HDR_SZ + MAX_TCP_PAYLOAD) + + ETH_IP6_HDR_SZ + MAX_IP6_PAYLOAD) #define IP4_HEADER_LEN 5 /* header lenght value in ip header without option */ @@ -1722,14 +1727,27 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain, { uint16_t ip_hdrlen; - unit->ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ); - ip_hdrlen = ((0xF & unit->ip->ip_ver_len) << 2); - unit->ip_plen = &unit->ip->ip_len; - unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); + unit->u_ip.ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ); + ip_hdrlen = ((0xF & unit->u_ip.ip->ip_ver_len) << 2); + unit->ip_plen = &unit->u_ip.ip->ip_len; + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip) + ip_hdrlen); unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; } +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, + const uint8_t *buf, NetRscUnit* unit) +{ + unit->u_ip.ip6 = (struct ip6_header *)(buf + chain->hdr_size + ETH_HDR_SZ); + unit->ip_plen = &(unit->u_ip.ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip6)\ + + IP6_HDR_SZ); + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; + /* There is a difference between payload lenght in ipv4 and v6, + ip header is excluded in ipv6 */ + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; +} + static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip) { uint32_t sum; @@ -1743,7 +1761,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg) { int ret; - virtio_net_rsc_ipv4_checksum(seg->unit.ip); + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { + virtio_net_rsc_ipv4_checksum(seg->unit.u_ip.ip); + } + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); QTAILQ_REMOVE(&chain->buffers, seg, next); g_free(seg->buf); @@ -1807,7 +1828,11 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); chain->stat.cache++; - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); + if (chain->proto == ETH_P_IP) { + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); + } else { + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); + } } static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg, @@ -1930,8 +1955,8 @@ coalesce: static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg, const uint8_t *buf, size_t size, NetRscUnit *unit) { - if ((unit->ip->ip_src ^ seg->unit.ip->ip_src) - || (unit->ip->ip_dst ^ seg->unit.ip->ip_dst) + if ((unit->u_ip.ip->ip_src ^ seg->unit.u_ip.ip->ip_src) + || (unit->u_ip.ip->ip_dst ^ seg->unit.u_ip.ip->ip_dst) || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { chain->stat.no_match++; @@ -1941,6 +1966,22 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg, return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); } +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg, + const uint8_t *buf, size_t size, NetRscUnit *unit) +{ + if (memcmp(&unit->u_ip.ip6->ip6_src, &seg->unit.u_ip.ip6->ip6_src, + sizeof(struct in6_address)) + || memcmp(&unit->u_ip.ip6->ip6_dst, &seg->unit.u_ip.ip6->ip6_dst, + sizeof(struct in6_address)) + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { + chain->stat.no_match++; + return RSC_NO_MATCH; + } + + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); +} + /* Pakcets with 'SYN' should bypass, other flag should be sent after drain * to prevent out of order */ static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, @@ -1983,7 +2024,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc, NetRscSeg *seg, *nseg; QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); + if (chain->proto == ETH_P_IP) { + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); + } else { + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); + } if (ret == RSC_FINAL) { if (virtio_net_rsc_drain_seg(chain, seg) == 0) { @@ -2082,7 +2127,8 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, chain = (NetRscChain *)opq; virtio_net_rsc_extract_unit4(chain, buf, &unit); - if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)) { + if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, + unit.u_ip.ip, buf, size)) { return virtio_net_do_receive(nc, buf, size); } @@ -2102,13 +2148,74 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); } +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, + struct ip6_header *ip, const uint8_t *buf, size_t size) +{ + uint16_t ip_len; + + if (size < (chain->hdr_size + ETH_IP6_HDR_SZ + TCP_HDR_SZ)) { + return RSC_BYPASS; + } + + if (((0xF0 & ip->ip6_ctlun.ip6_un1.ip6_un1_flow) >> 4) + != IP_HEADER_VERSION_6) { + return RSC_BYPASS; + } + + /* Both option and protocol is checked in this */ + if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { + chain->stat.bypass_not_tcp++; + return RSC_BYPASS; + } + + /* Sanity check */ + ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen); + if (ip_len < TCP_HDR_SZ + || ip_len > (size - chain->hdr_size - ETH_IP6_HDR_SZ)) { + chain->stat.ip_hacked++; + return RSC_BYPASS; + } + + return RSC_WANT; +} + +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, + const uint8_t *buf, size_t size) +{ + int32_t ret; + NetRscChain *chain; + NetRscUnit unit; + + chain = (NetRscChain *)opq; + virtio_net_rsc_extract_unit6(chain, buf, &unit); + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain, + unit.u_ip.ip6, buf, size)) { + return virtio_net_do_receive(nc, buf, size); + } + + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); + if (ret == RSC_BYPASS) { + return virtio_net_do_receive(nc, buf, size); + } else if (ret == RSC_FINAL) { + return virtio_net_rsc_drain_flow(chain, nc, buf, size, + ((chain->hdr_size + ETH_HDR_SZ) + 8), IP6_ADDR_SIZE, + (chain->hdr_size + ETH_IP6_HDR_SZ), TCP_PORT_SIZE); + } + + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { + return size; + } + + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); +} + static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, NetClientState *nc, uint16_t proto) { NetRscChain *chain; /* Only handle IPv4/6 */ - if (proto != (uint16_t)ETH_P_IP) { + if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { return NULL; } @@ -2121,7 +2228,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, chain = g_malloc(sizeof(*chain)); chain->hdr_size = n->guest_hdr_len; chain->proto = proto; - chain->max_payload = MAX_IP4_PAYLOAD; + if (proto == (uint16_t)ETH_P_IP) { + chain->max_payload = MAX_IP4_PAYLOAD; + } else { + chain->max_payload = MAX_IP6_PAYLOAD; + } chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, virtio_net_rsc_purge, chain); memset(&chain->stat, 0, sizeof(chain->stat)); @@ -2153,7 +2264,12 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc, return virtio_net_do_receive(nc, buf, size); } else { chain->stat.received++; - return virtio_net_rsc_receive4(chain, nc, buf, size); + + if (proto == (uint16_t)ETH_P_IP) { + return virtio_net_rsc_receive4(chain, nc, buf, size); + } else { + return virtio_net_rsc_receive6(chain, nc, buf, size); + } } } diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 3b1dfa8..13d20a4 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -170,7 +170,10 @@ typedef struct NetRscStat { /* Rsc unit general info used to checking if can coalescing */ typedef struct NetRscUnit { - struct ip_header *ip; /* ip header */ + union { + struct ip_header *ip; /* ip header */ + struct ip6_header *ip6; /* ip6 header */ + } u_ip; uint16_t *ip_plen; /* data len pointer in ip header field */ struct tcp_header *tcp; /* tcp header */ uint16_t tcp_hdrlen; /* tcp header len */