From patchwork Sun Jan 31 18:13:23 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wei Xu X-Patchwork-Id: 576231 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 044D8140BC0 for ; Mon, 1 Feb 2016 05:14:35 +1100 (AEDT) Received: from localhost ([::1]:42704 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aPwW8-0003NV-NT for incoming@patchwork.ozlabs.org; Sun, 31 Jan 2016 13:14:32 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:36670) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aPwVX-00028I-S4 for qemu-devel@nongnu.org; Sun, 31 Jan 2016 13:13:57 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aPwVW-00068w-HI for qemu-devel@nongnu.org; Sun, 31 Jan 2016 13:13:55 -0500 Received: from mx1.redhat.com ([209.132.183.28]:53423) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aPwVW-00068o-9S for qemu-devel@nongnu.org; Sun, 31 Jan 2016 13:13:54 -0500 Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) by mx1.redhat.com (Postfix) with ESMTPS id 0912E71 for ; Sun, 31 Jan 2016 18:13:54 +0000 (UTC) Received: from wei-thinkpad.nay.redhat.com (vpn1-6-127.pek2.redhat.com [10.72.6.127]) by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u0VIDVgQ014091; Sun, 31 Jan 2016 13:13:50 -0500 From: wexu@redhat.com To: qemu-devel@nongnu.org Date: Mon, 1 Feb 2016 02:13:23 +0800 Message-Id: <1454264009-24094-5-git-send-email-wexu@redhat.com> In-Reply-To: <1454264009-24094-1-git-send-email-wexu@redhat.com> References: <1454264009-24094-1-git-send-email-wexu@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 209.132.183.28 Cc: Wei Xu , victork@redhat.com, mst@redhat.com, jasowang@redhat.com, yvugenfi@redhat.com, Wei Xu , marcel@redhat.com, dfleytma@redhat.com Subject: [Qemu-devel] [RFC Patch v2 04/10] virtio-net rsc: Detailed IPv4 and General TCP data coalescing X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Wei Xu Since this feature also needs to support IPv6, and there are some protocol specific differences difference for IPv4/6 in the header, so try to make the interface to be general. IPv4/6 should set up both the new and old IP/TCP header before invoking TCP coalescing, and should also tell the real payload. The main handler of TCP includes TCP window update, duplicated ACK check and the real data coalescing if the new segment passed invalid filter and is identified as an expected one. An expected segment means: 1. Segment is within current window and the sequence is the expected one. 2. ACK of the segment is in the valid window. 3. If the ACK in the segment is a duplicated one, then it must less than 2, this is to notify upper layer TCP starting retransmission due to the spec. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 3 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index cfbac6d..4f77fbe 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -41,6 +41,10 @@ #define VIRTIO_HEADER 12 /* Virtio net header size */ #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) +#define TCP_WINDOW 65535 + +/* IPv4 max payload, 16 bits in the header */ +#define MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) #define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) @@ -1670,13 +1674,130 @@ out: return 0; } +static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg, + const uint8_t *buf, struct tcp_header *n_tcp, + struct tcp_header *o_tcp) +{ + uint32_t nack, oack; + uint16_t nwin, owin; + + nack = htonl(n_tcp->th_ack); + nwin = htons(n_tcp->th_win); + oack = htonl(o_tcp->th_ack); + owin = htons(o_tcp->th_win); + + if ((nack - oack) >= TCP_WINDOW) { + return RSC_FINAL; + } else if (nack == oack) { + /* duplicated ack or window probe */ + if (nwin == owin) { + /* duplicated ack, add dup ack count due to whql test up to 1 */ + + if (seg->dup_ack_count == 0) { + seg->dup_ack_count++; + return RSC_COALESCE; + } else { + /* Spec says should send it directly */ + return RSC_FINAL; + } + } else { + /* Coalesce window update */ + o_tcp->th_win = n_tcp->th_win; + return RSC_COALESCE; + } + } else { + /* pure ack, update ack */ + o_tcp->th_ack = n_tcp->th_ack; + return RSC_COALESCE; + } +} + +static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg, + const uint8_t *buf, struct tcp_header *n_tcp, uint16_t n_tcp_len, + uint16_t n_data, struct tcp_header *o_tcp, uint16_t o_tcp_len, + uint16_t o_data, uint16_t *p_ip_len, uint16_t max_data) +{ + void *data; + uint16_t o_ip_len; + uint32_t nseq, oseq; + + o_ip_len = htons(*p_ip_len); + nseq = htonl(n_tcp->th_seq); + oseq = htonl(o_tcp->th_seq); + + /* Ignore packet with more/larger tcp options */ + if (n_tcp_len > o_tcp_len) { + return RSC_FINAL; + } + + /* out of order or retransmitted. */ + if ((nseq - oseq) > TCP_WINDOW) { + return RSC_FINAL; + } + + data = ((uint8_t *)n_tcp) + n_tcp_len; + if (nseq == oseq) { + if ((0 == o_data) && n_data) { + /* From no payload to payload, normal case, not a dup ack or etc */ + goto coalesce; + } else { + return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp); + } + } else if ((nseq - oseq) != o_data) { + /* Not a consistent packet, out of order */ + return RSC_FINAL; + } else { +coalesce: + if ((o_ip_len + n_data) > max_data) { + return RSC_FINAL; + } + + /* Here comes the right data, the payload lengh in v4/v6 is different, + so use the field value to update */ + *p_ip_len = htons(o_ip_len + n_data); /* Update new data len */ + o_tcp->th_offset_flags = n_tcp->th_offset_flags; /* Bring 'PUSH' big */ + o_tcp->th_ack = n_tcp->th_ack; + o_tcp->th_win = n_tcp->th_win; + + memmove(seg->buf + seg->size, data, n_data); + seg->size += n_data; + return RSC_COALESCE; + } +} static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, NetRscSeg *seg, const uint8_t *buf, size_t size) { - /* This real part of this function will be introduced in next patch, just - * return a 'final' to feed the compilation. */ - return RSC_FINAL; + uint16_t o_ip_len, n_ip_len; /* len in ip header field */ + uint16_t n_ip_hdrlen, o_ip_hdrlen; /* ipv4 header len */ + uint16_t n_tcp_len, o_tcp_len; /* tcp header len */ + uint16_t o_data, n_data; /* payload without virtio/eth/ip/tcp */ + struct ip_header *n_ip, *o_ip; + struct tcp_header *n_tcp, *o_tcp; + + n_ip = (struct ip_header *)(buf + IP_OFFSET); + n_ip_hdrlen = ((0xF & n_ip->ip_ver_len) << 2); + n_ip_len = htons(n_ip->ip_len); + n_tcp = (struct tcp_header *)(((uint8_t *)n_ip) + n_ip_hdrlen); + n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10; + n_data = n_ip_len - n_ip_hdrlen - n_tcp_len; + + o_ip = (struct ip_header *)(seg->buf + IP_OFFSET); + o_ip_hdrlen = ((0xF & o_ip->ip_ver_len) << 2); + o_ip_len = htons(o_ip->ip_len); + o_tcp = (struct tcp_header *)(((uint8_t *)o_ip) + o_ip_hdrlen); + o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10; + o_data = o_ip_len - o_ip_hdrlen - o_tcp_len; + + if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst) + || (n_tcp->th_sport ^ o_tcp->th_sport) + || (n_tcp->th_dport ^ o_tcp->th_dport)) { + return RSC_NO_MATCH; + } + + return virtio_net_rsc_coalesce_tcp(chain, seg, buf, + n_tcp, n_tcp_len, n_data, o_tcp, o_tcp_len, + o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD); } static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,