From patchwork Tue May 15 15:12:45 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Toke_H=C3=B8iland-J=C3=B8rgensen?=
 <toke@toke.dk>
X-Patchwork-Id: 913738
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming-netdev@ozlabs.org
Delivered-To: patchwork-incoming-netdev@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=toke.dk
Authentication-Results: ozlabs.org; dkim=pass (2048-bit key;
	secure) header.d=toke.dk header.i=@toke.dk header.b="oLTWk7E3";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 40lh13418Jz9rxs
	for <patchwork-incoming-netdev@ozlabs.org>;
	Wed, 16 May 2018 01:13:59 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753836AbeEOPMx (ORCPT
	<rfc822;patchwork-incoming-netdev@ozlabs.org>);
	Tue, 15 May 2018 11:12:53 -0400
Received: from mail.toke.dk ([52.28.52.200]:37391 "EHLO mail.toke.dk"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1753160AbeEOPMt (ORCPT <rfc822;netdev@vger.kernel.org>);
	Tue, 15 May 2018 11:12:49 -0400
Subject: [PATCH net-next v11 6/7] sch_cake: Add overhead compensation
	support to the rate shaper
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=toke.dk; s=20161023;
	t=1526397166; bh=dy5lR19U7uqXvNVivgznrRwfmNBue5IAv0Yn1SVz0xY=;
	h=Subject:From:To:Cc:Date:In-Reply-To:References:From;
	b=oLTWk7E3daN6rhFDhHKbj9hj2ZDEVSFJKh/f1W6hnbP+M9tC96zlcORWGaL6zXnDl
	eA6NjNgoPpBybu0NxVhqfbPYnAc7RQqMh0U9yKstMe+hLUZFdieEAJFOfFVWteOnnd
	vnp+LEuGolMGcUfpAnSD+FBC8EGBvGG8nT8d2kjHIsBBea4irdffrUGbNBTUMnX0sG
	CEkOwfemh1odS62aJ9mbkq9SfiMnZ+A/ziUQMKbe7Ws90RdRc99FZig3zy168L5vth
	XbNg9RU0n1c6DfSKTHrHIchf6lybc4UJvHxh9mJpyLeHmG4GWhSFWyTGo9Z4SxiI+T
	Vq69vrlHPlunw==
From: Toke =?utf-8?q?H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
To: netdev@vger.kernel.org
Cc: cake@lists.bufferbloat.net
Date: Tue, 15 May 2018 17:12:45 +0200
X-Clacks-Overhead: GNU Terry Pratchett
Message-ID: <152639716516.2525.617012955602358464.stgit@alrua-kau>
In-Reply-To: <152639705919.2525.3718182212727523960.stgit@alrua-kau>
References: <152639705919.2525.3718182212727523960.stgit@alrua-kau>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This commit adds configurable overhead compensation support to the rate
shaper. With this feature, userspace can configure the actual bottleneck
link overhead and encapsulation mode used, which will be used by the shaper
to calculate the precise duration of each packet on the wire.

This feature is needed because CAKE is often deployed one or two hops
upstream of the actual bottleneck (which can be, e.g., inside a DSL or
cable modem). In this case, the link layer characteristics and overhead
reported by the kernel does not match the actual bottleneck. Being able to
set the actual values in use makes it possible to configure the shaper rate
much closer to the actual bottleneck rate (our experience shows it is
possible to get with 0.1% of the actual physical bottleneck rate), thus
keeping latency low without sacrificing bandwidth.

The overhead compensation has three tunables: A fixed per-packet overhead
size (which, if set, will be accounted from the IP packet header), a
minimum packet size (MPU) and a framing mode supporting either ATM or PTM
framing. We include a set of common keywords in TC to help users configure
the right parameters. If no overhead value is set, the value reported by
the kernel is used.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/sched/sch_cake.c |  124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+), 1 deletion(-)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 4b13345f85dc..a4aad577bf8e 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -275,6 +275,7 @@ enum {
 
 struct cobalt_skb_cb {
 	cobalt_time_t enqueue_time;
+	u32           adjusted_len;
 };
 
 static cobalt_time_t cobalt_get_time(void)
@@ -1129,6 +1130,88 @@ static cobalt_time_t cake_ewma(cobalt_time_t avg, cobalt_time_t sample,
 	return avg;
 }
 
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
 static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
 {
 	struct cake_heap_entry ii = q->overflow_heap[i];
@@ -1206,7 +1289,7 @@ static int cake_advance_shaper(struct cake_sched_data *q,
 			       struct sk_buff *skb,
 			       u64 now, bool drop)
 {
-	u32 len = qdisc_pkt_len(skb);
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
 
 	/* charge packet bandwidth to this tin
 	 * and to the global shaper.
@@ -1395,6 +1478,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		b->max_skblen = len;
 
 	cobalt_set_enqueue_time(skb, now);
+	get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
 	flow_queue_add(flow, skb);
 
 	if (q->ack_filter)
@@ -2171,6 +2255,31 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 			!!nla_get_u32(tb[TCA_CAKE_NAT]);
 	}
 
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
 	if (tb[TCA_CAKE_RTT]) {
 		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
 
@@ -2347,6 +2456,19 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 			!!(q->rate_flags & CAKE_FLAG_WASH)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure: