From patchwork Mon May 28 22:28:18 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Toke_H=C3=B8iland-J=C3=B8rgensen?=
 <toke@toke.dk>
X-Patchwork-Id: 921714
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming-netdev@ozlabs.org
Delivered-To: patchwork-incoming-netdev@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=toke.dk
Authentication-Results: ozlabs.org; dkim=pass (2048-bit key;
	secure) header.d=toke.dk header.i=@toke.dk header.b="aPp9pfva";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 40vs2Q6vMnz9s0y
	for <patchwork-incoming-netdev@ozlabs.org>;
	Tue, 29 May 2018 08:28:30 +1000 (AEST)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S934849AbeE1W2W (ORCPT
	<rfc822;patchwork-incoming-netdev@ozlabs.org>);
	Mon, 28 May 2018 18:28:22 -0400
Received: from mail.toke.dk ([52.28.52.200]:47947 "EHLO mail.toke.dk"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S934651AbeE1W2U (ORCPT <rfc822;netdev@vger.kernel.org>);
	Mon, 28 May 2018 18:28:20 -0400
Subject: [PATCH net-next v17 7/8] sch_cake: Add overhead compensation
	support to the rate shaper
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=toke.dk; s=20161023;
	t=1527546498; bh=ff88zQeEX6W/fHLgo5lCjLko1xq7R2KNhrpDdP0x6lo=;
	h=Subject:From:To:Cc:Date:In-Reply-To:References:From;
	b=aPp9pfvae4KJI8L+kB+EHgjktxGdWFg3vV6Pp/Pdw9/hUB+Bk7R9cdKMz1g6x0OHe
	0XgXI1j+1KILrGxN9ek5R6MJi17Tl2Rh1Tg7LgAMKjjF0ZWisvcUANsrHPiPDp7VLM
	oblpB8GMCwHyZgD1n7/bbNotVxgIbhUZlaTcktg9g2xGM8undhMoqZZFWtzZ8g5pIk
	AIcMoFKfL/i+WSvgs1V5yJaLUk/mXbViar7yRvCVx1X+esichfB6Wg72f8hf+NCi8V
	hSsY1ZoOlygIq1mz7bi0JdDIzXrfwkLh/6gRLxZJuVgIvcPRtcQr2Hq/9ep3baAt2b
	Sj8yfldTqdICQ==
From: Toke =?utf-8?q?H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
To: netdev@vger.kernel.org
Cc: cake@lists.bufferbloat.net
Date: Tue, 29 May 2018 00:28:18 +0200
X-Clacks-Overhead: GNU Terry Pratchett
Message-ID: <152754649862.18169.5997929441147340596.stgit@alrua-kau>
In-Reply-To: <152754646847.18169.14016621463603092505.stgit@alrua-kau>
References: <152754646847.18169.14016621463603092505.stgit@alrua-kau>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

This commit adds configurable overhead compensation support to the rate
shaper. With this feature, userspace can configure the actual bottleneck
link overhead and encapsulation mode used, which will be used by the shaper
to calculate the precise duration of each packet on the wire.

This feature is needed because CAKE is often deployed one or two hops
upstream of the actual bottleneck (which can be, e.g., inside a DSL or
cable modem). In this case, the link layer characteristics and overhead
reported by the kernel does not match the actual bottleneck. Being able to
set the actual values in use makes it possible to configure the shaper rate
much closer to the actual bottleneck rate (our experience shows it is
possible to get with 0.1% of the actual physical bottleneck rate), thus
keeping latency low without sacrificing bandwidth.

The overhead compensation has three tunables: A fixed per-packet overhead
size (which, if set, will be accounted from the IP packet header), a
minimum packet size (MPU) and a framing mode supporting either ATM or PTM
framing. We include a set of common keywords in TC to help users configure
the right parameters. If no overhead value is set, the value reported by
the kernel is used.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/sched/sch_cake.c |  124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+), 1 deletion(-)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index b2760bc1a34f..10a968d43d44 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -270,6 +270,7 @@ enum {
 
 struct cobalt_skb_cb {
 	ktime_t enqueue_time;
+	u32     adjusted_len;
 };
 
 static u64 us_to_ns(u64 us)
@@ -1282,6 +1283,88 @@ static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
 	return avg;
 }
 
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
 static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
 {
 	struct cake_heap_entry ii = q->overflow_heap[i];
@@ -1359,7 +1442,7 @@ static int cake_advance_shaper(struct cake_sched_data *q,
 			       struct sk_buff *skb,
 			       ktime_t now, bool drop)
 {
-	u32 len = qdisc_pkt_len(skb);
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
 
 	/* charge packet bandwidth to this tin
 	 * and to the global shaper.
@@ -1556,6 +1639,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		b->max_skblen = len;
 
 	cobalt_set_enqueue_time(skb, now);
+	get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
 	flow_queue_add(flow, skb);
 
 	if (q->ack_filter)
@@ -2356,6 +2440,31 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 		q->flow_mode = (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
 				CAKE_FLOW_MASK);
 
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
 	if (tb[TCA_CAKE_RTT]) {
 		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
 
@@ -2532,6 +2641,19 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 			!!(q->rate_flags & CAKE_FLAG_WASH)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure: