diff mbox series

[net-next,v14,6/7] sch_cake: Add overhead compensation support to the rate shaper

Message ID 152693495874.32668.11244869690098290078.stgit@alrua-kau
State Superseded, archived
Delegated to: David Miller
Headers show
Series sched: Add Common Applications Kept Enhanced (cake) qdisc | expand

Commit Message

Toke Høiland-Jørgensen May 21, 2018, 8:35 p.m. UTC
This commit adds configurable overhead compensation support to the rate
shaper. With this feature, userspace can configure the actual bottleneck
link overhead and encapsulation mode used, which will be used by the shaper
to calculate the precise duration of each packet on the wire.

This feature is needed because CAKE is often deployed one or two hops
upstream of the actual bottleneck (which can be, e.g., inside a DSL or
cable modem). In this case, the link layer characteristics and overhead
reported by the kernel does not match the actual bottleneck. Being able to
set the actual values in use makes it possible to configure the shaper rate
much closer to the actual bottleneck rate (our experience shows it is
possible to get with 0.1% of the actual physical bottleneck rate), thus
keeping latency low without sacrificing bandwidth.

The overhead compensation has three tunables: A fixed per-packet overhead
size (which, if set, will be accounted from the IP packet header), a
minimum packet size (MPU) and a framing mode supporting either ATM or PTM
framing. We include a set of common keywords in TC to help users configure
the right parameters. If no overhead value is set, the value reported by
the kernel is used.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/sched/sch_cake.c |  124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+), 1 deletion(-)

Comments

Marcelo Ricardo Leitner May 21, 2018, 11:45 p.m. UTC | #1
On Mon, May 21, 2018 at 10:35:58PM +0200, Toke Høiland-Jørgensen wrote:
> +static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
> +{
> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	unsigned int hdr_len, last_len = 0;
> +	u32 off = skb_network_offset(skb);
> +	u32 len = qdisc_pkt_len(skb);
> +	u16 segs = 1;
> +
> +	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
> +
> +	if (!shinfo->gso_size)
> +		return cake_calc_overhead(q, len, off);
> +
> +	/* borrowed from qdisc_pkt_len_init() */
> +	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> +
> +	/* + transport layer */
> +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
> +						SKB_GSO_TCPV6))) {
> +		const struct tcphdr *th;
> +		struct tcphdr _tcphdr;
> +
> +		th = skb_header_pointer(skb, skb_transport_offset(skb),
> +					sizeof(_tcphdr), &_tcphdr);
> +		if (likely(th))
> +			hdr_len += __tcp_hdrlen(th);
> +	} else {

I didn't see some code limiting GSO packets to just TCP or UDP. Is it
safe to assume that this packet is an UDP one, and not SCTP or ESP,
for example?

> +		struct udphdr _udphdr;
> +
> +		if (skb_header_pointer(skb, skb_transport_offset(skb),
> +				       sizeof(_udphdr), &_udphdr))
> +			hdr_len += sizeof(struct udphdr);
> +	}
> +
> +	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
> +		segs = DIV_ROUND_UP(skb->len - hdr_len,
> +				    shinfo->gso_size);
> +	else
> +		segs = shinfo->gso_segs;
> +
> +	len = shinfo->gso_size + hdr_len;
> +	last_len = skb->len - shinfo->gso_size * (segs - 1);
> +
> +	return (cake_calc_overhead(q, len, off) * (segs - 1) +
> +		cake_calc_overhead(q, last_len, off));
> +}
> +
Toke Høiland-Jørgensen May 22, 2018, 8:44 a.m. UTC | #2
On 22 May 2018 01:45:13 CEST, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> wrote:
>On Mon, May 21, 2018 at 10:35:58PM +0200, Toke Høiland-Jørgensen wrote:
>> +static u32 cake_overhead(struct cake_sched_data *q, const struct
>sk_buff *skb)
>> +{
>> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
>> +	unsigned int hdr_len, last_len = 0;
>> +	u32 off = skb_network_offset(skb);
>> +	u32 len = qdisc_pkt_len(skb);
>> +	u16 segs = 1;
>> +
>> +	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
>> +
>> +	if (!shinfo->gso_size)
>> +		return cake_calc_overhead(q, len, off);
>> +
>> +	/* borrowed from qdisc_pkt_len_init() */
>> +	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
>> +
>> +	/* + transport layer */
>> +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
>> +						SKB_GSO_TCPV6))) {
>> +		const struct tcphdr *th;
>> +		struct tcphdr _tcphdr;
>> +
>> +		th = skb_header_pointer(skb, skb_transport_offset(skb),
>> +					sizeof(_tcphdr), &_tcphdr);
>> +		if (likely(th))
>> +			hdr_len += __tcp_hdrlen(th);
>> +	} else {
>
>I didn't see some code limiting GSO packets to just TCP or UDP. Is it
>safe to assume that this packet is an UDP one, and not SCTP or ESP,
>for example?

As the comment says, I nicked this from the qdisc init code.
So I assume it's safe? :)

>> +		struct udphdr _udphdr;
>> +
>> +		if (skb_header_pointer(skb, skb_transport_offset(skb),
>> +				       sizeof(_udphdr), &_udphdr))
>> +			hdr_len += sizeof(struct udphdr);
>> +	}
>> +
>> +	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
>> +		segs = DIV_ROUND_UP(skb->len - hdr_len,
>> +				    shinfo->gso_size);
>> +	else
>> +		segs = shinfo->gso_segs;
>> +
>> +	len = shinfo->gso_size + hdr_len;
>> +	last_len = skb->len - shinfo->gso_size * (segs - 1);
>> +
>> +	return (cake_calc_overhead(q, len, off) * (segs - 1) +
>> +		cake_calc_overhead(q, last_len, off));
>> +}
>> +
Marcelo Ricardo Leitner May 22, 2018, 8:22 p.m. UTC | #3
On Tue, May 22, 2018 at 10:44:53AM +0200, Toke Høiland-Jørgensen wrote:
> 
> 
> On 22 May 2018 01:45:13 CEST, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> wrote:
> >On Mon, May 21, 2018 at 10:35:58PM +0200, Toke Høiland-Jørgensen wrote:
> >> +static u32 cake_overhead(struct cake_sched_data *q, const struct
> >sk_buff *skb)
> >> +{
> >> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> >> +	unsigned int hdr_len, last_len = 0;
> >> +	u32 off = skb_network_offset(skb);
> >> +	u32 len = qdisc_pkt_len(skb);
> >> +	u16 segs = 1;
> >> +
> >> +	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
> >> +
> >> +	if (!shinfo->gso_size)
> >> +		return cake_calc_overhead(q, len, off);
> >> +
> >> +	/* borrowed from qdisc_pkt_len_init() */
> >> +	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> >> +
> >> +	/* + transport layer */
> >> +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
> >> +						SKB_GSO_TCPV6))) {
> >> +		const struct tcphdr *th;
> >> +		struct tcphdr _tcphdr;
> >> +
> >> +		th = skb_header_pointer(skb, skb_transport_offset(skb),
> >> +					sizeof(_tcphdr), &_tcphdr);
> >> +		if (likely(th))
> >> +			hdr_len += __tcp_hdrlen(th);
> >> +	} else {
> >
> >I didn't see some code limiting GSO packets to just TCP or UDP. Is it
> >safe to assume that this packet is an UDP one, and not SCTP or ESP,
> >for example?
> 
> As the comment says, I nicked this from the qdisc init code.
> So I assume it's safe? :)

As long as it doesn't go further than this, it is. As in, it is just
validating if it can contain an UDP header, and if so, account for its
size, without actually reading the header.

Considering everything !TCP as UDP work as an approximation, which is
quite accurate. SCTP header is just 4 bytes bigger than UDP header and
is equal to ESP header size.

> 
> >> +		struct udphdr _udphdr;
> >> +
> >> +		if (skb_header_pointer(skb, skb_transport_offset(skb),
> >> +				       sizeof(_udphdr), &_udphdr))
> >> +			hdr_len += sizeof(struct udphdr);
> >> +	}
> >> +
> >> +	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
> >> +		segs = DIV_ROUND_UP(skb->len - hdr_len,
> >> +				    shinfo->gso_size);
> >> +	else
> >> +		segs = shinfo->gso_segs;
> >> +
> >> +	len = shinfo->gso_size + hdr_len;
> >> +	last_len = skb->len - shinfo->gso_size * (segs - 1);
> >> +
> >> +	return (cake_calc_overhead(q, len, off) * (segs - 1) +
> >> +		cake_calc_overhead(q, last_len, off));
> >> +}
> >> +
>
Toke Høiland-Jørgensen May 22, 2018, 10:06 p.m. UTC | #4
Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> writes:

> On Tue, May 22, 2018 at 10:44:53AM +0200, Toke Høiland-Jørgensen wrote:
>> 
>> 
>> On 22 May 2018 01:45:13 CEST, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> wrote:
>> >On Mon, May 21, 2018 at 10:35:58PM +0200, Toke Høiland-Jørgensen wrote:
>> >> +static u32 cake_overhead(struct cake_sched_data *q, const struct
>> >sk_buff *skb)
>> >> +{
>> >> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
>> >> +	unsigned int hdr_len, last_len = 0;
>> >> +	u32 off = skb_network_offset(skb);
>> >> +	u32 len = qdisc_pkt_len(skb);
>> >> +	u16 segs = 1;
>> >> +
>> >> +	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
>> >> +
>> >> +	if (!shinfo->gso_size)
>> >> +		return cake_calc_overhead(q, len, off);
>> >> +
>> >> +	/* borrowed from qdisc_pkt_len_init() */
>> >> +	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
>> >> +
>> >> +	/* + transport layer */
>> >> +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
>> >> +						SKB_GSO_TCPV6))) {
>> >> +		const struct tcphdr *th;
>> >> +		struct tcphdr _tcphdr;
>> >> +
>> >> +		th = skb_header_pointer(skb, skb_transport_offset(skb),
>> >> +					sizeof(_tcphdr), &_tcphdr);
>> >> +		if (likely(th))
>> >> +			hdr_len += __tcp_hdrlen(th);
>> >> +	} else {
>> >
>> >I didn't see some code limiting GSO packets to just TCP or UDP. Is it
>> >safe to assume that this packet is an UDP one, and not SCTP or ESP,
>> >for example?
>> 
>> As the comment says, I nicked this from the qdisc init code.
>> So I assume it's safe? :)
>
> As long as it doesn't go further than this, it is. As in, it is just
> validating if it can contain an UDP header, and if so, account for its
> size, without actually reading the header.
>
> Considering everything !TCP as UDP work as an approximation, which is
> quite accurate. SCTP header is just 4 bytes bigger than UDP header and
> is equal to ESP header size.

Yup, that seems close enough for our purposes. Thanks for explaining.
Didn't actually know that GSO handles other protocols as well :)

-Toke
diff mbox series

Patch

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 687fa9a38a0d..21785dc31acc 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -272,6 +272,7 @@  enum {
 
 struct cobalt_skb_cb {
 	ktime_t enqueue_time;
+	u32     adjusted_len;
 };
 
 static u64 us_to_ns(u64 us)
@@ -1290,6 +1291,88 @@  static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
 	return avg;
 }
 
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
 static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
 {
 	struct cake_heap_entry ii = q->overflow_heap[i];
@@ -1367,7 +1450,7 @@  static int cake_advance_shaper(struct cake_sched_data *q,
 			       struct sk_buff *skb,
 			       ktime_t now, bool drop)
 {
-	u32 len = qdisc_pkt_len(skb);
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
 
 	/* charge packet bandwidth to this tin
 	 * and to the global shaper.
@@ -1564,6 +1647,7 @@  static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		b->max_skblen = len;
 
 	cobalt_set_enqueue_time(skb, now);
+	get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
 	flow_queue_add(flow, skb);
 
 	if (q->ack_filter)
@@ -2364,6 +2448,31 @@  static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 		q->flow_mode = (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
 				CAKE_FLOW_MASK);
 
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
 	if (tb[TCA_CAKE_RTT]) {
 		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
 
@@ -2540,6 +2649,19 @@  static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 			!!(q->rate_flags & CAKE_FLAG_WASH)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure: