From patchwork Fri Mar  9 14:10:28 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?=
 <ilpo.jarvinen@helsinki.fi>
X-Patchwork-Id: 883656
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=helsinki.fi
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3zyTnV389Xz9sbb
	for <patchwork-incoming@ozlabs.org>;
	Sat, 10 Mar 2018 01:11:10 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S932080AbeCIOLE (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 9 Mar 2018 09:11:04 -0500
Received: from smtp-rs1-vallila2.fe.helsinki.fi ([128.214.173.75]:38246 "EHLO
	smtp-rs1-vallila2.fe.helsinki.fi" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751106AbeCIOKp (ORCPT
	<rfc822;netdev@vger.kernel.org>); Fri, 9 Mar 2018 09:10:45 -0500
Received: from whs-18.cs.helsinki.fi (whs-18.cs.helsinki.fi [128.214.166.46])
	by smtp-rs1.it.helsinki.fi (8.14.4/8.14.4) with ESMTP id
	w29EAc3V026164; Fri, 9 Mar 2018 16:10:38 +0200
Received: by whs-18.cs.helsinki.fi (Postfix, from userid 1070048)
	id 97FA036001B; Fri,  9 Mar 2018 16:10:38 +0200 (EET)
From: =?utf-8?q?Ilpo_J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
To: netdev@vger.kernel.org
Cc: Yuchung Cheng <ycheng@google.com>, Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net 1/5] tcp: feed correct number of pkts acked to cc
	modules also in recovery
Date: Fri,  9 Mar 2018 16:10:28 +0200
Message-Id: <1520604632-21185-2-git-send-email-ilpo.jarvinen@helsinki.fi>
X-Mailer: git-send-email 2.7.4
In-Reply-To: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
References: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

A miscalculation for the number of acknowledged packets occurs during
RTO recovery whenever SACK is not enabled and a cumulative ACK covers
any non-retransmitted skbs. The reason is that pkts_acked value
calculated in tcp_clean_rtx_queue is not correct for slow start after
RTO as it may include segments that were not lost and therefore did
not need retransmissions in the slow start following the RTO. Then
tcp_slow_start will add the excess into cwnd bloating it and
triggering a burst.

Instead, we want to pass only the number of retransmitted segments
that were covered by the cumulative ACK (and potentially newly sent
data segments too if the cumulative ACK covers that far).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9a1b3c1..0305f6d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3027,6 +3027,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 	long seq_rtt_us = -1L;
 	long ca_rtt_us = -1L;
 	u32 pkts_acked = 0;
+	u32 rexmit_acked = 0;
+	u32 newdata_acked = 0;
 	u32 last_in_flight = 0;
 	bool rtt_update;
 	int flag = 0;
@@ -3056,8 +3058,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		}
 
 		if (unlikely(sacked & TCPCB_RETRANS)) {
-			if (sacked & TCPCB_SACKED_RETRANS)
+			if (sacked & TCPCB_SACKED_RETRANS) {
 				tp->retrans_out -= acked_pcount;
+				rexmit_acked += acked_pcount;
+			}
 			flag |= FLAG_RETRANS_DATA_ACKED;
 		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
 			last_ackt = skb->skb_mstamp;
@@ -3068,8 +3072,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
 			if (before(start_seq, reord))
 				reord = start_seq;
-			if (!after(scb->end_seq, tp->high_seq))
+			if (!after(scb->end_seq, tp->high_seq)) {
 				flag |= FLAG_ORIG_SACK_ACKED;
+			} else {
+				newdata_acked += acked_pcount;
+			}
 		}
 
 		if (sacked & TCPCB_SACKED_ACKED) {
@@ -3151,6 +3158,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		}
 
 		if (tcp_is_reno(tp)) {
+			/* Due to discontinuity on RTO in the artificial
+			 * sacked_out calculations, TCP must restrict
+			 * pkts_acked without SACK to rexmits and new data
+			 * segments
+			 */
+			if (icsk->icsk_ca_state == TCP_CA_Loss)
+				pkts_acked = rexmit_acked + newdata_acked;
+
 			tcp_remove_reno_sacks(sk, pkts_acked);
 		} else {
 			int delta;

From patchwork Fri Mar  9 14:10:29 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?=
 <ilpo.jarvinen@helsinki.fi>
X-Patchwork-Id: 883654
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=helsinki.fi
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3zyTnK4hMyz9sbb
	for <patchwork-incoming@ozlabs.org>;
	Sat, 10 Mar 2018 01:11:01 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S932069AbeCIOKz (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 9 Mar 2018 09:10:55 -0500
Received: from smtp-rs1-vallila2.fe.helsinki.fi ([128.214.173.75]:38240 "EHLO
	smtp-rs1-vallila2.fe.helsinki.fi" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751137AbeCIOKp (ORCPT
	<rfc822;netdev@vger.kernel.org>); Fri, 9 Mar 2018 09:10:45 -0500
Received: from whs-18.cs.helsinki.fi (whs-18.cs.helsinki.fi [128.214.166.46])
	by smtp-rs1.it.helsinki.fi (8.14.4/8.14.4) with ESMTP id
	w29EAcLr026165; Fri, 9 Mar 2018 16:10:38 +0200
Received: by whs-18.cs.helsinki.fi (Postfix, from userid 1070048)
	id 99B93360389; Fri,  9 Mar 2018 16:10:38 +0200 (EET)
From: =?utf-8?q?Ilpo_J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
To: netdev@vger.kernel.org
Cc: Yuchung Cheng <ycheng@google.com>, Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net 2/5] tcp: prevent bogus FRTO undos with non-SACK flows
Date: Fri,  9 Mar 2018 16:10:29 +0200
Message-Id: <1520604632-21185-3-git-send-email-ilpo.jarvinen@helsinki.fi>
X-Mailer: git-send-email 2.7.4
In-Reply-To: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
References: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

If SACK is not enabled and the first cumulative ACK after the RTO
retransmission covers more than the retransmitted skb, a spurious
FRTO undo will trigger (assuming FRTO is enabled for that RTO).
The reason is that any non-retransmitted segment acknowledged will
set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is
no indication that it would have been delivered for real (the
scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK
case so the check for that bit won't help like it does with SACK).
Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo
in tcp_process_loss.

We need to use more strict condition for non-SACK case and check
that none of the cumulatively ACKed segments were retransmitted
to prove that progress is due to original transmissions. Only then
keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in
non-SACK case.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0305f6d..1277afb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3167,6 +3167,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 				pkts_acked = rexmit_acked + newdata_acked;
 
 			tcp_remove_reno_sacks(sk, pkts_acked);
+
+			/* If any of the cumulatively ACKed segments was
+			 * retransmitted, non-SACK case cannot confirm that
+			 * progress was due to original transmission due to
+			 * lack of TCPCB_SACKED_ACKED bits even if some of
+			 * the packets may have been never retransmitted.
+			 */
+			if (flag & FLAG_RETRANS_DATA_ACKED)
+				flag &= ~FLAG_ORIG_SACK_ACKED;
 		} else {
 			int delta;
 

From patchwork Fri Mar  9 14:10:30 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?=
 <ilpo.jarvinen@helsinki.fi>
X-Patchwork-Id: 883651
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=helsinki.fi
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3zyTn93vfBz9sbb
	for <patchwork-incoming@ozlabs.org>;
	Sat, 10 Mar 2018 01:10:53 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751170AbeCIOKq (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 9 Mar 2018 09:10:46 -0500
Received: from smtp-rs1-vallila2.fe.helsinki.fi ([128.214.173.75]:38242 "EHLO
	smtp-rs1-vallila2.fe.helsinki.fi" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751077AbeCIOKp (ORCPT
	<rfc822;netdev@vger.kernel.org>); Fri, 9 Mar 2018 09:10:45 -0500
Received: from whs-18.cs.helsinki.fi (whs-18.cs.helsinki.fi [128.214.166.46])
	by smtp-rs1.it.helsinki.fi (8.14.4/8.14.4) with ESMTP id
	w29EAc1S026169; Fri, 9 Mar 2018 16:10:38 +0200
Received: by whs-18.cs.helsinki.fi (Postfix, from userid 1070048)
	id 9C1F336038A; Fri,  9 Mar 2018 16:10:38 +0200 (EET)
From: =?utf-8?q?Ilpo_J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
To: netdev@vger.kernel.org
Cc: Yuchung Cheng <ycheng@google.com>, Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net 3/5] tcp: move false FR condition into
	tcp_false_fast_retrans_possible()
Date: Fri,  9 Mar 2018 16:10:30 +0200
Message-Id: <1520604632-21185-4-git-send-email-ilpo.jarvinen@helsinki.fi>
X-Mailer: git-send-email 2.7.4
In-Reply-To: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
References: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

No functional changes. This change simplifies the next change
slightly.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1277afb..4ccba94 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2211,6 +2211,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 	}
 }
 
+/* False fast retransmits may occur when SACK is not in use under certain
+ * conditions (RFC6582). The sender MUST hold old state until something
+ * *above* high_seq is ACKed to prevent triggering such false fast
+ * retransmits. SACK TCP is safe.
+ */
+static bool tcp_false_fast_retrans_possible(const struct tcp_sock *tp,
+					    const u32 snd_una)
+{
+	return tcp_is_reno(tp) && (snd_una == tp->high_seq);
+}
+
 static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
 {
 	return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2350,10 +2361,10 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 	} else if (tp->rack.reo_wnd_persist) {
 		tp->rack.reo_wnd_persist--;
 	}
-	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
-		/* Hold old state until something *above* high_seq
-		 * is ACKed. For Reno it is MUST to prevent false
-		 * fast retransmits (RFC2582). SACK TCP is safe. */
+	if (tcp_false_fast_retrans_possible(tp, tp->snd_una)) {
+		/* Hold old state until something *above* high_seq is ACKed
+		 * if false fast retransmit is possible.
+		 */
 		if (!tcp_any_retrans_done(sk))
 			tp->retrans_stamp = 0;
 		return true;

From patchwork Fri Mar  9 14:10:31 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?=
 <ilpo.jarvinen@helsinki.fi>
X-Patchwork-Id: 883652
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=helsinki.fi
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3zyTn51rJmz9sbX
	for <patchwork-incoming@ozlabs.org>;
	Sat, 10 Mar 2018 01:10:49 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751194AbeCIOKq (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 9 Mar 2018 09:10:46 -0500
Received: from smtp-rs1-vallila2.fe.helsinki.fi ([128.214.173.75]:38238 "EHLO
	smtp-rs1-vallila2.fe.helsinki.fi" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751096AbeCIOKp (ORCPT
	<rfc822;netdev@vger.kernel.org>); Fri, 9 Mar 2018 09:10:45 -0500
Received: from whs-18.cs.helsinki.fi (whs-18.cs.helsinki.fi [128.214.166.46])
	by smtp-rs1.it.helsinki.fi (8.14.4/8.14.4) with ESMTP id
	w29EAcr3026171; Fri, 9 Mar 2018 16:10:38 +0200
Received: by whs-18.cs.helsinki.fi (Postfix, from userid 1070048)
	id A2900360388; Fri,  9 Mar 2018 16:10:38 +0200 (EET)
From: =?utf-8?q?Ilpo_J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
To: netdev@vger.kernel.org
Cc: Yuchung Cheng <ycheng@google.com>, Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net 4/5] tcp: prevent bogus undos when SACK is not enabled
Date: Fri,  9 Mar 2018 16:10:31 +0200
Message-Id: <1520604632-21185-5-git-send-email-ilpo.jarvinen@helsinki.fi>
X-Mailer: git-send-email 2.7.4
In-Reply-To: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
References: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

When a cumulative ACK lands to high_seq at the end of loss
recovery and SACK is not enabled, the sender needs to avoid
false fast retransmits (RFC6582). The avoidance mechanisms is
implemented by remaining in the loss recovery CA state until
one additional cumulative ACK arrives. During the operation of
this avoidance mechanism, there is internal transient in the
use of state variables which will always trigger a bogus undo.

When we enter to this transient state in tcp_try_undo_recovery,
tcp_any_retrans_done is often (always?) false resulting in
clearing retrans_stamp. On the next cumulative ACK,
tcp_try_undo_recovery again executes because CA state still
remains in the same recovery state and tcp_may_undo will always
return true because tcp_packet_delayed has this condition:
    return !tp->retrans_stamp || ...

Check if the false fast retransmit transient avoidance is in
progress in tcp_packet_delayed to avoid bogus undos. Since snd_una
has advanced already on this ACK but CA state still remains
unchanged (CA state is updated slightly later than undo is
checked), prior_snd_una needs to be passed to tcp_packet_delayed
(instead of tp->snd_una). Passing prior_snd_una around to
the tcp_packet_delayed makes this change look more involved than
it really is.

The additional checks done in this change only affect non-SACK
case, the SACK case remains the same.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4ccba94..e67551a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2241,10 +2241,17 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
-static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
+static inline bool tcp_packet_delayed(const struct tcp_sock *tp,
+				      const u32 prior_snd_una)
 {
-	return !tp->retrans_stamp ||
-	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+	if (!tp->retrans_stamp) {
+		/* Sender will be in a transient state with cleared
+		 * retrans_stamp during false fast retransmit prevention
+		 * mechanism
+		 */
+		return !tcp_false_fast_retrans_possible(tp, prior_snd_una);
+	}
+	return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
 }
 
 /* Undo procedures. */
@@ -2334,17 +2341,19 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 	tp->rack.advanced = 1; /* Force RACK to re-exam losses */
 }
 
-static inline bool tcp_may_undo(const struct tcp_sock *tp)
+static inline bool tcp_may_undo(const struct tcp_sock *tp,
+				const u32 prior_snd_una)
 {
-	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
+	return tp->undo_marker &&
+	       (!tp->undo_retrans || tcp_packet_delayed(tp, prior_snd_una));
 }
 
 /* People celebrate: "We love our President!" */
-static bool tcp_try_undo_recovery(struct sock *sk)
+static bool tcp_try_undo_recovery(struct sock *sk, const u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_may_undo(tp)) {
+	if (tcp_may_undo(tp, prior_snd_una)) {
 		int mib_idx;
 
 		/* Happy end! We did not retransmit anything
@@ -2391,11 +2400,12 @@ static bool tcp_try_undo_dsack(struct sock *sk)
 }
 
 /* Undo during loss recovery after partial ACK or using F-RTO. */
-static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
+static bool tcp_try_undo_loss(struct sock *sk, const u32 prior_snd_una,
+			      bool frto_undo)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (frto_undo || tcp_may_undo(tp)) {
+	if (frto_undo || tcp_may_undo(tp, prior_snd_una)) {
 		tcp_undo_cwnd_reduction(sk, true);
 
 		DBGUNDO(sk, "partial loss");
@@ -2628,13 +2638,13 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
  * recovered or spurious. Otherwise retransmits more on partial ACKs.
  */
 static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
-			     int *rexmit)
+			     int *rexmit, const u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool recovered = !before(tp->snd_una, tp->high_seq);
 
 	if ((flag & FLAG_SND_UNA_ADVANCED) &&
-	    tcp_try_undo_loss(sk, false))
+	    tcp_try_undo_loss(sk, prior_snd_una, false))
 		return;
 
 	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
@@ -2642,7 +2652,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 		 * lost, i.e., never-retransmitted data are (s)acked.
 		 */
 		if ((flag & FLAG_ORIG_SACK_ACKED) &&
-		    tcp_try_undo_loss(sk, true))
+		    tcp_try_undo_loss(sk, prior_snd_una, true))
 			return;
 
 		if (after(tp->snd_nxt, tp->high_seq)) {
@@ -2665,7 +2675,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 
 	if (recovered) {
 		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
-		tcp_try_undo_recovery(sk);
+		tcp_try_undo_recovery(sk, prior_snd_una);
 		return;
 	}
 	if (tcp_is_reno(tp)) {
@@ -2685,7 +2695,7 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->undo_marker && tcp_packet_delayed(tp)) {
+	if (tp->undo_marker && tcp_packet_delayed(tp, prior_snd_una)) {
 		/* Plain luck! Hole if filled with delayed
 		 * packet, rather than with a retransmit. Check reordering.
 		 */
@@ -2788,7 +2798,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		case TCP_CA_Recovery:
 			if (tcp_is_reno(tp))
 				tcp_reset_reno_sack(tp);
-			if (tcp_try_undo_recovery(sk))
+			if (tcp_try_undo_recovery(sk, prior_snd_una))
 				return;
 			tcp_end_cwnd_reduction(sk);
 			break;
@@ -2815,7 +2825,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		tcp_rack_identify_loss(sk, ack_flag);
 		break;
 	case TCP_CA_Loss:
-		tcp_process_loss(sk, flag, is_dupack, rexmit);
+		tcp_process_loss(sk, flag, is_dupack, rexmit, prior_snd_una);
 		tcp_rack_identify_loss(sk, ack_flag);
 		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
 		      (*ack_flag & FLAG_LOST_RETRANS)))

From patchwork Fri Mar  9 14:10:32 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?=
 <ilpo.jarvinen@helsinki.fi>
X-Patchwork-Id: 883655
X-Patchwork-Delegate: davem@davemloft.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming@ozlabs.org
Delivered-To: patchwork-incoming@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=helsinki.fi
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 3zyTnR304hz9sbb
	for <patchwork-incoming@ozlabs.org>;
	Sat, 10 Mar 2018 01:11:07 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S932084AbeCIOLF (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);
	Fri, 9 Mar 2018 09:11:05 -0500
Received: from smtp-rs1-vallila2.fe.helsinki.fi ([128.214.173.75]:38244 "EHLO
	smtp-rs1-vallila2.fe.helsinki.fi" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751145AbeCIOKp (ORCPT
	<rfc822;netdev@vger.kernel.org>); Fri, 9 Mar 2018 09:10:45 -0500
Received: from whs-18.cs.helsinki.fi (whs-18.cs.helsinki.fi [128.214.166.46])
	by smtp-rs1.it.helsinki.fi (8.14.4/8.14.4) with ESMTP id
	w29EAdSL026185; Fri, 9 Mar 2018 16:10:39 +0200
Received: by whs-18.cs.helsinki.fi (Postfix, from userid 1070048)
	id AC54A36038C; Fri,  9 Mar 2018 16:10:38 +0200 (EET)
From: =?utf-8?q?Ilpo_J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
To: netdev@vger.kernel.org
Cc: Yuchung Cheng <ycheng@google.com>, Neal Cardwell <ncardwell@google.com>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net 5/5] tcp: send real dupACKs by locking advertized
	window for non-SACK flows
Date: Fri,  9 Mar 2018 16:10:32 +0200
Message-Id: <1520604632-21185-6-git-send-email-ilpo.jarvinen@helsinki.fi>
X-Mailer: git-send-email 2.7.4
In-Reply-To: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
References: <1520604632-21185-1-git-send-email-ilpo.jarvinen@helsinki.fi>
MIME-Version: 1.0
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

Currently, the TCP code is overly eager to increase window on
almost every ACK. It makes those ACKs that the receiver should
sent as dupACKs look like they update window that is not
considered a real dupACK by the non-SACK sender-side code.
Therefore the sender needs to resort to RTO to recover
losses as fast retransmit/fast recovery cannot be triggered
by such masked duplicate ACKs.

This change makes sure that when an ofo segment is received,
no change to window is applied if we are going to send a dupACK.
Even with this change, the window updates keep being maintained
but that occurs "behind the scenes". That is, this change does
not interfere with memory management of the flow which could
have long-term impact for the progress of the flow but only
prevents those updates being seen on the wire on short-term.
It's ok to change the window for non-dupACKs such as the first
ACK after ofo arrivals start if that ACK was using delayed ACKs
and also whenever the ack field advances. As ack field typically
advances once per RTT as the first hole is retransmitted, the
window updates are not delayed entirely during long recoveries.

Even before this fix, tcp_select_window did not allow ACK
shrinking the window for duplicate ACKs (that was previously
even called "treason" but the old charmy message is gone now).
The advertized window can only shrink when also ack field
changes which will not be blocked by this change as it is not
duplicate ACK.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 include/linux/tcp.h   |  3 ++-
 net/ipv4/tcp_input.c  |  5 ++++-
 net/ipv4/tcp_output.c | 43 +++++++++++++++++++++++++------------------
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c549..e239662 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -225,7 +225,8 @@ struct tcp_sock {
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
-		unused:2;
+		dupack_wnd_lock :1, /* Non-SACK constant rwnd dupacks needed? */
+		unused:1;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		unused1	    : 1,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e67551a..ca49b62 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4627,6 +4627,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	bool fragstolen;
 	int eaten;
 
@@ -4670,7 +4671,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			 * gap in queue is filled.
 			 */
 			if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
-				inet_csk(sk)->icsk_ack.pingpong = 0;
+				icsk->icsk_ack.pingpong = 0;
 		}
 
 		if (tp->rx_opt.num_sacks)
@@ -4720,6 +4721,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		goto queue_and_out;
 	}
 
+	if (tcp_is_reno(tp) && !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+		tp->dupack_wnd_lock = 1;
 	tcp_data_queue_ofo(sk, skb);
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6818042..45fbe92 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -249,25 +249,32 @@ static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 old_win = tp->rcv_wnd;
-	u32 cur_win = tcp_receive_window(tp);
-	u32 new_win = __tcp_select_window(sk);
-
-	/* Never shrink the offered window */
-	if (new_win < cur_win) {
-		/* Danger Will Robinson!
-		 * Don't update rcv_wup/rcv_wnd here or else
-		 * we will not be able to advertise a zero
-		 * window in time.  --DaveM
-		 *
-		 * Relax Will Robinson.
-		 */
-		if (new_win == 0)
-			NET_INC_STATS(sock_net(sk),
-				      LINUX_MIB_TCPWANTZEROWINDOWADV);
-		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
+	u32 cur_win;
+	u32 new_win;
+
+	if (tp->dupack_wnd_lock) {
+		new_win = old_win;
+		tp->dupack_wnd_lock = 0;
+	} else {
+		cur_win = tcp_receive_window(tp);
+		new_win = __tcp_select_window(sk);
+		/* Never shrink the offered window */
+		if (new_win < cur_win) {
+			/* Danger Will Robinson!
+			 * Don't update rcv_wup/rcv_wnd here or else
+			 * we will not be able to advertise a zero
+			 * window in time.  --DaveM
+			 *
+			 * Relax Will Robinson.
+			 */
+			if (new_win == 0)
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPWANTZEROWINDOWADV);
+			new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
+		}
+		tp->rcv_wnd = new_win;
+		tp->rcv_wup = tp->rcv_nxt;
 	}
-	tp->rcv_wnd = new_win;
-	tp->rcv_wup = tp->rcv_nxt;
 
 	/* Make sure we do not exceed the maximum possible
 	 * scaled window.