diff mbox

[PATCHv4,net-next,3/3] ipv4: Create probe timer for tcp PMTU as per RFC4821

Message ID 1425374361-21047-4-git-send-email-fan.du@intel.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Fan Du March 3, 2015, 9:19 a.m. UTC
As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
be armed once probing has converged. Once this timer expired,
probing again to take advantage of any path PMTU change. The
recommended probing interval is 10 minutes per RFC1981. Probing
interval could be sysctled by sysctl_tcp_probe_interval.

Eric Dumazet suggested to implement pseudo timer based on 32bits
jiffies tcp_time_stamp instead of using classic timer for such
rare event.

Signed-off-by: Fan Du <fan.du@intel.com>
---
v3:
  - Fix pseudo timer delta caculation.
v2:
  - Create a pseudo timer based on 32bits jiffies tcp_time_stamp
    to control when to reprobing as suggested by Eric.
---
 include/net/inet_connection_sock.h |    2 ++
 include/net/netns/ipv4.h           |    1 +
 include/net/tcp.h                  |    3 +++
 net/ipv4/sysctl_net_ipv4.c         |    7 +++++++
 net/ipv4/tcp_ipv4.c                |    1 +
 net/ipv4/tcp_output.c              |   34 ++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_timer.c               |    1 +
 7 files changed, 47 insertions(+), 2 deletions(-)

Comments

John Heffner March 3, 2015, 4:39 p.m. UTC | #1
On Tue, Mar 3, 2015 at 4:19 AM, Fan Du <fan.du@intel.com> wrote:
> As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
> be armed once probing has converged. Once this timer expired,
> probing again to take advantage of any path PMTU change. The
> recommended probing interval is 10 minutes per RFC1981. Probing
> interval could be sysctled by sysctl_tcp_probe_interval.
>
> Eric Dumazet suggested to implement pseudo timer based on 32bits
> jiffies tcp_time_stamp instead of using classic timer for such
> rare event.
>
> Signed-off-by: Fan Du <fan.du@intel.com>

> @@ -1823,6 +1825,31 @@ send_now:
>         return false;
>  }
>
> +static inline void tcp_mtu_check_reprobe(struct sock *sk)
> +{
> +       struct inet_connection_sock *icsk = inet_csk(sk);
> +       struct tcp_sock *tp = tcp_sk(sk);
> +       struct net *net = sock_net(sk);
> +       u32 interval;
> +       s32 delta;
> +
> +       interval = net->ipv4.sysctl_tcp_probe_interval;
> +       delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
> +       if (unlikely(delta >= interval * HZ)) {
> +               int mss = tcp_current_mss(sk);
> +
> +               /* Update current search range */
> +               icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
> +                       sizeof(struct tcphdr) +
> +                       icsk->icsk_af_ops->net_header_len;
> +               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
> +               icsk->icsk_mtup.probe_size = 0;
> +
> +               /* Update probe time stamp */
> +               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
> +       }
> +}
> +
>  /* Create a new MTU probe if we are ready.
>   * MTU probe is regularly attempting to increase the path MTU by
>   * deliberately sending larger packets.  This discovers routing

I think the only update to the search range required here is
potentially moving search_high upward.  Touching search_low seems
unnecessary, and probe_size better be zero when executing this anyway.
(We don't want to be changing the state while a probe is in flight.)



> @@ -1866,8 +1893,11 @@ static int tcp_mtu_probe(struct sock *sk)
>         size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
>         interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
>         if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
> -           interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
> -               /* TODO: set timer for probe_converge_event */
> +               interval < net->ipv4.sysctl_tcp_probe_threshold) {
> +               /* Check whether enough time has elaplased for
> +                * another round of probing.
> +                */
> +               tcp_mtu_check_reprobe(sk);
>                 return -1;
>         }

The way this check works, I think putting it here may not be exactly
what we want.  The comment was to set a timer here, but that's not
exactly what tcp_mtu_check_reprobe does.  Since it may update the
search range, I think it would be better to call prior to comparing
the candidate probe_size to search_high.

Thanks,
  -John
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
FengYu LeiDian March 4, 2015, 2:11 a.m. UTC | #2
于 2015年03月04日 00:39, John Heffner 写道:
> On Tue, Mar 3, 2015 at 4:19 AM, Fan Du <fan.du@intel.com> wrote:
>> As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
>> be armed once probing has converged. Once this timer expired,
>> probing again to take advantage of any path PMTU change. The
>> recommended probing interval is 10 minutes per RFC1981. Probing
>> interval could be sysctled by sysctl_tcp_probe_interval.
>>
>> Eric Dumazet suggested to implement pseudo timer based on 32bits
>> jiffies tcp_time_stamp instead of using classic timer for such
>> rare event.
>>
>> Signed-off-by: Fan Du <fan.du@intel.com>
>
>> @@ -1823,6 +1825,31 @@ send_now:
>>          return false;
>>   }
>>
>> +static inline void tcp_mtu_check_reprobe(struct sock *sk)
>> +{
>> +       struct inet_connection_sock *icsk = inet_csk(sk);
>> +       struct tcp_sock *tp = tcp_sk(sk);
>> +       struct net *net = sock_net(sk);
>> +       u32 interval;
>> +       s32 delta;
>> +
>> +       interval = net->ipv4.sysctl_tcp_probe_interval;
>> +       delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
>> +       if (unlikely(delta >= interval * HZ)) {
>> +               int mss = tcp_current_mss(sk);
>> +
>> +               /* Update current search range */
>> +               icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
>> +                       sizeof(struct tcphdr) +
>> +                       icsk->icsk_af_ops->net_header_len;
>> +               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
>> +               icsk->icsk_mtup.probe_size = 0;
>> +
>> +               /* Update probe time stamp */
>> +               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
>> +       }
>> +}
>> +
>>   /* Create a new MTU probe if we are ready.
>>    * MTU probe is regularly attempting to increase the path MTU by
>>    * deliberately sending larger packets.  This discovers routing
>
> I think the only update to the search range required here is
> potentially moving search_high upward.  Touching search_low seems
> unnecessary,
No.

If you restoring search_low to its original, the first probe will probably
be no better than using current mss, because current mss is working good,
there may be better mss between mtu(current mss) and search_high. Reprobing
from search_low to search_high is definitely not worth the effort.

> and probe_size better be zero when executing this anyway.
> (We don't want to be changing the state while a probe is in flight.)
Good point.
probe size should be set to zero frist, then update search range to keep
state consistent.
>
>
>> @@ -1866,8 +1893,11 @@ static int tcp_mtu_probe(struct sock *sk)
>>          size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
>>          interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
>>          if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
>> -           interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
>> -               /* TODO: set timer for probe_converge_event */
>> +               interval < net->ipv4.sysctl_tcp_probe_threshold) {
>> +               /* Check whether enough time has elaplased for
>> +                * another round of probing.
>> +                */
>> +               tcp_mtu_check_reprobe(sk);
>>                  return -1;
>>          }
>
> The way this check works, I think putting it here may not be exactly
> what we want.  The comment was to set a timer here, but that's not
> exactly what tcp_mtu_check_reprobe does.  Since it may update the
> search range, I think it would be better to call prior to comparing
> the candidate probe_size to search_high.
Semantically speaking, reprobe checking should be placed inside where
a decision to not probe has been made, reprobing from a stable state ;)
If we moving the reporobe checking outside, current probing may be legal,
but the reprobe check will rule it out.

So I don't see any compelling reason to move the reprobe checking outside.

> Thanks,
>    -John
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
John Heffner March 4, 2015, 1:26 p.m. UTC | #3
On Tue, Mar 3, 2015 at 9:11 PM, Fan Du <fengyuleidian0615@gmail.com> wrote:
> 于 2015年03月04日 00:39, John Heffner 写道:
>
>> On Tue, Mar 3, 2015 at 4:19 AM, Fan Du <fan.du@intel.com> wrote:
>>>
>>> As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
>>> be armed once probing has converged. Once this timer expired,
>>> probing again to take advantage of any path PMTU change. The
>>> recommended probing interval is 10 minutes per RFC1981. Probing
>>> interval could be sysctled by sysctl_tcp_probe_interval.
>>>
>>> Eric Dumazet suggested to implement pseudo timer based on 32bits
>>> jiffies tcp_time_stamp instead of using classic timer for such
>>> rare event.
>>>
>>> Signed-off-by: Fan Du <fan.du@intel.com>
>>
>>
>>> @@ -1823,6 +1825,31 @@ send_now:
>>>          return false;
>>>   }
>>>
>>> +static inline void tcp_mtu_check_reprobe(struct sock *sk)
>>> +{
>>> +       struct inet_connection_sock *icsk = inet_csk(sk);
>>> +       struct tcp_sock *tp = tcp_sk(sk);
>>> +       struct net *net = sock_net(sk);
>>> +       u32 interval;
>>> +       s32 delta;
>>> +
>>> +       interval = net->ipv4.sysctl_tcp_probe_interval;
>>> +       delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
>>> +       if (unlikely(delta >= interval * HZ)) {
>>> +               int mss = tcp_current_mss(sk);
>>> +
>>> +               /* Update current search range */
>>> +               icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
>>> +                       sizeof(struct tcphdr) +
>>> +                       icsk->icsk_af_ops->net_header_len;
>>> +               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
>>> +               icsk->icsk_mtup.probe_size = 0;
>>> +
>>> +               /* Update probe time stamp */
>>> +               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
>>> +       }
>>> +}
>>> +
>>>   /* Create a new MTU probe if we are ready.
>>>    * MTU probe is regularly attempting to increase the path MTU by
>>>    * deliberately sending larger packets.  This discovers routing
>>
>>
>> I think the only update to the search range required here is
>> potentially moving search_high upward.  Touching search_low seems
>> unnecessary,
>
> No.
>
> If you restoring search_low to its original, the first probe will probably
> be no better than using current mss, because current mss is working good,
> there may be better mss between mtu(current mss) and search_high. Reprobing
> from search_low to search_high is definitely not worth the effort.

Set search_high to the maximal MSS.  The next probe will be halfway
between search_low and MSS.  There's no reason to reduce search_low
when the current MSS == search_low.


>>> @@ -1866,8 +1893,11 @@ static int tcp_mtu_probe(struct sock *sk)
>>>          size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
>>>          interval = icsk->icsk_mtup.search_high -
>>> icsk->icsk_mtup.search_low;
>>>          if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)
>>> ||
>>> -           interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
>>> -               /* TODO: set timer for probe_converge_event */
>>> +               interval < net->ipv4.sysctl_tcp_probe_threshold) {
>>> +               /* Check whether enough time has elaplased for
>>> +                * another round of probing.
>>> +                */
>>> +               tcp_mtu_check_reprobe(sk);
>>>                  return -1;
>>>          }
>>
>>
>> The way this check works, I think putting it here may not be exactly
>> what we want.  The comment was to set a timer here, but that's not
>> exactly what tcp_mtu_check_reprobe does.  Since it may update the
>> search range, I think it would be better to call prior to comparing
>> the candidate probe_size to search_high.
>
> Semantically speaking, reprobe checking should be placed inside where
> a decision to not probe has been made, reprobing from a stable state ;)
> If we moving the reporobe checking outside, current probing may be legal,
> but the reprobe check will rule it out.
>
> So I don't see any compelling reason to move the reprobe checking outside.

The reprobe check should happen when you know you don't already have a
pending probe, not when a decision has already been made not to probe.
The impact is relatively minor because practically you're just
extending the duration of a long timer a bit more, but there's not a
good reason to do so.

Thanks,
  -John
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
FengYu LeiDian March 5, 2015, 2:36 a.m. UTC | #4
于 2015年03月04日 21:26, John Heffner 写道:
> On Tue, Mar 3, 2015 at 9:11 PM, Fan Du <fengyuleidian0615@gmail.com> wrote:
>> 于 2015年03月04日 00:39, John Heffner 写道:
>>
>>> On Tue, Mar 3, 2015 at 4:19 AM, Fan Du <fan.du@intel.com> wrote:
>>>>
>>>> As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
>>>> be armed once probing has converged. Once this timer expired,
>>>> probing again to take advantage of any path PMTU change. The
>>>> recommended probing interval is 10 minutes per RFC1981. Probing
>>>> interval could be sysctled by sysctl_tcp_probe_interval.
>>>>
>>>> Eric Dumazet suggested to implement pseudo timer based on 32bits
>>>> jiffies tcp_time_stamp instead of using classic timer for such
>>>> rare event.
>>>>
>>>> Signed-off-by: Fan Du <fan.du@intel.com>
>>>
>>>
>>>> @@ -1823,6 +1825,31 @@ send_now:
>>>>           return false;
>>>>    }
>>>>
>>>> +static inline void tcp_mtu_check_reprobe(struct sock *sk)
>>>> +{
>>>> +       struct inet_connection_sock *icsk = inet_csk(sk);
>>>> +       struct tcp_sock *tp = tcp_sk(sk);
>>>> +       struct net *net = sock_net(sk);
>>>> +       u32 interval;
>>>> +       s32 delta;
>>>> +
>>>> +       interval = net->ipv4.sysctl_tcp_probe_interval;
>>>> +       delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
>>>> +       if (unlikely(delta >= interval * HZ)) {
>>>> +               int mss = tcp_current_mss(sk);
>>>> +
>>>> +               /* Update current search range */
>>>> +               icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
>>>> +                       sizeof(struct tcphdr) +
>>>> +                       icsk->icsk_af_ops->net_header_len;
>>>> +               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
>>>> +               icsk->icsk_mtup.probe_size = 0;
>>>> +
>>>> +               /* Update probe time stamp */
>>>> +               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
>>>> +       }
>>>> +}
>>>> +
>>>>    /* Create a new MTU probe if we are ready.
>>>>     * MTU probe is regularly attempting to increase the path MTU by
>>>>     * deliberately sending larger packets.  This discovers routing
>>>
>>>
>>> I think the only update to the search range required here is
>>> potentially moving search_high upward.  Touching search_low seems
>>> unnecessary,
>>
>> No.
>>
>> If you restoring search_low to its original, the first probe will probably
>> be no better than using current mss, because current mss is working good,
>> there may be better mss between mtu(current mss) and search_high. Reprobing
>> from search_low to search_high is definitely not worth the effort.
>
> Set search_high to the maximal MSS.  The next probe will be halfway
> between search_low and MSS.  There's no reason to reduce search_low
> when the current MSS == search_low.

I think you mis-interpret the code and my intention here, put it in a simple way:
Search range is 512 ~ 1500, if current mss is 1024, then for another reprobing,
Should we start from A or B?
A: 1024 ~ 1500
B: 512  ~ 1500

The modification choose searching from A, because more optimal mss lies between
current mss and search_high.

In case of current mss == search_low, the modification also cover this scenario.
Again reprobing from current mss to search high.

>
>>>> @@ -1866,8 +1893,11 @@ static int tcp_mtu_probe(struct sock *sk)
>>>>           size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
>>>>           interval = icsk->icsk_mtup.search_high -
>>>> icsk->icsk_mtup.search_low;
>>>>           if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)
>>>> ||
>>>> -           interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
>>>> -               /* TODO: set timer for probe_converge_event */
>>>> +               interval < net->ipv4.sysctl_tcp_probe_threshold) {
>>>> +               /* Check whether enough time has elaplased for
>>>> +                * another round of probing.
>>>> +                */
>>>> +               tcp_mtu_check_reprobe(sk);
>>>>                   return -1;
>>>>           }
>>>
>>>
>>> The way this check works, I think putting it here may not be exactly
>>> what we want.  The comment was to set a timer here, but that's not
>>> exactly what tcp_mtu_check_reprobe does.  Since it may update the
>>> search range, I think it would be better to call prior to comparing
>>> the candidate probe_size to search_high.
>>
>> Semantically speaking, reprobe checking should be placed inside where
>> a decision to not probe has been made, reprobing from a stable state ;)
>> If we moving the reporobe checking outside, current probing may be legal,
>> but the reprobe check will rule it out.
>>
>> So I don't see any compelling reason to move the reprobe checking outside.
>
> The reprobe check should happen when you know you don't already have a
> pending probe, not when a decision has already been made not to probe.
> The impact is relatively minor because practically you're just
> extending the duration of a long timer a bit more, but there's not a
> good reason to do so.
When a reprobe check tell us enough time has elapsed, it's time for another probe
attempt, but *IF *we have already been probing at the time being due to misfortunes,
and we are approaching to the optimal mss no longer in distance than reprobing
from the very beginning. Why bother ignoring current probe effort?

Restarting the reprobe in exactly time interval sounds logical, but not so in
scenario where a *active* probing is going on.

> Thanks,
>    -John
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 5976bde..b9a6b0a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -126,6 +126,8 @@  struct inet_connection_sock {
 
 		/* Information on the current probe. */
 		int		  probe_size;
+
+		u32		  probe_timestamp;
 	} icsk_mtup;
 	u32			  icsk_ca_priv[16];
 	u32			  icsk_user_timeout;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 374bf3f..6323a22 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -86,6 +86,7 @@  struct netns_ipv4 {
 	int sysctl_tcp_mtu_probing;
 	int sysctl_tcp_base_mss;
 	int sysctl_tcp_probe_threshold;
+	u32 sysctl_tcp_probe_interval;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d269c91..dd6adbc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -67,6 +67,9 @@  void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		1024
 
+/* probing interval, default to 10 minutes as per RFC4821 */
+#define TCP_PROBE_INTERVAL	600
+
 /* Specify interval when tcp mtu probing will stop */
 #define TCP_PROBE_THRESHOLD	8
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d3c09c1..fdf8991 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -890,6 +890,13 @@  static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_probe_interval",
+		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 35790d9..f0c6fc3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2461,6 +2461,7 @@  static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_ecn = 2;
 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
+	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 46acddc..e99edf2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1354,6 +1354,8 @@  void tcp_mtup_init(struct sock *sk)
 			       icsk->icsk_af_ops->net_header_len;
 	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
+	if (icsk->icsk_mtup.enabled)
+		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
 
@@ -1823,6 +1825,31 @@  send_now:
 	return false;
 }
 
+static inline void tcp_mtu_check_reprobe(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct net *net = sock_net(sk);
+	u32 interval;
+	s32 delta;
+
+	interval = net->ipv4.sysctl_tcp_probe_interval;
+	delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+	if (unlikely(delta >= interval * HZ)) {
+		int mss = tcp_current_mss(sk);
+
+		/* Update current search range */
+		icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+			sizeof(struct tcphdr) +
+			icsk->icsk_af_ops->net_header_len;
+		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+		icsk->icsk_mtup.probe_size = 0;
+
+		/* Update probe time stamp */
+		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+	}
+}
+
 /* Create a new MTU probe if we are ready.
  * MTU probe is regularly attempting to increase the path MTU by
  * deliberately sending larger packets.  This discovers routing
@@ -1866,8 +1893,11 @@  static int tcp_mtu_probe(struct sock *sk)
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
-	    interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
-		/* TODO: set timer for probe_converge_event */
+		interval < net->ipv4.sysctl_tcp_probe_threshold) {
+		/* Check whether enough time has elaplased for
+		 * another round of probing.
+		 */
+		tcp_mtu_check_reprobe(sk);
 		return -1;
 	}
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0732b78..1550593 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -107,6 +107,7 @@  static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 	if (net->ipv4.sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
 			icsk->icsk_mtup.enabled = 1;
+			icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
 			struct net *net = sock_net(sk);