diff mbox series

[net-next,v5,1/6] net: tcp: Add trace events for TCP congestion window tracing

Message ID 151390833345.13277.9194686808843116185.stgit@devbox
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series net: tcp: sctp: dccp: Replace jprobe usage with trace events | expand

Commit Message

Masami Hiramatsu (Google) Dec. 22, 2017, 2:05 a.m. UTC
This adds an event to trace TCP stat variables with
slightly intrusive trace-event. This uses ftrace/perf
event log buffer to trace those state, no needs to
prepare own ring-buffer, nor custom user apps.

User can use ftrace to trace this event as below;

  # cd /sys/kernel/debug/tracing
  # echo 1 > events/tcp/tcp_probe/enable
  (run workloads)
  # cat trace

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 Changes in v3:
  - Fix build errors caused by including events/tcp.h twice.
  - Sort out the including headers.
---
 include/trace/events/tcp.h |   80 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c       |    3 ++
 2 files changed, 83 insertions(+)

Comments

David Miller Dec. 26, 2017, 11:51 p.m. UTC | #1
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 22 Dec 2017 11:05:33 +0900

> This adds an event to trace TCP stat variables with
> slightly intrusive trace-event. This uses ftrace/perf
> event log buffer to trace those state, no needs to
> prepare own ring-buffer, nor custom user apps.
> 
> User can use ftrace to trace this event as below;
> 
>   # cd /sys/kernel/debug/tracing
>   # echo 1 > events/tcp/tcp_probe/enable
>   (run workloads)
>   # cat trace
> 
> Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
 ...
> +	TP_fast_assign(
> +		const struct tcp_sock *tp = tcp_sk(sk);
> +		const struct inet_sock *inet = inet_sk(sk);
> +
> +		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> +		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> +
> +		if (sk->sk_family == AF_INET) {
> +			struct sockaddr_in *v4 = (void *)__entry->saddr;
> +
> +			v4->sin_family = AF_INET;
> +			v4->sin_port = inet->inet_sport;
> +			v4->sin_addr.s_addr = inet->inet_saddr;
> +			v4 = (void *)__entry->daddr;
> +			v4->sin_family = AF_INET;
> +			v4->sin_port = inet->inet_dport;
> +			v4->sin_addr.s_addr = inet->inet_daddr;
> +#if IS_ENABLED(CONFIG_IPV6)
> +		} else if (sk->sk_family == AF_INET6) {

It looks like doing this ifdef test inside of a trace macro is very
undesirable because it upsets sparse.

Please see the following commit which just went into 'net'.

====================
commit 6a6b0b9914e73a8a54253dd5f6f5e5dd5e4a756c
Author: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date:   Thu Dec 21 10:29:09 2017 -0800

    tcp: Avoid preprocessor directives in tracepoint macro args
    
    Using a preprocessor directive to check for CONFIG_IPV6 in the middle of
    a DECLARE_EVENT_CLASS macro's arg list causes sparse to report a series
    of errors:
    
    ./include/trace/events/tcp.h:68:1: error: directive in argument list
    ./include/trace/events/tcp.h:75:1: error: directive in argument list
    ./include/trace/events/tcp.h:144:1: error: directive in argument list
    ./include/trace/events/tcp.h:151:1: error: directive in argument list
    ./include/trace/events/tcp.h:216:1: error: directive in argument list
    ./include/trace/events/tcp.h:223:1: error: directive in argument list
    ./include/trace/events/tcp.h:274:1: error: directive in argument list
    ./include/trace/events/tcp.h:281:1: error: directive in argument list
    
    Once sparse finds an error, it stops printing warnings for the file it
    is checking. This masks any sparse warnings that would normally be
    reported for the core TCP code.
    
    Instead, handle the preprocessor conditionals in a couple of auxiliary
    macros. This also has the benefit of reducing duplicate code.
    
    Cc: David Ahern <dsahern@gmail.com>
    Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca..ab34c56 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -25,6 +25,35 @@
 		tcp_state_name(TCP_CLOSING),		\
 		tcp_state_name(TCP_NEW_SYN_RECV))
 
+#define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
+	do {							\
+		struct in6_addr *pin6;				\
+								\
+		pin6 = (struct in6_addr *)__entry->saddr_v6;	\
+		ipv6_addr_set_v4mapped(saddr, pin6);		\
+		pin6 = (struct in6_addr *)__entry->daddr_v6;	\
+		ipv6_addr_set_v4mapped(daddr, pin6);		\
+	} while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)		\
+	do {								\
+		if (sk->sk_family == AF_INET6) {			\
+			struct in6_addr *pin6;				\
+									\
+			pin6 = (struct in6_addr *)__entry->saddr_v6;	\
+			*pin6 = saddr6;					\
+			pin6 = (struct in6_addr *)__entry->daddr_v6;	\
+			*pin6 = daddr6;					\
+		} else {						\
+			TP_STORE_V4MAPPED(__entry, saddr, daddr);	\
+		}							\
+	} while (0)
+#else
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)	\
+	TP_STORE_V4MAPPED(__entry, saddr, daddr)
+#endif
+
 /*
  * tcp event with arguments sk and skb
  *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			      sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
 
 	TP_fast_assign(
 		struct inet_request_sock *ireq = inet_rsk(req);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 = ireq->ir_rmt_addr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = ireq->ir_v6_loc_addr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = ireq->ir_v6_rmt_addr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
+			      ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
Masami Hiramatsu (Google) Dec. 27, 2017, 5:43 a.m. UTC | #2
On Tue, 26 Dec 2017 18:51:55 -0500 (EST)
David Miller <davem@davemloft.net> wrote:

> From: Masami Hiramatsu <mhiramat@kernel.org>
> Date: Fri, 22 Dec 2017 11:05:33 +0900
> 
> > This adds an event to trace TCP stat variables with
> > slightly intrusive trace-event. This uses ftrace/perf
> > event log buffer to trace those state, no needs to
> > prepare own ring-buffer, nor custom user apps.
> > 
> > User can use ftrace to trace this event as below;
> > 
> >   # cd /sys/kernel/debug/tracing
> >   # echo 1 > events/tcp/tcp_probe/enable
> >   (run workloads)
> >   # cat trace
> > 
> > Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
>  ...
> > +	TP_fast_assign(
> > +		const struct tcp_sock *tp = tcp_sk(sk);
> > +		const struct inet_sock *inet = inet_sk(sk);
> > +
> > +		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> > +		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> > +
> > +		if (sk->sk_family == AF_INET) {
> > +			struct sockaddr_in *v4 = (void *)__entry->saddr;
> > +
> > +			v4->sin_family = AF_INET;
> > +			v4->sin_port = inet->inet_sport;
> > +			v4->sin_addr.s_addr = inet->inet_saddr;
> > +			v4 = (void *)__entry->daddr;
> > +			v4->sin_family = AF_INET;
> > +			v4->sin_port = inet->inet_dport;
> > +			v4->sin_addr.s_addr = inet->inet_daddr;
> > +#if IS_ENABLED(CONFIG_IPV6)
> > +		} else if (sk->sk_family == AF_INET6) {
> 
> It looks like doing this ifdef test inside of a trace macro is very
> undesirable because it upsets sparse.
> 
> Please see the following commit which just went into 'net'.

OK, that's helpful for me how to avoid it :)

I'll update the series .

Thank you,

> 
> ====================
> commit 6a6b0b9914e73a8a54253dd5f6f5e5dd5e4a756c
> Author: Mat Martineau <mathew.j.martineau@linux.intel.com>
> Date:   Thu Dec 21 10:29:09 2017 -0800
> 
>     tcp: Avoid preprocessor directives in tracepoint macro args
>     
>     Using a preprocessor directive to check for CONFIG_IPV6 in the middle of
>     a DECLARE_EVENT_CLASS macro's arg list causes sparse to report a series
>     of errors:
>     
>     ./include/trace/events/tcp.h:68:1: error: directive in argument list
>     ./include/trace/events/tcp.h:75:1: error: directive in argument list
>     ./include/trace/events/tcp.h:144:1: error: directive in argument list
>     ./include/trace/events/tcp.h:151:1: error: directive in argument list
>     ./include/trace/events/tcp.h:216:1: error: directive in argument list
>     ./include/trace/events/tcp.h:223:1: error: directive in argument list
>     ./include/trace/events/tcp.h:274:1: error: directive in argument list
>     ./include/trace/events/tcp.h:281:1: error: directive in argument list
>     
>     Once sparse finds an error, it stops printing warnings for the file it
>     is checking. This masks any sparse warnings that would normally be
>     reported for the core TCP code.
>     
>     Instead, handle the preprocessor conditionals in a couple of auxiliary
>     macros. This also has the benefit of reducing duplicate code.
>     
>     Cc: David Ahern <dsahern@gmail.com>
>     Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
>     Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 07cccca..ab34c56 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -25,6 +25,35 @@
>  		tcp_state_name(TCP_CLOSING),		\
>  		tcp_state_name(TCP_NEW_SYN_RECV))
>  
> +#define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
> +	do {							\
> +		struct in6_addr *pin6;				\
> +								\
> +		pin6 = (struct in6_addr *)__entry->saddr_v6;	\
> +		ipv6_addr_set_v4mapped(saddr, pin6);		\
> +		pin6 = (struct in6_addr *)__entry->daddr_v6;	\
> +		ipv6_addr_set_v4mapped(daddr, pin6);		\
> +	} while (0)
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)		\
> +	do {								\
> +		if (sk->sk_family == AF_INET6) {			\
> +			struct in6_addr *pin6;				\
> +									\
> +			pin6 = (struct in6_addr *)__entry->saddr_v6;	\
> +			*pin6 = saddr6;					\
> +			pin6 = (struct in6_addr *)__entry->daddr_v6;	\
> +			*pin6 = daddr6;					\
> +		} else {						\
> +			TP_STORE_V4MAPPED(__entry, saddr, daddr);	\
> +		}							\
> +	} while (0)
> +#else
> +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)	\
> +	TP_STORE_V4MAPPED(__entry, saddr, daddr)
> +#endif
> +
>  /*
>   * tcp event with arguments sk and skb
>   *
> @@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
>  
>  	TP_fast_assign(
>  		struct inet_sock *inet = inet_sk(sk);
> -		struct in6_addr *pin6;
>  		__be32 *p32;
>  
>  		__entry->skbaddr = skb;
> @@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
>  		p32 = (__be32 *) __entry->daddr;
>  		*p32 =  inet->inet_daddr;
>  
> -#if IS_ENABLED(CONFIG_IPV6)
> -		if (sk->sk_family == AF_INET6) {
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			*pin6 = sk->sk_v6_rcv_saddr;
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			*pin6 = sk->sk_v6_daddr;
> -		} else
> -#endif
> -		{
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
> -		}
> +		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
> +			      sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
>  	),
>  
>  	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
> @@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
>  
>  	TP_fast_assign(
>  		struct inet_sock *inet = inet_sk(sk);
> -		struct in6_addr *pin6;
>  		__be32 *p32;
>  
>  		__entry->skaddr = sk;
> @@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
>  		p32 = (__be32 *) __entry->daddr;
>  		*p32 =  inet->inet_daddr;
>  
> -#if IS_ENABLED(CONFIG_IPV6)
> -		if (sk->sk_family == AF_INET6) {
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			*pin6 = sk->sk_v6_rcv_saddr;
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			*pin6 = sk->sk_v6_daddr;
> -		} else
> -#endif
> -		{
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
> -		}
> +		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
> +			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
>  	),
>  
>  	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
> @@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
>  
>  	TP_fast_assign(
>  		struct inet_sock *inet = inet_sk(sk);
> -		struct in6_addr *pin6;
>  		__be32 *p32;
>  
>  		__entry->skaddr = sk;
> @@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
>  		p32 = (__be32 *) __entry->daddr;
>  		*p32 =  inet->inet_daddr;
>  
> -#if IS_ENABLED(CONFIG_IPV6)
> -		if (sk->sk_family == AF_INET6) {
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			*pin6 = sk->sk_v6_rcv_saddr;
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			*pin6 = sk->sk_v6_daddr;
> -		} else
> -#endif
> -		{
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
> -		}
> +		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
> +			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
>  	),
>  
>  	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
> @@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
>  
>  	TP_fast_assign(
>  		struct inet_request_sock *ireq = inet_rsk(req);
> -		struct in6_addr *pin6;
>  		__be32 *p32;
>  
>  		__entry->skaddr = sk;
> @@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
>  		p32 = (__be32 *) __entry->daddr;
>  		*p32 = ireq->ir_rmt_addr;
>  
> -#if IS_ENABLED(CONFIG_IPV6)
> -		if (sk->sk_family == AF_INET6) {
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			*pin6 = ireq->ir_v6_loc_addr;
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			*pin6 = ireq->ir_v6_rmt_addr;
> -		} else
> -#endif
> -		{
> -			pin6 = (struct in6_addr *)__entry->saddr_v6;
> -			ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
> -			pin6 = (struct in6_addr *)__entry->daddr_v6;
> -			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
> -		}
> +		TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
> +			      ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
>  	),
>  
>  	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
diff mbox series

Patch

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1..14ad60b468fb 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -1,3 +1,4 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM tcp
 
@@ -8,6 +9,7 @@ 
 #include <linux/tcp.h>
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
+#include <net/tcp.h>
 
 #define tcp_state_name(state)	{ state, #state }
 #define show_tcp_state_name(val)			\
@@ -293,6 +295,84 @@  TRACE_EVENT(tcp_retransmit_synack,
 		  __entry->saddr_v6, __entry->daddr_v6)
 );
 
+TRACE_EVENT(tcp_probe,
+
+	TP_PROTO(struct sock *sk, struct sk_buff *skb),
+
+	TP_ARGS(sk, skb),
+
+	TP_STRUCT__entry(
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u32, mark)
+		__field(__u16, length)
+		__field(__u32, snd_nxt)
+		__field(__u32, snd_una)
+		__field(__u32, snd_cwnd)
+		__field(__u32, ssthresh)
+		__field(__u32, snd_wnd)
+		__field(__u32, srtt)
+		__field(__u32, rcv_wnd)
+	),
+
+	TP_fast_assign(
+		const struct tcp_sock *tp = tcp_sk(sk);
+		const struct inet_sock *inet = inet_sk(sk);
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+		if (sk->sk_family == AF_INET) {
+			struct sockaddr_in *v4 = (void *)__entry->saddr;
+
+			v4->sin_family = AF_INET;
+			v4->sin_port = inet->inet_sport;
+			v4->sin_addr.s_addr = inet->inet_saddr;
+			v4 = (void *)__entry->daddr;
+			v4->sin_family = AF_INET;
+			v4->sin_port = inet->inet_dport;
+			v4->sin_addr.s_addr = inet->inet_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+		} else if (sk->sk_family == AF_INET6) {
+			struct sockaddr_in6 *v6 = (void *)__entry->saddr;
+
+			v6->sin6_family = AF_INET6;
+			v6->sin6_port = inet->inet_sport;
+			v6->sin6_addr = inet6_sk(sk)->saddr;
+			v6 = (void *)__entry->daddr;
+			v6->sin6_family = AF_INET6;
+			v6->sin6_port = inet->inet_dport;
+			v6->sin6_addr = sk->sk_v6_daddr;
+#endif
+		}
+
+		/* For filtering use */
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+		__entry->mark = skb->mark;
+
+		__entry->length = skb->len;
+		__entry->snd_nxt = tp->snd_nxt;
+		__entry->snd_una = tp->snd_una;
+		__entry->snd_cwnd = tp->snd_cwnd;
+		__entry->snd_wnd = tp->snd_wnd;
+		__entry->rcv_wnd = tp->rcv_wnd;
+		__entry->ssthresh = tcp_current_ssthresh(sk);
+		__entry->srtt = tp->srtt_us >> 3;
+	),
+
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
+		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
+		  "rcv_wnd=%u",
+		  __entry->saddr, __entry->daddr, __entry->mark,
+		  __entry->length, __entry->snd_nxt, __entry->snd_una,
+		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
+		  __entry->srtt, __entry->rcv_wnd)
+);
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4d55c4b338ee..ff71b18d9682 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5299,6 +5299,9 @@  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	/* TCP congestion window tracking */
+	trace_tcp_probe(sk, skb);
+
 	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);