diff mbox series

[v5,net-next] net/tcp: trace all TCP/IP state transition with tcp_set_state tracepoint

Message ID 1512655842-17784-1-git-send-email-laoar.shao@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [v5,net-next] net/tcp: trace all TCP/IP state transition with tcp_set_state tracepoint | expand

Commit Message

Yafang Shao Dec. 7, 2017, 2:10 p.m. UTC
The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
transitions are not traced with tcp_set_state tracepoint.

In order to trace the whole tcp lifespans, two helpers are introduced,
void sk_set_state(struct sock *sk, int state);
void sk_state_store(struct sock *sk, int newstate);

When do TCP/IP state transition, we should use these two helpers or use
tcp_set_state() other than assigning a value to sk_state directly.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
v4->v5: Trace only TCP sockets, whatever it is stream socket or raw socket.
v3->v4: Do not trace DCCP socket
v2->v3: Per suggestion from Marcelo Ricardo Leitner, inverting  __
		 to sk_state_store.
---
 include/net/sock.h              |  8 ++++++--
 net/core/sock.c                 | 15 +++++++++++++++
 net/ipv4/inet_connection_sock.c |  5 +++--
 net/ipv4/inet_hashtables.c      |  2 +-
 net/ipv4/tcp.c                  |  2 +-
 5 files changed, 26 insertions(+), 6 deletions(-)

Comments

Marcelo Ricardo Leitner Dec. 7, 2017, 8:02 p.m. UTC | #1
On Thu, Dec 07, 2017 at 02:10:42PM +0000, Yafang Shao wrote:
> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
> transitions are not traced with tcp_set_state tracepoint.
> 
> In order to trace the whole tcp lifespans, two helpers are introduced,
> void sk_set_state(struct sock *sk, int state);
> void sk_state_store(struct sock *sk, int newstate);
> 
> When do TCP/IP state transition, we should use these two helpers or use
> tcp_set_state() other than assigning a value to sk_state directly.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
> v4->v5: Trace only TCP sockets, whatever it is stream socket or raw socket.
> v3->v4: Do not trace DCCP socket
> v2->v3: Per suggestion from Marcelo Ricardo Leitner, inverting  __
> 		 to sk_state_store.
> ---
>  include/net/sock.h              |  8 ++++++--
>  net/core/sock.c                 | 15 +++++++++++++++
>  net/ipv4/inet_connection_sock.c |  5 +++--
>  net/ipv4/inet_hashtables.c      |  2 +-
>  net/ipv4/tcp.c                  |  2 +-
>  5 files changed, 26 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 79e1a2c..1cf7685 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2349,18 +2349,22 @@ static inline int sk_state_load(const struct sock *sk)
>  }
>  
>  /**
> - * sk_state_store - update sk->sk_state
> + * __sk_state_store - update sk->sk_state
>   * @sk: socket pointer
>   * @newstate: new state
>   *
>   * Paired with sk_state_load(). Should be used in contexts where
>   * state change might impact lockless readers.
>   */
> -static inline void sk_state_store(struct sock *sk, int newstate)
> +static inline void __sk_state_store(struct sock *sk, int newstate)
>  {
>  	smp_store_release(&sk->sk_state, newstate);
>  }
>  
> +/* For tcp_set_state tracepoint */
> +void sk_state_store(struct sock *sk, int newstate);
> +void sk_set_state(struct sock *sk, int state);
> +
>  void sock_enable_timestamp(struct sock *sk, int flag);
>  int sock_get_timestamp(struct sock *, struct timeval __user *);
>  int sock_get_timestampns(struct sock *, struct timespec __user *);
> diff --git a/net/core/sock.c b/net/core/sock.c
> index c0b5b2f..61841a2 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -138,6 +138,7 @@
>  #include <net/sock_reuseport.h>
>  
>  #include <trace/events/sock.h>
> +#include <trace/events/tcp.h>
>  
>  #include <net/tcp.h>
>  #include <net/busy_poll.h>
> @@ -2859,6 +2860,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
>  }
>  EXPORT_SYMBOL(sock_get_timestampns);
>  
> +void sk_state_store(struct sock *sk, int newstate)
> +{
> +	if (sk->sk_protocol == IPPROTO_TCP)
> +		trace_tcp_set_state(sk, sk->sk_state, newstate);

I think this is going in the wrong way. When Dave said to not define a
sock generic function in tcp code on v3, you moved it all from tcp.h
to sock.h. But now sock.h gets to deal with more tcp code, which also
isn't nice.

Instead, if you move it back to tcp.h but rename the function to
tcp_state_store (instead of the original sk_state_store), it won't be
a generic sock code and will fit nicely into tcp.h.

You may then even keep sk_state_store() as it is now, and just define
tcp_state_store():

tcp_state_store()
{
	trace_tcp_...();
	sk_state_store();
}

Making it very clear that this code is only to be used by tcp stack.

> +	__sk_state_store(sk, newstate);
> +}
> +
> +void sk_set_state(struct sock *sk, int state)

Same about this one.

Dave?

> +{
> +	if (sk->sk_protocol == IPPROTO_TCP) 
> +		trace_tcp_set_state(sk, sk->sk_state, state);
> +	sk->sk_state = state;
> +}
> +
>  void sock_enable_timestamp(struct sock *sk, int flag)
>  {
>  	if (!sock_flag(sk, flag)) {
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 4ca46dc..41f9c87 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
>  	if (newsk) {
>  		struct inet_connection_sock *newicsk = inet_csk(newsk);
>  
> -		newsk->sk_state = TCP_SYN_RECV;
> +		sk_set_state(newsk, TCP_SYN_RECV);
>  		newicsk->icsk_bind_hash = NULL;
>  
>  		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
> @@ -888,7 +888,8 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
>  			return 0;
>  	}
>  
> -	sk->sk_state = TCP_CLOSE;
> +	sk_set_state(sk, TCP_CLOSE);
> +
>  	return err;
>  }
>  EXPORT_SYMBOL_GPL(inet_csk_listen_start);
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index f6f5810..5973693 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
>  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
>  	} else {
>  		percpu_counter_inc(sk->sk_prot->orphan_count);
> -		sk->sk_state = TCP_CLOSE;
> +		sk_set_state(sk, TCP_CLOSE);
>  		sock_set_flag(sk, SOCK_DEAD);
>  		inet_csk_destroy_sock(sk);
>  	}
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1803116..ac98dc6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2065,7 +2065,7 @@ void tcp_set_state(struct sock *sk, int state)
>  	/* Change state AFTER socket is unhashed to avoid closed
>  	 * socket sitting in hash tables.
>  	 */
> -	sk_state_store(sk, state);
> +	__sk_state_store(sk, state);
>  
>  #ifdef STATE_TRACE
>  	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
> -- 
> 1.8.3.1
>
David Miller Dec. 7, 2017, 8:04 p.m. UTC | #2
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 7 Dec 2017 18:02:46 -0200

> Dave?

Yeah that will untangle this mess much more nicely.
Yafang Shao Dec. 8, 2017, 1:41 a.m. UTC | #3
2017-12-08 4:02 GMT+08:00 Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>:
> On Thu, Dec 07, 2017 at 02:10:42PM +0000, Yafang Shao wrote:
>> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
>> transitions are not traced with tcp_set_state tracepoint.
>>
>> In order to trace the whole tcp lifespans, two helpers are introduced,
>> void sk_set_state(struct sock *sk, int state);
>> void sk_state_store(struct sock *sk, int newstate);
>>
>> When do TCP/IP state transition, we should use these two helpers or use
>> tcp_set_state() other than assigning a value to sk_state directly.
>>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>> Acked-by: Song Liu <songliubraving@fb.com>
>> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>> ---
>> v4->v5: Trace only TCP sockets, whatever it is stream socket or raw socket.
>> v3->v4: Do not trace DCCP socket
>> v2->v3: Per suggestion from Marcelo Ricardo Leitner, inverting  __
>>                to sk_state_store.
>> ---
>>  include/net/sock.h              |  8 ++++++--
>>  net/core/sock.c                 | 15 +++++++++++++++
>>  net/ipv4/inet_connection_sock.c |  5 +++--
>>  net/ipv4/inet_hashtables.c      |  2 +-
>>  net/ipv4/tcp.c                  |  2 +-
>>  5 files changed, 26 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index 79e1a2c..1cf7685 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -2349,18 +2349,22 @@ static inline int sk_state_load(const struct sock *sk)
>>  }
>>
>>  /**
>> - * sk_state_store - update sk->sk_state
>> + * __sk_state_store - update sk->sk_state
>>   * @sk: socket pointer
>>   * @newstate: new state
>>   *
>>   * Paired with sk_state_load(). Should be used in contexts where
>>   * state change might impact lockless readers.
>>   */
>> -static inline void sk_state_store(struct sock *sk, int newstate)
>> +static inline void __sk_state_store(struct sock *sk, int newstate)
>>  {
>>       smp_store_release(&sk->sk_state, newstate);
>>  }
>>
>> +/* For tcp_set_state tracepoint */
>> +void sk_state_store(struct sock *sk, int newstate);
>> +void sk_set_state(struct sock *sk, int state);
>> +
>>  void sock_enable_timestamp(struct sock *sk, int flag);
>>  int sock_get_timestamp(struct sock *, struct timeval __user *);
>>  int sock_get_timestampns(struct sock *, struct timespec __user *);
>> diff --git a/net/core/sock.c b/net/core/sock.c
>> index c0b5b2f..61841a2 100644
>> --- a/net/core/sock.c
>> +++ b/net/core/sock.c
>> @@ -138,6 +138,7 @@
>>  #include <net/sock_reuseport.h>
>>
>>  #include <trace/events/sock.h>
>> +#include <trace/events/tcp.h>
>>
>>  #include <net/tcp.h>
>>  #include <net/busy_poll.h>
>> @@ -2859,6 +2860,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
>>  }
>>  EXPORT_SYMBOL(sock_get_timestampns);
>>
>> +void sk_state_store(struct sock *sk, int newstate)
>> +{
>> +     if (sk->sk_protocol == IPPROTO_TCP)
>> +             trace_tcp_set_state(sk, sk->sk_state, newstate);
>
> I think this is going in the wrong way. When Dave said to not define a
> sock generic function in tcp code on v3, you moved it all from tcp.h
> to sock.h. But now sock.h gets to deal with more tcp code, which also
> isn't nice.
>
> Instead, if you move it back to tcp.h but rename the function to
> tcp_state_store (instead of the original sk_state_store), it won't be
> a generic sock code and will fit nicely into tcp.h.
>
> You may then even keep sk_state_store() as it is now, and just define
> tcp_state_store():
>
> tcp_state_store()
> {
>         trace_tcp_...();
>         sk_state_store();
> }
>
> Making it very clear that this code is only to be used by tcp stack.
>

Then we have to do bellow 'if' test in inet_connection_sock.c and
/inet_hashtables.

if (sk->sk_protocol == IPPROTO_TCP)
    tcp_state_store(sk, TCP_CLOSE)
else
    sk->sk_state = TCP_CLOSE;

And same code about other changes.

Is that proper ?


>> +     __sk_state_store(sk, newstate);
>> +}
>> +
>> +void sk_set_state(struct sock *sk, int state)
>
> Same about this one.
>
> Dave?
>
>> +{
>> +     if (sk->sk_protocol == IPPROTO_TCP)
>> +             trace_tcp_set_state(sk, sk->sk_state, state);
>> +     sk->sk_state = state;
>> +}
>> +
>>  void sock_enable_timestamp(struct sock *sk, int flag)
>>  {
>>       if (!sock_flag(sk, flag)) {
>> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
>> index 4ca46dc..41f9c87 100644
>> --- a/net/ipv4/inet_connection_sock.c
>> +++ b/net/ipv4/inet_connection_sock.c
>> @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
>>       if (newsk) {
>>               struct inet_connection_sock *newicsk = inet_csk(newsk);
>>
>> -             newsk->sk_state = TCP_SYN_RECV;
>> +             sk_set_state(newsk, TCP_SYN_RECV);
>>               newicsk->icsk_bind_hash = NULL;
>>
>>               inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
>> @@ -888,7 +888,8 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
>>                       return 0;
>>       }
>>
>> -     sk->sk_state = TCP_CLOSE;
>> +     sk_set_state(sk, TCP_CLOSE);
>> +
>>       return err;
>>  }
>>  EXPORT_SYMBOL_GPL(inet_csk_listen_start);
>> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
>> index f6f5810..5973693 100644
>> --- a/net/ipv4/inet_hashtables.c
>> +++ b/net/ipv4/inet_hashtables.c
>> @@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
>>               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
>>       } else {
>>               percpu_counter_inc(sk->sk_prot->orphan_count);
>> -             sk->sk_state = TCP_CLOSE;
>> +             sk_set_state(sk, TCP_CLOSE);
>>               sock_set_flag(sk, SOCK_DEAD);
>>               inet_csk_destroy_sock(sk);
>>       }
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index 1803116..ac98dc6 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -2065,7 +2065,7 @@ void tcp_set_state(struct sock *sk, int state)
>>       /* Change state AFTER socket is unhashed to avoid closed
>>        * socket sitting in hash tables.
>>        */
>> -     sk_state_store(sk, state);
>> +     __sk_state_store(sk, state);
>>
>>  #ifdef STATE_TRACE
>>       SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
>> --
>> 1.8.3.1
>>
Yafang Shao Dec. 8, 2017, 3:40 a.m. UTC | #4
2017-12-08 9:41 GMT+08:00 Yafang Shao <laoar.shao@gmail.com>:
> 2017-12-08 4:02 GMT+08:00 Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>:
>> On Thu, Dec 07, 2017 at 02:10:42PM +0000, Yafang Shao wrote:
>>> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
>>> transitions are not traced with tcp_set_state tracepoint.
>>>
>>> In order to trace the whole tcp lifespans, two helpers are introduced,
>>> void sk_set_state(struct sock *sk, int state);
>>> void sk_state_store(struct sock *sk, int newstate);
>>>
>>> When do TCP/IP state transition, we should use these two helpers or use
>>> tcp_set_state() other than assigning a value to sk_state directly.
>>>
>>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>> Acked-by: Song Liu <songliubraving@fb.com>
>>> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>> ---
>>> v4->v5: Trace only TCP sockets, whatever it is stream socket or raw socket.
>>> v3->v4: Do not trace DCCP socket
>>> v2->v3: Per suggestion from Marcelo Ricardo Leitner, inverting  __
>>>                to sk_state_store.
>>> ---
>>>  include/net/sock.h              |  8 ++++++--
>>>  net/core/sock.c                 | 15 +++++++++++++++
>>>  net/ipv4/inet_connection_sock.c |  5 +++--
>>>  net/ipv4/inet_hashtables.c      |  2 +-
>>>  net/ipv4/tcp.c                  |  2 +-
>>>  5 files changed, 26 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/net/sock.h b/include/net/sock.h
>>> index 79e1a2c..1cf7685 100644
>>> --- a/include/net/sock.h
>>> +++ b/include/net/sock.h
>>> @@ -2349,18 +2349,22 @@ static inline int sk_state_load(const struct sock *sk)
>>>  }
>>>
>>>  /**
>>> - * sk_state_store - update sk->sk_state
>>> + * __sk_state_store - update sk->sk_state
>>>   * @sk: socket pointer
>>>   * @newstate: new state
>>>   *
>>>   * Paired with sk_state_load(). Should be used in contexts where
>>>   * state change might impact lockless readers.
>>>   */
>>> -static inline void sk_state_store(struct sock *sk, int newstate)
>>> +static inline void __sk_state_store(struct sock *sk, int newstate)
>>>  {
>>>       smp_store_release(&sk->sk_state, newstate);
>>>  }
>>>
>>> +/* For tcp_set_state tracepoint */
>>> +void sk_state_store(struct sock *sk, int newstate);
>>> +void sk_set_state(struct sock *sk, int state);
>>> +
>>>  void sock_enable_timestamp(struct sock *sk, int flag);
>>>  int sock_get_timestamp(struct sock *, struct timeval __user *);
>>>  int sock_get_timestampns(struct sock *, struct timespec __user *);
>>> diff --git a/net/core/sock.c b/net/core/sock.c
>>> index c0b5b2f..61841a2 100644
>>> --- a/net/core/sock.c
>>> +++ b/net/core/sock.c
>>> @@ -138,6 +138,7 @@
>>>  #include <net/sock_reuseport.h>
>>>
>>>  #include <trace/events/sock.h>
>>> +#include <trace/events/tcp.h>
>>>
>>>  #include <net/tcp.h>
>>>  #include <net/busy_poll.h>
>>> @@ -2859,6 +2860,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
>>>  }
>>>  EXPORT_SYMBOL(sock_get_timestampns);
>>>
>>> +void sk_state_store(struct sock *sk, int newstate)
>>> +{
>>> +     if (sk->sk_protocol == IPPROTO_TCP)
>>> +             trace_tcp_set_state(sk, sk->sk_state, newstate);
>>
>> I think this is going in the wrong way. When Dave said to not define a
>> sock generic function in tcp code on v3, you moved it all from tcp.h
>> to sock.h. But now sock.h gets to deal with more tcp code, which also
>> isn't nice.
>>
>> Instead, if you move it back to tcp.h but rename the function to
>> tcp_state_store (instead of the original sk_state_store), it won't be
>> a generic sock code and will fit nicely into tcp.h.
>>
>> You may then even keep sk_state_store() as it is now, and just define
>> tcp_state_store():
>>
>> tcp_state_store()
>> {
>>         trace_tcp_...();
>>         sk_state_store();
>> }
>>
>> Making it very clear that this code is only to be used by tcp stack.
>>
>
> Then we have to do bellow 'if' test in inet_connection_sock.c and
> /inet_hashtables.
>
> if (sk->sk_protocol == IPPROTO_TCP)
>     tcp_state_store(sk, TCP_CLOSE)
> else
>     sk->sk_state = TCP_CLOSE;
>
> And same code about other changes.
>
> Is that proper ?
>
>

It will looks like these,

    if (sk->sk_protocol == IPPROTO_TCP)
        __tcp_set_state(newsk, TCP_SYN_RECV);
    else
        newsk->sk_state = TCP_SYN_RECV;


    if (sk->sk_protocol == IPPROTO_TCP)
          __tcp_set_state(sk, TCP_CLOSE);
    else
          sk->sk_state = TCP_CLOSE;

    if (sk->sk_protocol == IPPROTO_TCP)
          tcp_state_store(sk,  state);
    else
          sk_state_store(sk, state);


Some redundant code.

IMO, put these similar code into a wrapper is more nice.

Thanks
Yafang
Marcelo Ricardo Leitner Dec. 8, 2017, 11:03 a.m. UTC | #5
On Fri, Dec 08, 2017 at 11:40:23AM +0800, Yafang Shao wrote:
> It will looks like these,
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>         __tcp_set_state(newsk, TCP_SYN_RECV);
>     else
>         newsk->sk_state = TCP_SYN_RECV;
> 
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>           __tcp_set_state(sk, TCP_CLOSE);
>     else
>           sk->sk_state = TCP_CLOSE;
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>           tcp_state_store(sk,  state);
>     else
>           sk_state_store(sk, state);
> 
> 
> Some redundant code.
> 
> IMO, put these similar code into a wrapper is more nice.

Agreed. Hmpf, looks like one way or another, we have to add the sk_
functions to do such check, and then the tcp_* won't help much.

I'm okay with this v5 then, can't see a better way around it.

  Marcelo
David Miller Dec. 8, 2017, 3:42 p.m. UTC | #6
From: Yafang Shao <laoar.shao@gmail.com>
Date: Fri, 8 Dec 2017 11:40:23 +0800

> It will looks like these,
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>         __tcp_set_state(newsk, TCP_SYN_RECV);
>     else
>         newsk->sk_state = TCP_SYN_RECV;
> 
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>           __tcp_set_state(sk, TCP_CLOSE);
>     else
>           sk->sk_state = TCP_CLOSE;
> 
>     if (sk->sk_protocol == IPPROTO_TCP)
>           tcp_state_store(sk,  state);
>     else
>           sk_state_store(sk, state);
> 
> 
> Some redundant code.
> 
> IMO, put these similar code into a wrapper is more nice.

I think this discussion and how ugly this is getting shows that
tracing the state transitions of a socket is perhaps not best as a TCP
specific feature.
Yafang Shao Dec. 8, 2017, 3:50 p.m. UTC | #7
2017-12-08 23:42 GMT+08:00 David Miller <davem@davemloft.net>:
> From: Yafang Shao <laoar.shao@gmail.com>
> Date: Fri, 8 Dec 2017 11:40:23 +0800
>
>> It will looks like these,
>>
>>     if (sk->sk_protocol == IPPROTO_TCP)
>>         __tcp_set_state(newsk, TCP_SYN_RECV);
>>     else
>>         newsk->sk_state = TCP_SYN_RECV;
>>
>>
>>     if (sk->sk_protocol == IPPROTO_TCP)
>>           __tcp_set_state(sk, TCP_CLOSE);
>>     else
>>           sk->sk_state = TCP_CLOSE;
>>
>>     if (sk->sk_protocol == IPPROTO_TCP)
>>           tcp_state_store(sk,  state);
>>     else
>>           sk_state_store(sk, state);
>>
>>
>> Some redundant code.
>>
>> IMO, put these similar code into a wrapper is more nice.
>
> I think this discussion and how ugly this is getting shows that
> tracing the state transitions of a socket is perhaps not best as a TCP
> specific feature.

Do you mean that tcp_set_state tracepoint should be replaced with
sk_set_state tracepoint and move that tracepoint to
trace/events/sock.h ?

Thanks
Yafang
David Miller Dec. 8, 2017, 4:03 p.m. UTC | #8
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 8 Dec 2017 09:03:56 -0200

> I'm okay with this v5 then, can't see a better way around it.

I would suggest not making this a TCP specific feature.

Because socket state changes are not a TCP specific behavior.
David Miller Dec. 8, 2017, 4:28 p.m. UTC | #9
From: Yafang Shao <laoar.shao@gmail.com>
Date: Fri, 8 Dec 2017 23:50:44 +0800

> 2017-12-08 23:42 GMT+08:00 David Miller <davem@davemloft.net>:
>> From: Yafang Shao <laoar.shao@gmail.com>
>> Date: Fri, 8 Dec 2017 11:40:23 +0800
>>
>>> It will looks like these,
>>>
>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>         __tcp_set_state(newsk, TCP_SYN_RECV);
>>>     else
>>>         newsk->sk_state = TCP_SYN_RECV;
>>>
>>>
>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>           __tcp_set_state(sk, TCP_CLOSE);
>>>     else
>>>           sk->sk_state = TCP_CLOSE;
>>>
>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>           tcp_state_store(sk,  state);
>>>     else
>>>           sk_state_store(sk, state);
>>>
>>>
>>> Some redundant code.
>>>
>>> IMO, put these similar code into a wrapper is more nice.
>>
>> I think this discussion and how ugly this is getting shows that
>> tracing the state transitions of a socket is perhaps not best as a TCP
>> specific feature.
> 
> Do you mean that tcp_set_state tracepoint should be replaced with
> sk_set_state tracepoint and move that tracepoint to
> trace/events/sock.h ?

Yes, something like that.

It will avoid all of these protocol specific checks and weird
dependencies.
Yafang Shao Dec. 9, 2017, 12:47 a.m. UTC | #10
2017-12-09 0:28 GMT+08:00 David Miller <davem@davemloft.net>:
> From: Yafang Shao <laoar.shao@gmail.com>
> Date: Fri, 8 Dec 2017 23:50:44 +0800
>
>> 2017-12-08 23:42 GMT+08:00 David Miller <davem@davemloft.net>:
>>> From: Yafang Shao <laoar.shao@gmail.com>
>>> Date: Fri, 8 Dec 2017 11:40:23 +0800
>>>
>>>> It will looks like these,
>>>>
>>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>>         __tcp_set_state(newsk, TCP_SYN_RECV);
>>>>     else
>>>>         newsk->sk_state = TCP_SYN_RECV;
>>>>
>>>>
>>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>>           __tcp_set_state(sk, TCP_CLOSE);
>>>>     else
>>>>           sk->sk_state = TCP_CLOSE;
>>>>
>>>>     if (sk->sk_protocol == IPPROTO_TCP)
>>>>           tcp_state_store(sk,  state);
>>>>     else
>>>>           sk_state_store(sk, state);
>>>>
>>>>
>>>> Some redundant code.
>>>>
>>>> IMO, put these similar code into a wrapper is more nice.
>>>
>>> I think this discussion and how ugly this is getting shows that
>>> tracing the state transitions of a socket is perhaps not best as a TCP
>>> specific feature.
>>
>> Do you mean that tcp_set_state tracepoint should be replaced with
>> sk_set_state tracepoint and move that tracepoint to
>> trace/events/sock.h ?
>
> Yes, something like that.
>
> It will avoid all of these protocol specific checks and weird
> dependencies.

That looks like a good idea.
I will try to reimplemnet it.


Thanks
Yafang
diff mbox series

Patch

diff --git a/include/net/sock.h b/include/net/sock.h
index 79e1a2c..1cf7685 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2349,18 +2349,22 @@  static inline int sk_state_load(const struct sock *sk)
 }
 
 /**
- * sk_state_store - update sk->sk_state
+ * __sk_state_store - update sk->sk_state
  * @sk: socket pointer
  * @newstate: new state
  *
  * Paired with sk_state_load(). Should be used in contexts where
  * state change might impact lockless readers.
  */
-static inline void sk_state_store(struct sock *sk, int newstate)
+static inline void __sk_state_store(struct sock *sk, int newstate)
 {
 	smp_store_release(&sk->sk_state, newstate);
 }
 
+/* For tcp_set_state tracepoint */
+void sk_state_store(struct sock *sk, int newstate);
+void sk_set_state(struct sock *sk, int state);
+
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f..61841a2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -138,6 +138,7 @@ 
 #include <net/sock_reuseport.h>
 
 #include <trace/events/sock.h>
+#include <trace/events/tcp.h>
 
 #include <net/tcp.h>
 #include <net/busy_poll.h>
@@ -2859,6 +2860,20 @@  int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 }
 EXPORT_SYMBOL(sock_get_timestampns);
 
+void sk_state_store(struct sock *sk, int newstate)
+{
+	if (sk->sk_protocol == IPPROTO_TCP)
+		trace_tcp_set_state(sk, sk->sk_state, newstate);
+	__sk_state_store(sk, newstate);
+}
+
+void sk_set_state(struct sock *sk, int state)
+{
+	if (sk->sk_protocol == IPPROTO_TCP) 
+		trace_tcp_set_state(sk, sk->sk_state, state);
+	sk->sk_state = state;
+}
+
 void sock_enable_timestamp(struct sock *sk, int flag)
 {
 	if (!sock_flag(sk, flag)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc..41f9c87 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -783,7 +783,7 @@  struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-		newsk->sk_state = TCP_SYN_RECV;
+		sk_set_state(newsk, TCP_SYN_RECV);
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -888,7 +888,8 @@  int inet_csk_listen_start(struct sock *sk, int backlog)
 			return 0;
 	}
 
-	sk->sk_state = TCP_CLOSE;
+	sk_set_state(sk, TCP_CLOSE);
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f5810..5973693 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -544,7 +544,7 @@  bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	} else {
 		percpu_counter_inc(sk->sk_prot->orphan_count);
-		sk->sk_state = TCP_CLOSE;
+		sk_set_state(sk, TCP_CLOSE);
 		sock_set_flag(sk, SOCK_DEAD);
 		inet_csk_destroy_sock(sk);
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1803116..ac98dc6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2065,7 +2065,7 @@  void tcp_set_state(struct sock *sk, int state)
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk_state_store(sk, state);
+	__sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);