diff mbox series

[bpf,3/9] bpf: sockmap/tls, push write_space updates through ulp updates

Message ID 157851808101.1732.11616068811837364406.stgit@ubuntu3-kvm2
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series Fixes for sockmap/tls from more complex BPF progs | expand

Commit Message

John Fastabend Jan. 8, 2020, 9:14 p.m. UTC
When sockmap sock with TLS enabled is removed we cleanup bpf/psock state
and call tcp_update_ulp() to push updates to TLS ULP on top. However, we
don't push the write_space callback up and instead simply overwrite the
op with the psock stored previous op. This may or may not be correct so
to ensure we don't overwrite the TLS write space hook pass this field to
the ULP and have it fixup the ctx.

This completes a previous fix that pushed the ops through to the ULP
but at the time missed doing this for write_space, presumably because
write_space TLS hook was added around the same time.

Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/linux/skmsg.h |   12 ++++++++----
 include/net/tcp.h     |    6 ++++--
 net/ipv4/tcp_ulp.c    |    6 ++++--
 net/tls/tls_main.c    |   10 +++++++---
 4 files changed, 23 insertions(+), 11 deletions(-)

Comments

Jakub Sitnicki Jan. 9, 2020, 10:33 a.m. UTC | #1
On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote:
> When sockmap sock with TLS enabled is removed we cleanup bpf/psock state
> and call tcp_update_ulp() to push updates to TLS ULP on top. However, we
> don't push the write_space callback up and instead simply overwrite the
> op with the psock stored previous op. This may or may not be correct so
> to ensure we don't overwrite the TLS write space hook pass this field to
> the ULP and have it fixup the ctx.
>
> This completes a previous fix that pushed the ops through to the ULP
> but at the time missed doing this for write_space, presumably because
> write_space TLS hook was added around the same time.
>
> Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/linux/skmsg.h |   12 ++++++++----
>  include/net/tcp.h     |    6 ++++--
>  net/ipv4/tcp_ulp.c    |    6 ++++--
>  net/tls/tls_main.c    |   10 +++++++---
>  4 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index b6afe01f8592..14d61bba0b79 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk,
>  					  struct sk_psock *psock)
>  {
>  	sk->sk_prot->unhash = psock->saved_unhash;
> -	sk->sk_write_space = psock->saved_write_space;
>
>  	if (psock->sk_proto) {
>  		struct inet_connection_sock *icsk = inet_csk(sk);
>  		bool has_ulp = !!icsk->icsk_ulp_data;
>
> -		if (has_ulp)
> -			tcp_update_ulp(sk, psock->sk_proto);
> -		else
> +		if (has_ulp) {
> +			tcp_update_ulp(sk, psock->sk_proto,
> +				       psock->saved_write_space);
> +		} else {
>  			sk->sk_prot = psock->sk_proto;
> +			sk->sk_write_space = psock->saved_write_space;
> +		}

I'm wondering if we need the above fallback branch for no-ULP case?
tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps
it can be reduced to:

	if (psock->sk_proto) {
		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
		psock->sk_proto = NULL;
	} else {
		sk->sk_write_space = psock->saved_write_space;
	}

Then there's the question if it's okay to leave psock->sk_proto set and
potentially restore it more than once? Reading tls_update, the only user
ULP 'update' callback, it looks fine.

Can sk_psock_restore_proto be as simple as:

static inline void sk_psock_restore_proto(struct sock *sk,
					  struct sk_psock *psock)
{
	tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
}

... or am I missing something?

Asking becuase I have a patch [0] like this in the queue and haven't
seen issues with it during testing.

-jkbs

[0] https://github.com/jsitnicki/linux/commit/2d2152593c8e6c5f38548796501a81a6ba20b6dc

>  		psock->sk_proto = NULL;
> +	} else {
> +		sk->sk_write_space = psock->saved_write_space;
>  	}
>  }
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index e460ea7f767b..e6f48384dc71 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2147,7 +2147,8 @@ struct tcp_ulp_ops {
>  	/* initialize ulp */
>  	int (*init)(struct sock *sk);
>  	/* update ulp */
> -	void (*update)(struct sock *sk, struct proto *p);
> +	void (*update)(struct sock *sk, struct proto *p,
> +		       void (*write_space)(struct sock *sk));
>  	/* cleanup ulp */
>  	void (*release)(struct sock *sk);
>  	/* diagnostic */
> @@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
>  int tcp_set_ulp(struct sock *sk, const char *name);
>  void tcp_get_available_ulp(char *buf, size_t len);
>  void tcp_cleanup_ulp(struct sock *sk);
> -void tcp_update_ulp(struct sock *sk, struct proto *p);
> +void tcp_update_ulp(struct sock *sk, struct proto *p,
> +		    void (*write_space)(struct sock *sk));
>
>  #define MODULE_ALIAS_TCP_ULP(name)				\
>  	__MODULE_INFO(alias, alias_userspace, name);		\
> diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
> index 12ab5db2b71c..38d3ad141161 100644
> --- a/net/ipv4/tcp_ulp.c
> +++ b/net/ipv4/tcp_ulp.c
> @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
>  	rcu_read_unlock();
>  }
>
> -void tcp_update_ulp(struct sock *sk, struct proto *proto)
> +void tcp_update_ulp(struct sock *sk, struct proto *proto,
> +		    void (*write_space)(struct sock *sk))
>  {
>  	struct inet_connection_sock *icsk = inet_csk(sk);
>
>  	if (!icsk->icsk_ulp_ops) {
> +		sk->sk_write_space = write_space;
>  		sk->sk_prot = proto;
>  		return;
>  	}
>
>  	if (icsk->icsk_ulp_ops->update)
> -		icsk->icsk_ulp_ops->update(sk, proto);
> +		icsk->icsk_ulp_ops->update(sk, proto, write_space);
>  }
>
>  void tcp_cleanup_ulp(struct sock *sk)
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index dac24c7aa7d4..94774c0e5ff3 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -732,15 +732,19 @@ static int tls_init(struct sock *sk)
>  	return rc;
>  }
>
> -static void tls_update(struct sock *sk, struct proto *p)
> +static void tls_update(struct sock *sk, struct proto *p,
> +		       void (*write_space)(struct sock *sk))
>  {
>  	struct tls_context *ctx;
>
>  	ctx = tls_get_ctx(sk);
> -	if (likely(ctx))
> +	if (likely(ctx)) {
> +		ctx->sk_write_space = write_space;
>  		ctx->sk_proto = p;
> -	else
> +	} else {
>  		sk->sk_prot = p;
> +		sk->sk_write_space = write_space;
> +	}
>  }
>
>  static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
John Fastabend Jan. 9, 2020, 9:22 p.m. UTC | #2
Jakub Sitnicki wrote:
> On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote:
> > When sockmap sock with TLS enabled is removed we cleanup bpf/psock state
> > and call tcp_update_ulp() to push updates to TLS ULP on top. However, we
> > don't push the write_space callback up and instead simply overwrite the
> > op with the psock stored previous op. This may or may not be correct so
> > to ensure we don't overwrite the TLS write space hook pass this field to
> > the ULP and have it fixup the ctx.
> >
> > This completes a previous fix that pushed the ops through to the ULP
> > but at the time missed doing this for write_space, presumably because
> > write_space TLS hook was added around the same time.
> >
> > Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free")
> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> > ---
> >  include/linux/skmsg.h |   12 ++++++++----
> >  include/net/tcp.h     |    6 ++++--
> >  net/ipv4/tcp_ulp.c    |    6 ++++--
> >  net/tls/tls_main.c    |   10 +++++++---
> >  4 files changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> > index b6afe01f8592..14d61bba0b79 100644
> > --- a/include/linux/skmsg.h
> > +++ b/include/linux/skmsg.h
> > @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk,
> >  					  struct sk_psock *psock)
> >  {
> >  	sk->sk_prot->unhash = psock->saved_unhash;
> > -	sk->sk_write_space = psock->saved_write_space;
> >
> >  	if (psock->sk_proto) {
> >  		struct inet_connection_sock *icsk = inet_csk(sk);
> >  		bool has_ulp = !!icsk->icsk_ulp_data;
> >
> > -		if (has_ulp)
> > -			tcp_update_ulp(sk, psock->sk_proto);
> > -		else
> > +		if (has_ulp) {
> > +			tcp_update_ulp(sk, psock->sk_proto,
> > +				       psock->saved_write_space);
> > +		} else {
> >  			sk->sk_prot = psock->sk_proto;
> > +			sk->sk_write_space = psock->saved_write_space;
> > +		}
> 
> I'm wondering if we need the above fallback branch for no-ULP case?
> tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps
> it can be reduced to:
> 
> 	if (psock->sk_proto) {
> 		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
> 		psock->sk_proto = NULL;
> 	} else {
> 		sk->sk_write_space = psock->saved_write_space;
> 	}

Yeah that is a bit nicer. How about pushing it for bpf-next? I'm not
sure its needed for bpf and the patch I pushed is the minimal change
needed for the fix and pushes the saved_write_space around.

> 
> Then there's the question if it's okay to leave psock->sk_proto set and
> potentially restore it more than once? Reading tls_update, the only user
> ULP 'update' callback, it looks fine.
> 
> Can sk_psock_restore_proto be as simple as:
> 
> static inline void sk_psock_restore_proto(struct sock *sk,
> 					  struct sk_psock *psock)
> {
> 	tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
> }
> 
> ... or am I missing something?

I think that is good. bpf-next?

> 
> Asking becuase I have a patch [0] like this in the queue and haven't
> seen issues with it during testing.

+1 Want to push it after we sort out this series?

> 
> -jkbs
> 
> [0] https://github.com/jsitnicki/linux/commit/2d2152593c8e6c5f38548796501a81a6ba20b6dc
> 
> >  		psock->sk_proto = NULL;
> > +	} else {
> > +		sk->sk_write_space = psock->saved_write_space;
> >  	}
> >  }
> >
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index e460ea7f767b..e6f48384dc71 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -2147,7 +2147,8 @@ struct tcp_ulp_ops {
> >  	/* initialize ulp */
> >  	int (*init)(struct sock *sk);
> >  	/* update ulp */
> > -	void (*update)(struct sock *sk, struct proto *p);
> > +	void (*update)(struct sock *sk, struct proto *p,
> > +		       void (*write_space)(struct sock *sk));
> >  	/* cleanup ulp */
> >  	void (*release)(struct sock *sk);
> >  	/* diagnostic */
> > @@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
> >  int tcp_set_ulp(struct sock *sk, const char *name);
> >  void tcp_get_available_ulp(char *buf, size_t len);
> >  void tcp_cleanup_ulp(struct sock *sk);
> > -void tcp_update_ulp(struct sock *sk, struct proto *p);
> > +void tcp_update_ulp(struct sock *sk, struct proto *p,
> > +		    void (*write_space)(struct sock *sk));
> >
> >  #define MODULE_ALIAS_TCP_ULP(name)				\
> >  	__MODULE_INFO(alias, alias_userspace, name);		\
> > diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
> > index 12ab5db2b71c..38d3ad141161 100644
> > --- a/net/ipv4/tcp_ulp.c
> > +++ b/net/ipv4/tcp_ulp.c
> > @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
> >  	rcu_read_unlock();
> >  }
> >
> > -void tcp_update_ulp(struct sock *sk, struct proto *proto)
> > +void tcp_update_ulp(struct sock *sk, struct proto *proto,
> > +		    void (*write_space)(struct sock *sk))
> >  {
> >  	struct inet_connection_sock *icsk = inet_csk(sk);
> >
> >  	if (!icsk->icsk_ulp_ops) {
> > +		sk->sk_write_space = write_space;
> >  		sk->sk_prot = proto;
> >  		return;
> >  	}
> >
> >  	if (icsk->icsk_ulp_ops->update)
> > -		icsk->icsk_ulp_ops->update(sk, proto);
> > +		icsk->icsk_ulp_ops->update(sk, proto, write_space);
> >  }
> >
> >  void tcp_cleanup_ulp(struct sock *sk)
> > diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> > index dac24c7aa7d4..94774c0e5ff3 100644
> > --- a/net/tls/tls_main.c
> > +++ b/net/tls/tls_main.c
> > @@ -732,15 +732,19 @@ static int tls_init(struct sock *sk)
> >  	return rc;
> >  }
> >
> > -static void tls_update(struct sock *sk, struct proto *p)
> > +static void tls_update(struct sock *sk, struct proto *p,
> > +		       void (*write_space)(struct sock *sk))
> >  {
> >  	struct tls_context *ctx;
> >
> >  	ctx = tls_get_ctx(sk);
> > -	if (likely(ctx))
> > +	if (likely(ctx)) {
> > +		ctx->sk_write_space = write_space;
> >  		ctx->sk_proto = p;
> > -	else
> > +	} else {
> >  		sk->sk_prot = p;
> > +		sk->sk_write_space = write_space;
> > +	}
> >  }
> >
> >  static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
Jakub Sitnicki Jan. 10, 2020, 1:40 p.m. UTC | #3
On Thu, Jan 09, 2020 at 10:22 PM CET, John Fastabend wrote:
> Jakub Sitnicki wrote:
>> On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote:
>> > When sockmap sock with TLS enabled is removed we cleanup bpf/psock state
>> > and call tcp_update_ulp() to push updates to TLS ULP on top. However, we
>> > don't push the write_space callback up and instead simply overwrite the
>> > op with the psock stored previous op. This may or may not be correct so
>> > to ensure we don't overwrite the TLS write space hook pass this field to
>> > the ULP and have it fixup the ctx.
>> >
>> > This completes a previous fix that pushed the ops through to the ULP
>> > but at the time missed doing this for write_space, presumably because
>> > write_space TLS hook was added around the same time.
>> >
>> > Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free")
>> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> > ---
>> >  include/linux/skmsg.h |   12 ++++++++----
>> >  include/net/tcp.h     |    6 ++++--
>> >  net/ipv4/tcp_ulp.c    |    6 ++++--
>> >  net/tls/tls_main.c    |   10 +++++++---
>> >  4 files changed, 23 insertions(+), 11 deletions(-)
>> >
>> > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
>> > index b6afe01f8592..14d61bba0b79 100644
>> > --- a/include/linux/skmsg.h
>> > +++ b/include/linux/skmsg.h
>> > @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk,
>> >  					  struct sk_psock *psock)
>> >  {
>> >  	sk->sk_prot->unhash = psock->saved_unhash;
>> > -	sk->sk_write_space = psock->saved_write_space;
>> >
>> >  	if (psock->sk_proto) {
>> >  		struct inet_connection_sock *icsk = inet_csk(sk);
>> >  		bool has_ulp = !!icsk->icsk_ulp_data;
>> >
>> > -		if (has_ulp)
>> > -			tcp_update_ulp(sk, psock->sk_proto);
>> > -		else
>> > +		if (has_ulp) {
>> > +			tcp_update_ulp(sk, psock->sk_proto,
>> > +				       psock->saved_write_space);
>> > +		} else {
>> >  			sk->sk_prot = psock->sk_proto;
>> > +			sk->sk_write_space = psock->saved_write_space;
>> > +		}
>>
>> I'm wondering if we need the above fallback branch for no-ULP case?
>> tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps
>> it can be reduced to:
>>
>> 	if (psock->sk_proto) {
>> 		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
>> 		psock->sk_proto = NULL;
>> 	} else {
>> 		sk->sk_write_space = psock->saved_write_space;
>> 	}
>
> Yeah that is a bit nicer. How about pushing it for bpf-next? I'm not
> sure its needed for bpf and the patch I pushed is the minimal change
> needed for the fix and pushes the saved_write_space around.

Yeah, this is bpf-next material.

>> Then there's the question if it's okay to leave psock->sk_proto set and
>> potentially restore it more than once? Reading tls_update, the only user
>> ULP 'update' callback, it looks fine.
>>
>> Can sk_psock_restore_proto be as simple as:
>>
>> static inline void sk_psock_restore_proto(struct sock *sk,
>> 					  struct sk_psock *psock)
>> {
>> 	tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
>> }
>>
>> ... or am I missing something?
>
> I think that is good. bpf-next?

Great, I needed to confirm my thinking.

>> Asking becuase I have a patch [0] like this in the queue and haven't
>> seen issues with it during testing.
>
> +1 Want to push it after we sort out this series?

I've actually pushed it earlier today with next iteration of "Extend
SOCKMAP to store listening sockets" to collect feedback [0]. I will
adapt it once it shows up in bpf-next (or split it out and submit
separately).

-jkbs

[0] https://lore.kernel.org/bpf/20200110105027.257877-1-jakub@cloudflare.com/
diff mbox series

Patch

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index b6afe01f8592..14d61bba0b79 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -359,17 +359,21 @@  static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
-	sk->sk_write_space = psock->saved_write_space;
 
 	if (psock->sk_proto) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		bool has_ulp = !!icsk->icsk_ulp_data;
 
-		if (has_ulp)
-			tcp_update_ulp(sk, psock->sk_proto);
-		else
+		if (has_ulp) {
+			tcp_update_ulp(sk, psock->sk_proto,
+				       psock->saved_write_space);
+		} else {
 			sk->sk_prot = psock->sk_proto;
+			sk->sk_write_space = psock->saved_write_space;
+		}
 		psock->sk_proto = NULL;
+	} else {
+		sk->sk_write_space = psock->saved_write_space;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e460ea7f767b..e6f48384dc71 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2147,7 +2147,8 @@  struct tcp_ulp_ops {
 	/* initialize ulp */
 	int (*init)(struct sock *sk);
 	/* update ulp */
-	void (*update)(struct sock *sk, struct proto *p);
+	void (*update)(struct sock *sk, struct proto *p,
+		       void (*write_space)(struct sock *sk));
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 	/* diagnostic */
@@ -2162,7 +2163,8 @@  void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
-void tcp_update_ulp(struct sock *sk, struct proto *p);
+void tcp_update_ulp(struct sock *sk, struct proto *p,
+		    void (*write_space)(struct sock *sk));
 
 #define MODULE_ALIAS_TCP_ULP(name)				\
 	__MODULE_INFO(alias, alias_userspace, name);		\
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 12ab5db2b71c..38d3ad141161 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -99,17 +99,19 @@  void tcp_get_available_ulp(char *buf, size_t maxlen)
 	rcu_read_unlock();
 }
 
-void tcp_update_ulp(struct sock *sk, struct proto *proto)
+void tcp_update_ulp(struct sock *sk, struct proto *proto,
+		    void (*write_space)(struct sock *sk))
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (!icsk->icsk_ulp_ops) {
+		sk->sk_write_space = write_space;
 		sk->sk_prot = proto;
 		return;
 	}
 
 	if (icsk->icsk_ulp_ops->update)
-		icsk->icsk_ulp_ops->update(sk, proto);
+		icsk->icsk_ulp_ops->update(sk, proto, write_space);
 }
 
 void tcp_cleanup_ulp(struct sock *sk)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index dac24c7aa7d4..94774c0e5ff3 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -732,15 +732,19 @@  static int tls_init(struct sock *sk)
 	return rc;
 }
 
-static void tls_update(struct sock *sk, struct proto *p)
+static void tls_update(struct sock *sk, struct proto *p,
+		       void (*write_space)(struct sock *sk))
 {
 	struct tls_context *ctx;
 
 	ctx = tls_get_ctx(sk);
-	if (likely(ctx))
+	if (likely(ctx)) {
+		ctx->sk_write_space = write_space;
 		ctx->sk_proto = p;
-	else
+	} else {
 		sk->sk_prot = p;
+		sk->sk_write_space = write_space;
+	}
 }
 
 static int tls_get_info(const struct sock *sk, struct sk_buff *skb)