Message ID | 157851808101.1732.11616068811837364406.stgit@ubuntu3-kvm2 |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Fixes for sockmap/tls from more complex BPF progs | expand |
On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote: > When sockmap sock with TLS enabled is removed we cleanup bpf/psock state > and call tcp_update_ulp() to push updates to TLS ULP on top. However, we > don't push the write_space callback up and instead simply overwrite the > op with the psock stored previous op. This may or may not be correct so > to ensure we don't overwrite the TLS write space hook pass this field to > the ULP and have it fixup the ctx. > > This completes a previous fix that pushed the ops through to the ULP > but at the time missed doing this for write_space, presumably because > write_space TLS hook was added around the same time. > > Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free") > Signed-off-by: John Fastabend <john.fastabend@gmail.com> > --- > include/linux/skmsg.h | 12 ++++++++---- > include/net/tcp.h | 6 ++++-- > net/ipv4/tcp_ulp.c | 6 ++++-- > net/tls/tls_main.c | 10 +++++++--- > 4 files changed, 23 insertions(+), 11 deletions(-) > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index b6afe01f8592..14d61bba0b79 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk, > struct sk_psock *psock) > { > sk->sk_prot->unhash = psock->saved_unhash; > - sk->sk_write_space = psock->saved_write_space; > > if (psock->sk_proto) { > struct inet_connection_sock *icsk = inet_csk(sk); > bool has_ulp = !!icsk->icsk_ulp_data; > > - if (has_ulp) > - tcp_update_ulp(sk, psock->sk_proto); > - else > + if (has_ulp) { > + tcp_update_ulp(sk, psock->sk_proto, > + psock->saved_write_space); > + } else { > sk->sk_prot = psock->sk_proto; > + sk->sk_write_space = psock->saved_write_space; > + } I'm wondering if we need the above fallback branch for no-ULP case? tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps it can be reduced to: if (psock->sk_proto) { tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); psock->sk_proto = NULL; } else { sk->sk_write_space = psock->saved_write_space; } Then there's the question if it's okay to leave psock->sk_proto set and potentially restore it more than once? Reading tls_update, the only user ULP 'update' callback, it looks fine. Can sk_psock_restore_proto be as simple as: static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } ... or am I missing something? Asking becuase I have a patch [0] like this in the queue and haven't seen issues with it during testing. -jkbs [0] https://github.com/jsitnicki/linux/commit/2d2152593c8e6c5f38548796501a81a6ba20b6dc > psock->sk_proto = NULL; > + } else { > + sk->sk_write_space = psock->saved_write_space; > } > } > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index e460ea7f767b..e6f48384dc71 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2147,7 +2147,8 @@ struct tcp_ulp_ops { > /* initialize ulp */ > int (*init)(struct sock *sk); > /* update ulp */ > - void (*update)(struct sock *sk, struct proto *p); > + void (*update)(struct sock *sk, struct proto *p, > + void (*write_space)(struct sock *sk)); > /* cleanup ulp */ > void (*release)(struct sock *sk); > /* diagnostic */ > @@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); > int tcp_set_ulp(struct sock *sk, const char *name); > void tcp_get_available_ulp(char *buf, size_t len); > void tcp_cleanup_ulp(struct sock *sk); > -void tcp_update_ulp(struct sock *sk, struct proto *p); > +void tcp_update_ulp(struct sock *sk, struct proto *p, > + void (*write_space)(struct sock *sk)); > > #define MODULE_ALIAS_TCP_ULP(name) \ > __MODULE_INFO(alias, alias_userspace, name); \ > diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c > index 12ab5db2b71c..38d3ad141161 100644 > --- a/net/ipv4/tcp_ulp.c > +++ b/net/ipv4/tcp_ulp.c > @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) > rcu_read_unlock(); > } > > -void tcp_update_ulp(struct sock *sk, struct proto *proto) > +void tcp_update_ulp(struct sock *sk, struct proto *proto, > + void (*write_space)(struct sock *sk)) > { > struct inet_connection_sock *icsk = inet_csk(sk); > > if (!icsk->icsk_ulp_ops) { > + sk->sk_write_space = write_space; > sk->sk_prot = proto; > return; > } > > if (icsk->icsk_ulp_ops->update) > - icsk->icsk_ulp_ops->update(sk, proto); > + icsk->icsk_ulp_ops->update(sk, proto, write_space); > } > > void tcp_cleanup_ulp(struct sock *sk) > diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c > index dac24c7aa7d4..94774c0e5ff3 100644 > --- a/net/tls/tls_main.c > +++ b/net/tls/tls_main.c > @@ -732,15 +732,19 @@ static int tls_init(struct sock *sk) > return rc; > } > > -static void tls_update(struct sock *sk, struct proto *p) > +static void tls_update(struct sock *sk, struct proto *p, > + void (*write_space)(struct sock *sk)) > { > struct tls_context *ctx; > > ctx = tls_get_ctx(sk); > - if (likely(ctx)) > + if (likely(ctx)) { > + ctx->sk_write_space = write_space; > ctx->sk_proto = p; > - else > + } else { > sk->sk_prot = p; > + sk->sk_write_space = write_space; > + } > } > > static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
Jakub Sitnicki wrote: > On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote: > > When sockmap sock with TLS enabled is removed we cleanup bpf/psock state > > and call tcp_update_ulp() to push updates to TLS ULP on top. However, we > > don't push the write_space callback up and instead simply overwrite the > > op with the psock stored previous op. This may or may not be correct so > > to ensure we don't overwrite the TLS write space hook pass this field to > > the ULP and have it fixup the ctx. > > > > This completes a previous fix that pushed the ops through to the ULP > > but at the time missed doing this for write_space, presumably because > > write_space TLS hook was added around the same time. > > > > Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free") > > Signed-off-by: John Fastabend <john.fastabend@gmail.com> > > --- > > include/linux/skmsg.h | 12 ++++++++---- > > include/net/tcp.h | 6 ++++-- > > net/ipv4/tcp_ulp.c | 6 ++++-- > > net/tls/tls_main.c | 10 +++++++--- > > 4 files changed, 23 insertions(+), 11 deletions(-) > > > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > > index b6afe01f8592..14d61bba0b79 100644 > > --- a/include/linux/skmsg.h > > +++ b/include/linux/skmsg.h > > @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk, > > struct sk_psock *psock) > > { > > sk->sk_prot->unhash = psock->saved_unhash; > > - sk->sk_write_space = psock->saved_write_space; > > > > if (psock->sk_proto) { > > struct inet_connection_sock *icsk = inet_csk(sk); > > bool has_ulp = !!icsk->icsk_ulp_data; > > > > - if (has_ulp) > > - tcp_update_ulp(sk, psock->sk_proto); > > - else > > + if (has_ulp) { > > + tcp_update_ulp(sk, psock->sk_proto, > > + psock->saved_write_space); > > + } else { > > sk->sk_prot = psock->sk_proto; > > + sk->sk_write_space = psock->saved_write_space; > > + } > > I'm wondering if we need the above fallback branch for no-ULP case? > tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps > it can be reduced to: > > if (psock->sk_proto) { > tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); > psock->sk_proto = NULL; > } else { > sk->sk_write_space = psock->saved_write_space; > } Yeah that is a bit nicer. How about pushing it for bpf-next? I'm not sure its needed for bpf and the patch I pushed is the minimal change needed for the fix and pushes the saved_write_space around. > > Then there's the question if it's okay to leave psock->sk_proto set and > potentially restore it more than once? Reading tls_update, the only user > ULP 'update' callback, it looks fine. > > Can sk_psock_restore_proto be as simple as: > > static inline void sk_psock_restore_proto(struct sock *sk, > struct sk_psock *psock) > { > tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); > } > > ... or am I missing something? I think that is good. bpf-next? > > Asking becuase I have a patch [0] like this in the queue and haven't > seen issues with it during testing. +1 Want to push it after we sort out this series? > > -jkbs > > [0] https://github.com/jsitnicki/linux/commit/2d2152593c8e6c5f38548796501a81a6ba20b6dc > > > psock->sk_proto = NULL; > > + } else { > > + sk->sk_write_space = psock->saved_write_space; > > } > > } > > > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > index e460ea7f767b..e6f48384dc71 100644 > > --- a/include/net/tcp.h > > +++ b/include/net/tcp.h > > @@ -2147,7 +2147,8 @@ struct tcp_ulp_ops { > > /* initialize ulp */ > > int (*init)(struct sock *sk); > > /* update ulp */ > > - void (*update)(struct sock *sk, struct proto *p); > > + void (*update)(struct sock *sk, struct proto *p, > > + void (*write_space)(struct sock *sk)); > > /* cleanup ulp */ > > void (*release)(struct sock *sk); > > /* diagnostic */ > > @@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); > > int tcp_set_ulp(struct sock *sk, const char *name); > > void tcp_get_available_ulp(char *buf, size_t len); > > void tcp_cleanup_ulp(struct sock *sk); > > -void tcp_update_ulp(struct sock *sk, struct proto *p); > > +void tcp_update_ulp(struct sock *sk, struct proto *p, > > + void (*write_space)(struct sock *sk)); > > > > #define MODULE_ALIAS_TCP_ULP(name) \ > > __MODULE_INFO(alias, alias_userspace, name); \ > > diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c > > index 12ab5db2b71c..38d3ad141161 100644 > > --- a/net/ipv4/tcp_ulp.c > > +++ b/net/ipv4/tcp_ulp.c > > @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) > > rcu_read_unlock(); > > } > > > > -void tcp_update_ulp(struct sock *sk, struct proto *proto) > > +void tcp_update_ulp(struct sock *sk, struct proto *proto, > > + void (*write_space)(struct sock *sk)) > > { > > struct inet_connection_sock *icsk = inet_csk(sk); > > > > if (!icsk->icsk_ulp_ops) { > > + sk->sk_write_space = write_space; > > sk->sk_prot = proto; > > return; > > } > > > > if (icsk->icsk_ulp_ops->update) > > - icsk->icsk_ulp_ops->update(sk, proto); > > + icsk->icsk_ulp_ops->update(sk, proto, write_space); > > } > > > > void tcp_cleanup_ulp(struct sock *sk) > > diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c > > index dac24c7aa7d4..94774c0e5ff3 100644 > > --- a/net/tls/tls_main.c > > +++ b/net/tls/tls_main.c > > @@ -732,15 +732,19 @@ static int tls_init(struct sock *sk) > > return rc; > > } > > > > -static void tls_update(struct sock *sk, struct proto *p) > > +static void tls_update(struct sock *sk, struct proto *p, > > + void (*write_space)(struct sock *sk)) > > { > > struct tls_context *ctx; > > > > ctx = tls_get_ctx(sk); > > - if (likely(ctx)) > > + if (likely(ctx)) { > > + ctx->sk_write_space = write_space; > > ctx->sk_proto = p; > > - else > > + } else { > > sk->sk_prot = p; > > + sk->sk_write_space = write_space; > > + } > > } > > > > static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
On Thu, Jan 09, 2020 at 10:22 PM CET, John Fastabend wrote: > Jakub Sitnicki wrote: >> On Wed, Jan 08, 2020 at 10:14 PM CET, John Fastabend wrote: >> > When sockmap sock with TLS enabled is removed we cleanup bpf/psock state >> > and call tcp_update_ulp() to push updates to TLS ULP on top. However, we >> > don't push the write_space callback up and instead simply overwrite the >> > op with the psock stored previous op. This may or may not be correct so >> > to ensure we don't overwrite the TLS write space hook pass this field to >> > the ULP and have it fixup the ctx. >> > >> > This completes a previous fix that pushed the ops through to the ULP >> > but at the time missed doing this for write_space, presumably because >> > write_space TLS hook was added around the same time. >> > >> > Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free") >> > Signed-off-by: John Fastabend <john.fastabend@gmail.com> >> > --- >> > include/linux/skmsg.h | 12 ++++++++---- >> > include/net/tcp.h | 6 ++++-- >> > net/ipv4/tcp_ulp.c | 6 ++++-- >> > net/tls/tls_main.c | 10 +++++++--- >> > 4 files changed, 23 insertions(+), 11 deletions(-) >> > >> > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h >> > index b6afe01f8592..14d61bba0b79 100644 >> > --- a/include/linux/skmsg.h >> > +++ b/include/linux/skmsg.h >> > @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk, >> > struct sk_psock *psock) >> > { >> > sk->sk_prot->unhash = psock->saved_unhash; >> > - sk->sk_write_space = psock->saved_write_space; >> > >> > if (psock->sk_proto) { >> > struct inet_connection_sock *icsk = inet_csk(sk); >> > bool has_ulp = !!icsk->icsk_ulp_data; >> > >> > - if (has_ulp) >> > - tcp_update_ulp(sk, psock->sk_proto); >> > - else >> > + if (has_ulp) { >> > + tcp_update_ulp(sk, psock->sk_proto, >> > + psock->saved_write_space); >> > + } else { >> > sk->sk_prot = psock->sk_proto; >> > + sk->sk_write_space = psock->saved_write_space; >> > + } >> >> I'm wondering if we need the above fallback branch for no-ULP case? >> tcp_update_ulp repeats the ULP check and has the same fallback. Perhaps >> it can be reduced to: >> >> if (psock->sk_proto) { >> tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); >> psock->sk_proto = NULL; >> } else { >> sk->sk_write_space = psock->saved_write_space; >> } > > Yeah that is a bit nicer. How about pushing it for bpf-next? I'm not > sure its needed for bpf and the patch I pushed is the minimal change > needed for the fix and pushes the saved_write_space around. Yeah, this is bpf-next material. >> Then there's the question if it's okay to leave psock->sk_proto set and >> potentially restore it more than once? Reading tls_update, the only user >> ULP 'update' callback, it looks fine. >> >> Can sk_psock_restore_proto be as simple as: >> >> static inline void sk_psock_restore_proto(struct sock *sk, >> struct sk_psock *psock) >> { >> tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); >> } >> >> ... or am I missing something? > > I think that is good. bpf-next? Great, I needed to confirm my thinking. >> Asking becuase I have a patch [0] like this in the queue and haven't >> seen issues with it during testing. > > +1 Want to push it after we sort out this series? I've actually pushed it earlier today with next iteration of "Extend SOCKMAP to store listening sockets" to collect feedback [0]. I will adapt it once it shows up in bpf-next (or split it out and submit separately). -jkbs [0] https://lore.kernel.org/bpf/20200110105027.257877-1-jakub@cloudflare.com/
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index b6afe01f8592..14d61bba0b79 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - sk->sk_write_space = psock->saved_write_space; if (psock->sk_proto) { struct inet_connection_sock *icsk = inet_csk(sk); bool has_ulp = !!icsk->icsk_ulp_data; - if (has_ulp) - tcp_update_ulp(sk, psock->sk_proto); - else + if (has_ulp) { + tcp_update_ulp(sk, psock->sk_proto, + psock->saved_write_space); + } else { sk->sk_prot = psock->sk_proto; + sk->sk_write_space = psock->saved_write_space; + } psock->sk_proto = NULL; + } else { + sk->sk_write_space = psock->saved_write_space; } } diff --git a/include/net/tcp.h b/include/net/tcp.h index e460ea7f767b..e6f48384dc71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2147,7 +2147,8 @@ struct tcp_ulp_ops { /* initialize ulp */ int (*init)(struct sock *sk); /* update ulp */ - void (*update)(struct sock *sk, struct proto *p); + void (*update)(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)); /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ @@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); -void tcp_update_ulp(struct sock *sk, struct proto *p); +void tcp_update_ulp(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ __MODULE_INFO(alias, alias_userspace, name); \ diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 12ab5db2b71c..38d3ad141161 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) rcu_read_unlock(); } -void tcp_update_ulp(struct sock *sk, struct proto *proto) +void tcp_update_ulp(struct sock *sk, struct proto *proto, + void (*write_space)(struct sock *sk)) { struct inet_connection_sock *icsk = inet_csk(sk); if (!icsk->icsk_ulp_ops) { + sk->sk_write_space = write_space; sk->sk_prot = proto; return; } if (icsk->icsk_ulp_ops->update) - icsk->icsk_ulp_ops->update(sk, proto); + icsk->icsk_ulp_ops->update(sk, proto, write_space); } void tcp_cleanup_ulp(struct sock *sk) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index dac24c7aa7d4..94774c0e5ff3 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -732,15 +732,19 @@ static int tls_init(struct sock *sk) return rc; } -static void tls_update(struct sock *sk, struct proto *p) +static void tls_update(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)) { struct tls_context *ctx; ctx = tls_get_ctx(sk); - if (likely(ctx)) + if (likely(ctx)) { + ctx->sk_write_space = write_space; ctx->sk_proto = p; - else + } else { sk->sk_prot = p; + sk->sk_write_space = write_space; + } } static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
When sockmap sock with TLS enabled is removed we cleanup bpf/psock state and call tcp_update_ulp() to push updates to TLS ULP on top. However, we don't push the write_space callback up and instead simply overwrite the op with the psock stored previous op. This may or may not be correct so to ensure we don't overwrite the TLS write space hook pass this field to the ULP and have it fixup the ctx. This completes a previous fix that pushed the ops through to the ULP but at the time missed doing this for write_space, presumably because write_space TLS hook was added around the same time. Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free") Signed-off-by: John Fastabend <john.fastabend@gmail.com> --- include/linux/skmsg.h | 12 ++++++++---- include/net/tcp.h | 6 ++++-- net/ipv4/tcp_ulp.c | 6 ++++-- net/tls/tls_main.c | 10 +++++++--- 4 files changed, 23 insertions(+), 11 deletions(-)