Message ID | 20200730205723.3353838-1-kafai@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | BPF TCP header options | expand |
On Thu, Jul 30, 2020 at 1:58 PM Martin KaFai Lau <kafai@fb.com> wrote: > > In the latter patch, the bpf prog only wants to be called to handle > a header option if that particular header option cannot be handled by > the kernel. This unknown option could be written by the peer's bpf-prog. > It could also be a new standard option that the running kernel does not > support it while a bpf-prog can handle it. > > In a latter patch, the bpf prog will be called from tcp_validate_incoming() > if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags. > > Instead of using skb->cb[] in an earlier attempt, this patch > adds an optional arg "bool *unknown_opt" to tcp_parse_options(). > The bool will be set to true if it has encountered an option > that the kernel does not recognize. > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > --- > drivers/infiniband/hw/cxgb4/cm.c | 2 +- > include/net/tcp.h | 3 ++- > net/ipv4/syncookies.c | 2 +- > net/ipv4/tcp_input.c | 40 +++++++++++++++++++++----------- > net/ipv4/tcp_minisocks.c | 4 ++-- > net/ipv6/syncookies.c | 2 +- > 6 files changed, 34 insertions(+), 19 deletions(-) > > diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c > index 30e08bcc9afb..dedca6576bb9 100644 > --- a/drivers/infiniband/hw/cxgb4/cm.c > +++ b/drivers/infiniband/hw/cxgb4/cm.c > @@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) > */ > memset(&tmp_opt, 0, sizeof(tmp_opt)); > tcp_clear_options(&tmp_opt); > - tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); > + tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL); > > req = __skb_push(skb, sizeof(*req)); > memset(req, 0, sizeof(*req)); > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 895e7aabf136..d49d8f1c961a 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock, > #endif > void tcp_parse_options(const struct net *net, const struct sk_buff *skb, > struct tcp_options_received *opt_rx, > - int estab, struct tcp_fastopen_cookie *foc); > + int estab, struct tcp_fastopen_cookie *foc, > + bool *unknown_opt); > const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); > Instead of changing signatures of many functions (and make future stable backports challenging) how about adding a field into 'struct tcp_options_received' ? Sorry for not suggesting this earlier :/
On Fri, Jul 31, 2020 at 09:12:10AM -0700, Eric Dumazet wrote: > On Thu, Jul 30, 2020 at 1:58 PM Martin KaFai Lau <kafai@fb.com> wrote: > > > > In the latter patch, the bpf prog only wants to be called to handle > > a header option if that particular header option cannot be handled by > > the kernel. This unknown option could be written by the peer's bpf-prog. > > It could also be a new standard option that the running kernel does not > > support it while a bpf-prog can handle it. > > > > In a latter patch, the bpf prog will be called from tcp_validate_incoming() > > if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags. > > > > Instead of using skb->cb[] in an earlier attempt, this patch > > adds an optional arg "bool *unknown_opt" to tcp_parse_options(). > > The bool will be set to true if it has encountered an option > > that the kernel does not recognize. > > > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > > --- > > drivers/infiniband/hw/cxgb4/cm.c | 2 +- > > include/net/tcp.h | 3 ++- > > net/ipv4/syncookies.c | 2 +- > > net/ipv4/tcp_input.c | 40 +++++++++++++++++++++----------- > > net/ipv4/tcp_minisocks.c | 4 ++-- > > net/ipv6/syncookies.c | 2 +- > > 6 files changed, 34 insertions(+), 19 deletions(-) > > > > diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c > > index 30e08bcc9afb..dedca6576bb9 100644 > > --- a/drivers/infiniband/hw/cxgb4/cm.c > > +++ b/drivers/infiniband/hw/cxgb4/cm.c > > @@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) > > */ > > memset(&tmp_opt, 0, sizeof(tmp_opt)); > > tcp_clear_options(&tmp_opt); > > - tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); > > + tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL); > > > > req = __skb_push(skb, sizeof(*req)); > > memset(req, 0, sizeof(*req)); > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > index 895e7aabf136..d49d8f1c961a 100644 > > --- a/include/net/tcp.h > > +++ b/include/net/tcp.h > > @@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock, > > #endif > > void tcp_parse_options(const struct net *net, const struct sk_buff *skb, > > struct tcp_options_received *opt_rx, > > - int estab, struct tcp_fastopen_cookie *foc); > > + int estab, struct tcp_fastopen_cookie *foc, > > + bool *unknown_opt); > > const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); > > > > Instead of changing signatures of many functions (and make future > stable backports challenging) > how about adding a field into 'struct tcp_options_received' ? Sounds good. There is a one byte hole in 'struct tcp_options_received', so it won't matter much even there is "rx_opt" in "struct tcp_sock".
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 30e08bcc9afb..dedca6576bb9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); + tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL); req = __skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/net/tcp.h b/include/net/tcp.h index 895e7aabf136..d49d8f1c961a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock, #endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, struct tcp_fastopen_cookie *foc); + int estab, struct tcp_fastopen_cookie *foc, + bool *unknown_opt); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9a4f6b16c9bc..fd39aed4fcd3 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcp_ts_off(sock_net(sk), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c38ca9de17e..d9c878001be2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3799,7 +3799,7 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, foc->exp = exp_opt; } -static void smc_parse_options(const struct tcphdr *th, +static bool smc_parse_options(const struct tcphdr *th, struct tcp_options_received *opt_rx, const unsigned char *ptr, int opsize) @@ -3808,10 +3808,13 @@ static void smc_parse_options(const struct tcphdr *th, if (static_branch_unlikely(&tcp_have_smc)) { if (th->syn && !(opsize & 1) && opsize >= TCPOLEN_EXP_SMC_BASE && - get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) + get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) { opt_rx->smc_ok = 1; + return true; + } } #endif + return false; } /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped @@ -3864,7 +3867,8 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + bool *unknown_opt) { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); @@ -3962,15 +3966,23 @@ void tcp_parse_options(const struct net *net, */ if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && get_unaligned_be16(ptr) == - TCPOPT_FASTOPEN_MAGIC) + TCPOPT_FASTOPEN_MAGIC) { tcp_parse_fastopen_option(opsize - TCPOLEN_EXP_FASTOPEN_BASE, ptr + 2, th->syn, foc, true); - else - smc_parse_options(th, opt_rx, ptr, - opsize); + break; + } + + if (smc_parse_options(th, opt_rx, ptr, opsize)) + break; + + if (unknown_opt) + *unknown_opt = true; break; + default: + if (unknown_opt) + *unknown_opt = true; } ptr += opsize-2; length -= opsize; @@ -4003,7 +4015,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr */ static bool tcp_fast_parse_options(const struct net *net, const struct sk_buff *skb, - const struct tcphdr *th, struct tcp_sock *tp) + const struct tcphdr *th, struct tcp_sock *tp, + bool *unknown_opt) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -4017,7 +4030,7 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } - tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, unknown_opt); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5492,9 +5505,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); bool rst_seq_match = false; + bool unknown_opt = false; /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && + if (tcp_fast_parse_options(sock_net(sk), skb, th, tp, &unknown_opt) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { @@ -5866,7 +5880,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); + tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, NULL); mss = opt.mss_clamp; } @@ -5951,7 +5965,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; - tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, NULL); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -6685,7 +6699,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, - want_cookie ? NULL : &foc); + want_cookie ? NULL : &foc, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 495dda2449fe..61f9194802c4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); + tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) @@ -580,7 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 13235a012388..f22961a73c2b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -157,7 +157,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcpv6_ts_off(sock_net(sk),
In the latter patch, the bpf prog only wants to be called to handle a header option if that particular header option cannot be handled by the kernel. This unknown option could be written by the peer's bpf-prog. It could also be a new standard option that the running kernel does not support it while a bpf-prog can handle it. In a latter patch, the bpf prog will be called from tcp_validate_incoming() if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags. Instead of using skb->cb[] in an earlier attempt, this patch adds an optional arg "bool *unknown_opt" to tcp_parse_options(). The bool will be set to true if it has encountered an option that the kernel does not recognize. Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 40 +++++++++++++++++++++----------- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv6/syncookies.c | 2 +- 6 files changed, 34 insertions(+), 19 deletions(-)