From patchwork Thu Nov 5 17:01:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 1395131 X-Patchwork-Delegate: fw@strlen.de Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.01.org (client-ip=2001:19d0:306:5::1; helo=ml01.01.org; envelope-from=mptcp-bounces@lists.01.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=strlen.de Received: from ml01.01.org (ml01.01.org [IPv6:2001:19d0:306:5::1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4CRqYt3D2Zz9sSn for ; Fri, 6 Nov 2020 04:01:54 +1100 (AEDT) Received: from ml01.vlan13.01.org (localhost [IPv6:::1]) by ml01.01.org (Postfix) with ESMTP id 3B20016646E30; Thu, 5 Nov 2020 09:01:50 -0800 (PST) Received-SPF: Pass (mailfrom) identity=mailfrom; client-ip=2a0a:51c0:0:12e:520::1; helo=chamillionaire.breakpoint.cc; envelope-from=fw@breakpoint.cc; receiver= Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [IPv6:2a0a:51c0:0:12e:520::1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) (No client certificate requested) by ml01.01.org (Postfix) with ESMTPS id 6B81D16646E30 for ; Thu, 5 Nov 2020 09:01:47 -0800 (PST) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1kaidt-0007D9-ME; Thu, 05 Nov 2020 18:01:45 +0100 From: Florian Westphal To: Cc: Florian Westphal Date: Thu, 5 Nov 2020 18:01:24 +0100 Message-Id: <20201105170126.5627-4-fw@strlen.de> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201105170126.5627-1-fw@strlen.de> References: <20201105170126.5627-1-fw@strlen.de> MIME-Version: 1.0 Message-ID-Hash: 7VKOR65WVPGQG4QLOTAN3HMWS4AD6MP5 X-Message-ID-Hash: 7VKOR65WVPGQG4QLOTAN3HMWS4AD6MP5 X-MailFrom: fw@breakpoint.cc X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; suspicious-header X-Mailman-Version: 3.1.1 Precedence: list Subject: [MPTCP] [PATCH MPTCP 3/5] mptcp: add mptcp reset option support List-Id: Discussions regarding MPTCP upstreaming Archived-At: List-Archive: List-Help: List-Post: List-Subscribe: List-Unsubscribe: The MPTCP reset option allows to carry a mptcp-specific error code that provides more information on the nature of a connection reset. The Reset option data received gets stored in the mptcp skb extension structure so it can be consumed by e.g. path management. When a subflow is closed, the desired error code that should be sent to the peer is placed in the subflow context structure. If close happens before a suitable tcp socket has been created (for example, when HMAC fails validation), the reset code can be placed in the mptcp skb extension which then gets added to the TCP reset skb. Signed-off-by: Florian Westphal --- include/net/mptcp.h | 6 ++++-- include/net/tcp.h | 3 +++ net/ipv4/tcp_ipv4.c | 21 ++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 19 +++++++++++++++++++ net/mptcp/options.c | 42 +++++++++++++++++++++++++++++++++++++----- net/mptcp/protocol.c | 12 +++++++++--- net/mptcp/protocol.h | 18 ++++++++++++++++++ net/mptcp/subflow.c | 27 ++++++++++++++++++++++++--- 8 files changed, 134 insertions(+), 14 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 3d57607982fa..0aed06330a25 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -30,8 +30,8 @@ struct mptcp_ext { ack64:1, mpc_map:1, frozen:1, - __unused:1; - /* one byte hole */ + reset_transient:1; + u8 reset_reason:4; }; struct mptcp_out_options { @@ -50,6 +50,8 @@ struct mptcp_out_options { u8 rm_id; u8 join_id; u8 backup; + u8 reset_reason:4; + u8 reset_transient:1; u32 nonce; u64 thmac; u32 token; diff --git a/include/net/tcp.h b/include/net/tcp.h index c0fef9e9ba20..899f87346b49 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -193,6 +193,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_FASTOPEN_MAGIC 0xF989 #define TCPOPT_SMC_MAGIC 0xE2D4C3D9 +/* MPTCP suboptions used in TCP */ +#define MPTCPOPT_RST 8 /* * TCP option lengths */ @@ -216,6 +218,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 #define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 +#define TCPOLEN_MPTCP_RST 4 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7352c097ae48..c96aea5514c6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -660,9 +660,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; + __be32 opt[(TCPOLEN_MPTCP_RST >> 2) #ifdef CONFIG_TCP_MD5SIG - __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; + + (TCPOLEN_MD5SIG_ALIGNED >> 2) #endif + ]; } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG @@ -770,6 +772,23 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->daddr, &rep.th); } #endif + /* Can't co-exist with TCPMD5, hence check rep.opt[0] */ + if (sk && sk_fullsock(sk) && sk_is_mptcp(sk) && rep.opt[0] == 0) { + const struct mptcp_ext *ext = mptcp_get_ext(skb); + u8 flags = 0, reason = 0; + + if (ext) { + flags = ext->reset_transient; + reason = ext->reset_reason; + } + + rep.opt[0] = mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, + flags, reason); + + arg.iov[0].iov_len += TCPOLEN_MPTCP_RST; + rep.th.doff = arg.iov[0].iov_len / 4; + } + arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8db59f4e5f13..cfe8d6b4c34c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -868,6 +868,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); + bool mptcp_reset = false; struct dst_entry *dst; __be32 *topt; __u32 mark = 0; @@ -879,6 +880,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 tot_len += TCPOLEN_MD5SIG_ALIGNED; #endif + if (rst && sk && sk_fullsock(sk) && sk_is_mptcp(sk) && !key) { + tot_len += TCPOLEN_MPTCP_RST; + mptcp_reset = true; + } + buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); if (!buff) @@ -909,6 +915,19 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = htonl(tsecr); } + if (mptcp_reset) { + const struct mptcp_ext *ext = mptcp_get_ext(skb); + u8 flags = 0, reason = 0; + + if (ext) { + flags = ext->reset_transient; + reason = ext->reset_reason; + } + + *topt++ = mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, + flags, reason); + } + #ifdef CONFIG_TCP_MD5SIG if (key) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 248e3930c0cb..785a9f4e7da8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -281,7 +281,17 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rm_id = *ptr++; pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); break; + case MPTCPOPT_RST: + if (opsize != TCPOLEN_MPTCP_RST) + break; + if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) + break; + mp_opt->reset = 1; + flags = *ptr++; + mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; + mp_opt->reset_reason = *ptr; + break; default: break; } @@ -302,6 +312,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; + mp_opt->reset = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -660,6 +671,22 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, return true; } +static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (remaining < TCPOLEN_MPTCP_RST) + return; + + *size = TCPOLEN_MPTCP_RST; + opts->suboptions |= OPTION_MPTCP_RST; + opts->reset_transient = subflow->reset_transient; + opts->reset_reason = subflow->reset_reason; +} + bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -672,11 +699,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (unlikely(mptcp_check_fallback(sk))) return false; - /* prevent adding of any MPTCP related options on reset packet - * until we support MP_TCPRST/MP_FASTCLOSE - */ - if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) - return false; + if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { + mptcp_established_options_rst(sk, skb, size, remaining, opts); + return true; + } if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) ret = true; @@ -1137,6 +1163,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ptr += 5; } + if (OPTION_MPTCP_RST & opts->suboptions) + *ptr++ = mptcp_option(MPTCPOPT_RST, + TCPOLEN_MPTCP_RST, + opts->reset_transient, + opts->reset_reason); + if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { struct mptcp_ext *mpext = &opts->ext_copy; u8 len = TCPOLEN_MPTCP_DSS_BASE; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a6bd06c724d5..71e556540161 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2608,14 +2608,18 @@ bool mptcp_finish_join(struct sock *ssk) pr_debug("msk=%p, subflow=%p", msk, subflow); /* mptcp socket already closing? */ - if (!mptcp_is_fully_established(parent)) + if (!mptcp_is_fully_established(parent)) { + subflow->reset_reason = MPTCP_RST_EMPTCP; return false; + } if (!msk->pm.server_side) return true; - if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + } /* active connections are already on conn_list, and we can't acquire * msk lock here. @@ -2629,8 +2633,10 @@ bool mptcp_finish_join(struct sock *ssk) sock_hold(ssk); } spin_unlock_bh(&msk->join_list_lock); - if (!ret) + if (!ret) { + subflow->reset_reason = MPTCP_RST_EPROHIBIT; return false; + } /* attach to msk socket only after we are sure he will deal with us * at close time diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 66bd4d096753..8a247e50d326 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -23,6 +23,7 @@ #define OPTION_MPTCP_ADD_ADDR BIT(6) #define OPTION_MPTCP_ADD_ADDR6 BIT(7) #define OPTION_MPTCP_RM_ADDR BIT(8) +#define OPTION_MPTCP_RST BIT(9) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -84,6 +85,18 @@ #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 +/* MPTCP TCPRST flags */ +#define MPTCP_RST_TRANSIENT BIT(0) + +/* MPTCP Reset reason codes, rfc8684 */ +#define MPTCP_RST_EUNSPEC 0 +#define MPTCP_RST_EMPTCP 1 +#define MPTCP_RST_ERESOURCE 2 +#define MPTCP_RST_EPROHIBIT 3 +#define MPTCP_RST_EWQ2BIG 4 +#define MPTCP_RST_EBADPERF 5 +#define MPTCP_RST_EMIDDLEBOX 6 + /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 @@ -109,6 +122,7 @@ struct mptcp_options_received { u16 data_len; u16 mp_capable : 1, mp_join : 1, + reset : 1, dss : 1, add_addr : 1, rm_addr : 1, @@ -129,6 +143,8 @@ struct mptcp_options_received { __unused:2; u8 addr_id; u8 rm_id; + u8 reset_reason:4; + u8 reset_transient:1; union { struct in_addr addr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -365,6 +381,8 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; u8 local_id; u8 remote_id; + u8 reset_transient:1; + u8 reset_reason:4; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1e9a72af67dc..e0da6712a5c3 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -325,8 +325,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; - if (!mp_opt.mp_join) + if (!mp_opt.mp_join) { + subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; + } subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; @@ -335,6 +337,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -356,6 +359,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; do_reset: + subflow->reset_transient = 0; mptcp_subflow_reset(sk); } @@ -505,6 +509,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_ext *mpext; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -565,8 +570,15 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * to reset the context to non MPTCP status. */ if (!ctx || fallback) { - if (fallback_is_fatal) + if (fallback_is_fatal) { + mpext = skb_ext_add(skb, SKB_EXT_MPTCP); + if (mpext) { + memset(mpext, 0, sizeof(*mpext)); + mpext->reset_reason = MPTCP_RST_EMPTCP; + } + goto dispose_child; + } subflow_drop_ctx(child); goto out; @@ -600,8 +612,15 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_sock *owner; owner = subflow_req->msk; - if (!owner) + if (!owner) { + mpext = skb_ext_add(skb, SKB_EXT_MPTCP); + if (mpext) { + memset(mpext, 0, sizeof(*mpext)); + mpext->reset_reason = MPTCP_RST_EPROHIBIT; + } + goto dispose_child; + } /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; @@ -936,6 +955,8 @@ static bool subflow_check_data_avail(struct sock *ssk) smp_wmb(); ssk->sk_error_report(ssk); tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; tcp_send_active_reset(ssk, GFP_ATOMIC); subflow->data_avail = 0; return false;