@@ -30,8 +30,8 @@ struct mptcp_ext {
ack64:1,
mpc_map:1,
frozen:1,
- __unused:1;
- /* one byte hole */
+ reset_transient:1;
+ u8 reset_reason:4;
};
struct mptcp_out_options {
@@ -50,6 +50,8 @@ struct mptcp_out_options {
u8 rm_id;
u8 join_id;
u8 backup;
+ u8 reset_reason:4;
+ u8 reset_transient:1;
u32 nonce;
u64 thmac;
u32 token;
@@ -193,6 +193,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_FASTOPEN_MAGIC 0xF989
#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
+/* MPTCP suboptions used in TCP */
+#define MPTCPOPT_TCPRST 8
/*
* TCP option lengths
*/
@@ -216,6 +218,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_MD5SIG_ALIGNED 20
#define TCPOLEN_MSS_ALIGNED 4
#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
+#define TCPOLEN_MPTCP_TCPRST 4
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
@@ -660,9 +660,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
+ __be32 opt[(TCPOLEN_MPTCP_TCPRST >> 2)
#ifdef CONFIG_TCP_MD5SIG
- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
+ + (TCPOLEN_MD5SIG_ALIGNED >> 2)
#endif
+ ];
} rep;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
@@ -770,6 +772,23 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ip_hdr(skb)->daddr, &rep.th);
}
#endif
+ /* Can't co-exist with TCPMD5, hence check rep.opt[0] */
+ if (sk && sk_fullsock(sk) && sk_is_mptcp(sk) && rep.opt[0] == 0) {
+ const struct mptcp_ext *ext = mptcp_get_ext(skb);
+ u8 flags = 0, reason = 0;
+
+ if (ext) {
+ flags = ext->reset_transient;
+ reason = ext->reset_reason;
+ }
+
+ rep.opt[0] = mptcp_option(MPTCPOPT_TCPRST, TCPOLEN_MPTCP_TCPRST,
+ flags, reason);
+
+ arg.iov[0].iov_len += TCPOLEN_MPTCP_TCPRST;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+ }
+
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -868,6 +868,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
+ bool mptcp_reset = false;
struct dst_entry *dst;
__be32 *topt;
__u32 mark = 0;
@@ -879,6 +880,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (rst && sk && sk_fullsock(sk) && sk_is_mptcp(sk) && !key) {
+ tot_len += TCPOLEN_MPTCP_TCPRST;
+ mptcp_reset = true;
+ }
+
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
@@ -909,6 +915,19 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = htonl(tsecr);
}
+ if (mptcp_reset) {
+ const struct mptcp_ext *ext = mptcp_get_ext(skb);
+ u8 flags = 0, reason = 0;
+
+ if (ext) {
+ flags = ext->reset_transient;
+ reason = ext->reset_reason;
+ }
+
+ *topt++ = mptcp_option(MPTCPOPT_TCPRST, TCPOLEN_MPTCP_TCPRST,
+ flags, reason);
+ }
+
#ifdef CONFIG_TCP_MD5SIG
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -280,7 +280,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rm_id = *ptr++;
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
break;
+ case MPTCPOPT_TCPRST:
+ if (opsize != TCPOLEN_MPTCP_TCPRST)
+ break;
+ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
+ break;
+ mp_opt->reset = 1;
+ flags = *ptr++;
+ mp_opt->reset_transient = flags & MPTCP_TCPRST_TRANSIENT;
+ mp_opt->reset_reason = *ptr;
+ break;
default:
break;
}
@@ -299,6 +309,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->add_addr = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
+ mp_opt->reset = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -477,6 +488,22 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
}
}
+static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (remaining < TCPOLEN_MPTCP_TCPRST)
+ return;
+
+ *size = TCPOLEN_MPTCP_TCPRST;
+ opts->suboptions |= OPTION_MPTCP_TCPRST;
+ opts->reset_transient = subflow->reset_transient;
+ opts->reset_reason = subflow->reset_reason;
+}
+
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
@@ -535,6 +562,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size += ack_size;
*size = ALIGN(dss_size, 4);
+
+ if (unlikely(skb && (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)))
+ mptcp_established_options_rst(sk, skb, size, remaining, opts);
+
return true;
}
@@ -1065,6 +1096,12 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
ptr += 5;
}
+ if (OPTION_MPTCP_TCPRST & opts->suboptions)
+ *ptr++ = mptcp_option(MPTCPOPT_TCPRST,
+ TCPOLEN_MPTCP_TCPRST,
+ opts->reset_transient,
+ opts->reset_reason);
+
if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
@@ -2402,14 +2402,18 @@ bool mptcp_finish_join(struct sock *sk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
- if (!mptcp_is_fully_established(parent))
+ if (!mptcp_is_fully_established(parent)) {
+ subflow->reset_reason = MPTCP_TCPRST_EMPTCP;
return false;
+ }
if (!msk->pm.server_side)
return true;
- if (!mptcp_pm_allow_new_subflow(msk))
+ if (!mptcp_pm_allow_new_subflow(msk)) {
+ subflow->reset_reason = MPTCP_TCPRST_EPROHIBIT;
return false;
+ }
/* active connections are already on conn_list, and we can't acquire
* msk lock here.
@@ -2421,8 +2425,10 @@ bool mptcp_finish_join(struct sock *sk)
if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node)))
list_add_tail(&subflow->node, &msk->join_list);
spin_unlock_bh(&msk->join_list_lock);
- if (!ret)
+ if (!ret) {
+ subflow->reset_reason = MPTCP_TCPRST_EPROHIBIT;
return false;
+ }
/* attach to msk socket only after we are sure he will deal with us
* at close time
@@ -23,6 +23,7 @@
#define OPTION_MPTCP_ADD_ADDR BIT(6)
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
#define OPTION_MPTCP_RM_ADDR BIT(8)
+#define OPTION_MPTCP_TCPRST BIT(9)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -84,6 +85,18 @@
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
+/* MPTCP TCPRST flags */
+#define MPTCP_TCPRST_TRANSIENT BIT(0)
+
+/* MPTCP TCPRST reason codes */
+#define MPTCP_TCPRST_EUNSPEC 0
+#define MPTCP_TCPRST_EMPTCP 1
+#define MPTCP_TCPRST_ERESOURCE 2
+#define MPTCP_TCPRST_EPROHIBIT 3
+#define MPTCP_TCPRST_EWQ2BIG 4
+#define MPTCP_TCPRST_EBADPERF 5
+#define MPTCP_TCPRST_EMIDDLEBOX 6
+
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
#define MPTCP_SEND_SPACE 1
@@ -100,6 +113,7 @@ struct mptcp_options_received {
u16 data_len;
u16 mp_capable : 1,
mp_join : 1,
+ reset : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
@@ -120,6 +134,8 @@ struct mptcp_options_received {
__unused:2;
u8 addr_id;
u8 rm_id;
+ u8 reset_reason:4;
+ u8 reset_transient:1;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -314,6 +330,8 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN];
u8 local_id;
u8 remote_id;
+ u8 reset_transient:1;
+ u8 reset_reason:4;
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
@@ -312,8 +312,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!mp_opt.mp_join)
+ if (!mp_opt.mp_join) {
+ subflow->reset_reason = MPTCP_TCPRST_EMPTCP;
goto do_reset;
+ }
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
@@ -322,6 +324,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
+ subflow->reset_reason = MPTCP_TCPRST_EMPTCP;
goto do_reset;
}
@@ -343,6 +346,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
do_reset:
+ subflow->reset_transient = 0;
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
}
@@ -493,6 +497,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
struct sock *new_msk = NULL;
+ struct mptcp_ext *mpext;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -553,8 +558,15 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* to reset the context to non MPTCP status.
*/
if (!ctx || fallback) {
- if (fallback_is_fatal)
+ if (fallback_is_fatal) {
+ mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
+ if (mpext) {
+ memset(mpext, 0, sizeof(*mpext));
+ mpext->reset_reason = MPTCP_TCPRST_EMPTCP;
+ }
+
goto dispose_child;
+ }
subflow_drop_ctx(child);
goto out;
@@ -584,8 +596,15 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_sock *owner;
owner = subflow_req->msk;
- if (!owner)
+ if (!owner) {
+ mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
+ if (mpext) {
+ memset(mpext, 0, sizeof(*mpext));
+ mpext->reset_reason = MPTCP_TCPRST_EPROHIBIT;
+ }
+
goto dispose_child;
+ }
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
@@ -911,6 +930,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
smp_wmb();
ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_TCPRST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
subflow->data_avail = 0;
return false;
Reset option data taht is received will be stored in the mptcp skb extension structure so it can be consumed by e.g. path management. When a subflow gets closed, the desired error code is stored in the subflow context structure. When the close happens before a suitable tcp socket has been created (for example, when HMAC fails validation), its possible to attach the mptcp skb extension and store the reset reason code there. Signed-off-by: Florian Westphal <fw@strlen.de> --- include/net/mptcp.h | 6 ++++-- include/net/tcp.h | 3 +++ net/ipv4/tcp_ipv4.c | 21 ++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 19 +++++++++++++++++++ net/mptcp/options.c | 37 +++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.c | 12 +++++++++--- net/mptcp/protocol.h | 18 ++++++++++++++++++ net/mptcp/subflow.c | 27 ++++++++++++++++++++++++--- 8 files changed, 134 insertions(+), 9 deletions(-)