From patchwork Wed Mar 24 13:15:46 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 1457885 X-Patchwork-Delegate: pabeni@redhat.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.01.org (client-ip=198.145.21.10; helo=ml01.01.org; envelope-from=mptcp-bounces@lists.01.org; receiver=) Received: from ml01.01.org (ml01.01.org [198.145.21.10]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4F57zl3T4Yz9sWV for ; Thu, 25 Mar 2021 00:16:35 +1100 (AEDT) Received: from ml01.vlan13.01.org (localhost [IPv6:::1]) by ml01.01.org (Postfix) with ESMTP id 77FA2100EB355; Wed, 24 Mar 2021 06:16:33 -0700 (PDT) Received-SPF: Pass (mailfrom) identity=mailfrom; client-ip=2a0a:51c0:0:12e:520::1; helo=chamillionaire.breakpoint.cc; envelope-from=fw@breakpoint.cc; receiver= Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [IPv6:2a0a:51c0:0:12e:520::1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) (No client certificate requested) by ml01.01.org (Postfix) with ESMTPS id 32AED100EB353 for ; Wed, 24 Mar 2021 06:16:31 -0700 (PDT) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1lP3N7-0003ll-M0; Wed, 24 Mar 2021 14:16:29 +0100 From: Florian Westphal To: Date: Wed, 24 Mar 2021 14:15:46 +0100 Message-Id: <20210324131546.13730-9-fw@strlen.de> X-Mailer: git-send-email 2.26.3 In-Reply-To: <20210324131546.13730-1-fw@strlen.de> References: <20210324131546.13730-1-fw@strlen.de> MIME-Version: 1.0 Message-ID-Hash: 76DDI4N75KUZQAQKQJBWNCPLRL36WJO3 X-Message-ID-Hash: 76DDI4N75KUZQAQKQJBWNCPLRL36WJO3 X-MailFrom: fw@breakpoint.cc X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; suspicious-header CC: mptcp@lists.01.org X-Mailman-Version: 3.1.1 Precedence: list Subject: [MPTCP] [RFC PATCH mptcp-next v2 8/8] mptcp: sockopt: add TCP_CONGESTION and TCP_INFO List-Id: Discussions regarding MPTCP upstreaming Archived-At: List-Archive: List-Help: List-Post: List-Subscribe: List-Unsubscribe: TCP_CONGESTION is set for all subflows. The mptcp socket gains icsk_ca_ops too so it can be used to keep the authoritative state that should be set on new/future subflows. TCP_INFO will return first subflow only. The out-of-tree kernel has a MPTCP_INFO getsockopt, this could be added later on. Signed-off-by: Florian Westphal --- net/mptcp/protocol.c | 4 ++ net/mptcp/sockopt.c | 105 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 15beb99f559d..3aefc52ab8f1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2402,6 +2402,8 @@ static int __mptcp_init_sock(struct sock *sk) timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + tcp_assign_congestion_control(sk); + #if IS_ENABLED(CONFIG_KASAN) sock_set_flag(sk, SOCK_RCU_FREE); #endif @@ -2598,6 +2600,8 @@ static void __mptcp_destroy_sock(struct sock *sk) WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); + + tcp_cleanup_congestion_control(sk); sk_refcnt_debug_release(sk); mptcp_dispose_initial_subflow(msk); sock_put(sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d345ebc3947a..2c9aabe631c2 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -481,6 +481,63 @@ static bool mptcp_supported_sockopt(int level, int optname) return false; } +static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + char name[TCP_CA_NAME_MAX]; + bool cap_net_admin; + int ret; + + if (optlen < 1) + return -EINVAL; + + ret = strncpy_from_sockptr(name, optval, + min_t(long, TCP_CA_NAME_MAX-1, optlen)); + if (ret < 0) + return -EFAULT; + + name[ret] = 0; + + cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); + + ret = 0; + lock_sock(sk); + msk->setsockopt_seq++; + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err; + + lock_sock(ssk); + err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); + if (err < 0 && ret == 0) + ret = err; + subflow->setsockopt_seq = msk->setsockopt_seq; + release_sock(ssk); + } + + if (ret == 0) + tcp_set_congestion_control(sk, name, false, cap_net_admin); + + release_sock(sk); + return ret; +} + + +static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + switch (optname) { + case TCP_ULP: + return -EOPNOTSUPP; + case TCP_CONGESTION: + return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); + } + + return -EOPNOTSUPP; +} + int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -510,6 +567,49 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, if (level == SOL_IPV6) return mptcp_setsockopt_v6(msk, optname, optval, optlen); + if (level == SOL_TCP) + return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = (struct sock *)msk; + struct socket *ssock; + int ret = -EINVAL; + struct sock *ssk; + + lock_sock(sk); + ssk = msk->first; + if (ssk) { + ret = tcp_getsockopt(ssk, level, optname, optval, optlen); + goto out; + } + + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) + goto out; + + ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); + +out: + release_sock(sk); + return ret; +} + +static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, + char __user *optval, int __user *optlen) +{ + switch (optname) { + case TCP_ULP: + case TCP_CONGESTION: + case TCP_INFO: + case TCP_CC_INFO: + return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); + } return -EOPNOTSUPP; } @@ -533,6 +633,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, if (ssk) return tcp_getsockopt(ssk, level, optname, optval, option); + if (level == SOL_TCP) + return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); return -EOPNOTSUPP; } @@ -569,6 +671,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) } sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); + + if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) + tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true); } void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)