diff mbox series

[v2,2/2] net: socket: implement SO_DESCRIPTION

Message ID 20200822032827.6386-2-kalou@tfz.net
State Changes Requested
Delegated to: David Miller
Headers show
Series [v2,1/2] mm: add GFP mask param to strndup_user | expand

Commit Message

Pascal Bouchareine Aug. 22, 2020, 3:28 a.m. UTC
This command attaches the zero terminated string in optval to the
socket for troubleshooting purposes. The free string is displayed in the
process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).

One intended usage is to allow processes to self-document sockets
for netstat and friends to report

We ignore optlen and constrain the string to a static max size

Signed-off-by: Pascal Bouchareine <kalou@tfz.net>
---
 include/net/sock.h                |  4 +++
 include/uapi/asm-generic/socket.h |  2 ++
 net/core/sock.c                   | 53 +++++++++++++++++++++++++++++++
 net/socket.c                      |  5 +++
 4 files changed, 64 insertions(+)

Comments

kernel test robot Aug. 22, 2020, 6:57 a.m. UTC | #1
Hi Pascal,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on security/next-testing]
[also build test ERROR on linux/master]
[cannot apply to mmotm/master tip/perf/core linus/master v5.9-rc1 next-20200821]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Pascal-Bouchareine/mm-add-GFP-mask-param-to-strndup_user/20200822-122903
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git next-testing
config: alpha-randconfig-r025-20200822 (attached as .config)
compiler: alpha-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=alpha 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net/core/sock.c: In function 'sock_setsockopt':
>> net/core/sock.c:896:17: error: 'SO_DESCRIPTION' undeclared (first use in this function); did you mean 'MODULE_DESCRIPTION'?
     896 |  if (optname == SO_DESCRIPTION)
         |                 ^~~~~~~~~~~~~~
         |                 MODULE_DESCRIPTION
   net/core/sock.c:896:17: note: each undeclared identifier is reported only once for each function it appears in
   net/core/sock.c: In function 'sock_getsockopt':
   net/core/sock.c:1663:7: error: 'SO_DESCRIPTION' undeclared (first use in this function); did you mean 'MODULE_DESCRIPTION'?
    1663 |  case SO_DESCRIPTION:
         |       ^~~~~~~~~~~~~~
         |       MODULE_DESCRIPTION

# https://github.com/0day-ci/linux/commit/35dcbc957b52151274a9e06b2d6c4739b5061622
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Pascal-Bouchareine/mm-add-GFP-mask-param-to-strndup_user/20200822-122903
git checkout 35dcbc957b52151274a9e06b2d6c4739b5061622
vim +896 net/core/sock.c

   873	
   874	/*
   875	 *	This is meant for all protocols to use and covers goings on
   876	 *	at the socket level. Everything here is generic.
   877	 */
   878	
   879	int sock_setsockopt(struct socket *sock, int level, int optname,
   880			    char __user *optval, unsigned int optlen)
   881	{
   882		struct sock_txtime sk_txtime;
   883		struct sock *sk = sock->sk;
   884		int val;
   885		int valbool;
   886		struct linger ling;
   887		int ret = 0;
   888	
   889		/*
   890		 *	Options without arguments
   891		 */
   892	
   893		if (optname == SO_BINDTODEVICE)
   894			return sock_setbindtodevice(sk, optval, optlen);
   895	
 > 896		if (optname == SO_DESCRIPTION)
   897			return sock_set_description(sk, optval);
   898	
   899		if (optlen < sizeof(int))
   900			return -EINVAL;
   901	
   902		if (get_user(val, (int __user *)optval))
   903			return -EFAULT;
   904	
   905		valbool = val ? 1 : 0;
   906	
   907		lock_sock(sk);
   908	
   909		switch (optname) {
   910		case SO_DEBUG:
   911			if (val && !capable(CAP_NET_ADMIN))
   912				ret = -EACCES;
   913			else
   914				sock_valbool_flag(sk, SOCK_DBG, valbool);
   915			break;
   916		case SO_REUSEADDR:
   917			sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
   918			break;
   919		case SO_REUSEPORT:
   920			sk->sk_reuseport = valbool;
   921			break;
   922		case SO_TYPE:
   923		case SO_PROTOCOL:
   924		case SO_DOMAIN:
   925		case SO_ERROR:
   926			ret = -ENOPROTOOPT;
   927			break;
   928		case SO_DONTROUTE:
   929			sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
   930			sk_dst_reset(sk);
   931			break;
   932		case SO_BROADCAST:
   933			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
   934			break;
   935		case SO_SNDBUF:
   936			/* Don't error on this BSD doesn't and if you think
   937			 * about it this is right. Otherwise apps have to
   938			 * play 'guess the biggest size' games. RCVBUF/SNDBUF
   939			 * are treated in BSD as hints
   940			 */
   941			val = min_t(u32, val, sysctl_wmem_max);
   942	set_sndbuf:
   943			/* Ensure val * 2 fits into an int, to prevent max_t()
   944			 * from treating it as a negative value.
   945			 */
   946			val = min_t(int, val, INT_MAX / 2);
   947			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
   948			WRITE_ONCE(sk->sk_sndbuf,
   949				   max_t(int, val * 2, SOCK_MIN_SNDBUF));
   950			/* Wake up sending tasks if we upped the value. */
   951			sk->sk_write_space(sk);
   952			break;
   953	
   954		case SO_SNDBUFFORCE:
   955			if (!capable(CAP_NET_ADMIN)) {
   956				ret = -EPERM;
   957				break;
   958			}
   959	
   960			/* No negative values (to prevent underflow, as val will be
   961			 * multiplied by 2).
   962			 */
   963			if (val < 0)
   964				val = 0;
   965			goto set_sndbuf;
   966	
   967		case SO_RCVBUF:
   968			/* Don't error on this BSD doesn't and if you think
   969			 * about it this is right. Otherwise apps have to
   970			 * play 'guess the biggest size' games. RCVBUF/SNDBUF
   971			 * are treated in BSD as hints
   972			 */
   973			__sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
   974			break;
   975	
   976		case SO_RCVBUFFORCE:
   977			if (!capable(CAP_NET_ADMIN)) {
   978				ret = -EPERM;
   979				break;
   980			}
   981	
   982			/* No negative values (to prevent underflow, as val will be
   983			 * multiplied by 2).
   984			 */
   985			__sock_set_rcvbuf(sk, max(val, 0));
   986			break;
   987	
   988		case SO_KEEPALIVE:
   989			if (sk->sk_prot->keepalive)
   990				sk->sk_prot->keepalive(sk, valbool);
   991			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
   992			break;
   993	
   994		case SO_OOBINLINE:
   995			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
   996			break;
   997	
   998		case SO_NO_CHECK:
   999			sk->sk_no_check_tx = valbool;
  1000			break;
  1001	
  1002		case SO_PRIORITY:
  1003			if ((val >= 0 && val <= 6) ||
  1004			    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  1005				sk->sk_priority = val;
  1006			else
  1007				ret = -EPERM;
  1008			break;
  1009	
  1010		case SO_LINGER:
  1011			if (optlen < sizeof(ling)) {
  1012				ret = -EINVAL;	/* 1003.1g */
  1013				break;
  1014			}
  1015			if (copy_from_user(&ling, optval, sizeof(ling))) {
  1016				ret = -EFAULT;
  1017				break;
  1018			}
  1019			if (!ling.l_onoff)
  1020				sock_reset_flag(sk, SOCK_LINGER);
  1021			else {
  1022	#if (BITS_PER_LONG == 32)
  1023				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
  1024					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
  1025				else
  1026	#endif
  1027					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
  1028				sock_set_flag(sk, SOCK_LINGER);
  1029			}
  1030			break;
  1031	
  1032		case SO_BSDCOMPAT:
  1033			sock_warn_obsolete_bsdism("setsockopt");
  1034			break;
  1035	
  1036		case SO_PASSCRED:
  1037			if (valbool)
  1038				set_bit(SOCK_PASSCRED, &sock->flags);
  1039			else
  1040				clear_bit(SOCK_PASSCRED, &sock->flags);
  1041			break;
  1042	
  1043		case SO_TIMESTAMP_OLD:
  1044			__sock_set_timestamps(sk, valbool, false, false);
  1045			break;
  1046		case SO_TIMESTAMP_NEW:
  1047			__sock_set_timestamps(sk, valbool, true, false);
  1048			break;
  1049		case SO_TIMESTAMPNS_OLD:
  1050			__sock_set_timestamps(sk, valbool, false, true);
  1051			break;
  1052		case SO_TIMESTAMPNS_NEW:
  1053			__sock_set_timestamps(sk, valbool, true, true);
  1054			break;
  1055		case SO_TIMESTAMPING_NEW:
  1056			sock_set_flag(sk, SOCK_TSTAMP_NEW);
  1057			/* fall through */
  1058		case SO_TIMESTAMPING_OLD:
  1059			if (val & ~SOF_TIMESTAMPING_MASK) {
  1060				ret = -EINVAL;
  1061				break;
  1062			}
  1063	
  1064			if (val & SOF_TIMESTAMPING_OPT_ID &&
  1065			    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
  1066				if (sk->sk_protocol == IPPROTO_TCP &&
  1067				    sk->sk_type == SOCK_STREAM) {
  1068					if ((1 << sk->sk_state) &
  1069					    (TCPF_CLOSE | TCPF_LISTEN)) {
  1070						ret = -EINVAL;
  1071						break;
  1072					}
  1073					sk->sk_tskey = tcp_sk(sk)->snd_una;
  1074				} else {
  1075					sk->sk_tskey = 0;
  1076				}
  1077			}
  1078	
  1079			if (val & SOF_TIMESTAMPING_OPT_STATS &&
  1080			    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
  1081				ret = -EINVAL;
  1082				break;
  1083			}
  1084	
  1085			sk->sk_tsflags = val;
  1086			if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
  1087				sock_enable_timestamp(sk,
  1088						      SOCK_TIMESTAMPING_RX_SOFTWARE);
  1089			else {
  1090				if (optname == SO_TIMESTAMPING_NEW)
  1091					sock_reset_flag(sk, SOCK_TSTAMP_NEW);
  1092	
  1093				sock_disable_timestamp(sk,
  1094						       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
  1095			}
  1096			break;
  1097	
  1098		case SO_RCVLOWAT:
  1099			if (val < 0)
  1100				val = INT_MAX;
  1101			if (sock->ops->set_rcvlowat)
  1102				ret = sock->ops->set_rcvlowat(sk, val);
  1103			else
  1104				WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
  1105			break;
  1106	
  1107		case SO_RCVTIMEO_OLD:
  1108		case SO_RCVTIMEO_NEW:
  1109			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
  1110			break;
  1111	
  1112		case SO_SNDTIMEO_OLD:
  1113		case SO_SNDTIMEO_NEW:
  1114			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
  1115			break;
  1116	
  1117		case SO_ATTACH_FILTER:
  1118			ret = -EINVAL;
  1119			if (optlen == sizeof(struct sock_fprog)) {
  1120				struct sock_fprog fprog;
  1121	
  1122				ret = -EFAULT;
  1123				if (copy_from_user(&fprog, optval, sizeof(fprog)))
  1124					break;
  1125	
  1126				ret = sk_attach_filter(&fprog, sk);
  1127			}
  1128			break;
  1129	
  1130		case SO_ATTACH_BPF:
  1131			ret = -EINVAL;
  1132			if (optlen == sizeof(u32)) {
  1133				u32 ufd;
  1134	
  1135				ret = -EFAULT;
  1136				if (copy_from_user(&ufd, optval, sizeof(ufd)))
  1137					break;
  1138	
  1139				ret = sk_attach_bpf(ufd, sk);
  1140			}
  1141			break;
  1142	
  1143		case SO_ATTACH_REUSEPORT_CBPF:
  1144			ret = -EINVAL;
  1145			if (optlen == sizeof(struct sock_fprog)) {
  1146				struct sock_fprog fprog;
  1147	
  1148				ret = -EFAULT;
  1149				if (copy_from_user(&fprog, optval, sizeof(fprog)))
  1150					break;
  1151	
  1152				ret = sk_reuseport_attach_filter(&fprog, sk);
  1153			}
  1154			break;
  1155	
  1156		case SO_ATTACH_REUSEPORT_EBPF:
  1157			ret = -EINVAL;
  1158			if (optlen == sizeof(u32)) {
  1159				u32 ufd;
  1160	
  1161				ret = -EFAULT;
  1162				if (copy_from_user(&ufd, optval, sizeof(ufd)))
  1163					break;
  1164	
  1165				ret = sk_reuseport_attach_bpf(ufd, sk);
  1166			}
  1167			break;
  1168	
  1169		case SO_DETACH_REUSEPORT_BPF:
  1170			ret = reuseport_detach_prog(sk);
  1171			break;
  1172	
  1173		case SO_DETACH_FILTER:
  1174			ret = sk_detach_filter(sk);
  1175			break;
  1176	
  1177		case SO_LOCK_FILTER:
  1178			if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
  1179				ret = -EPERM;
  1180			else
  1181				sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
  1182			break;
  1183	
  1184		case SO_PASSSEC:
  1185			if (valbool)
  1186				set_bit(SOCK_PASSSEC, &sock->flags);
  1187			else
  1188				clear_bit(SOCK_PASSSEC, &sock->flags);
  1189			break;
  1190		case SO_MARK:
  1191			if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
  1192				ret = -EPERM;
  1193			} else if (val != sk->sk_mark) {
  1194				sk->sk_mark = val;
  1195				sk_dst_reset(sk);
  1196			}
  1197			break;
  1198	
  1199		case SO_RXQ_OVFL:
  1200			sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
  1201			break;
  1202	
  1203		case SO_WIFI_STATUS:
  1204			sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
  1205			break;
  1206	
  1207		case SO_PEEK_OFF:
  1208			if (sock->ops->set_peek_off)
  1209				ret = sock->ops->set_peek_off(sk, val);
  1210			else
  1211				ret = -EOPNOTSUPP;
  1212			break;
  1213	
  1214		case SO_NOFCS:
  1215			sock_valbool_flag(sk, SOCK_NOFCS, valbool);
  1216			break;
  1217	
  1218		case SO_SELECT_ERR_QUEUE:
  1219			sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
  1220			break;
  1221	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
David Miller Aug. 22, 2020, 7:36 p.m. UTC | #2
From: Pascal Bouchareine <kalou@tfz.net>
Date: Fri, 21 Aug 2020 20:28:27 -0700

> This command attaches the zero terminated string in optval to the
> socket for troubleshooting purposes. The free string is displayed in the
> process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).
> 
> One intended usage is to allow processes to self-document sockets
> for netstat and friends to report
> 
> We ignore optlen and constrain the string to a static max size
> 
> Signed-off-by: Pascal Bouchareine <kalou@tfz.net>

This change is really a non-starter unless the information gets
published somewhere where people actually look at dumped sockets, and
that's inet_diag and friends.
Pascal Bouchareine Aug. 22, 2020, 7:59 p.m. UTC | #3
Thank you,

On Sat, Aug 22, 2020 at 12:36 PM David Miller <davem@davemloft.net> wrote:
> > We ignore optlen and constrain the string to a static max size
> >
> > Signed-off-by: Pascal Bouchareine <kalou@tfz.net>
>
> This change is really a non-starter unless the information gets
> published somewhere where people actually look at dumped sockets, and
> that's inet_diag and friends.

Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
(And keep the existing change - setsockopt + show_fd_info via
/proc/.../fdinfo/..)

I would feel like adding a pid information (and what else am I missing
here) to inet_diag might also be a good improvement then?

I understand that users have to scan /proc to find the FDs, matching
the inode number for the socket to find the owning process today.

If that's of interest I can explore that too
Pascal Bouchareine Aug. 22, 2020, 8:19 p.m. UTC | #4
On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <kalou@tfz.net> wrote:

> Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
> (And keep the existing change - setsockopt + show_fd_info via
> /proc/.../fdinfo/..)


Ah,very wrong example - to be more precise, I suppose that'd be adding
a couple idiag_ext for sk_description and pid if possible instead
Pascal Bouchareine Aug. 22, 2020, 8:53 p.m. UTC | #5
On Sat, Aug 22, 2020 at 1:19 PM Pascal Bouchareine <kalou@tfz.net> wrote:
>
> On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <kalou@tfz.net> wrote:
>
> > Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
> > (And keep the existing change - setsockopt + show_fd_info via
> > /proc/.../fdinfo/..)
>
>
> Ah,very wrong example - to be more precise, I suppose that'd be adding
> a couple idiag_ext for sk_description and pid if possible instead

About the pid part -
On top of multiple pids to scan for a given socket, there's also the
security provided by /proc - I'm not sure what inet_diag does for that
So maybe users calling it will need to scan /proc for a long time anyway...

Or is that doable?
David Miller Aug. 22, 2020, 9:01 p.m. UTC | #6
From: Pascal Bouchareine <kalou@tfz.net>
Date: Sat, 22 Aug 2020 13:53:03 -0700

> On Sat, Aug 22, 2020 at 1:19 PM Pascal Bouchareine <kalou@tfz.net> wrote:
>>
>> On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <kalou@tfz.net> wrote:
>>
>> > Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
>> > (And keep the existing change - setsockopt + show_fd_info via
>> > /proc/.../fdinfo/..)
>>
>>
>> Ah,very wrong example - to be more precise, I suppose that'd be adding
>> a couple idiag_ext for sk_description and pid if possible instead
> 
> About the pid part -
> On top of multiple pids to scan for a given socket, there's also the
> security provided by /proc - I'm not sure what inet_diag does for that
> So maybe users calling it will need to scan /proc for a long time anyway...
> 
> Or is that doable?

I'd like to kindly ask that you do more research into how this kind of
information is advertised to the user using modern interfaces, and what
kinds of permissions and checks are done for those.

You are proposing a new UAPI for the Linux kernel, and with that comes
some level of responsibility.

Thank you.
Pascal Bouchareine Aug. 23, 2020, 10:28 p.m. UTC | #7
On Sat, Aug 22, 2020 at 2:01 PM David Miller <davem@davemloft.net> wrote:
> > About the pid part -
> > On top of multiple pids to scan for a given socket, there's also the
> > security provided by /proc - I'm not sure what inet_diag does for that
> > So maybe users calling it will need to scan /proc for a long time anyway...
> >
> > Or is that doable?
>
> I'd like to kindly ask that you do more research into how this kind of
> information is advertised to the user using modern interfaces, and what
> kinds of permissions and checks are done for those.

If we wanted to get rid of having to scan /proc from userland when
using sock_diag to identify associated processes,
I suppose scanning for pids would be the most annoying part?

I understand sock_diag uses CAP_NET_ADMIN for some sensitive bits.

I thought it would require an additional bit of logic to let an
unprivileged user access its own socket "sensitive" data.

Your message makes me think I need to read a lot more about it, so
I'll try that - but more importantly
as you mention APIs and modern interfaces, I think eBPF is going to be
of great help to try and hack
around this data without disturbing existing APIs.

Thanks for taking the time to look into it
diff mbox series

Patch

diff --git a/include/net/sock.h b/include/net/sock.h
index 1183507df95b..6b4fd1383282 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -342,6 +342,7 @@  struct bpf_sk_storage;
   *	@sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
   *	@sk_txtime_report_errors: set report errors mode for SO_TXTIME
   *	@sk_txtime_unused: unused txtime flags
+  *	@sk_description: user supplied with SO_DESCRIPTION
   */
 struct sock {
 	/*
@@ -519,6 +520,9 @@  struct sock {
 	struct bpf_sk_storage __rcu	*sk_bpf_storage;
 #endif
 	struct rcu_head		sk_rcu;
+
+#define	SK_MAX_DESC_SIZE	256
+	char			*sk_description;
 };
 
 enum sk_pacing {
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 77f7c1638eb1..fb51c4bb7a12 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -119,6 +119,8 @@ 
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_DESCRIPTION		69
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index 2e5b7870e5d3..b8bad57338d8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -828,6 +828,49 @@  void sock_set_rcvbuf(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(sock_set_rcvbuf);
 
+static int sock_set_description(struct sock *sk, char __user *user_desc)
+{
+	char *old, *desc;
+
+	desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	lock_sock(sk);
+	old = sk->sk_description;
+	sk->sk_description = desc;
+	release_sock(sk);
+
+	kfree(old);
+
+	return 0;
+}
+
+static int sock_get_description(struct sock *sk, char __user *optval,
+				int __user *optlen, int len)
+{
+	char desc[SK_MAX_DESC_SIZE];
+
+	lock_sock(sk);
+	if (sk->sk_description) {
+		/* strndup_user: len(desc + nul) <= SK_MAX_DESC_SIZE */
+		len = min_t(unsigned int, len,
+			    strlen(sk->sk_description) + 1);
+		memcpy(desc, sk->sk_description, len);
+	} else {
+		len = 0;
+	}
+	release_sock(sk);
+
+	if (copy_to_user(optval, desc, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -850,6 +893,9 @@  int sock_setsockopt(struct socket *sock, int level, int optname,
 	if (optname == SO_BINDTODEVICE)
 		return sock_setbindtodevice(sk, optval, optlen);
 
+	if (optname == SO_DESCRIPTION)
+		return sock_set_description(sk, optval);
+
 	if (optlen < sizeof(int))
 		return -EINVAL;
 
@@ -1614,6 +1660,9 @@  int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_bound_dev_if;
 		break;
 
+	case SO_DESCRIPTION:
+		return sock_get_description(sk, optval, optlen, len);
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1792,6 +1841,8 @@  static void __sk_destruct(struct rcu_head *head)
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 	}
 
+	kfree(sk->sk_description);
+
 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
 #ifdef CONFIG_BPF_SYSCALL
@@ -1964,6 +2015,8 @@  struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		if (sk_user_data_is_nocopy(newsk))
 			newsk->sk_user_data = NULL;
 
+		newsk->sk_description = NULL;
+
 		newsk->sk_err	   = 0;
 		newsk->sk_err_soft = 0;
 		newsk->sk_priority = 0;
diff --git a/net/socket.c b/net/socket.c
index 976426d03f09..4f2c1a7744b0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -134,6 +134,11 @@  static void sock_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct socket *sock = f->private_data;
 
+	lock_sock(sock->sk);
+	if (sock->sk->sk_description)
+		seq_printf(m, "desc:\t%s\n", sock->sk->sk_description);
+	release_sock(sock->sk);
+
 	if (sock->ops->show_fdinfo)
 		sock->ops->show_fdinfo(m, sock);
 }