From patchwork Fri Sep 2 18:09:23 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Linus Torvalds X-Patchwork-Id: 665281 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3sQnFm1Y6zz9sBg for ; Sat, 3 Sep 2016 04:09:32 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=fail reason="signature verification failed" (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b=yjQiiQ2s; dkim-atps=neutral Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752653AbcIBSJ1 (ORCPT ); Fri, 2 Sep 2016 14:09:27 -0400 Received: from mail-pa0-f68.google.com ([209.85.220.68]:32958 "EHLO mail-pa0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751845AbcIBSJ0 (ORCPT ); Fri, 2 Sep 2016 14:09:26 -0400 Received: by mail-pa0-f68.google.com with SMTP id vy10so5814905pac.0 for ; Fri, 02 Sep 2016 11:09:25 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:date:from:to:cc:subject:message-id:user-agent:mime-version; bh=49AUYbr3IBcYi0t1A8e1NgZyWAF+qr8iaOjXWUBySss=; b=yjQiiQ2s1y7zm/ALrETlAI9S2FGkpixcBpTMruI8sL5G6j/pjOlZTbj178/KqUyCOO pagsx0XR57E2xe9XNhpy9aWSv3bCRRI+/8EOdO42Yn4OKpKu5u7nSsQIVJDQK8HVBPGI t6XF8Atfp+LU3WKEAPqqGddXukaDLCA32ET0kr3Ivgv8pyewyX0QEK14gfwFxOiLuX0k gTebZgDG4q5Ej0EtnEy4hRaiN5iCmzmUCIslNsL8ObgmRV+YFXgFCis6Pknqf4vGV0jM YfzCeSwgEeRyiSzPMoSbrZ+R1mVej/syvk3yrDWJ6Fq/uin59/sg3WS8yVl3Gfmzc2J4 KrpQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:sender:date:from:to:cc:subject:message-id :user-agent:mime-version; bh=49AUYbr3IBcYi0t1A8e1NgZyWAF+qr8iaOjXWUBySss=; b=SPT/IKllXw5eSXYn7z2mjRRzholtaZiwX75gPJ8KPqk6IgdP6CUS9o92KjLVCiO7Fq pE+zGBmgfrCjbXNXzJ5MNffBcAPSX02wlqQ5lCC+YaAVWWc2TuEvNOhXI9I7cgjiul/i X0/glSqFM+6CSAPMLNlxBEuL0uqBA6ptBDzjASMQA2yAN0kYYxl/77Cn8aM06f1nf2jc Vl6AhjBOELULYZhRsdOWuN13kui2KXHqx9vbWhqlcmoYHCv2NH3sn6vdDaL3TSXff5gZ hpIL/cu17yf53M1UtuUfRE/O7Rnzerr1lhN3M/0C50VO8sXZBACSKyI6i6U4WR421JT/ wQKg== X-Gm-Message-State: AE9vXwMuxKm0bASNfq6p0uSArygF/OnZhwgA4HNPQKC2rtbbiPKscINRkCipozcCJtgrhQ== X-Received: by 10.66.156.72 with SMTP id wc8mr38301096pab.53.1472839765169; Fri, 02 Sep 2016 11:09:25 -0700 (PDT) Received: from i7 (c-67-168-201-187.hsd1.or.comcast.net. [67.168.201.187]) by smtp.gmail.com with ESMTPSA id m24sm16406454pfi.34.2016.09.02.11.09.24 (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 02 Sep 2016 11:09:24 -0700 (PDT) Date: Fri, 2 Sep 2016 11:09:23 -0700 (PDT) From: Linus Torvalds X-X-Sender: torvalds@i7 To: "David S. Miller" cc: Hannes Frederic Sowa , Rainer Weikusat , Eric Dumazet , willy tarreau , netdev@vger.kernel.org Subject: [PATCH 1/2] Revert "af_unix: Fix splice-bind deadlock" Message-ID: User-Agent: Alpine 2.20 (LFD 67 2015-01-07) MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Linus Torvalds Date: Thu, 1 Sep 2016 14:56:49 -0700 Subject: [PATCH 1/2] Revert "af_unix: Fix splice-bind deadlock" This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1. It turns out that it just replaces one deadlock with another one: we can still get the wrong lock ordering with the readlock due to overlayfs calling back into the filesystem layer and still taking the vfs locks after the readlock. The proper solution ends up being to just split the readlock into two pieces: the bind lock (taken *outside* the vfs locks) and the IO lock (taken *inside* the filesystem locks). The two locks are independent anyway. Signed-off-by: Linus Torvalds Reviewed-by: Shmulik Ladkani --- This is not a completely clean revert, because other changes had happened in this area since that commit, but the conflicts were pretty trivial. The next patch actually fixes the problem as described above ("proper solution"). Also, David, if you'd prefer I just apply these directly, you can just tell me so. But I really wanted some AF_UNIX people to look at the next patch regardless. net/unix/af_unix.c | 66 +++++++++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f1dffe84f0d5..433ae1bbef97 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -954,20 +954,32 @@ fail: return NULL; } -static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode, - struct path *res) +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) { - int err; + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; - err = security_path_mknod(path, dentry, mode, 0); + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); if (!err) { - err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); + err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); if (!err) { - res->mnt = mntget(path->mnt); + res->mnt = mntget(path.mnt); res->dentry = dget(dentry); } } - + done_path_create(&path, dentry); return err; } @@ -978,12 +990,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - int err, name_err; + int err; unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path; - struct dentry *dentry; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -999,34 +1009,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - name_err = 0; - dentry = NULL; - if (sun_path[0]) { - /* Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - - if (IS_ERR(dentry)) { - /* delay report until after 'already bound' check */ - name_err = PTR_ERR(dentry); - dentry = NULL; - } - } - err = mutex_lock_interruptible(&u->readlock); if (err) - goto out_path; + goto out; err = -EINVAL; if (u->addr) goto out_up; - if (name_err) { - err = name_err == -EEXIST ? -EADDRINUSE : name_err; - goto out_up; - } - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) @@ -1037,11 +1027,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = hash ^ sk->sk_type; atomic_set(&addr->refcnt, 1); - if (dentry) { - struct path u_path; + if (sun_path[0]) { + struct path path; umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(dentry, &path, mode, &u_path); + err = unix_mknod(sun_path, mode, &path); if (err) { if (err == -EEXIST) err = -EADDRINUSE; @@ -1049,9 +1039,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); - u->path = u_path; + u->path = path; list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); @@ -1074,10 +1064,6 @@ out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->readlock); -out_path: - if (dentry) - done_path_create(&path, dentry); - out: return err; }