@@ -971,7 +971,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
*/
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
- int err, lnum, offs;
+ int err, lnum, offs, i = 0;
+ struct inode **xinos = NULL;
struct ubifs_ino_node *ino, *ino_start;
struct ubifs_inode *ui = ubifs_inode(inode);
int sync = 0, write_len = 0, ilen = UBIFS_INO_NODE_SZ;
@@ -982,44 +983,24 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
if (kill_xattrs) {
+ union ubifs_key key;
+ struct fscrypt_name nm = {0};
+ struct ubifs_dent_node *xent, *pxent = NULL;
+ struct super_block *sb = c->vfs_sb;
+
if (ui->xattr_cnt > ubifs_xattr_max_cnt(c)) {
ubifs_err(c, "Cannot delete inode, it has too much xattrs!");
ubifs_ro_mode(c, err);
return -EPERM;
}
- }
-
- /*
- * If the inode is being deleted, do not write the attached data. No
- * need to synchronize the write-buffer either.
- */
- if (!last_reference) {
- ilen += ui->data_len;
- sync = IS_SYNC(inode);
- } else if (kill_xattrs) {
- write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
- }
-
- if (ubifs_authenticated(c))
- write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
- else
- write_len += ilen;
- ino_start = ino = kmalloc(write_len, GFP_NOFS);
- if (!ino)
- return -ENOMEM;
-
- /* Make reservation before allocating sequence numbers */
- err = make_reservation(c, BASEHD, write_len);
- if (err)
- goto out_free;
-
- if (kill_xattrs) {
- union ubifs_key key;
- struct fscrypt_name nm = {0};
- struct inode *xino;
- struct ubifs_dent_node *xent, *pxent = NULL;
+ xinos = kmalloc(sizeof(*xinos) * ui->xattr_cnt, GFP_NOFS);
+ if (!xinos) {
+ ubifs_err(c, "Cannot allocate memory for xattr inodes");
+ return -ENOMEM;
+ }
+ i = 0;
lowest_xent_key(c, &key, inode->i_ino);
while (1) {
xent = ubifs_tnc_next_ent(c, &key, &nm);
@@ -1029,34 +1010,71 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
break;
kfree(pxent);
- goto out_release;
+ goto out_iput;
}
fname_name(&nm) = xent->name;
fname_len(&nm) = le16_to_cpu(xent->nlen);
- xino = ubifs_iget(c->vfs_sb, le64_to_cpu(xent->inum));
- if (IS_ERR(xino)) {
- err = PTR_ERR(xino);
+ xinos[i] = ubifs_iget(sb, le64_to_cpu(xent->inum));
+ if (IS_ERR(xinos[i])) {
+ err = PTR_ERR(xinos[i]);
ubifs_err(c, "dead directory entry '%s', error %d",
xent->name, err);
ubifs_ro_mode(c, err);
kfree(pxent);
kfree(xent);
- goto out_release;
+ goto out_iput;
}
- ubifs_assert(c, ubifs_inode(xino)->xattr);
-
- clear_nlink(xino);
- pack_inode(c, ino, xino, 0);
- ino = (void *)ino + UBIFS_INO_NODE_SZ;
- iput(xino);
+ ubifs_assert(c, ubifs_inode(xinos[i])->xattr);
+ ubifs_assert(c, i < ui->xattr_cnt);
kfree(pxent);
pxent = xent;
key_read(c, &xent->key, &key);
+ i++;
}
kfree(pxent);
+ ubifs_assert(c, i == ui->xattr_cnt);
+ }
+
+ /*
+ * If the inode is being deleted, do not write the attached data. No
+ * need to synchronize the write-buffer either.
+ */
+ if (!last_reference) {
+ ilen += ui->data_len;
+ sync = IS_SYNC(inode);
+ } else if (kill_xattrs) {
+ write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
+ }
+
+ if (ubifs_authenticated(c))
+ write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+ else
+ write_len += ilen;
+
+ ino_start = ino = kmalloc(write_len, GFP_NOFS);
+ if (!ino) {
+ err = -ENOMEM;
+ goto out_iput;
+ }
+
+ /* Make reservation before allocating sequence numbers */
+ err = make_reservation(c, BASEHD, write_len);
+ if (err)
+ goto out_free;
+
+ if (kill_xattrs) {
+ for (i = 0; i < ui->xattr_cnt; i++) {
+ clear_nlink(xinos[i]);
+ pack_inode(c, ino, xinos[i], 0);
+ ino = (void *)ino + UBIFS_INO_NODE_SZ;
+ iput(xinos[i]);
+ }
+
+ kfree(xinos);
+ xinos = NULL;
}
pack_inode(c, ino, inode, 1);
@@ -1103,6 +1121,12 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
finish_reservation(c);
out_free:
kfree(ino_start);
+out_iput:
+ if (xinos) {
+ while (--i >= 0)
+ iput(xinos[i]);
+ kfree(xinos);
+ }
return err;
}
The inodes reclaiming process(See function prune_icache_sb) collects all reclaimable inodes and mark them with I_FREEING flag at first, at that time, other processes will be stuck if they try getting these inodes(See function find_inode_fast), then the reclaiming process destroy the inodes. In deleted inode writing function ubifs_jnl_write_inode(), UBIFS holds BASEHD's wbuf->io_mutex while traversing all xattr inodes, which could race with inodes reclaiming process(The reclaiming process could try locking BASEHD's wbuf->io_mutex in inode evicting function), then an ABBA deadlock problem would happens as following: 1. File A has inode ia and a xattr(with inode ixa), regular file B has inode ib and a xattr. 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa 3. Then, following three processes running like this: PA PB PC echo 2 > /proc/sys/vm/drop_caches // ib and ia area added into lru, lru->ixa->ib->ia shrink_slab prune_icache_sb list_lru_walk_one inode_lru_isolate ixa->inode->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(ib) spin_unlock(&ib->i_lock) spin_unlock(lru_lock) rm file B ib->nlink = 0 iput(ib) rm file A iput(ia) ubifs_evict_inode(ia) ubifs_jnl_delete_inode(ia) ubifs_jnl_write_inode(ia) make_reservation(BASEHD) // Lock wbuf->io_mutex ubifs_iget(ixa->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(ixa) | iput(ib) // ib->nlink is 0, do evict | ubifs_evict_inode | ubifs_jnl_delete_inode(ib) ↓ ubifs_jnl_write_inode ABBA deadlock ←-----make_reservation(BASEHD) dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&inode->i_state) The root cause is that UBIFS tries getting xattr inode with holding BASEHD's wbuf->io_mutex, so the problem can be fixed by getting all xattr inodes before locking BASEHD's wbuf->io_mutex. Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files") Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> --- fs/ubifs/journal.c | 110 +++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 43 deletions(-)