@@ -4853,6 +4853,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
unsigned long ino;
int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int inode_size = EXT4_INODE_SIZE(sb);
+ struct inode *res;
oi.orig_ino = orig_ino;
/*
@@ -4865,7 +4866,10 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
if (ino == orig_ino)
continue;
oi.raw_inode = (struct ext4_inode *) buf;
- (void)find_inode_nowait(sb, ino, other_inode_match, &oi, false);
+ /* Try to find inode, stop if inode_hash_lock is congested. */
+ res = find_inode_nowait(sb, ino, other_inode_match, &oi, true);
+ if (res == ERR_PTR(-EAGAIN))
+ break;
}
}
Function ext4_update_other_inodes_time() implements optimization which opportunistically updates times for inodes within same inode table block. For now concurrent inode lookup by number does not scale well because inode hash table is protected with single spinlock. It could become very hot at concurrent writes to fast nvme when inode cache has enough inodes. Let's use here non-blocking variant of function find_inode_nowait() and skip opportunistic inode updates if spinlock is congested. Synthetic testcase by Dmitry Monakhov: modprobe brd rd_size=10240000 rd_nr=1 mkfs.ext4 -F -I 256 -b 4096 -q /dev/ram0 mkdir -p m mount /dev/ram0 m -o lazytime,noload,barrier=0 mkdir m/{0..31} fio --ioengine=ftruncate --direct=1 --bs=4k --filesize=16m --time_based=1 \ --runtime=10 --numjobs=32 --group_reporting --norandommap --name=write-32 \ --rw=randwrite --directory=m --filename_format='$jobnum/t' umount m rmdir m rmmod brd Before patch: 50k op/s After patch: 3000k op/s Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru> Link: https://lore.kernel.org/lkml/158031264567.6836.126132376018905207.stgit@buzz/T/#u (v1) --- fs/ext4/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)