Message ID | 20240809064606.3490994-2-zhangshida@kylinos.cn |
---|---|
State | Superseded |
Headers | show |
Series | Fix an error caused by improperly dirtied buffer | expand |
On Fri 09-08-24 14:46:05, zhangshida wrote: > From: Shida Zhang <zhangshida@kylinos.cn> > > On an old kernel version(4.19, ext3, data=journal, pagesize=64k), > an assertion failure will occasionally be triggered by the line below: > ----------- > jbd2_journal_commit_transaction > { > ... > J_ASSERT_BH(bh, !buffer_dirty(bh)); > /* > * The buffer on BJ_Forget list and not jbddirty means > ... > } > ----------- > > The same condition may also be applied to the lattest kernel version. Maybe let me shorten the following part of the changelog a bit: When blocksize < pagesize and we truncate a file, there can be buffers in the mapping tail page beyond i_size. These buffers will be filed to transaction's BJ_Forget list by ext4_journalled_invalidatepage() during truncation. When the transaction doing truncate starts committing, we can grow the file again. This calls __block_write_begin() which allocates new blocks under these buffers in the tail page we go through the branch: if (buffer_new(bh)) { clean_bdev_bh_alias(bh); if (folio_test_uptodate(folio)) { clear_buffer_new(bh); set_buffer_uptodate(bh); mark_buffer_dirty(bh); continue; } ... } Hence buffers on BJ_Forget list of the committing transaction get marked dirty and this triggers the jbd2 assertion. Teach ext4_block_write_begin() to properly handle files with data journalling by avoiding dirtying them directly. Instead of folio_zero_new_buffers() we use ext4_journalled_zero_new_buffers() which takes care of handling journalling. We also don't need to mark new uptodate buffers as dirty in ext4_block_write_begin(). That will be either done either by block_commit_write() in case of success or by folio_zero_new_buffers() in case of failure. > Reported-by: Baolin Liu <liubaolin@kylinos.cn> > Suggested-by: Jan Kara <jack@suse.cz> > Signed-off-by: Shida Zhang <zhangshida@kylinos.cn> > --- > fs/ext4/inode.c | 30 ++++++++++++++++++++++++------ > 1 file changed, 24 insertions(+), 6 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 941c1c0d5c6e..de46c0a6842a 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -49,6 +49,11 @@ > > #include <trace/events/ext4.h> > > +static void ext4_journalled_zero_new_buffers(handle_t *handle, > + struct inode *inode, > + struct folio *folio, > + unsigned from, unsigned to); > + > static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, > struct ext4_inode_info *ei) > { > @@ -1042,7 +1047,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, > } > > #ifdef CONFIG_FS_ENCRYPTION > -static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, > +static int ext4_block_write_begin(handle_t *handle, struct folio *folio, > + loff_t pos, unsigned len, > get_block_t *get_block) > { > unsigned from = pos & (PAGE_SIZE - 1); > @@ -1056,6 +1062,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, > struct buffer_head *bh, *head, *wait[2]; > int nr_wait = 0; > int i; > + bool should_journal_data = ext4_should_journal_data(inode); > > BUG_ON(!folio_test_locked(folio)); > BUG_ON(from > PAGE_SIZE); > @@ -1084,11 +1091,16 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, > err = get_block(inode, block, bh, 1); > if (err) > break; > + if (should_journal_data) > + do_journal_get_write_access(handle, inode, bh); > if (buffer_new(bh)) { > if (folio_test_uptodate(folio)) { > clear_buffer_new(bh); > set_buffer_uptodate(bh); > - mark_buffer_dirty(bh); > + if (should_journal_data) > + ext4_dirty_journalled_data(handle, bh); > + else > + mark_buffer_dirty(bh); This hunk is not needed. We can just do: if (folio_test_uptodate(folio)) { - clear_buffer_new(bh); set_buffer_uptodate(bh); - mark_buffer_dirty(bh); continue; } > continue; > } > if (block_end > to || block_start < from) > @@ -1118,7 +1130,11 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, > err = -EIO; > } > if (unlikely(err)) { > - folio_zero_new_buffers(folio, from, to); > + if (should_journal_data) > + ext4_journalled_zero_new_buffers(handle, inode, folio, > + from, to); > + else > + folio_zero_new_buffers(folio, from, to); > } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { > for (i = 0; i < nr_wait; i++) { > int err2; This looks good. Honza
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 941c1c0d5c6e..de46c0a6842a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -49,6 +49,11 @@ #include <trace/events/ext4.h> +static void ext4_journalled_zero_new_buffers(handle_t *handle, + struct inode *inode, + struct folio *folio, + unsigned from, unsigned to); + static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei) { @@ -1042,7 +1047,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, } #ifdef CONFIG_FS_ENCRYPTION -static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, +static int ext4_block_write_begin(handle_t *handle, struct folio *folio, + loff_t pos, unsigned len, get_block_t *get_block) { unsigned from = pos & (PAGE_SIZE - 1); @@ -1056,6 +1062,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, struct buffer_head *bh, *head, *wait[2]; int nr_wait = 0; int i; + bool should_journal_data = ext4_should_journal_data(inode); BUG_ON(!folio_test_locked(folio)); BUG_ON(from > PAGE_SIZE); @@ -1084,11 +1091,16 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, err = get_block(inode, block, bh, 1); if (err) break; + if (should_journal_data) + do_journal_get_write_access(handle, inode, bh); if (buffer_new(bh)) { if (folio_test_uptodate(folio)) { clear_buffer_new(bh); set_buffer_uptodate(bh); - mark_buffer_dirty(bh); + if (should_journal_data) + ext4_dirty_journalled_data(handle, bh); + else + mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) @@ -1118,7 +1130,11 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, err = -EIO; } if (unlikely(err)) { - folio_zero_new_buffers(folio, from, to); + if (should_journal_data) + ext4_journalled_zero_new_buffers(handle, inode, folio, + from, to); + else + folio_zero_new_buffers(folio, from, to); } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { for (i = 0; i < nr_wait; i++) { int err2; @@ -1218,10 +1234,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, #ifdef CONFIG_FS_ENCRYPTION if (ext4_should_dioread_nolock(inode)) - ret = ext4_block_write_begin(folio, pos, len, + ret = ext4_block_write_begin(handle, folio, pos, len, ext4_get_block_unwritten); else - ret = ext4_block_write_begin(folio, pos, len, ext4_get_block); + ret = ext4_block_write_begin(handle, folio, pos, len, + ext4_get_block); #else if (ext4_should_dioread_nolock(inode)) ret = __block_write_begin(&folio->page, pos, len, @@ -2962,7 +2979,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, return PTR_ERR(folio); #ifdef CONFIG_FS_ENCRYPTION - ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep); + ret = ext4_block_write_begin(NULL, folio, pos, len, + ext4_da_get_block_prep); #else ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep); #endif