diff mbox series

[v2,8/8] ext4: mark the group block bitmap as corrupted before reporting an error

Message ID 20231221150558.2740823-9-libaokun1@huawei.com
State Superseded
Headers show
Series ext4: fix divide error in mb_update_avg_fragment_size() | expand

Commit Message

Baokun Li Dec. 21, 2023, 3:05 p.m. UTC
Otherwise unlocking the group in ext4_grp_locked_error may allow other
processes to modify the core block bitmap that is known to be corrupt.

Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 fs/ext4/mballoc.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

Comments

Jan Kara Jan. 4, 2024, 10:51 a.m. UTC | #1
On Thu 21-12-23 23:05:58, Baokun Li wrote:
> Otherwise unlocking the group in ext4_grp_locked_error may allow other
> processes to modify the core block bitmap that is known to be corrupt.
> 
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

I'm not opposed but I don't think this matters much.
ext4_grp_locked_error() unlocks the group only in errors=remount-ro case
these days and in that case we abort the journal so none of the changes
should make it to disk anyway. Anyway, in the name of defensive programming
feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

:)

								Honza

> ---
>  fs/ext4/mballoc.c | 23 +++++++++++------------
>  1 file changed, 11 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index b862ca2750fd..c43eefebdaa3 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>  
>  			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
>  			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			ext4_grp_locked_error(sb, e4b->bd_group,
>  					      inode ? inode->i_ino : 0,
>  					      blocknr,
>  					      "freeing block already freed "
>  					      "(bit %u)",
>  					      first + i);
> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  		}
>  		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
>  	}
> @@ -1926,14 +1926,13 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
>  		blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
>  		blocknr += EXT4_C2B(sbi, block);
>  		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			ext4_grp_locked_error(sb, e4b->bd_group,
>  					      inode ? inode->i_ino : 0,
>  					      blocknr,
>  					      "freeing already freed block (bit %u); block bitmap corrupt.",
>  					      block);
> -			ext4_mark_group_bitmap_corrupted(
> -				sb, e4b->bd_group,
> -				EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  		} else {
>  			mb_regenerate_buddy(e4b);
>  			goto check;
> @@ -2410,12 +2409,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
>  
>  		k = mb_find_next_zero_bit(buddy, max, 0);
>  		if (k >= max) {
> +			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
> +					e4b->bd_group,
> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
>  				"%d free clusters of order %d. But found 0",
>  				grp->bb_counters[i], i);
> -			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
> -					 e4b->bd_group,
> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			break;
>  		}
>  		ac->ac_found++;
> @@ -2466,12 +2465,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  			 * free blocks even though group info says we
>  			 * have free blocks
>  			 */
> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
>  					"%d free clusters as per "
>  					"group info. But bitmap says 0",
>  					free);
> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			break;
>  		}
>  
> @@ -2497,12 +2496,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  		if (WARN_ON(ex.fe_len <= 0))
>  			break;
>  		if (free < ex.fe_len) {
> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
>  					"%d free clusters as per "
>  					"group info. But got %d blocks",
>  					free, ex.fe_len);
> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>  			/*
>  			 * The number of free blocks differs. This mostly
>  			 * indicate that the bitmap is corrupt. So exit
> -- 
> 2.31.1
>
Baokun Li Jan. 4, 2024, 12:14 p.m. UTC | #2
On 2024/1/4 18:51, Jan Kara wrote:
> On Thu 21-12-23 23:05:58, Baokun Li wrote:
>> Otherwise unlocking the group in ext4_grp_locked_error may allow other
>> processes to modify the core block bitmap that is known to be corrupt.
>>
>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> I'm not opposed but I don't think this matters much.
> ext4_grp_locked_error() unlocks the group only in errors=remount-ro case
> these days and in that case we abort the journal so none of the changes
> should make it to disk anyway. Anyway, in the name of defensive programming
> feel free to add:
>
> Reviewed-by: Jan Kara <jack@suse.cz>
>
> :)
>
> 								Honza
Thank you very much for your review!

Yes, the unlock gap here does not cause corrupted data to be written
to disk, which is why no issues have been reported here before.

My concern is that core block bitmap corruption may cause kernel
crash in some corners. That's why inode bitmap corruption is not a
concern here, since there is no core inode bitmap.

We encounter all sorts of hard-to-replicate kernel problems every
day, and hopefully by fixing these trivial little issues, we can reduce
the number of difficult problems that arise from all sorts of
coincidental stacking.

Cheers!😊
>> ---
>>   fs/ext4/mballoc.c | 23 +++++++++++------------
>>   1 file changed, 11 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index b862ca2750fd..c43eefebdaa3 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>>   
>>   			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
>>   			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
>> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			ext4_grp_locked_error(sb, e4b->bd_group,
>>   					      inode ? inode->i_ino : 0,
>>   					      blocknr,
>>   					      "freeing block already freed "
>>   					      "(bit %u)",
>>   					      first + i);
>> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   		}
>>   		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
>>   	}
>> @@ -1926,14 +1926,13 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
>>   		blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
>>   		blocknr += EXT4_C2B(sbi, block);
>>   		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
>> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			ext4_grp_locked_error(sb, e4b->bd_group,
>>   					      inode ? inode->i_ino : 0,
>>   					      blocknr,
>>   					      "freeing already freed block (bit %u); block bitmap corrupt.",
>>   					      block);
>> -			ext4_mark_group_bitmap_corrupted(
>> -				sb, e4b->bd_group,
>> -				EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   		} else {
>>   			mb_regenerate_buddy(e4b);
>>   			goto check;
>> @@ -2410,12 +2409,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
>>   
>>   		k = mb_find_next_zero_bit(buddy, max, 0);
>>   		if (k >= max) {
>> +			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
>> +					e4b->bd_group,
>> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
>>   				"%d free clusters of order %d. But found 0",
>>   				grp->bb_counters[i], i);
>> -			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
>> -					 e4b->bd_group,
>> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			break;
>>   		}
>>   		ac->ac_found++;
>> @@ -2466,12 +2465,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>>   			 * free blocks even though group info says we
>>   			 * have free blocks
>>   			 */
>> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
>>   					"%d free clusters as per "
>>   					"group info. But bitmap says 0",
>>   					free);
>> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			break;
>>   		}
>>   
>> @@ -2497,12 +2496,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>>   		if (WARN_ON(ex.fe_len <= 0))
>>   			break;
>>   		if (free < ex.fe_len) {
>> +			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> +					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
>>   					"%d free clusters as per "
>>   					"group info. But got %d blocks",
>>   					free, ex.fe_len);
>> -			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
>> -					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
>>   			/*
>>   			 * The number of free blocks differs. This mostly
>>   			 * indicate that the bitmap is corrupt. So exit
>> -- 
>> 2.31.1
>>
>>
Thanks!
diff mbox series

Patch

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b862ca2750fd..c43eefebdaa3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -564,14 +564,14 @@  static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 
 			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 					      inode ? inode->i_ino : 0,
 					      blocknr,
 					      "freeing block already freed "
 					      "(bit %u)",
 					      first + i);
-			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		}
 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
 	}
@@ -1926,14 +1926,13 @@  static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 		blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 		blocknr += EXT4_C2B(sbi, block);
 		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 					      inode ? inode->i_ino : 0,
 					      blocknr,
 					      "freeing already freed block (bit %u); block bitmap corrupt.",
 					      block);
-			ext4_mark_group_bitmap_corrupted(
-				sb, e4b->bd_group,
-				EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		} else {
 			mb_regenerate_buddy(e4b);
 			goto check;
@@ -2410,12 +2409,12 @@  void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
 
 		k = mb_find_next_zero_bit(buddy, max, 0);
 		if (k >= max) {
+			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
+					e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
 				"%d free clusters of order %d. But found 0",
 				grp->bb_counters[i], i);
-			ext4_mark_group_bitmap_corrupted(ac->ac_sb,
-					 e4b->bd_group,
-					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			break;
 		}
 		ac->ac_found++;
@@ -2466,12 +2465,12 @@  void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * free blocks even though group info says we
 			 * have free blocks
 			 */
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
 					"%d free clusters as per "
 					"group info. But bitmap says 0",
 					free);
-			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			break;
 		}
 
@@ -2497,12 +2496,12 @@  void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 		if (WARN_ON(ex.fe_len <= 0))
 			break;
 		if (free < ex.fe_len) {
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
 					"%d free clusters as per "
 					"group info. But got %d blocks",
 					free, ex.fe_len);
-			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			/*
 			 * The number of free blocks differs. This mostly
 			 * indicate that the bitmap is corrupt. So exit