diff mbox series

[02/20] ext4: prevent partial update of the extents path

Message ID 20240710040654.1714672-3-libaokun@huaweicloud.com
State Superseded
Headers show
Series ext4: some bugfixes and cleanups for ext4 extents path | expand

Commit Message

Baokun Li July 10, 2024, 4:06 a.m. UTC
From: Baokun Li <libaokun1@huawei.com>

In ext4_ext_rm_idx() and ext4_ext_correct_indexes(), there is no proper
rollback of already executed updates when updating a level of the extents
path fails, so we may get an inconsistent extents tree, which may trigger
some bad things in errors=continue mode.

Hence clear the verified bit of modified extents buffers if the tree fails
to be updated in ext4_ext_rm_idx() or ext4_ext_correct_indexes(), which
forces the extents buffers to be checked in ext4_valid_extent_entries(),
ensuring that the extents tree is consistent.

Signed-off-by: zhanchengbin <zhanchengbin1@huawei.com>
Link: https://lore.kernel.org/r/20230213080514.535568-3-zhanchengbin1@huawei.com/
Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 fs/ext4/extents.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

Comments

Ojaswin Mujoo July 14, 2024, 3:42 p.m. UTC | #1
On Wed, Jul 10, 2024 at 12:06:36PM +0800, libaokun@huaweicloud.com wrote:
> From: Baokun Li <libaokun1@huawei.com>
> 
> In ext4_ext_rm_idx() and ext4_ext_correct_indexes(), there is no proper
> rollback of already executed updates when updating a level of the extents
> path fails, so we may get an inconsistent extents tree, which may trigger
> some bad things in errors=continue mode.
> 
> Hence clear the verified bit of modified extents buffers if the tree fails
> to be updated in ext4_ext_rm_idx() or ext4_ext_correct_indexes(), which
> forces the extents buffers to be checked in ext4_valid_extent_entries(),
> ensuring that the extents tree is consistent.
> 
> Signed-off-by: zhanchengbin <zhanchengbin1@huawei.com>
> Link: https://lore.kernel.org/r/20230213080514.535568-3-zhanchengbin1@huawei.com/
> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> ---
>  fs/ext4/extents.c | 31 +++++++++++++++++++++++++++----
>  1 file changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index bff3666c891a..4d589d34b30e 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1749,12 +1749,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
>       break;
>     err = ext4_ext_get_access(handle, inode, path + k);
>     if (err)
> -     break;
> +     goto clean;
>     path[k].p_idx->ei_block = border;
>     err = ext4_ext_dirty(handle, inode, path + k);
>     if (err)
> -     break;
> +     goto clean;
>   }
> + return 0;
> +
> +clean:
> + /*
> +  * The path[k].p_bh is either unmodified or with no verified bit
> +  * set (see ext4_ext_get_access()). So just clear the verified bit
> +  * of the successfully modified extents buffers, which will force
> +  * these extents to be checked to avoid using inconsistent data.
> +  */
> + while (++k < depth)
> +   clear_buffer_verified(path[k].p_bh);
>  
>   return err;
>  }
> @@ -2312,12 +2323,24 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>       break;
>     err = ext4_ext_get_access(handle, inode, path + k);
>     if (err)
> -     break;
> +     goto clean;
>     path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
>     err = ext4_ext_dirty(handle, inode, path + k);
>     if (err)
> -     break;
> +     goto clean;
>   }
> + return 0;
> +
> +clean:
> + /*
> +  * The path[k].p_bh is either unmodified or with no verified bit
> +  * set (see ext4_ext_get_access()). So just clear the verified bit
> +  * of the successfully modified extents buffers, which will force
> +  * these extents to be checked to avoid using inconsistent data.
> +  */
> + while (++k < depth)
> +   clear_buffer_verified(path[k].p_bh);
> +
>   return err;
>  }

Hi Baokun,

So I wanted to understand the extent handling paths for a whil and thought this
patchset was a good chance to finally take sometime and do that.

I do have a question based on my understanding of this extent deletion code:

So IIUC, ext4_find_extent() will return a path where buffer of each node is
verified (via bh = read_extent_tree_block()). So imagine we have the following
path (d=depth, blk=idx.ei_block, v=verified, nv=not-verified):

+------+     +------+     +------+     +------+     +------+
|d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
|blk=1 | --> |blk=1 | --> |blk=1 | --> |blk=1 | --> |pblk=1|
|(v)   |     |(v)   |     |(v)   |     |(v)   |     |      |
+------+     +------+     +------+     +------+     +------+
                                       |d=3   |     +------+
                                       |blk=2 | --> |      |
                                       |(v)   |     |pblk=2|
                                       +------+     |      |
                                                    +------+

Here, the the last column are the leaf nodes with only 1 extent in them.  Now,
say we want to punch the leaf having pblk=1. We'll eventually call
ext4_ext_rm_leaf() -> ext4_ext_rm_idx() to remove the index at depth = 3 and
try fixin up the indices in path with ei_block = 2

Suppose we error out at depth == 1. After the cleanup (introduced in this
patch), we'll mark depth = 1 to 4 as non verified:

+------+     +------+     +------+     +------+     +------+
|d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
|blk=1 | --> |blk=1 | --> |blk=2 | --> |blk=2 | --> |pblk=2|
|(v)   |     |(nv)  |     |(nv)  |     |(nv)  |     |(nv)  |
+------+     +------+     +------+     +------+     +------+

And we return and we won't actually mark the FS corrupt until we try to check
the bh at depth = 1 above. In this case, the first node is still pointing to
wrong ei_block but is marked valid. Aren't we silently leaving the tree in an
inconsistent state which might lead to corrupted lookups until we actually
touch the affected bh and realise that there's a corruption.

Am I missing some codepath here? Should we maybe try to clear_valid for the
whole tree?

(I hope the formatting of diagram comes out correct :) )

Regards,
ojaswin

>  
> -- 
> 2.39.2
>
Baokun Li July 15, 2024, 12:33 p.m. UTC | #2
Hi Ojaswin!

On 2024/7/14 23:42, Ojaswin Mujoo wrote:
> On Wed, Jul 10, 2024 at 12:06:36PM +0800, libaokun@huaweicloud.com wrote:
>> From: Baokun Li <libaokun1@huawei.com>
>>
>> In ext4_ext_rm_idx() and ext4_ext_correct_indexes(), there is no proper
>> rollback of already executed updates when updating a level of the extents
>> path fails, so we may get an inconsistent extents tree, which may trigger
>> some bad things in errors=continue mode.
>>
>> Hence clear the verified bit of modified extents buffers if the tree fails
>> to be updated in ext4_ext_rm_idx() or ext4_ext_correct_indexes(), which
>> forces the extents buffers to be checked in ext4_valid_extent_entries(),
>> ensuring that the extents tree is consistent.
>>
>> Signed-off-by: zhanchengbin <zhanchengbin1@huawei.com>
>> Link: https://lore.kernel.org/r/20230213080514.535568-3-zhanchengbin1@huawei.com/
>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
>> ---
>>   fs/ext4/extents.c | 31 +++++++++++++++++++++++++++----
>>   1 file changed, 27 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index bff3666c891a..4d589d34b30e 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -1749,12 +1749,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
>>        break;
>>      err = ext4_ext_get_access(handle, inode, path + k);
>>      if (err)
>> -     break;
>> +     goto clean;
>>      path[k].p_idx->ei_block = border;
>>      err = ext4_ext_dirty(handle, inode, path + k);
>>      if (err)
>> -     break;
>> +     goto clean;
>>    }
>> + return 0;
>> +
>> +clean:
>> + /*
>> +  * The path[k].p_bh is either unmodified or with no verified bit
>> +  * set (see ext4_ext_get_access()). So just clear the verified bit
>> +  * of the successfully modified extents buffers, which will force
>> +  * these extents to be checked to avoid using inconsistent data.
>> +  */
>> + while (++k < depth)
>> +   clear_buffer_verified(path[k].p_bh);
>>   
>>    return err;
>>   }
>> @@ -2312,12 +2323,24 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>        break;
>>      err = ext4_ext_get_access(handle, inode, path + k);
>>      if (err)
>> -     break;
>> +     goto clean;
>>      path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
>>      err = ext4_ext_dirty(handle, inode, path + k);
>>      if (err)
>> -     break;
>> +     goto clean;
>>    }
>> + return 0;
>> +
>> +clean:
>> + /*
>> +  * The path[k].p_bh is either unmodified or with no verified bit
>> +  * set (see ext4_ext_get_access()). So just clear the verified bit
>> +  * of the successfully modified extents buffers, which will force
>> +  * these extents to be checked to avoid using inconsistent data.
>> +  */
>> + while (++k < depth)
>> +   clear_buffer_verified(path[k].p_bh);
>> +
>>    return err;
>>   }
> Hi Baokun,
>
> So I wanted to understand the extent handling paths for a whil and thought this
> patchset was a good chance to finally take sometime and do that.
>
> I do have a question based on my understanding of this extent deletion code:
>
> So IIUC, ext4_find_extent() will return a path where buffer of each node is
> verified (via bh = read_extent_tree_block()). So imagine we have the following
> path (d=depth, blk=idx.ei_block, v=verified, nv=not-verified):
>
> +------+     +------+     +------+     +------+     +------+
> |d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
> |blk=1 | --> |blk=1 | --> |blk=1 | --> |blk=1 | --> |pblk=1|
> |(v)   |     |(v)   |     |(v)   |     |(v)   |     |      |
> +------+     +------+     +------+     +------+     +------+
>                                         |d=3   |     +------+
>                                         |blk=2 | --> |      |
>                                         |(v)   |     |pblk=2|
>                                         +------+     |      |
>                                                      +------+
>
> Here, the the last column are the leaf nodes with only 1 extent in them.  Now,
> say we want to punch the leaf having pblk=1. We'll eventually call
> ext4_ext_rm_leaf() -> ext4_ext_rm_idx() to remove the index at depth = 3 and
> try fixin up the indices in path with ei_block = 2
>
> Suppose we error out at depth == 1. After the cleanup (introduced in this
> patch), we'll mark depth = 1 to 4 as non verified:
>
> +------+     +------+     +------+     +------+     +------+
> |d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
> |blk=1 | --> |blk=1 | --> |blk=2 | --> |blk=2 | --> |pblk=2|
> |(v)   |     |(nv)  |     |(nv)  |     |(nv)  |     |(nv)  |
> +------+     +------+     +------+     +------+     +------+
Exactly right!
>
> And we return and we won't actually mark the FS corrupt until we try to check
> the bh at depth = 1 above. In this case, the first node is still pointing to
> wrong ei_block but is marked valid. Aren't we silently leaving the tree in an
> inconsistent state which might lead to corrupted lookups until we actually
> touch the affected bh and realise that there's a corruption.
>
> Am I missing some codepath here? Should we maybe try to clear_valid for the
> whole tree?
>
> (I hope the formatting of diagram comes out correct :) )
>
> Regards,
> ojaswin
But the journal will ensure the consistency of the extents path after
this patch.

When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
the extents tree to be inconsistent. But the inconsistency just
exists in memory and doesn't land on disk.

For ext4_ext_get_access(), the handle must have been aborted
when it returned an error, as follows:

ext4_ext_get_access
ext4_journal_get_write_access
__ext4_journal_get_write_access
err = jbd2_journal_get_write_access
if (err)
ext4_journal_abort_handle

For ext4_ext_dirty(), since path->p_bh must not be null and handle
must be valid, handle is aborted anyway when an error is returned:

ext4_ext_dirty __ext4_ext_dirty if (path->p_bh) 
__ext4_handle_dirty_metadata if (ext4_handle_valid(handle)) err = 
jbd2_journal_dirty_metadata if (!is_handle_aborted(handle) && 
WARN_ON_ONCE(err)) ext4_journal_abort_handle
Thus the extents tree will only be inconsistent in memory, so only
the verified bit of the modified buffer needs to be cleared to avoid
these inconsistent data being used in memory.

Regards,
Baokun
Baokun Li July 15, 2024, 12:41 p.m. UTC | #3
On 2024/7/15 20:33, Baokun Li wrote:
> Hi Ojaswin!
>
> On 2024/7/14 23:42, Ojaswin Mujoo wrote:
>> On Wed, Jul 10, 2024 at 12:06:36PM +0800, libaokun@huaweicloud.com 
>> wrote:
>>> From: Baokun Li <libaokun1@huawei.com>
>>>
>>> In ext4_ext_rm_idx() and ext4_ext_correct_indexes(), there is no proper
>>> rollback of already executed updates when updating a level of the 
>>> extents
>>> path fails, so we may get an inconsistent extents tree, which may 
>>> trigger
>>> some bad things in errors=continue mode.
>>>
>>> Hence clear the verified bit of modified extents buffers if the tree 
>>> fails
>>> to be updated in ext4_ext_rm_idx() or ext4_ext_correct_indexes(), which
>>> forces the extents buffers to be checked in 
>>> ext4_valid_extent_entries(),
>>> ensuring that the extents tree is consistent.
>>>
>>> Signed-off-by: zhanchengbin <zhanchengbin1@huawei.com>
>>> Link: 
>>> https://lore.kernel.org/r/20230213080514.535568-3-zhanchengbin1@huawei.com/
>>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
>>> ---
>>>   fs/ext4/extents.c | 31 +++++++++++++++++++++++++++----
>>>   1 file changed, 27 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index bff3666c891a..4d589d34b30e 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -1749,12 +1749,23 @@ static int ext4_ext_correct_indexes(handle_t 
>>> *handle, struct inode *inode,
>>>        break;
>>>      err = ext4_ext_get_access(handle, inode, path + k);
>>>      if (err)
>>> -     break;
>>> +     goto clean;
>>>      path[k].p_idx->ei_block = border;
>>>      err = ext4_ext_dirty(handle, inode, path + k);
>>>      if (err)
>>> -     break;
>>> +     goto clean;
>>>    }
>>> + return 0;
>>> +
>>> +clean:
>>> + /*
>>> +  * The path[k].p_bh is either unmodified or with no verified bit
>>> +  * set (see ext4_ext_get_access()). So just clear the verified bit
>>> +  * of the successfully modified extents buffers, which will force
>>> +  * these extents to be checked to avoid using inconsistent data.
>>> +  */
>>> + while (++k < depth)
>>> +   clear_buffer_verified(path[k].p_bh);
>>>      return err;
>>>   }
>>> @@ -2312,12 +2323,24 @@ static int ext4_ext_rm_idx(handle_t *handle, 
>>> struct inode *inode,
>>>        break;
>>>      err = ext4_ext_get_access(handle, inode, path + k);
>>>      if (err)
>>> -     break;
>>> +     goto clean;
>>>      path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
>>>      err = ext4_ext_dirty(handle, inode, path + k);
>>>      if (err)
>>> -     break;
>>> +     goto clean;
>>>    }
>>> + return 0;
>>> +
>>> +clean:
>>> + /*
>>> +  * The path[k].p_bh is either unmodified or with no verified bit
>>> +  * set (see ext4_ext_get_access()). So just clear the verified bit
>>> +  * of the successfully modified extents buffers, which will force
>>> +  * these extents to be checked to avoid using inconsistent data.
>>> +  */
>>> + while (++k < depth)
>>> +   clear_buffer_verified(path[k].p_bh);
>>> +
>>>    return err;
>>>   }
>> Hi Baokun,
>>
>> So I wanted to understand the extent handling paths for a whil and 
>> thought this
>> patchset was a good chance to finally take sometime and do that.
>>
>> I do have a question based on my understanding of this extent 
>> deletion code:
>>
>> So IIUC, ext4_find_extent() will return a path where buffer of each 
>> node is
>> verified (via bh = read_extent_tree_block()). So imagine we have the 
>> following
>> path (d=depth, blk=idx.ei_block, v=verified, nv=not-verified):
>>
>> +------+     +------+     +------+     +------+     +------+
>> |d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
>> |blk=1 | --> |blk=1 | --> |blk=1 | --> |blk=1 | --> |pblk=1|
>> |(v)   |     |(v)   |     |(v)   |     |(v)   |     |      |
>> +------+     +------+     +------+     +------+     +------+
>>                                         |d=3   |     +------+
>>                                         |blk=2 | --> |      |
>>                                         |(v)   |     |pblk=2|
>>                                         +------+     |      |
>>                                                      +------+
>>
>> Here, the the last column are the leaf nodes with only 1 extent in 
>> them.  Now,
>> say we want to punch the leaf having pblk=1. We'll eventually call
>> ext4_ext_rm_leaf() -> ext4_ext_rm_idx() to remove the index at depth 
>> = 3 and
>> try fixin up the indices in path with ei_block = 2
>>
>> Suppose we error out at depth == 1. After the cleanup (introduced in 
>> this
>> patch), we'll mark depth = 1 to 4 as non verified:
>>
>> +------+     +------+     +------+     +------+     +------+
>> |d=0   |     |d=1   |     |d=2   |     |d=3   |     |      |
>> |blk=1 | --> |blk=1 | --> |blk=2 | --> |blk=2 | --> |pblk=2|
>> |(v)   |     |(nv)  |     |(nv)  |     |(nv)  |     |(nv)  |
>> +------+     +------+     +------+     +------+     +------+
> Exactly right!
>>
>> And we return and we won't actually mark the FS corrupt until we try 
>> to check
>> the bh at depth = 1 above. In this case, the first node is still 
>> pointing to
>> wrong ei_block but is marked valid. Aren't we silently leaving the 
>> tree in an
>> inconsistent state which might lead to corrupted lookups until we 
>> actually
>> touch the affected bh and realise that there's a corruption.
>>
>> Am I missing some codepath here? Should we maybe try to clear_valid 
>> for the
>> whole tree?
>>
>> (I hope the formatting of diagram comes out correct :) )
Uh, I'm sorry, my diagram is disordered. 😅
>>
>> Regards,
>> ojaswin
> But the journal will ensure the consistency of the extents path after
> this patch.
>
> When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
> ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
> the extents tree to be inconsistent. But the inconsistency just
> exists in memory and doesn't land on disk.
>
> For ext4_ext_get_access(), the handle must have been aborted
> when it returned an error, as follows:
ext4_ext_get_access
  ext4_journal_get_write_access
   __ext4_journal_get_write_access
    err = jbd2_journal_get_write_access
    if (err)
      ext4_journal_abort_handle
> For ext4_ext_dirty(), since path->p_bh must not be null and handle
> must be valid, handle is aborted anyway when an error is returned:
ext4_ext_dirty
  __ext4_ext_dirty
   if (path->p_bh)
     __ext4_handle_dirty_metadata
      if (ext4_handle_valid(handle))
        err = jbd2_journal_dirty_metadata
         if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
           ext4_journal_abort_handle
> Thus the extents tree will only be inconsistent in memory, so only
> the verified bit of the modified buffer needs to be cleared to avoid
> these inconsistent data being used in memory.
>
Regards,
Baokun
Ojaswin Mujoo July 16, 2024, 9:54 a.m. UTC | #4
> > But the journal will ensure the consistency of the extents path after
> > this patch.
> > 
> > When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
> > ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
> > the extents tree to be inconsistent. But the inconsistency just
> > exists in memory and doesn't land on disk.
> > 
> > For ext4_ext_get_access(), the handle must have been aborted
> > when it returned an error, as follows:
> ext4_ext_get_access
>  ext4_journal_get_write_access
>   __ext4_journal_get_write_access
>    err = jbd2_journal_get_write_access
>    if (err)
>      ext4_journal_abort_handle
> > For ext4_ext_dirty(), since path->p_bh must not be null and handle
> > must be valid, handle is aborted anyway when an error is returned:
> ext4_ext_dirty
>  __ext4_ext_dirty
>   if (path->p_bh)
>     __ext4_handle_dirty_metadata
>      if (ext4_handle_valid(handle))
>        err = jbd2_journal_dirty_metadata
>         if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
>           ext4_journal_abort_handle
> > Thus the extents tree will only be inconsistent in memory, so only
> > the verified bit of the modified buffer needs to be cleared to avoid
> > these inconsistent data being used in memory.
> > 
> Regards,
> Baokun

Thanks for the explanation Baokun, so basically we only have the
inconsitency in the memory.

I do have a followup questions:

So in the above example, after we have the error, we'll have the buffer
for depth=0 marked as valid but pointing to the wrong ei_block.

In this case, can we have something like below:

-----------------
ext4_ext_remove_space
  err = ext4_ext_rm_idx (error, path[0].p_bh inconsistent but verified)
  /* 
   * we release buffers of the path but path[0].p_bh is not cleaned up
   * due to other references to it (possible?)
   */

... at a later point...:

ext4_find_extent
  bh = read_extent_tree_block() 
    /* 
     * we get the bh that was left inconsistent previously
     * since its verified, we dont check it again corrupting
     * the lookup
     */

-----------------

Is the above scenario possible? Or would the path[0].p_bh that was
corrupted previously always be reread during the subsequent
ext4_find_extent() lookup?

Thanks again,
Ojaswin
Baokun Li July 16, 2024, 11:54 a.m. UTC | #5
Hi Ojaswin,

On 2024/7/16 17:54, Ojaswin Mujoo wrote:
>>> But the journal will ensure the consistency of the extents path after
>>> this patch.
>>>
>>> When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
>>> ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
>>> the extents tree to be inconsistent. But the inconsistency just
>>> exists in memory and doesn't land on disk.
>>>
>>> For ext4_ext_get_access(), the handle must have been aborted
>>> when it returned an error, as follows:
>> ext4_ext_get_access
>>   ext4_journal_get_write_access
>>    __ext4_journal_get_write_access
>>     err = jbd2_journal_get_write_access
>>     if (err)
>>       ext4_journal_abort_handle
>>> For ext4_ext_dirty(), since path->p_bh must not be null and handle
>>> must be valid, handle is aborted anyway when an error is returned:
>> ext4_ext_dirty
>>   __ext4_ext_dirty
>>    if (path->p_bh)
>>      __ext4_handle_dirty_metadata
>>       if (ext4_handle_valid(handle))
>>         err = jbd2_journal_dirty_metadata
>>          if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
>>            ext4_journal_abort_handle
>>> Thus the extents tree will only be inconsistent in memory, so only
>>> the verified bit of the modified buffer needs to be cleared to avoid
>>> these inconsistent data being used in memory.
>>>
>> Regards,
>> Baokun
> Thanks for the explanation Baokun, so basically we only have the
> inconsitency in the memory.
>
> I do have a followup questions:
>
> So in the above example, after we have the error, we'll have the buffer
> for depth=0 marked as valid but pointing to the wrong ei_block.
It looks wrong here. When there is an error, the ei_block of the
unmodified buffer with depth=0 is the correct one, it is indeed
'valid' and it is consistent with the disk. Only buffers that were
modified during the error process need to be checked.

Regards,
Baokun
>
> In this case, can we have something like below:
>
> -----------------
> ext4_ext_remove_space
>    err = ext4_ext_rm_idx (error, path[0].p_bh inconsistent but verified)
>    /*
>     * we release buffers of the path but path[0].p_bh is not cleaned up
>     * due to other references to it (possible?)
>     */
>
> ... at a later point...:
>
> ext4_find_extent
>    bh = read_extent_tree_block()
>      /*
>       * we get the bh that was left inconsistent previously
>       * since its verified, we dont check it again corrupting
>       * the lookup
>       */
>
> -----------------
>
> Is the above scenario possible? Or would the path[0].p_bh that was
> corrupted previously always be reread during the subsequent
> ext4_find_extent() lookup?
>
> Thanks again,
> Ojaswin
Ojaswin Mujoo July 17, 2024, 5:29 a.m. UTC | #6
On Tue, Jul 16, 2024 at 07:54:43PM +0800, Baokun Li wrote:
> Hi Ojaswin,
> 
> On 2024/7/16 17:54, Ojaswin Mujoo wrote:
> > > > But the journal will ensure the consistency of the extents path after
> > > > this patch.
> > > > 
> > > > When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
> > > > ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
> > > > the extents tree to be inconsistent. But the inconsistency just
> > > > exists in memory and doesn't land on disk.
> > > > 
> > > > For ext4_ext_get_access(), the handle must have been aborted
> > > > when it returned an error, as follows:
> > > ext4_ext_get_access
> > >   ext4_journal_get_write_access
> > >    __ext4_journal_get_write_access
> > >     err = jbd2_journal_get_write_access
> > >     if (err)
> > >       ext4_journal_abort_handle
> > > > For ext4_ext_dirty(), since path->p_bh must not be null and handle
> > > > must be valid, handle is aborted anyway when an error is returned:
> > > ext4_ext_dirty
> > >   __ext4_ext_dirty
> > >    if (path->p_bh)
> > >      __ext4_handle_dirty_metadata
> > >       if (ext4_handle_valid(handle))
> > >         err = jbd2_journal_dirty_metadata
> > >          if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
> > >            ext4_journal_abort_handle
> > > > Thus the extents tree will only be inconsistent in memory, so only
> > > > the verified bit of the modified buffer needs to be cleared to avoid
> > > > these inconsistent data being used in memory.
> > > > 
> > > Regards,
> > > Baokun
> > Thanks for the explanation Baokun, so basically we only have the
> > inconsitency in the memory.
> > 
> > I do have a followup questions:
> > 
> > So in the above example, after we have the error, we'll have the buffer
> > for depth=0 marked as valid but pointing to the wrong ei_block.
> It looks wrong here. When there is an error, the ei_block of the
> unmodified buffer with depth=0 is the correct one, it is indeed
> 'valid' and it is consistent with the disk. Only buffers that were

Hey Baokun,

Ahh I see now, I was looking at it the wrong way. So basically since
depth 1 to 4 is inconsistent to the disk we mark then non verified so
then subsequent lookups can act accordingly.

Thanks for the explanation! I am in the middle of testing this patchset
with xfstests on a POWERPC system with 64k page size. I'll let you know
how that goes!

Regards,
Ojaswin
> modified during the error process need to be checked.
> 
> Regards,
> Baokun

> > 
> > In this case, can we have something like below:
> > 
> > -----------------
> > ext4_ext_remove_space
> >    err = ext4_ext_rm_idx (error, path[0].p_bh inconsistent but verified)
> >    /*
> >     * we release buffers of the path but path[0].p_bh is not cleaned up
> >     * due to other references to it (possible?)
> >     */
> > 
> > ... at a later point...:
> > 
> > ext4_find_extent
> >    bh = read_extent_tree_block()
> >      /*
> >       * we get the bh that was left inconsistent previously
> >       * since its verified, we dont check it again corrupting
> >       * the lookup
> >       */
> > 
> > -----------------
> > 
> > Is the above scenario possible? Or would the path[0].p_bh that was
> > corrupted previously always be reread during the subsequent
> > ext4_find_extent() lookup?
> > 
> > Thanks again,
> > Ojaswin
>
Baokun Li July 17, 2024, 6:11 a.m. UTC | #7
On 2024/7/17 13:29, Ojaswin Mujoo wrote:
> On Tue, Jul 16, 2024 at 07:54:43PM +0800, Baokun Li wrote:
>> Hi Ojaswin,
>>
>> On 2024/7/16 17:54, Ojaswin Mujoo wrote:
>>>>> But the journal will ensure the consistency of the extents path after
>>>>> this patch.
>>>>>
>>>>> When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
>>>>> ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
>>>>> the extents tree to be inconsistent. But the inconsistency just
>>>>> exists in memory and doesn't land on disk.
>>>>>
>>>>> For ext4_ext_get_access(), the handle must have been aborted
>>>>> when it returned an error, as follows:
>>>> ext4_ext_get_access
>>>>    ext4_journal_get_write_access
>>>>     __ext4_journal_get_write_access
>>>>      err = jbd2_journal_get_write_access
>>>>      if (err)
>>>>        ext4_journal_abort_handle
>>>>> For ext4_ext_dirty(), since path->p_bh must not be null and handle
>>>>> must be valid, handle is aborted anyway when an error is returned:
>>>> ext4_ext_dirty
>>>>    __ext4_ext_dirty
>>>>     if (path->p_bh)
>>>>       __ext4_handle_dirty_metadata
>>>>        if (ext4_handle_valid(handle))
>>>>          err = jbd2_journal_dirty_metadata
>>>>           if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
>>>>             ext4_journal_abort_handle
>>>>> Thus the extents tree will only be inconsistent in memory, so only
>>>>> the verified bit of the modified buffer needs to be cleared to avoid
>>>>> these inconsistent data being used in memory.
>>>>>
>>>> Regards,
>>>> Baokun
>>> Thanks for the explanation Baokun, so basically we only have the
>>> inconsitency in the memory.
>>>
>>> I do have a followup questions:
>>>
>>> So in the above example, after we have the error, we'll have the buffer
>>> for depth=0 marked as valid but pointing to the wrong ei_block.
>> It looks wrong here. When there is an error, the ei_block of the
>> unmodified buffer with depth=0 is the correct one, it is indeed
>> 'valid' and it is consistent with the disk. Only buffers that were
> Hey Baokun,
>
> Ahh I see now, I was looking at it the wrong way. So basically since
> depth 1 to 4 is inconsistent to the disk we mark then non verified so
> then subsequent lookups can act accordingly.
>
> Thanks for the explanation! I am in the middle of testing this patchset
> with xfstests on a POWERPC system with 64k page size. I'll let you know
> how that goes!
>
> Regards,
> Ojaswin

Hi Ojaswin,

Thank you for the test and feedback!

Cheers,
Baokun
Ojaswin Mujoo July 24, 2024, 6:23 a.m. UTC | #8
On Wed, Jul 17, 2024 at 02:11:27PM +0800, Baokun Li wrote:
> On 2024/7/17 13:29, Ojaswin Mujoo wrote:
> > On Tue, Jul 16, 2024 at 07:54:43PM +0800, Baokun Li wrote:
> > > Hi Ojaswin,
> > > 
> > > On 2024/7/16 17:54, Ojaswin Mujoo wrote:
> > > > > > But the journal will ensure the consistency of the extents path after
> > > > > > this patch.
> > > > > > 
> > > > > > When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
> > > > > > ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
> > > > > > the extents tree to be inconsistent. But the inconsistency just
> > > > > > exists in memory and doesn't land on disk.
> > > > > > 
> > > > > > For ext4_ext_get_access(), the handle must have been aborted
> > > > > > when it returned an error, as follows:
> > > > > ext4_ext_get_access
> > > > >    ext4_journal_get_write_access
> > > > >     __ext4_journal_get_write_access
> > > > >      err = jbd2_journal_get_write_access
> > > > >      if (err)
> > > > >        ext4_journal_abort_handle
> > > > > > For ext4_ext_dirty(), since path->p_bh must not be null and handle
> > > > > > must be valid, handle is aborted anyway when an error is returned:
> > > > > ext4_ext_dirty
> > > > >    __ext4_ext_dirty
> > > > >     if (path->p_bh)
> > > > >       __ext4_handle_dirty_metadata
> > > > >        if (ext4_handle_valid(handle))
> > > > >          err = jbd2_journal_dirty_metadata
> > > > >           if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
> > > > >             ext4_journal_abort_handle
> > > > > > Thus the extents tree will only be inconsistent in memory, so only
> > > > > > the verified bit of the modified buffer needs to be cleared to avoid
> > > > > > these inconsistent data being used in memory.
> > > > > > 
> > > > > Regards,
> > > > > Baokun
> > > > Thanks for the explanation Baokun, so basically we only have the
> > > > inconsitency in the memory.
> > > > 
> > > > I do have a followup questions:
> > > > 
> > > > So in the above example, after we have the error, we'll have the buffer
> > > > for depth=0 marked as valid but pointing to the wrong ei_block.
> > > It looks wrong here. When there is an error, the ei_block of the
> > > unmodified buffer with depth=0 is the correct one, it is indeed
> > > 'valid' and it is consistent with the disk. Only buffers that were
> > Hey Baokun,
> > 
> > Ahh I see now, I was looking at it the wrong way. So basically since
> > depth 1 to 4 is inconsistent to the disk we mark then non verified so
> > then subsequent lookups can act accordingly.
> > 
> > Thanks for the explanation! I am in the middle of testing this patchset
> > with xfstests on a POWERPC system with 64k page size. I'll let you know
> > how that goes!
> > 
> > Regards,
> > Ojaswin
> 
> Hi Ojaswin,
> 
> Thank you for the test and feedback!
> 
> Cheers,
> Baokun

Hey Baokun,

The xfstests pass for sub page size as well as bs = page size for
POWERPC with no new regressions.

Although for this particular patch I doubt if we would be able to
exersice the error path using xfstests. We might need to artifically 
inject error in ext4_ext_get_access or ext4_ext_dirty.  Do you have any
other way of testing this? 

Also, just curious whether you came across this bug during code reading
or were you actually hitting it?

Regards,
Ojaswin
>
Jan Kara July 24, 2024, 6:53 p.m. UTC | #9
On Wed 10-07-24 12:06:36, libaokun@huaweicloud.com wrote:
> From: Baokun Li <libaokun1@huawei.com>
> 
> In ext4_ext_rm_idx() and ext4_ext_correct_indexes(), there is no proper
> rollback of already executed updates when updating a level of the extents
> path fails, so we may get an inconsistent extents tree, which may trigger
> some bad things in errors=continue mode.
> 
> Hence clear the verified bit of modified extents buffers if the tree fails
> to be updated in ext4_ext_rm_idx() or ext4_ext_correct_indexes(), which
> forces the extents buffers to be checked in ext4_valid_extent_entries(),
> ensuring that the extents tree is consistent.
> 
> Signed-off-by: zhanchengbin <zhanchengbin1@huawei.com>
> Link: https://lore.kernel.org/r/20230213080514.535568-3-zhanchengbin1@huawei.com/
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/ext4/extents.c | 31 +++++++++++++++++++++++++++----
>  1 file changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index bff3666c891a..4d589d34b30e 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1749,12 +1749,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
>  			break;
>  		err = ext4_ext_get_access(handle, inode, path + k);
>  		if (err)
> -			break;
> +			goto clean;
>  		path[k].p_idx->ei_block = border;
>  		err = ext4_ext_dirty(handle, inode, path + k);
>  		if (err)
> -			break;
> +			goto clean;
>  	}
> +	return 0;
> +
> +clean:
> +	/*
> +	 * The path[k].p_bh is either unmodified or with no verified bit
> +	 * set (see ext4_ext_get_access()). So just clear the verified bit
> +	 * of the successfully modified extents buffers, which will force
> +	 * these extents to be checked to avoid using inconsistent data.
> +	 */
> +	while (++k < depth)
> +		clear_buffer_verified(path[k].p_bh);
>  
>  	return err;
>  }
> @@ -2312,12 +2323,24 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>  			break;
>  		err = ext4_ext_get_access(handle, inode, path + k);
>  		if (err)
> -			break;
> +			goto clean;
>  		path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
>  		err = ext4_ext_dirty(handle, inode, path + k);
>  		if (err)
> -			break;
> +			goto clean;
>  	}
> +	return 0;
> +
> +clean:
> +	/*
> +	 * The path[k].p_bh is either unmodified or with no verified bit
> +	 * set (see ext4_ext_get_access()). So just clear the verified bit
> +	 * of the successfully modified extents buffers, which will force
> +	 * these extents to be checked to avoid using inconsistent data.
> +	 */
> +	while (++k < depth)
> +		clear_buffer_verified(path[k].p_bh);
> +
>  	return err;
>  }
>  
> -- 
> 2.39.2
>
Baokun Li July 25, 2024, 5:35 a.m. UTC | #10
On 2024/7/24 14:23, Ojaswin Mujoo wrote:
> On Wed, Jul 17, 2024 at 02:11:27PM +0800, Baokun Li wrote:
>> On 2024/7/17 13:29, Ojaswin Mujoo wrote:
>>> On Tue, Jul 16, 2024 at 07:54:43PM +0800, Baokun Li wrote:
>>>> Hi Ojaswin,
>>>>
>>>> On 2024/7/16 17:54, Ojaswin Mujoo wrote:
>>>>>>> But the journal will ensure the consistency of the extents path after
>>>>>>> this patch.
>>>>>>>
>>>>>>> When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
>>>>>>> ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
>>>>>>> the extents tree to be inconsistent. But the inconsistency just
>>>>>>> exists in memory and doesn't land on disk.
>>>>>>>
>>>>>>> For ext4_ext_get_access(), the handle must have been aborted
>>>>>>> when it returned an error, as follows:
>>>>>> ext4_ext_get_access
>>>>>>     ext4_journal_get_write_access
>>>>>>      __ext4_journal_get_write_access
>>>>>>       err = jbd2_journal_get_write_access
>>>>>>       if (err)
>>>>>>         ext4_journal_abort_handle
>>>>>>> For ext4_ext_dirty(), since path->p_bh must not be null and handle
>>>>>>> must be valid, handle is aborted anyway when an error is returned:
>>>>>> ext4_ext_dirty
>>>>>>     __ext4_ext_dirty
>>>>>>      if (path->p_bh)
>>>>>>        __ext4_handle_dirty_metadata
>>>>>>         if (ext4_handle_valid(handle))
>>>>>>           err = jbd2_journal_dirty_metadata
>>>>>>            if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
>>>>>>              ext4_journal_abort_handle
>>>>>>> Thus the extents tree will only be inconsistent in memory, so only
>>>>>>> the verified bit of the modified buffer needs to be cleared to avoid
>>>>>>> these inconsistent data being used in memory.
>>>>>>>
>>>>>> Regards,
>>>>>> Baokun
>>>>> Thanks for the explanation Baokun, so basically we only have the
>>>>> inconsitency in the memory.
>>>>>
>>>>> I do have a followup questions:
>>>>>
>>>>> So in the above example, after we have the error, we'll have the buffer
>>>>> for depth=0 marked as valid but pointing to the wrong ei_block.
>>>> It looks wrong here. When there is an error, the ei_block of the
>>>> unmodified buffer with depth=0 is the correct one, it is indeed
>>>> 'valid' and it is consistent with the disk. Only buffers that were
>>> Hey Baokun,
>>>
>>> Ahh I see now, I was looking at it the wrong way. So basically since
>>> depth 1 to 4 is inconsistent to the disk we mark then non verified so
>>> then subsequent lookups can act accordingly.
>>>
>>> Thanks for the explanation! I am in the middle of testing this patchset
>>> with xfstests on a POWERPC system with 64k page size. I'll let you know
>>> how that goes!
>>>
>>> Regards,
>>> Ojaswin
>> Hi Ojaswin,
>>
>> Thank you for the test and feedback!
>>
>> Cheers,
>> Baokun
> Hey Baokun,

Hi Ojaswin,

Sorry for the slow reply, I'm currently on a business trip.

> The xfstests pass for sub page size as well as bs = page size for
> POWERPC with no new regressions.
Thank you very much for your test!
>
> Although for this particular patch I doubt if we would be able to
> exersice the error path using xfstests. We might need to artifically
> inject error in ext4_ext_get_access or ext4_ext_dirty.  Do you have any
> other way of testing this?
The issues in this patch set can all be triggered by injecting EIO or
ENOMEM into ext4_find_extent(). So not only did I test kvm-xftests
several times on x86 to make sure there weren't any regressions,
but I also tested that running kvm-xfstests while randomly injecting
faults into ext4_find_extent() didn't crash the system.
>
> Also, just curious whether you came across this bug during code reading
> or were you actually hitting it?
The initial issue was that e2fsck was always reporting some sort of
extents tree exception after testing, so the processes in question
were troubleshooting and hardening, i.e. the first two patches.
The other issues were discovered during fault injection testing of
the processes in question.


Regards,
Baokun
Ojaswin Mujoo July 25, 2024, 8:48 a.m. UTC | #11
On Thu, Jul 25, 2024 at 01:35:10PM +0800, Baokun Li wrote:
> On 2024/7/24 14:23, Ojaswin Mujoo wrote:
> > On Wed, Jul 17, 2024 at 02:11:27PM +0800, Baokun Li wrote:
> > > On 2024/7/17 13:29, Ojaswin Mujoo wrote:
> > > > On Tue, Jul 16, 2024 at 07:54:43PM +0800, Baokun Li wrote:
> > > > > Hi Ojaswin,
> > > > > 
> > > > > On 2024/7/16 17:54, Ojaswin Mujoo wrote:
> > > > > > > > But the journal will ensure the consistency of the extents path after
> > > > > > > > this patch.
> > > > > > > > 
> > > > > > > > When ext4_ext_get_access() or ext4_ext_dirty() returns an error in
> > > > > > > > ext4_ext_rm_idx() and ext4_ext_correct_indexes(), this may cause
> > > > > > > > the extents tree to be inconsistent. But the inconsistency just
> > > > > > > > exists in memory and doesn't land on disk.
> > > > > > > > 
> > > > > > > > For ext4_ext_get_access(), the handle must have been aborted
> > > > > > > > when it returned an error, as follows:
> > > > > > > ext4_ext_get_access
> > > > > > >     ext4_journal_get_write_access
> > > > > > >      __ext4_journal_get_write_access
> > > > > > >       err = jbd2_journal_get_write_access
> > > > > > >       if (err)
> > > > > > >         ext4_journal_abort_handle
> > > > > > > > For ext4_ext_dirty(), since path->p_bh must not be null and handle
> > > > > > > > must be valid, handle is aborted anyway when an error is returned:
> > > > > > > ext4_ext_dirty
> > > > > > >     __ext4_ext_dirty
> > > > > > >      if (path->p_bh)
> > > > > > >        __ext4_handle_dirty_metadata
> > > > > > >         if (ext4_handle_valid(handle))
> > > > > > >           err = jbd2_journal_dirty_metadata
> > > > > > >            if (!is_handle_aborted(handle) && WARN_ON_ONCE(err))
> > > > > > >              ext4_journal_abort_handle
> > > > > > > > Thus the extents tree will only be inconsistent in memory, so only
> > > > > > > > the verified bit of the modified buffer needs to be cleared to avoid
> > > > > > > > these inconsistent data being used in memory.
> > > > > > > > 
> > > > > > > Regards,
> > > > > > > Baokun
> > > > > > Thanks for the explanation Baokun, so basically we only have the
> > > > > > inconsitency in the memory.
> > > > > > 
> > > > > > I do have a followup questions:
> > > > > > 
> > > > > > So in the above example, after we have the error, we'll have the buffer
> > > > > > for depth=0 marked as valid but pointing to the wrong ei_block.
> > > > > It looks wrong here. When there is an error, the ei_block of the
> > > > > unmodified buffer with depth=0 is the correct one, it is indeed
> > > > > 'valid' and it is consistent with the disk. Only buffers that were
> > > > Hey Baokun,
> > > > 
> > > > Ahh I see now, I was looking at it the wrong way. So basically since
> > > > depth 1 to 4 is inconsistent to the disk we mark then non verified so
> > > > then subsequent lookups can act accordingly.
> > > > 
> > > > Thanks for the explanation! I am in the middle of testing this patchset
> > > > with xfstests on a POWERPC system with 64k page size. I'll let you know
> > > > how that goes!
> > > > 
> > > > Regards,
> > > > Ojaswin
> > > Hi Ojaswin,
> > > 
> > > Thank you for the test and feedback!
> > > 
> > > Cheers,
> > > Baokun
> > Hey Baokun,
> 
> Hi Ojaswin,
> 
> Sorry for the slow reply, I'm currently on a business trip.
> 
> > The xfstests pass for sub page size as well as bs = page size for
> > POWERPC with no new regressions.
> Thank you very much for your test!
> > 
> > Although for this particular patch I doubt if we would be able to
> > exersice the error path using xfstests. We might need to artifically
> > inject error in ext4_ext_get_access or ext4_ext_dirty.  Do you have any
> > other way of testing this?
> The issues in this patch set can all be triggered by injecting EIO or
> ENOMEM into ext4_find_extent(). So not only did I test kvm-xftests
> several times on x86 to make sure there weren't any regressions,
> but I also tested that running kvm-xfstests while randomly injecting
> faults into ext4_find_extent() didn't crash the system.

Ahh got it, thanks. I think I understand the changes well enough now and 
it makes sense to me to mark them non verified in case of errors.
Furthermore, the tests also look fine. Feel free to add:

Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>

> > 
> > Also, just curious whether you came across this bug during code reading
> > or were you actually hitting it?
> The initial issue was that e2fsck was always reporting some sort of
> extents tree exception after testing, so the processes in question
> were troubleshooting and hardening, i.e. the first two patches.
> The other issues were discovered during fault injection testing of
> the processes in question.
> 
> 
> Regards,
> Baokun
>
diff mbox series

Patch

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bff3666c891a..4d589d34b30e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1749,12 +1749,23 @@  static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 			break;
 		err = ext4_ext_get_access(handle, inode, path + k);
 		if (err)
-			break;
+			goto clean;
 		path[k].p_idx->ei_block = border;
 		err = ext4_ext_dirty(handle, inode, path + k);
 		if (err)
-			break;
+			goto clean;
 	}
+	return 0;
+
+clean:
+	/*
+	 * The path[k].p_bh is either unmodified or with no verified bit
+	 * set (see ext4_ext_get_access()). So just clear the verified bit
+	 * of the successfully modified extents buffers, which will force
+	 * these extents to be checked to avoid using inconsistent data.
+	 */
+	while (++k < depth)
+		clear_buffer_verified(path[k].p_bh);
 
 	return err;
 }
@@ -2312,12 +2323,24 @@  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 			break;
 		err = ext4_ext_get_access(handle, inode, path + k);
 		if (err)
-			break;
+			goto clean;
 		path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
 		err = ext4_ext_dirty(handle, inode, path + k);
 		if (err)
-			break;
+			goto clean;
 	}
+	return 0;
+
+clean:
+	/*
+	 * The path[k].p_bh is either unmodified or with no verified bit
+	 * set (see ext4_ext_get_access()). So just clear the verified bit
+	 * of the successfully modified extents buffers, which will force
+	 * these extents to be checked to avoid using inconsistent data.
+	 */
+	while (++k < depth)
+		clear_buffer_verified(path[k].p_bh);
+
 	return err;
 }