diff mbox series

[15/19] parallels: Remove unnecessary data_end field

Message ID 20231002085738.369684-16-alexander.ivanov@virtuozzo.com
State New
Headers show
Series parallels: Add full dirty bitmap support | expand

Commit Message

Alexander Ivanov Oct. 2, 2023, 8:57 a.m. UTC
Since we have used bitmap, field data_end in BDRVParallelsState is
redundant and can be removed.

Add parallels_data_end() helper and remove data_end handling.

Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
---
 block/parallels.c | 33 +++++++++++++--------------------
 block/parallels.h |  1 -
 2 files changed, 13 insertions(+), 21 deletions(-)

Comments

Mike Maslenkin Oct. 6, 2023, 7:43 p.m. UTC | #1
On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
<alexander.ivanov@virtuozzo.com> wrote:
>
> Since we have used bitmap, field data_end in BDRVParallelsState is
> redundant and can be removed.
>
> Add parallels_data_end() helper and remove data_end handling.
>
> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> ---
>  block/parallels.c | 33 +++++++++++++--------------------
>  block/parallels.h |  1 -
>  2 files changed, 13 insertions(+), 21 deletions(-)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index 48ea5b3f03..80a7171b84 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
>      g_free(s->used_bmap);
>  }
>
> +static int64_t parallels_data_end(BDRVParallelsState *s)
> +{
> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
> +    data_end += s->used_bmap_size * s->cluster_size;
> +    return data_end;
> +}
> +
>  int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>                                           int64_t *clusters)
>  {
> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>
>      first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
>      if (first_free == s->used_bmap_size) {
> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
> +        host_off = parallels_data_end(s);
>          prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
>          bytes = prealloc_clusters * s->cluster_size;
>
> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>          s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
>                                            new_usedsize);
>          s->used_bmap_size = new_usedsize;
> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
> -        }
>      } else {
>          next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
>
> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>           * branch. In the other case we are likely re-using hole. Preallocate
>           * the space if required by the prealloc_mode.
>           */
> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
>              ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
>              if (ret < 0) {
>                  return ret;
> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
>          }
>      }
>
> -    if (high_off == 0) {
> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
> -    } else {
> -        res->image_end_offset = high_off + s->cluster_size;
> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> -    }
> -
> +    res->image_end_offset = parallels_data_end(s);
>      return 0;
>  }
>
> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>              res->check_errors++;
>              return ret;
>          }
> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>
>          parallels_free_used_bitmap(bs);
>          ret = parallels_fill_used_bitmap(bs);
> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>      }
>
>      s->data_start = data_start;
> -    s->data_end = s->data_start;
> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
>          /*
>           * There is not enough unused space to fit to block align between BAT
>           * and actual data. We can't avoid read-modify-write...
> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>
>      for (i = 0; i < s->bat_size; i++) {
>          sector = bat2sect(s, i);
> -        if (sector + s->tracks > s->data_end) {
> -            s->data_end = sector + s->tracks;
> +        if (sector + s->tracks > file_nb_sectors) {
> +            need_check = true;
>          }
>      }
> -    need_check = need_check || s->data_end > file_nb_sectors;
>
>      ret = parallels_fill_used_bitmap(bs);
>      if (ret == -ENOMEM) {
> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
>          end_off = (end_off + 1) * s->cluster_size;
>      }
>      end_off += s->data_start * BDRV_SECTOR_SIZE;
> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
>      return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
>  }
>
> diff --git a/block/parallels.h b/block/parallels.h
> index 18b4f8068e..a6a048d890 100644
> --- a/block/parallels.h
> +++ b/block/parallels.h
> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
>      unsigned int bat_size;
>
>      int64_t  data_start;
> -    int64_t  data_end;
>      uint64_t prealloc_size;
>      ParallelsPreallocMode prealloc_mode;
>
> --
> 2.34.1
>

Is it intended behavior?

Run:
1. ./qemu-img create -f parallels $TEST_IMG 1T
2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
3. ./qemu-img check  $TEST_IMG
       No errors were found on the image.
       Image end offset: 150994944

Without this patch `qemu-img check` reports:
       ERROR space leaked at the end of the image 145752064

      139 leaked clusters were found on the image.
      This means waste of disk space, but no harm to data.
      Image end offset: 5242880

Note: there is another issue caused by previous commits exists.
g_free asserts from parallels_free_used_bitmap() because of
s->used_bmap is NULL.

To reproduce this crash at revision before or without patch 15/19, run commands:
1. ./qemu-img create -f parallels $TEST_IMG 1T
2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
3. ./qemu-img check -r leaks $TEST_IMG

Regards,
Mike.
Alexander Ivanov Oct. 7, 2023, 10:18 a.m. UTC | #2
On 10/6/23 21:43, Mike Maslenkin wrote:
> On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
> <alexander.ivanov@virtuozzo.com> wrote:
>> Since we have used bitmap, field data_end in BDRVParallelsState is
>> redundant and can be removed.
>>
>> Add parallels_data_end() helper and remove data_end handling.
>>
>> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
>> ---
>>   block/parallels.c | 33 +++++++++++++--------------------
>>   block/parallels.h |  1 -
>>   2 files changed, 13 insertions(+), 21 deletions(-)
>>
>> diff --git a/block/parallels.c b/block/parallels.c
>> index 48ea5b3f03..80a7171b84 100644
>> --- a/block/parallels.c
>> +++ b/block/parallels.c
>> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
>>       g_free(s->used_bmap);
>>   }
>>
>> +static int64_t parallels_data_end(BDRVParallelsState *s)
>> +{
>> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
>> +    data_end += s->used_bmap_size * s->cluster_size;
>> +    return data_end;
>> +}
>> +
>>   int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>                                            int64_t *clusters)
>>   {
>> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>
>>       first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
>>       if (first_free == s->used_bmap_size) {
>> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
>> +        host_off = parallels_data_end(s);
>>           prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
>>           bytes = prealloc_clusters * s->cluster_size;
>>
>> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>           s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
>>                                             new_usedsize);
>>           s->used_bmap_size = new_usedsize;
>> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
>> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
>> -        }
>>       } else {
>>           next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
>>
>> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>            * branch. In the other case we are likely re-using hole. Preallocate
>>            * the space if required by the prealloc_mode.
>>            */
>> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
>> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
>> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
>>               ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
>>               if (ret < 0) {
>>                   return ret;
>> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
>>           }
>>       }
>>
>> -    if (high_off == 0) {
>> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
>> -    } else {
>> -        res->image_end_offset = high_off + s->cluster_size;
>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>> -    }
>> -
>> +    res->image_end_offset = parallels_data_end(s);
>>       return 0;
>>   }
>>
>> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>>               res->check_errors++;
>>               return ret;
>>           }
>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>>
>>           parallels_free_used_bitmap(bs);
>>           ret = parallels_fill_used_bitmap(bs);
>> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>       }
>>
>>       s->data_start = data_start;
>> -    s->data_end = s->data_start;
>> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
>> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
>>           /*
>>            * There is not enough unused space to fit to block align between BAT
>>            * and actual data. We can't avoid read-modify-write...
>> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>
>>       for (i = 0; i < s->bat_size; i++) {
>>           sector = bat2sect(s, i);
>> -        if (sector + s->tracks > s->data_end) {
>> -            s->data_end = sector + s->tracks;
>> +        if (sector + s->tracks > file_nb_sectors) {
>> +            need_check = true;
>>           }
>>       }
>> -    need_check = need_check || s->data_end > file_nb_sectors;
>>
>>       ret = parallels_fill_used_bitmap(bs);
>>       if (ret == -ENOMEM) {
>> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
>>           end_off = (end_off + 1) * s->cluster_size;
>>       }
>>       end_off += s->data_start * BDRV_SECTOR_SIZE;
>> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
>>       return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
>>   }
>>
>> diff --git a/block/parallels.h b/block/parallels.h
>> index 18b4f8068e..a6a048d890 100644
>> --- a/block/parallels.h
>> +++ b/block/parallels.h
>> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
>>       unsigned int bat_size;
>>
>>       int64_t  data_start;
>> -    int64_t  data_end;
>>       uint64_t prealloc_size;
>>       ParallelsPreallocMode prealloc_mode;
>>
>> --
>> 2.34.1
>>
> Is it intended behavior?
>
> Run:
> 1. ./qemu-img create -f parallels $TEST_IMG 1T
> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> 3. ./qemu-img check  $TEST_IMG
>         No errors were found on the image.
>         Image end offset: 150994944
>
> Without this patch `qemu-img check` reports:
>         ERROR space leaked at the end of the image 145752064
>
>        139 leaked clusters were found on the image.
>        This means waste of disk space, but no harm to data.
>        Image end offset: 5242880
The original intention is do nothing at this point if an image is opened as
RO. In the next patch parallels_check_leak() was rewritten to detect
unused clusters at the image end.

But there is a bug: (end_off == s->used_bmap_size) case is handled in an
incorrect way. Will fix it, thank you.
>
> Note: there is another issue caused by previous commits exists.
> g_free asserts from parallels_free_used_bitmap() because of
> s->used_bmap is NULL.
Maybe I don't understand your point, but if you meant that g_free() could be
called with NULL argument, it's not a problem. GLib Manual says:

    void g_free (/|gpointer
    <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Basic-Types.php#gpointer>
    mem|/);

    If /|mem|/ is |NULL|
    <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
    it simply returns, so there is no need to check /|mem|/ against
    |NULL|
    <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
    before calling this function.

> To reproduce this crash at revision before or without patch 15/19, run commands:
> 1. ./qemu-img create -f parallels $TEST_IMG 1T
> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> 3. ./qemu-img check -r leaks $TEST_IMG
Sorry, but I couldn't reproduce it. Reset to 14/19, made the three steps
and had such output:

    $ ./qemu-img create -f parallels $TEST_IMG 1T
    Formatting 'test.img', fmt=parallels size=1099511627776
    cluster_size=1048576

    $ dd if=/dev/zero of=$TEST_IMG seek=12  bs=1M count=128 conv=notrunc
    128+0 records in
    128+0 records out
    134217728 bytes (134 MB, 128 MiB) copied, 0.0797576 s, 1.7 GB/s

    $ ./qemu-img check -r leaks $TEST_IMG
    Repairing space leaked at the end of the image 141557760
    The following inconsistencies were found and repaired:

    135 leaked clusters
    0 corruptions

    Double checking the fixed image now...
    No errors were found on the image.
    Image end offset: 5242880

Checked with GCC and Clang.
> Regards,
> Mike.
Mike Maslenkin Oct. 7, 2023, 11:21 a.m. UTC | #3
On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
<alexander.ivanov@virtuozzo.com> wrote:
>
>
>
> On 10/6/23 21:43, Mike Maslenkin wrote:
> > On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
> > <alexander.ivanov@virtuozzo.com> wrote:
> >> Since we have used bitmap, field data_end in BDRVParallelsState is
> >> redundant and can be removed.
> >>
> >> Add parallels_data_end() helper and remove data_end handling.
> >>
> >> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> >> ---
> >>   block/parallels.c | 33 +++++++++++++--------------------
> >>   block/parallels.h |  1 -
> >>   2 files changed, 13 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/block/parallels.c b/block/parallels.c
> >> index 48ea5b3f03..80a7171b84 100644
> >> --- a/block/parallels.c
> >> +++ b/block/parallels.c
> >> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
> >>       g_free(s->used_bmap);
> >>   }
> >>
> >> +static int64_t parallels_data_end(BDRVParallelsState *s)
> >> +{
> >> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
> >> +    data_end += s->used_bmap_size * s->cluster_size;
> >> +    return data_end;
> >> +}
> >> +
> >>   int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>                                            int64_t *clusters)
> >>   {
> >> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>
> >>       first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
> >>       if (first_free == s->used_bmap_size) {
> >> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
> >> +        host_off = parallels_data_end(s);
> >>           prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
> >>           bytes = prealloc_clusters * s->cluster_size;
> >>
> >> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>           s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
> >>                                             new_usedsize);
> >>           s->used_bmap_size = new_usedsize;
> >> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
> >> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
> >> -        }
> >>       } else {
> >>           next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
> >>
> >> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>            * branch. In the other case we are likely re-using hole. Preallocate
> >>            * the space if required by the prealloc_mode.
> >>            */
> >> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
> >> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
> >> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
> >>               ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
> >>               if (ret < 0) {
> >>                   return ret;
> >> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
> >>           }
> >>       }
> >>
> >> -    if (high_off == 0) {
> >> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
> >> -    } else {
> >> -        res->image_end_offset = high_off + s->cluster_size;
> >> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >> -    }
> >> -
> >> +    res->image_end_offset = parallels_data_end(s);
> >>       return 0;
> >>   }
> >>
> >> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
> >>               res->check_errors++;
> >>               return ret;
> >>           }
> >> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >>
> >>           parallels_free_used_bitmap(bs);
> >>           ret = parallels_fill_used_bitmap(bs);
> >> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
> >>       }
> >>
> >>       s->data_start = data_start;
> >> -    s->data_end = s->data_start;
> >> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
> >> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
> >>           /*
> >>            * There is not enough unused space to fit to block align between BAT
> >>            * and actual data. We can't avoid read-modify-write...
> >> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
> >>
> >>       for (i = 0; i < s->bat_size; i++) {
> >>           sector = bat2sect(s, i);
> >> -        if (sector + s->tracks > s->data_end) {
> >> -            s->data_end = sector + s->tracks;
> >> +        if (sector + s->tracks > file_nb_sectors) {
> >> +            need_check = true;
> >>           }
> >>       }
> >> -    need_check = need_check || s->data_end > file_nb_sectors;
> >>
> >>       ret = parallels_fill_used_bitmap(bs);
> >>       if (ret == -ENOMEM) {
> >> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
> >>           end_off = (end_off + 1) * s->cluster_size;
> >>       }
> >>       end_off += s->data_start * BDRV_SECTOR_SIZE;
> >> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
> >>       return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
> >>   }
> >>
> >> diff --git a/block/parallels.h b/block/parallels.h
> >> index 18b4f8068e..a6a048d890 100644
> >> --- a/block/parallels.h
> >> +++ b/block/parallels.h
> >> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
> >>       unsigned int bat_size;
> >>
> >>       int64_t  data_start;
> >> -    int64_t  data_end;
> >>       uint64_t prealloc_size;
> >>       ParallelsPreallocMode prealloc_mode;
> >>
> >> --
> >> 2.34.1
> >>
> > Is it intended behavior?
> >
> > Run:
> > 1. ./qemu-img create -f parallels $TEST_IMG 1T
> > 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> > 3. ./qemu-img check  $TEST_IMG
> >         No errors were found on the image.
> >         Image end offset: 150994944
> >
> > Without this patch `qemu-img check` reports:
> >         ERROR space leaked at the end of the image 145752064
> >
> >        139 leaked clusters were found on the image.
> >        This means waste of disk space, but no harm to data.
> >        Image end offset: 5242880
> The original intention is do nothing at this point if an image is opened as
> RO. In the next patch parallels_check_leak() was rewritten to detect
> unused clusters at the image end.
>
> But there is a bug: (end_off == s->used_bmap_size) case is handled in an
> incorrect way. Will fix it, thank you.
> >
> > Note: there is another issue caused by previous commits exists.
> > g_free asserts from parallels_free_used_bitmap() because of
> > s->used_bmap is NULL.
> Maybe I don't understand your point, but if you meant that g_free() could be
> called with NULL argument, it's not a problem. GLib Manual says:
>
>     void g_free (/|gpointer
>     <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Basic-Types.php#gpointer>
>     mem|/);
>
>     If /|mem|/ is |NULL|
>     <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>     it simply returns, so there is no need to check /|mem|/ against
>     |NULL|
>     <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>     before calling this function.
>
> > To reproduce this crash at revision before or without patch 15/19, run commands:
> > 1. ./qemu-img create -f parallels $TEST_IMG 1T
> > 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> > 3. ./qemu-img check -r leaks $TEST_IMG
> Sorry, but I couldn't reproduce it. Reset to 14/19, made the three steps
> and had such output:
>
>     $ ./qemu-img create -f parallels $TEST_IMG 1T
>     Formatting 'test.img', fmt=parallels size=1099511627776
>     cluster_size=1048576
>
>     $ dd if=/dev/zero of=$TEST_IMG seek=12  bs=1M count=128 conv=notrunc
>     128+0 records in
>     128+0 records out
>     134217728 bytes (134 MB, 128 MiB) copied, 0.0797576 s, 1.7 GB/s
>
>     $ ./qemu-img check -r leaks $TEST_IMG
>     Repairing space leaked at the end of the image 141557760
>     The following inconsistencies were found and repaired:
>
>     135 leaked clusters
>     0 corruptions
>
>     Double checking the fixed image now...
>     No errors were found on the image.
>     Image end offset: 5242880

My comment regarding patch 15 is about 'check' operation is not able
to detect leaked data anymore.
So, after this patch I see:

$ ./qemu-img info   leak.bin
image: leak.bin
file format: parallels
virtual size: 1 TiB (1099511627776 bytes)
disk size: 145 MiB
Child node '/file':
    filename: leak.bin
    protocol type: file
    file length: 146 MiB (153092096 bytes)
    disk size: 145 MiB

$ ./qemu-img check -r leaks leak.bin
No errors were found on the image.
Image end offset: 153092096

After reverting this patch  'check' reports about:
ERROR space leaked at the end of the image 148897792

So, after reverting patch 15 I tried to repair such image
and got abort trap.

I rechecked with downloaded patches, rebuild from scratch and can tell
that there is no abort on master branch, but it appears after applying
patches[1-9].
Obviously It can be debugged and the reason is that
parallels_fill_used_bitmap() returns after

 s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
    if (s->used_bmap_size == 0) {
        return 0;
    }

Because DIV_ROUND_UP(payload_bytes, s->cluster_size); gives a 0;

So subsequent parallels_free_used_bitmap() called from
parallels_close() causes an assert.

So, the first invocation of parallels_free_used_bitmap is:
  * frame #0: 0x0000000100091830 qemu-img`parallels_check_leak
[inlined] parallels_free_used_bitmap(bs=0x0000000101011600) at
parallels.c:263:33 [opt]
    frame #1: 0x0000000100091830
qemu-img`parallels_check_leak(bs=0x0000000101011600,
res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS, explicit=true) at
parallels.c:811:9 [opt]
    frame #2: 0x0000000100090d80
qemu-img`parallels_co_check(bs=0x0000000101011600,
res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS) at parallels.c:1014:15
[opt]
    frame #3: 0x0000000100014f6c
qemu-img`bdrv_co_check_entry(opaque=0x000000016fdff560) at
block-gen.c:556:14 [opt]

And the second generates abort from there:
  * frame #0: 0x000000010008fef8 qemu-img`parallels_close [inlined]
parallels_free_used_bitmap(bs=<unavailable>) at parallels.c:263:33
[opt]
    frame #1: 0x000000010008fef8
qemu-img`parallels_close(bs=0x0000000101011600) at parallels.c:1501:5
[opt]
    frame #2: 0x0000000100019d3c qemu-img`bdrv_unref [inlined]
bdrv_close(bs=0x0000000101011600) at block.c:5164:13 [opt]

After the first parallels_free_used_bitmap(), there is an actual image
truncation happens, so there is no payload at the moment of the next
parallels_fill_used_bitmap(),

PS: there are a chances that some patches were not applied clearly,
I'll recheck this later.
It would be nice if it was possible to fetch changes from some repo,
rather than extracting  it from gmail.

Regards,
Mike.
Alexander Ivanov Oct. 7, 2023, 2:30 p.m. UTC | #4
On 10/7/23 13:21, Mike Maslenkin wrote:
> On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
> <alexander.ivanov@virtuozzo.com>  wrote:
>>
>> On 10/6/23 21:43, Mike Maslenkin wrote:
>>> On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
>>> <alexander.ivanov@virtuozzo.com>  wrote:
>>>> Since we have used bitmap, field data_end in BDRVParallelsState is
>>>> redundant and can be removed.
>>>>
>>>> Add parallels_data_end() helper and remove data_end handling.
>>>>
>>>> Signed-off-by: Alexander Ivanov<alexander.ivanov@virtuozzo.com>
>>>> ---
>>>>    block/parallels.c | 33 +++++++++++++--------------------
>>>>    block/parallels.h |  1 -
>>>>    2 files changed, 13 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/block/parallels.c b/block/parallels.c
>>>> index 48ea5b3f03..80a7171b84 100644
>>>> --- a/block/parallels.c
>>>> +++ b/block/parallels.c
>>>> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
>>>>        g_free(s->used_bmap);
>>>>    }
>>>>
>>>> +static int64_t parallels_data_end(BDRVParallelsState *s)
>>>> +{
>>>> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
>>>> +    data_end += s->used_bmap_size * s->cluster_size;
>>>> +    return data_end;
>>>> +}
>>>> +
>>>>    int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>                                             int64_t *clusters)
>>>>    {
>>>> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>
>>>>        first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
>>>>        if (first_free == s->used_bmap_size) {
>>>> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
>>>> +        host_off = parallels_data_end(s);
>>>>            prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
>>>>            bytes = prealloc_clusters * s->cluster_size;
>>>>
>>>> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>            s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
>>>>                                              new_usedsize);
>>>>            s->used_bmap_size = new_usedsize;
>>>> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
>>>> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
>>>> -        }
>>>>        } else {
>>>>            next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
>>>>
>>>> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>             * branch. In the other case we are likely re-using hole. Preallocate
>>>>             * the space if required by the prealloc_mode.
>>>>             */
>>>> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
>>>> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
>>>> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
>>>>                ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
>>>>                if (ret < 0) {
>>>>                    return ret;
>>>> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
>>>>            }
>>>>        }
>>>>
>>>> -    if (high_off == 0) {
>>>> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
>>>> -    } else {
>>>> -        res->image_end_offset = high_off + s->cluster_size;
>>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>>>> -    }
>>>> -
>>>> +    res->image_end_offset = parallels_data_end(s);
>>>>        return 0;
>>>>    }
>>>>
>>>> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>>>>                res->check_errors++;
>>>>                return ret;
>>>>            }
>>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>>>>
>>>>            parallels_free_used_bitmap(bs);
>>>>            ret = parallels_fill_used_bitmap(bs);
>>>> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>>>        }
>>>>
>>>>        s->data_start = data_start;
>>>> -    s->data_end = s->data_start;
>>>> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
>>>> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
>>>>            /*
>>>>             * There is not enough unused space to fit to block align between BAT
>>>>             * and actual data. We can't avoid read-modify-write...
>>>> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>>>
>>>>        for (i = 0; i < s->bat_size; i++) {
>>>>            sector = bat2sect(s, i);
>>>> -        if (sector + s->tracks > s->data_end) {
>>>> -            s->data_end = sector + s->tracks;
>>>> +        if (sector + s->tracks > file_nb_sectors) {
>>>> +            need_check = true;
>>>>            }
>>>>        }
>>>> -    need_check = need_check || s->data_end > file_nb_sectors;
>>>>
>>>>        ret = parallels_fill_used_bitmap(bs);
>>>>        if (ret == -ENOMEM) {
>>>> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
>>>>            end_off = (end_off + 1) * s->cluster_size;
>>>>        }
>>>>        end_off += s->data_start * BDRV_SECTOR_SIZE;
>>>> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
>>>>        return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
>>>>    }
>>>>
>>>> diff --git a/block/parallels.h b/block/parallels.h
>>>> index 18b4f8068e..a6a048d890 100644
>>>> --- a/block/parallels.h
>>>> +++ b/block/parallels.h
>>>> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
>>>>        unsigned int bat_size;
>>>>
>>>>        int64_t  data_start;
>>>> -    int64_t  data_end;
>>>>        uint64_t prealloc_size;
>>>>        ParallelsPreallocMode prealloc_mode;
>>>>
>>>> --
>>>> 2.34.1
>>>>
>>> Is it intended behavior?
>>>
>>> Run:
>>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
>>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
>>> 3. ./qemu-img check  $TEST_IMG
>>>          No errors were found on the image.
>>>          Image end offset: 150994944
>>>
>>> Without this patch `qemu-img check` reports:
>>>          ERROR space leaked at the end of the image 145752064
>>>
>>>         139 leaked clusters were found on the image.
>>>         This means waste of disk space, but no harm to data.
>>>         Image end offset: 5242880
>> The original intention is do nothing at this point if an image is opened as
>> RO. In the next patch parallels_check_leak() was rewritten to detect
>> unused clusters at the image end.
>>
>> But there is a bug: (end_off == s->used_bmap_size) case is handled in an
>> incorrect way. Will fix it, thank you.
>>> Note: there is another issue caused by previous commits exists.
>>> g_free asserts from parallels_free_used_bitmap() because of
>>> s->used_bmap is NULL.
>> Maybe I don't understand your point, but if you meant that g_free() could be
>> called with NULL argument, it's not a problem. GLib Manual says:
>>
>>      void g_free (/|gpointer
>>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Basic-Types.php#gpointer>
>>      mem|/);
>>
>>      If /|mem|/ is|NULL|
>>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>>      it simply returns, so there is no need to check /|mem|/ against
>>      |NULL|
>>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>>      before calling this function.
>>
>>> To reproduce this crash at revision before or without patch 15/19, run commands:
>>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
>>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
>>> 3. ./qemu-img check -r leaks $TEST_IMG
>> Sorry, but I couldn't reproduce it. Reset to 14/19, made the three steps
>> and had such output:
>>
>>      $ ./qemu-img create -f parallels $TEST_IMG 1T
>>      Formatting 'test.img', fmt=parallels size=1099511627776
>>      cluster_size=1048576
>>
>>      $ dd if=/dev/zero of=$TEST_IMG seek=12  bs=1M count=128 conv=notrunc
>>      128+0 records in
>>      128+0 records out
>>      134217728 bytes (134 MB, 128 MiB) copied, 0.0797576 s, 1.7 GB/s
>>
>>      $ ./qemu-img check -r leaks $TEST_IMG
>>      Repairing space leaked at the end of the image 141557760
>>      The following inconsistencies were found and repaired:
>>
>>      135 leaked clusters
>>      0 corruptions
>>
>>      Double checking the fixed image now...
>>      No errors were found on the image.
>>      Image end offset: 5242880
> My comment regarding patch 15 is about 'check' operation is not able
> to detect leaked data anymore.
> So, after this patch I see:
>
> $ ./qemu-img info   leak.bin
> image: leak.bin
> file format: parallels
> virtual size: 1 TiB (1099511627776 bytes)
> disk size: 145 MiB
> Child node '/file':
>      filename: leak.bin
>      protocol type: file
>      file length: 146 MiB (153092096 bytes)
>      disk size: 145 MiB
>
> $ ./qemu-img check -r leaks leak.bin
> No errors were found on the image.
> Image end offset: 153092096
>
> After reverting this patch  'check' reports about:
> ERROR space leaked at the end of the image 148897792
>
> So, after reverting patch 15 I tried to repair such image
> and got abort trap.
Yes, I understand this part. OK, I think, I could place 16 patch before 
15 and
leaks would handle in the correct way at any point of the patch sequence.
>
> I rechecked with downloaded patches, rebuild from scratch and can tell
> that there is no abort on master branch, but it appears after applying
> patches[1-9].
Maybe I do something wrong, but I reset to the top of mainstream, applied
1-9 patches, rebuilt QEMU and didn't see any abort.

> Obviously It can be debugged and the reason is that
> parallels_fill_used_bitmap() returns after
>
>   s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
>      if (s->used_bmap_size == 0) {
>          return 0;
>      }
>
> Because DIV_ROUND_UP(payload_bytes, s->cluster_size); gives a 0;
>
> So subsequent parallels_free_used_bitmap() called from
> parallels_close() causes an assert.
>
> So, the first invocation of parallels_free_used_bitmap is:
>    * frame #0: 0x0000000100091830 qemu-img`parallels_check_leak
> [inlined] parallels_free_used_bitmap(bs=0x0000000101011600) at
> parallels.c:263:33 [opt]
>      frame #1: 0x0000000100091830
> qemu-img`parallels_check_leak(bs=0x0000000101011600,
> res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS, explicit=true) at
> parallels.c:811:9 [opt]
>      frame #2: 0x0000000100090d80
> qemu-img`parallels_co_check(bs=0x0000000101011600,
> res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS) at parallels.c:1014:15
> [opt]
>      frame #3: 0x0000000100014f6c
> qemu-img`bdrv_co_check_entry(opaque=0x000000016fdff560) at
> block-gen.c:556:14 [opt]
>
> And the second generates abort from there:
>    * frame #0: 0x000000010008fef8 qemu-img`parallels_close [inlined]
> parallels_free_used_bitmap(bs=<unavailable>) at parallels.c:263:33
In this line we have:

    BDRVParallelsState *s = bs->opaque;

So there is only one possibility to abort - incorrect bs. I don't know 
how it
could be possible.

> [opt]
>      frame #1: 0x000000010008fef8
> qemu-img`parallels_close(bs=0x0000000101011600) at parallels.c:1501:5
> [opt]
>      frame #2: 0x0000000100019d3c qemu-img`bdrv_unref [inlined]
> bdrv_close(bs=0x0000000101011600) at block.c:5164:13 [opt]
>
> After the first parallels_free_used_bitmap(), there is an actual image
> truncation happens, so there is no payload at the moment of the next
> parallels_fill_used_bitmap(),
>
> PS: there are a chances that some patches were not applied clearly,
> I'll recheck this later.
I just reset to the mainstream top and apply 1-9 patches:

    $ git reset --hard 2f3913f4b2ad74baeb5a6f1d36efbd9ecdf1057d
    HEAD is now at 2f3913f4b2 Merge tag 'for_upstream' of
    https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
    $ git am *.eml
    Applying: parallels: Move inactivation code to a separate function
    Applying: parallels: Add mark_unused() helper
    Applying: parallels: Move host clusters allocation to a separate
    function
    Applying: parallels: Set data_end value in parallels_check_leak()
    Applying: parallels: Recreate used bitmap in parallels_check_leak()
    Applying: parallels: Add a note about used bitmap in
    parallels_check_duplicate()
    Applying: parallels: Create used bitmap even if checks needed
    Applying: parallels: Make mark_used() and mark_unused() global functions
    Applying: parallels: Add dirty bitmaps saving

> It would be nice if it was possible to fetch changes from some repo,
> rather than extracting  it from gmail.
You can fetch it here (branch "parallels") - 
https://github.com/AlexanderIvanov-Virtuozzo/qemu.git
>
> Regards,
> Mike.
Mike Maslenkin Oct. 7, 2023, 5:54 p.m. UTC | #5
On Sat, Oct 7, 2023 at 5:30 PM Alexander Ivanov
<alexander.ivanov@virtuozzo.com> wrote:
>
>
>
> On 10/7/23 13:21, Mike Maslenkin wrote:
> > On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
> > <alexander.ivanov@virtuozzo.com>  wrote:
> >>
> >> On 10/6/23 21:43, Mike Maslenkin wrote:
> >>> On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
> >>> <alexander.ivanov@virtuozzo.com>  wrote:
> >>>> Since we have used bitmap, field data_end in BDRVParallelsState is
> >>>> redundant and can be removed.
> >>>>
> >>>> Add parallels_data_end() helper and remove data_end handling.
> >>>>
> >>>> Signed-off-by: Alexander Ivanov<alexander.ivanov@virtuozzo.com>
> >>>> ---
> >>>>    block/parallels.c | 33 +++++++++++++--------------------
> >>>>    block/parallels.h |  1 -
> >>>>    2 files changed, 13 insertions(+), 21 deletions(-)
> >>>>
> >>>> diff --git a/block/parallels.c b/block/parallels.c
> >>>> index 48ea5b3f03..80a7171b84 100644
> >>>> --- a/block/parallels.c
> >>>> +++ b/block/parallels.c
> >>>> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
> >>>>        g_free(s->used_bmap);
> >>>>    }
> >>>>
> >>>> +static int64_t parallels_data_end(BDRVParallelsState *s)
> >>>> +{
> >>>> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
> >>>> +    data_end += s->used_bmap_size * s->cluster_size;
> >>>> +    return data_end;
> >>>> +}
> >>>> +
> >>>>    int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>>>                                             int64_t *clusters)
> >>>>    {
> >>>> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>>>
> >>>>        first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
> >>>>        if (first_free == s->used_bmap_size) {
> >>>> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
> >>>> +        host_off = parallels_data_end(s);
> >>>>            prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
> >>>>            bytes = prealloc_clusters * s->cluster_size;
> >>>>
> >>>> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>>>            s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
> >>>>                                              new_usedsize);
> >>>>            s->used_bmap_size = new_usedsize;
> >>>> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
> >>>> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
> >>>> -        }
> >>>>        } else {
> >>>>            next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
> >>>>
> >>>> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
> >>>>             * branch. In the other case we are likely re-using hole. Preallocate
> >>>>             * the space if required by the prealloc_mode.
> >>>>             */
> >>>> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
> >>>> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
> >>>> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
> >>>>                ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
> >>>>                if (ret < 0) {
> >>>>                    return ret;
> >>>> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
> >>>>            }
> >>>>        }
> >>>>
> >>>> -    if (high_off == 0) {
> >>>> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
> >>>> -    } else {
> >>>> -        res->image_end_offset = high_off + s->cluster_size;
> >>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >>>> -    }
> >>>> -
> >>>> +    res->image_end_offset = parallels_data_end(s);
> >>>>        return 0;
> >>>>    }
> >>>>
> >>>> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
> >>>>                res->check_errors++;
> >>>>                return ret;
> >>>>            }
> >>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
> >>>>
> >>>>            parallels_free_used_bitmap(bs);
> >>>>            ret = parallels_fill_used_bitmap(bs);
> >>>> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
> >>>>        }
> >>>>
> >>>>        s->data_start = data_start;
> >>>> -    s->data_end = s->data_start;
> >>>> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
> >>>> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
> >>>>            /*
> >>>>             * There is not enough unused space to fit to block align between BAT
> >>>>             * and actual data. We can't avoid read-modify-write...
> >>>> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
> >>>>
> >>>>        for (i = 0; i < s->bat_size; i++) {
> >>>>            sector = bat2sect(s, i);
> >>>> -        if (sector + s->tracks > s->data_end) {
> >>>> -            s->data_end = sector + s->tracks;
> >>>> +        if (sector + s->tracks > file_nb_sectors) {
> >>>> +            need_check = true;
> >>>>            }
> >>>>        }
> >>>> -    need_check = need_check || s->data_end > file_nb_sectors;
> >>>>
> >>>>        ret = parallels_fill_used_bitmap(bs);
> >>>>        if (ret == -ENOMEM) {
> >>>> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
> >>>>            end_off = (end_off + 1) * s->cluster_size;
> >>>>        }
> >>>>        end_off += s->data_start * BDRV_SECTOR_SIZE;
> >>>> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
> >>>>        return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
> >>>>    }
> >>>>
> >>>> diff --git a/block/parallels.h b/block/parallels.h
> >>>> index 18b4f8068e..a6a048d890 100644
> >>>> --- a/block/parallels.h
> >>>> +++ b/block/parallels.h
> >>>> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
> >>>>        unsigned int bat_size;
> >>>>
> >>>>        int64_t  data_start;
> >>>> -    int64_t  data_end;
> >>>>        uint64_t prealloc_size;
> >>>>        ParallelsPreallocMode prealloc_mode;
> >>>>
> >>>> --
> >>>> 2.34.1
> >>>>
> >>> Is it intended behavior?
> >>>
> >>> Run:
> >>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
> >>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> >>> 3. ./qemu-img check  $TEST_IMG
> >>>          No errors were found on the image.
> >>>          Image end offset: 150994944
> >>>
> >>> Without this patch `qemu-img check` reports:
> >>>          ERROR space leaked at the end of the image 145752064
> >>>
> >>>         139 leaked clusters were found on the image.
> >>>         This means waste of disk space, but no harm to data.
> >>>         Image end offset: 5242880
> >> The original intention is do nothing at this point if an image is opened as
> >> RO. In the next patch parallels_check_leak() was rewritten to detect
> >> unused clusters at the image end.
> >>
> >> But there is a bug: (end_off == s->used_bmap_size) case is handled in an
> >> incorrect way. Will fix it, thank you.
> >>> Note: there is another issue caused by previous commits exists.
> >>> g_free asserts from parallels_free_used_bitmap() because of
> >>> s->used_bmap is NULL.
> >> Maybe I don't understand your point, but if you meant that g_free() could be
> >> called with NULL argument, it's not a problem. GLib Manual says:
> >>
> >>      void g_free (/|gpointer
> >>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Basic-Types.php#gpointer>
> >>      mem|/);
> >>
> >>      If /|mem|/ is|NULL|
> >>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
> >>      it simply returns, so there is no need to check /|mem|/ against
> >>      |NULL|
> >>      <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
> >>      before calling this function.
> >>
> >>> To reproduce this crash at revision before or without patch 15/19, run commands:
> >>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
> >>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
> >>> 3. ./qemu-img check -r leaks $TEST_IMG
> >> Sorry, but I couldn't reproduce it. Reset to 14/19, made the three steps
> >> and had such output:
> >>
> >>      $ ./qemu-img create -f parallels $TEST_IMG 1T
> >>      Formatting 'test.img', fmt=parallels size=1099511627776
> >>      cluster_size=1048576
> >>
> >>      $ dd if=/dev/zero of=$TEST_IMG seek=12  bs=1M count=128 conv=notrunc
> >>      128+0 records in
> >>      128+0 records out
> >>      134217728 bytes (134 MB, 128 MiB) copied, 0.0797576 s, 1.7 GB/s
> >>
> >>      $ ./qemu-img check -r leaks $TEST_IMG
> >>      Repairing space leaked at the end of the image 141557760
> >>      The following inconsistencies were found and repaired:
> >>
> >>      135 leaked clusters
> >>      0 corruptions
> >>
> >>      Double checking the fixed image now...
> >>      No errors were found on the image.
> >>      Image end offset: 5242880
> > My comment regarding patch 15 is about 'check' operation is not able
> > to detect leaked data anymore.
> > So, after this patch I see:
> >
> > $ ./qemu-img info   leak.bin
> > image: leak.bin
> > file format: parallels
> > virtual size: 1 TiB (1099511627776 bytes)
> > disk size: 145 MiB
> > Child node '/file':
> >      filename: leak.bin
> >      protocol type: file
> >      file length: 146 MiB (153092096 bytes)
> >      disk size: 145 MiB
> >
> > $ ./qemu-img check -r leaks leak.bin
> > No errors were found on the image.
> > Image end offset: 153092096
> >
> > After reverting this patch  'check' reports about:
> > ERROR space leaked at the end of the image 148897792
> >
> > So, after reverting patch 15 I tried to repair such image
> > and got abort trap.
> Yes, I understand this part. OK, I think, I could place 16 patch before
> 15 and
> leaks would handle in the correct way at any point of the patch sequence.
> >
> > I rechecked with downloaded patches, rebuild from scratch and can tell
> > that there is no abort on master branch, but it appears after applying
> > patches[1-9].
> Maybe I do something wrong, but I reset to the top of mainstream, applied
> 1-9 patches, rebuilt QEMU and didn't see any abort.
>
> > Obviously It can be debugged and the reason is that
> > parallels_fill_used_bitmap() returns after
> >
> >   s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
> >      if (s->used_bmap_size == 0) {
> >          return 0;
> >      }
> >
> > Because DIV_ROUND_UP(payload_bytes, s->cluster_size); gives a 0;
> >
> > So subsequent parallels_free_used_bitmap() called from
> > parallels_close() causes an assert.
> >
> > So, the first invocation of parallels_free_used_bitmap is:
> >    * frame #0: 0x0000000100091830 qemu-img`parallels_check_leak
> > [inlined] parallels_free_used_bitmap(bs=0x0000000101011600) at
> > parallels.c:263:33 [opt]
> >      frame #1: 0x0000000100091830
> > qemu-img`parallels_check_leak(bs=0x0000000101011600,
> > res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS, explicit=true) at
> > parallels.c:811:9 [opt]
> >      frame #2: 0x0000000100090d80
> > qemu-img`parallels_co_check(bs=0x0000000101011600,
> > res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS) at parallels.c:1014:15
> > [opt]
> >      frame #3: 0x0000000100014f6c
> > qemu-img`bdrv_co_check_entry(opaque=0x000000016fdff560) at
> > block-gen.c:556:14 [opt]
> >
> > And the second generates abort from there:
> >    * frame #0: 0x000000010008fef8 qemu-img`parallels_close [inlined]
> > parallels_free_used_bitmap(bs=<unavailable>) at parallels.c:263:33
> In this line we have:
>
>     BDRVParallelsState *s = bs->opaque;
>
> So there is only one possibility to abort - incorrect bs. I don't know
> how it
> could be possible.
>
> > [opt]
> >      frame #1: 0x000000010008fef8
> > qemu-img`parallels_close(bs=0x0000000101011600) at parallels.c:1501:5
> > [opt]
> >      frame #2: 0x0000000100019d3c qemu-img`bdrv_unref [inlined]
> > bdrv_close(bs=0x0000000101011600) at block.c:5164:13 [opt]
> >
> > After the first parallels_free_used_bitmap(), there is an actual image
> > truncation happens, so there is no payload at the moment of the next
> > parallels_fill_used_bitmap(),
> >
> > PS: there are a chances that some patches were not applied clearly,
> > I'll recheck this later.
> I just reset to the mainstream top and apply 1-9 patches:
>
>     $ git reset --hard 2f3913f4b2ad74baeb5a6f1d36efbd9ecdf1057d
>     HEAD is now at 2f3913f4b2 Merge tag 'for_upstream' of
>     https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
>     $ git am *.eml
>     Applying: parallels: Move inactivation code to a separate function
>     Applying: parallels: Add mark_unused() helper
>     Applying: parallels: Move host clusters allocation to a separate
>     function
>     Applying: parallels: Set data_end value in parallels_check_leak()
>     Applying: parallels: Recreate used bitmap in parallels_check_leak()
>     Applying: parallels: Add a note about used bitmap in
>     parallels_check_duplicate()
>     Applying: parallels: Create used bitmap even if checks needed
>     Applying: parallels: Make mark_used() and mark_unused() global functions
>     Applying: parallels: Add dirty bitmaps saving
>
> > It would be nice if it was possible to fetch changes from some repo,
> > rather than extracting  it from gmail.
> You can fetch it here (branch "parallels") -
> https://github.com/AlexanderIvanov-Virtuozzo/qemu.git
> >
> > Regards,
> > Mike.
>
> --
> Best regards,
> Alexander Ivanov
>
Thanks for the link. I've fetched your repo and reverted "parallels:
Remove unnecessary data_end field" as it hides reproduction,
because it makes 'check' blind for the case we are discussing.
So the situation is the same:
1. parallels_open calls parallels_fill_used_bitmap(). A this time file
size is 145M (i.e leaked clusters are there) and s->used_bmap_size =
139.
2  Then parallels_co_check()->parallels_check_leak () is invoked.
     At the first parallels_check_leak calls
bdrv_co_truncate(offset=5242880), that is true as we have only empty
BAT on the image.
     At this step image truncated to 5M i.e. contains only empty BAT.
     So, on line 809 s->data_end = 10240 i.e 5M (10240<<9)
      809:         s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;

      811:        parallels_free_used_bitmap(bs);
      812:        ret = parallels_fill_used_bitmap(bs);

Line 811 invalidates used_bmap and sets used_bmap_size to 0.
parallels_fill_used_bitmap Invoked on line 812  returns 0, because
payload_bytes = 0 (current file size 5M - s->data_start *
BDRV_SECTOR_SIZE ),
and s->used_bmap_size is NOT initialized.

3. parallels_close() invoked to finish work and exit process.
   parallels_close() calls  parallels_free_used_bitmap() unconditionally.

static void parallels_free_used_bitmap(BlockDriverState *bs)
{
    BDRVParallelsState *s = bs->opaque;
    s->used_bmap_size = 0;
ASSERT IS HERE >>>>  g_free(s->used_bmap);
}

The fix is trivial...

if (s->used_bmap_size) {
   g_free(s->used_bmap);
   s->used_bmap_size = 0;
}

PS: I retuned to your HEAD. Killed gdb thus made image marked is
incorrectly closed.
But 'qemu-img check' only removed  incorrectly closed flags and didn't
remove leaked clusters.

Regards,
Mike.
Alexander Ivanov Oct. 8, 2023, 1:54 p.m. UTC | #6
On 10/7/23 19:54, Mike Maslenkin wrote:
> On Sat, Oct 7, 2023 at 5:30 PM Alexander Ivanov
> <alexander.ivanov@virtuozzo.com> wrote:
>>
>>
>> On 10/7/23 13:21, Mike Maslenkin wrote:
>>> On Sat, Oct 7, 2023 at 1:18 PM Alexander Ivanov
>>> <alexander.ivanov@virtuozzo.com>  wrote:
>>>> On 10/6/23 21:43, Mike Maslenkin wrote:
>>>>> On Mon, Oct 2, 2023 at 12:01 PM Alexander Ivanov
>>>>> <alexander.ivanov@virtuozzo.com>  wrote:
>>>>>> Since we have used bitmap, field data_end in BDRVParallelsState is
>>>>>> redundant and can be removed.
>>>>>>
>>>>>> Add parallels_data_end() helper and remove data_end handling.
>>>>>>
>>>>>> Signed-off-by: Alexander Ivanov<alexander.ivanov@virtuozzo.com>
>>>>>> ---
>>>>>>     block/parallels.c | 33 +++++++++++++--------------------
>>>>>>     block/parallels.h |  1 -
>>>>>>     2 files changed, 13 insertions(+), 21 deletions(-)
>>>>>>
>>>>>> diff --git a/block/parallels.c b/block/parallels.c
>>>>>> index 48ea5b3f03..80a7171b84 100644
>>>>>> --- a/block/parallels.c
>>>>>> +++ b/block/parallels.c
>>>>>> @@ -265,6 +265,13 @@ static void parallels_free_used_bitmap(BlockDriverState *bs)
>>>>>>         g_free(s->used_bmap);
>>>>>>     }
>>>>>>
>>>>>> +static int64_t parallels_data_end(BDRVParallelsState *s)
>>>>>> +{
>>>>>> +    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
>>>>>> +    data_end += s->used_bmap_size * s->cluster_size;
>>>>>> +    return data_end;
>>>>>> +}
>>>>>> +
>>>>>>     int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>>>                                              int64_t *clusters)
>>>>>>     {
>>>>>> @@ -275,7 +282,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>>>
>>>>>>         first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
>>>>>>         if (first_free == s->used_bmap_size) {
>>>>>> -        host_off = s->data_end * BDRV_SECTOR_SIZE;
>>>>>> +        host_off = parallels_data_end(s);
>>>>>>             prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
>>>>>>             bytes = prealloc_clusters * s->cluster_size;
>>>>>>
>>>>>> @@ -297,9 +304,6 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>>>             s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
>>>>>>                                               new_usedsize);
>>>>>>             s->used_bmap_size = new_usedsize;
>>>>>> -        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
>>>>>> -            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
>>>>>> -        }
>>>>>>         } else {
>>>>>>             next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
>>>>>>
>>>>>> @@ -315,8 +319,7 @@ int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
>>>>>>              * branch. In the other case we are likely re-using hole. Preallocate
>>>>>>              * the space if required by the prealloc_mode.
>>>>>>              */
>>>>>> -        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
>>>>>> -                host_off < s->data_end * BDRV_SECTOR_SIZE) {
>>>>>> +        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
>>>>>>                 ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
>>>>>>                 if (ret < 0) {
>>>>>>                     return ret;
>>>>>> @@ -757,13 +760,7 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
>>>>>>             }
>>>>>>         }
>>>>>>
>>>>>> -    if (high_off == 0) {
>>>>>> -        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
>>>>>> -    } else {
>>>>>> -        res->image_end_offset = high_off + s->cluster_size;
>>>>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>>>>>> -    }
>>>>>> -
>>>>>> +    res->image_end_offset = parallels_data_end(s);
>>>>>>         return 0;
>>>>>>     }
>>>>>>
>>>>>> @@ -806,7 +803,6 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>>>>>>                 res->check_errors++;
>>>>>>                 return ret;
>>>>>>             }
>>>>>> -        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>>>>>>
>>>>>>             parallels_free_used_bitmap(bs);
>>>>>>             ret = parallels_fill_used_bitmap(bs);
>>>>>> @@ -1361,8 +1357,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>>>>>         }
>>>>>>
>>>>>>         s->data_start = data_start;
>>>>>> -    s->data_end = s->data_start;
>>>>>> -    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
>>>>>> +    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
>>>>>>             /*
>>>>>>              * There is not enough unused space to fit to block align between BAT
>>>>>>              * and actual data. We can't avoid read-modify-write...
>>>>>> @@ -1403,11 +1398,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
>>>>>>
>>>>>>         for (i = 0; i < s->bat_size; i++) {
>>>>>>             sector = bat2sect(s, i);
>>>>>> -        if (sector + s->tracks > s->data_end) {
>>>>>> -            s->data_end = sector + s->tracks;
>>>>>> +        if (sector + s->tracks > file_nb_sectors) {
>>>>>> +            need_check = true;
>>>>>>             }
>>>>>>         }
>>>>>> -    need_check = need_check || s->data_end > file_nb_sectors;
>>>>>>
>>>>>>         ret = parallels_fill_used_bitmap(bs);
>>>>>>         if (ret == -ENOMEM) {
>>>>>> @@ -1461,7 +1455,6 @@ static int parallels_truncate_unused_clusters(BlockDriverState *bs)
>>>>>>             end_off = (end_off + 1) * s->cluster_size;
>>>>>>         }
>>>>>>         end_off += s->data_start * BDRV_SECTOR_SIZE;
>>>>>> -    s->data_end = end_off / BDRV_SECTOR_SIZE;
>>>>>>         return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
>>>>>>     }
>>>>>>
>>>>>> diff --git a/block/parallels.h b/block/parallels.h
>>>>>> index 18b4f8068e..a6a048d890 100644
>>>>>> --- a/block/parallels.h
>>>>>> +++ b/block/parallels.h
>>>>>> @@ -79,7 +79,6 @@ typedef struct BDRVParallelsState {
>>>>>>         unsigned int bat_size;
>>>>>>
>>>>>>         int64_t  data_start;
>>>>>> -    int64_t  data_end;
>>>>>>         uint64_t prealloc_size;
>>>>>>         ParallelsPreallocMode prealloc_mode;
>>>>>>
>>>>>> --
>>>>>> 2.34.1
>>>>>>
>>>>> Is it intended behavior?
>>>>>
>>>>> Run:
>>>>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
>>>>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
>>>>> 3. ./qemu-img check  $TEST_IMG
>>>>>           No errors were found on the image.
>>>>>           Image end offset: 150994944
>>>>>
>>>>> Without this patch `qemu-img check` reports:
>>>>>           ERROR space leaked at the end of the image 145752064
>>>>>
>>>>>          139 leaked clusters were found on the image.
>>>>>          This means waste of disk space, but no harm to data.
>>>>>          Image end offset: 5242880
>>>> The original intention is do nothing at this point if an image is opened as
>>>> RO. In the next patch parallels_check_leak() was rewritten to detect
>>>> unused clusters at the image end.
>>>>
>>>> But there is a bug: (end_off == s->used_bmap_size) case is handled in an
>>>> incorrect way. Will fix it, thank you.
>>>>> Note: there is another issue caused by previous commits exists.
>>>>> g_free asserts from parallels_free_used_bitmap() because of
>>>>> s->used_bmap is NULL.
>>>> Maybe I don't understand your point, but if you meant that g_free() could be
>>>> called with NULL argument, it's not a problem. GLib Manual says:
>>>>
>>>>       void g_free (/|gpointer
>>>>       <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Basic-Types.php#gpointer>
>>>>       mem|/);
>>>>
>>>>       If /|mem|/ is|NULL|
>>>>       <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>>>>       it simply returns, so there is no need to check /|mem|/ against
>>>>       |NULL|
>>>>       <https://www.manpagez.com/html/glib/glib-2.56.0/glib-Standard-Macros.php#NULL:CAPS>
>>>>       before calling this function.
>>>>
>>>>> To reproduce this crash at revision before or without patch 15/19, run commands:
>>>>> 1. ./qemu-img create -f parallels $TEST_IMG 1T
>>>>> 2. dd if=/dev/zero of=$TEST_IMG oseek=12  bs=1M count=128 conv=notrunc
>>>>> 3. ./qemu-img check -r leaks $TEST_IMG
>>>> Sorry, but I couldn't reproduce it. Reset to 14/19, made the three steps
>>>> and had such output:
>>>>
>>>>       $ ./qemu-img create -f parallels $TEST_IMG 1T
>>>>       Formatting 'test.img', fmt=parallels size=1099511627776
>>>>       cluster_size=1048576
>>>>
>>>>       $ dd if=/dev/zero of=$TEST_IMG seek=12  bs=1M count=128 conv=notrunc
>>>>       128+0 records in
>>>>       128+0 records out
>>>>       134217728 bytes (134 MB, 128 MiB) copied, 0.0797576 s, 1.7 GB/s
>>>>
>>>>       $ ./qemu-img check -r leaks $TEST_IMG
>>>>       Repairing space leaked at the end of the image 141557760
>>>>       The following inconsistencies were found and repaired:
>>>>
>>>>       135 leaked clusters
>>>>       0 corruptions
>>>>
>>>>       Double checking the fixed image now...
>>>>       No errors were found on the image.
>>>>       Image end offset: 5242880
>>> My comment regarding patch 15 is about 'check' operation is not able
>>> to detect leaked data anymore.
>>> So, after this patch I see:
>>>
>>> $ ./qemu-img info   leak.bin
>>> image: leak.bin
>>> file format: parallels
>>> virtual size: 1 TiB (1099511627776 bytes)
>>> disk size: 145 MiB
>>> Child node '/file':
>>>       filename: leak.bin
>>>       protocol type: file
>>>       file length: 146 MiB (153092096 bytes)
>>>       disk size: 145 MiB
>>>
>>> $ ./qemu-img check -r leaks leak.bin
>>> No errors were found on the image.
>>> Image end offset: 153092096
>>>
>>> After reverting this patch  'check' reports about:
>>> ERROR space leaked at the end of the image 148897792
>>>
>>> So, after reverting patch 15 I tried to repair such image
>>> and got abort trap.
>> Yes, I understand this part. OK, I think, I could place 16 patch before
>> 15 and
>> leaks would handle in the correct way at any point of the patch sequence.
>>> I rechecked with downloaded patches, rebuild from scratch and can tell
>>> that there is no abort on master branch, but it appears after applying
>>> patches[1-9].
>> Maybe I do something wrong, but I reset to the top of mainstream, applied
>> 1-9 patches, rebuilt QEMU and didn't see any abort.
>>
>>> Obviously It can be debugged and the reason is that
>>> parallels_fill_used_bitmap() returns after
>>>
>>>    s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
>>>       if (s->used_bmap_size == 0) {
>>>           return 0;
>>>       }
>>>
>>> Because DIV_ROUND_UP(payload_bytes, s->cluster_size); gives a 0;
>>>
>>> So subsequent parallels_free_used_bitmap() called from
>>> parallels_close() causes an assert.
>>>
>>> So, the first invocation of parallels_free_used_bitmap is:
>>>     * frame #0: 0x0000000100091830 qemu-img`parallels_check_leak
>>> [inlined] parallels_free_used_bitmap(bs=0x0000000101011600) at
>>> parallels.c:263:33 [opt]
>>>       frame #1: 0x0000000100091830
>>> qemu-img`parallels_check_leak(bs=0x0000000101011600,
>>> res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS, explicit=true) at
>>> parallels.c:811:9 [opt]
>>>       frame #2: 0x0000000100090d80
>>> qemu-img`parallels_co_check(bs=0x0000000101011600,
>>> res=0x000000016fdff5d8, fix=BDRV_FIX_LEAKS) at parallels.c:1014:15
>>> [opt]
>>>       frame #3: 0x0000000100014f6c
>>> qemu-img`bdrv_co_check_entry(opaque=0x000000016fdff560) at
>>> block-gen.c:556:14 [opt]
>>>
>>> And the second generates abort from there:
>>>     * frame #0: 0x000000010008fef8 qemu-img`parallels_close [inlined]
>>> parallels_free_used_bitmap(bs=<unavailable>) at parallels.c:263:33
>> In this line we have:
>>
>>      BDRVParallelsState *s = bs->opaque;
>>
>> So there is only one possibility to abort - incorrect bs. I don't know
>> how it
>> could be possible.
>>
>>> [opt]
>>>       frame #1: 0x000000010008fef8
>>> qemu-img`parallels_close(bs=0x0000000101011600) at parallels.c:1501:5
>>> [opt]
>>>       frame #2: 0x0000000100019d3c qemu-img`bdrv_unref [inlined]
>>> bdrv_close(bs=0x0000000101011600) at block.c:5164:13 [opt]
>>>
>>> After the first parallels_free_used_bitmap(), there is an actual image
>>> truncation happens, so there is no payload at the moment of the next
>>> parallels_fill_used_bitmap(),
>>>
>>> PS: there are a chances that some patches were not applied clearly,
>>> I'll recheck this later.
>> I just reset to the mainstream top and apply 1-9 patches:
>>
>>      $ git reset --hard 2f3913f4b2ad74baeb5a6f1d36efbd9ecdf1057d
>>      HEAD is now at 2f3913f4b2 Merge tag 'for_upstream' of
>>      https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
>>      $ git am *.eml
>>      Applying: parallels: Move inactivation code to a separate function
>>      Applying: parallels: Add mark_unused() helper
>>      Applying: parallels: Move host clusters allocation to a separate
>>      function
>>      Applying: parallels: Set data_end value in parallels_check_leak()
>>      Applying: parallels: Recreate used bitmap in parallels_check_leak()
>>      Applying: parallels: Add a note about used bitmap in
>>      parallels_check_duplicate()
>>      Applying: parallels: Create used bitmap even if checks needed
>>      Applying: parallels: Make mark_used() and mark_unused() global functions
>>      Applying: parallels: Add dirty bitmaps saving
>>
>>> It would be nice if it was possible to fetch changes from some repo,
>>> rather than extracting  it from gmail.
>> You can fetch it here (branch "parallels") -
>> https://github.com/AlexanderIvanov-Virtuozzo/qemu.git
>>> Regards,
>>> Mike.
>> --
>> Best regards,
>> Alexander Ivanov
>>
> Thanks for the link. I've fetched your repo and reverted "parallels:
> Remove unnecessary data_end field" as it hides reproduction,
> because it makes 'check' blind for the case we are discussing.
> So the situation is the same:
> 1. parallels_open calls parallels_fill_used_bitmap(). A this time file
> size is 145M (i.e leaked clusters are there) and s->used_bmap_size =
> 139.
> 2  Then parallels_co_check()->parallels_check_leak () is invoked.
>       At the first parallels_check_leak calls
> bdrv_co_truncate(offset=5242880), that is true as we have only empty
> BAT on the image.
>       At this step image truncated to 5M i.e. contains only empty BAT.
>       So, on line 809 s->data_end = 10240 i.e 5M (10240<<9)
>        809:         s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
>
>        811:        parallels_free_used_bitmap(bs);
>        812:        ret = parallels_fill_used_bitmap(bs);
>
> Line 811 invalidates used_bmap and sets used_bmap_size to 0.
> parallels_fill_used_bitmap Invoked on line 812  returns 0, because
> payload_bytes = 0 (current file size 5M - s->data_start *
> BDRV_SECTOR_SIZE ),
> and s->used_bmap_size is NOT initialized.
>
> 3. parallels_close() invoked to finish work and exit process.
>     parallels_close() calls  parallels_free_used_bitmap() unconditionally.
>
> static void parallels_free_used_bitmap(BlockDriverState *bs)
> {
>      BDRVParallelsState *s = bs->opaque;
>      s->used_bmap_size = 0;
> ASSERT IS HERE >>>>  g_free(s->used_bmap);
Oh, now I see, s->used_bmap is not reset to NULL in 
parallels_free_used_bitmap().
That's why I could not reproduce the bug - on my system by a chance 
s->used_bmap
contained NULL.

Maybe it would be better to put NULL to s->used_bmap in
parallels_free_used_bitmap() and let g_free() handle NULL argument 
properly?
> }
>
> The fix is trivial...
>
> if (s->used_bmap_size) {
>     g_free(s->used_bmap);
>     s->used_bmap_size = 0;
> }
>
> PS: I retuned to your HEAD. Killed gdb thus made image marked is
> incorrectly closed.
> But 'qemu-img check' only removed  incorrectly closed flags and didn't
> remove leaked clusters.
Will work on it.
>
> Regards,
> Mike.
diff mbox series

Patch

diff --git a/block/parallels.c b/block/parallels.c
index 48ea5b3f03..80a7171b84 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -265,6 +265,13 @@  static void parallels_free_used_bitmap(BlockDriverState *bs)
     g_free(s->used_bmap);
 }
 
+static int64_t parallels_data_end(BDRVParallelsState *s)
+{
+    int64_t data_end = s->data_start * BDRV_SECTOR_SIZE;
+    data_end += s->used_bmap_size * s->cluster_size;
+    return data_end;
+}
+
 int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
                                          int64_t *clusters)
 {
@@ -275,7 +282,7 @@  int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
 
     first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
     if (first_free == s->used_bmap_size) {
-        host_off = s->data_end * BDRV_SECTOR_SIZE;
+        host_off = parallels_data_end(s);
         prealloc_clusters = *clusters + s->prealloc_size / s->tracks;
         bytes = prealloc_clusters * s->cluster_size;
 
@@ -297,9 +304,6 @@  int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
         s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
                                           new_usedsize);
         s->used_bmap_size = new_usedsize;
-        if (host_off + bytes > s->data_end * BDRV_SECTOR_SIZE) {
-            s->data_end = (host_off + bytes) / BDRV_SECTOR_SIZE;
-        }
     } else {
         next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
 
@@ -315,8 +319,7 @@  int64_t parallels_allocate_host_clusters(BlockDriverState *bs,
          * branch. In the other case we are likely re-using hole. Preallocate
          * the space if required by the prealloc_mode.
          */
-        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
-                host_off < s->data_end * BDRV_SECTOR_SIZE) {
+        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
             ret = bdrv_pwrite_zeroes(bs->file, host_off, bytes, 0);
             if (ret < 0) {
                 return ret;
@@ -757,13 +760,7 @@  parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
         }
     }
 
-    if (high_off == 0) {
-        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
-    } else {
-        res->image_end_offset = high_off + s->cluster_size;
-        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
-    }
-
+    res->image_end_offset = parallels_data_end(s);
     return 0;
 }
 
@@ -806,7 +803,6 @@  parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
             res->check_errors++;
             return ret;
         }
-        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
 
         parallels_free_used_bitmap(bs);
         ret = parallels_fill_used_bitmap(bs);
@@ -1361,8 +1357,7 @@  static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->data_start = data_start;
-    s->data_end = s->data_start;
-    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+    if (s->data_start < (s->header_size >> BDRV_SECTOR_BITS)) {
         /*
          * There is not enough unused space to fit to block align between BAT
          * and actual data. We can't avoid read-modify-write...
@@ -1403,11 +1398,10 @@  static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
 
     for (i = 0; i < s->bat_size; i++) {
         sector = bat2sect(s, i);
-        if (sector + s->tracks > s->data_end) {
-            s->data_end = sector + s->tracks;
+        if (sector + s->tracks > file_nb_sectors) {
+            need_check = true;
         }
     }
-    need_check = need_check || s->data_end > file_nb_sectors;
 
     ret = parallels_fill_used_bitmap(bs);
     if (ret == -ENOMEM) {
@@ -1461,7 +1455,6 @@  static int parallels_truncate_unused_clusters(BlockDriverState *bs)
         end_off = (end_off + 1) * s->cluster_size;
     }
     end_off += s->data_start * BDRV_SECTOR_SIZE;
-    s->data_end = end_off / BDRV_SECTOR_SIZE;
     return bdrv_truncate(bs->file, end_off, true, PREALLOC_MODE_OFF, 0, NULL);
 }
 
diff --git a/block/parallels.h b/block/parallels.h
index 18b4f8068e..a6a048d890 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -79,7 +79,6 @@  typedef struct BDRVParallelsState {
     unsigned int bat_size;
 
     int64_t  data_start;
-    int64_t  data_end;
     uint64_t prealloc_size;
     ParallelsPreallocMode prealloc_mode;