diff mbox series

[v2,2/4] qcow2: add configurations for zoned format extension

Message ID 20230814085802.61459-3-faithilikerun@gmail.com
State New
Headers show
Series Add full zoned storage emulation to qcow2 driver | expand

Commit Message

Sam Li Aug. 14, 2023, 8:58 a.m. UTC
To configure the zoned format feature on the qcow2 driver, it
requires following arguments: the device size, zoned profile,
zoned model, zone size, zone capacity, number of conventional
zones, limits on zone resources (max append sectors, max open
zones, and max_active_zones). The zoned profile option is set
to zns when using the qcow2 file as a ZNS drive.

To create a qcow2 file with zoned format, use command like this:
$ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
zone_size=64M -o zone_capacity=64M -o zone_nr_conv=0 -o
max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
 -o zoned_profile=zbc/zns

Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
 block/qcow2.c                    | 125 +++++++++++++++++++++++++++++++
 block/qcow2.h                    |  21 ++++++
 docs/interop/qcow2.txt           |  24 ++++++
 include/block/block-common.h     |   5 ++
 include/block/block_int-common.h |  16 ++++
 qapi/block-core.json             |  46 ++++++++----
 6 files changed, 223 insertions(+), 14 deletions(-)

Comments

Stefan Hajnoczi Aug. 16, 2023, 7:31 p.m. UTC | #1
On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> To configure the zoned format feature on the qcow2 driver, it
> requires following arguments: the device size, zoned profile,
> zoned model, zone size, zone capacity, number of conventional
> zones, limits on zone resources (max append sectors, max open
> zones, and max_active_zones). The zoned profile option is set
> to zns when using the qcow2 file as a ZNS drive.
> 
> To create a qcow2 file with zoned format, use command like this:
> $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> zone_size=64M -o zone_capacity=64M -o zone_nr_conv=0 -o
> max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
>  -o zoned_profile=zbc/zns
> 
> Signed-off-by: Sam Li <faithilikerun@gmail.com>
> ---
>  block/qcow2.c                    | 125 +++++++++++++++++++++++++++++++
>  block/qcow2.h                    |  21 ++++++
>  docs/interop/qcow2.txt           |  24 ++++++
>  include/block/block-common.h     |   5 ++
>  include/block/block_int-common.h |  16 ++++
>  qapi/block-core.json             |  46 ++++++++----
>  6 files changed, 223 insertions(+), 14 deletions(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index c51388e99d..c1077c4a4a 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -73,6 +73,7 @@ typedef struct {
>  #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
>  #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
>  #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
> +#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x7a6264
>  
>  static int coroutine_fn
>  qcow2_co_preadv_compressed(BlockDriverState *bs,
> @@ -210,6 +211,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>      uint64_t offset;
>      int ret;
>      Qcow2BitmapHeaderExt bitmaps_ext;
> +    Qcow2ZonedHeaderExtension zoned_ext;
>  
>      if (need_update_header != NULL) {
>          *need_update_header = false;
> @@ -431,6 +433,38 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
>              break;
>          }
>  
> +        case QCOW2_EXT_MAGIC_ZONED_FORMAT:
> +        {
> +            if (ext.len != sizeof(zoned_ext)) {
> +                error_setg_errno(errp, -ret, "zoned_ext: "

ret does not contain a useful value. I suggest calling error_setg()
instead.

> +                                             "Invalid extension length");
> +                return -EINVAL;
> +            }
> +            ret = bdrv_pread(bs->file, offset, ext.len, &zoned_ext, 0);
> +            if (ret < 0) {
> +                error_setg_errno(errp, -ret, "zoned_ext: "
> +                                             "Could not read ext header");
> +                return ret;
> +            }
> +
> +            zoned_ext.zone_size = be32_to_cpu(zoned_ext.zone_size);
> +            zoned_ext.zone_capacity = be32_to_cpu(zoned_ext.zone_capacity);
> +            zoned_ext.nr_zones = be32_to_cpu(zoned_ext.nr_zones);
> +            zoned_ext.zone_nr_conv = be32_to_cpu(zoned_ext.zone_nr_conv);
> +            zoned_ext.max_open_zones = be32_to_cpu(zoned_ext.max_open_zones);
> +            zoned_ext.max_active_zones =
> +                be32_to_cpu(zoned_ext.max_active_zones);
> +            zoned_ext.max_append_sectors =
> +                be32_to_cpu(zoned_ext.max_append_sectors);
> +            s->zoned_header = zoned_ext;

I suggest adding checks here and refusing to open broken images:

  if (zone_size == 0) {
      error_setg(errp, "Zoned extension header zone_size field cannot be 0");
      return -EINVAL;
  }
  if (zone_capacity > zone_size) { ... }
  if (nr_zones != DIV_ROUND_UP(bs->total_size, zone_size)) { ... }

> +
> +#ifdef DEBUG_EXT
> +            printf("Qcow2: Got zoned format extension: "
> +                   "offset=%" PRIu32 "\n", offset);
> +#endif
> +            break;
> +        }
> +
>          default:
>              /* unknown magic - save it in case we need to rewrite the header */
>              /* If you add a new feature, make sure to also update the fast
> @@ -3089,6 +3123,31 @@ int qcow2_update_header(BlockDriverState *bs)
>          buflen -= ret;
>      }
>  
> +    /* Zoned devices header extension */
> +    if (s->zoned_header.zoned == BLK_Z_HM) {
> +        Qcow2ZonedHeaderExtension zoned_header = {
> +            .zoned_profile      = s->zoned_header.zoned_profile,
> +            .zoned              = s->zoned_header.zoned,
> +            .nr_zones           = cpu_to_be32(s->zoned_header.nr_zones),
> +            .zone_size          = cpu_to_be32(s->zoned_header.zone_size),
> +            .zone_capacity      = cpu_to_be32(s->zoned_header.zone_capacity),
> +            .zone_nr_conv       = cpu_to_be32(s->zoned_header.zone_nr_conv),
> +            .max_open_zones     = cpu_to_be32(s->zoned_header.max_open_zones),
> +            .max_active_zones   =
> +                cpu_to_be32(s->zoned_header.max_active_zones),
> +            .max_append_sectors =
> +                cpu_to_be32(s->zoned_header.max_append_sectors)
> +        };
> +        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT,
> +                             &zoned_header, sizeof(zoned_header),
> +                             buflen);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        buf += ret;
> +        buflen -= ret;
> +    }
> +
>      /* Keep unknown header extensions */
>      QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
>          ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
> @@ -3773,6 +3832,23 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
>          s->image_data_file = g_strdup(data_bs->filename);
>      }
>  
> +    if (qcow2_opts->zoned_profile) {
> +        BDRVQcow2State *s = blk_bs(blk)->opaque;
> +        if (!strcmp(qcow2_opts->zoned_profile, "zbc")) {
> +            s->zoned_header.zoned_profile = BLK_ZP_ZBC;
> +            s->zoned_header.zone_capacity = qcow2_opts->zone_size;
> +        } else if (!strcmp(qcow2_opts->zoned_profile, "zns")) {
> +            s->zoned_header.zoned_profile = BLK_ZP_ZNS;
> +            s->zoned_header.zone_capacity = qcow2_opts->zone_capacity;
> +        }
> +        s->zoned_header.zoned = BLK_Z_HM;
> +        s->zoned_header.zone_size = qcow2_opts->zone_size;
> +        s->zoned_header.zone_nr_conv = qcow2_opts->zone_nr_conv;
> +        s->zoned_header.max_open_zones = qcow2_opts->max_open_zones;
> +        s->zoned_header.max_active_zones = qcow2_opts->max_active_zones;
> +        s->zoned_header.max_append_sectors = qcow2_opts->max_append_sectors;

Please add input validation that rejects bad values. For example,
zone_size cannot be 0 and zone_capacity cannot be larger than zone_size.

> +    }
> +
>      /* Create a full header (including things like feature table) */
>      ret = qcow2_update_header(blk_bs(blk));
>      bdrv_graph_co_rdunlock();
> @@ -3891,6 +3967,13 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
>          qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
>      }
>  
> +    /* The available zoned-profile options are zbc, which stands for
> +     * ZBC/ZAC standards, and zns following NVMe ZNS spec. */
> +    val = qdict_get_try_str(qdict, BLOCK_OPT_Z_PROFILE);
> +    if (val) {
> +        qdict_put_str(qdict, BLOCK_OPT_Z_PROFILE, val);
> +    }
> +
>      /* Change legacy command line options into QMP ones */
>      static const QDictRenames opt_renames[] = {
>          { BLOCK_OPT_BACKING_FILE,       "backing-file" },
> @@ -3903,6 +3986,13 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
>          { BLOCK_OPT_COMPAT_LEVEL,       "version" },
>          { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
>          { BLOCK_OPT_COMPRESSION_TYPE,   "compression-type" },
> +        { BLOCK_OPT_Z_PROFILE,          "zoned-profile"},
> +        { BLOCK_OPT_Z_NR_COV,           "zone-nr-conv"},
> +        { BLOCK_OPT_Z_MOZ,              "max-open-zones"},
> +        { BLOCK_OPT_Z_MAZ,              "max-active-zones"},
> +        { BLOCK_OPT_Z_MAS,              "max-append-sectors"},
> +        { BLOCK_OPT_Z_SIZE,             "zone-size"},
> +        { BLOCK_OPT_Z_CAP,              "zone-capacity"},
>          { NULL, NULL },
>      };
>  
> @@ -6066,6 +6156,41 @@ static QemuOptsList qcow2_create_opts = {
>              .help = "Compression method used for image cluster "        \
>                      "compression",                                      \
>              .def_value_str = "zlib"                                     \
> +        },                                                              \
> +            {

Indentation is off and the forward slash is missing. I'm surprised this
works without the forward slash because the preprocessor should interpet
the macro as ending on this line, weird.

> +            .name = BLOCK_OPT_Z_PROFILE,                                \
> +            .type = QEMU_OPT_STRING,                                    \
> +            .help = "zoned format option for the disk img",             \
> +        },                                                              \
> +            {                                                           \
> +            .name = BLOCK_OPT_Z_SIZE,                                   \
> +            .type = QEMU_OPT_SIZE,                                      \
> +            .help = "zone size",                                        \
> +        },                                                              \
> +        {                                                           \
> +            .name = BLOCK_OPT_Z_CAP,                                    \
> +            .type = QEMU_OPT_SIZE,                                      \
> +            .help = "zone capacity",                                    \
> +        },                                                              \
> +        {                                                               \
> +                .name = BLOCK_OPT_Z_NR_COV,                             \

Indentation is off. QEMU uses 4-space indentation.

> +                .type = QEMU_OPT_NUMBER,                                \
> +                .help = "numbers of conventional zones",                \
> +        },                                                              \
> +        {                                                               \
> +                .name = BLOCK_OPT_Z_MAS,                                \
> +                .type = QEMU_OPT_NUMBER,                                \
> +                .help = "max append sectors",                           \
> +        },                                                              \
> +        {                                                               \
> +                .name = BLOCK_OPT_Z_MAZ,                                \
> +                .type = QEMU_OPT_NUMBER,                                \
> +                .help = "max active zones",                             \
> +        },                                                              \
> +        {                                                               \
> +                .name = BLOCK_OPT_Z_MOZ,                                \
> +                .type = QEMU_OPT_NUMBER,                                \
> +                .help = "max open zones",                               \
>          },
>          QCOW_COMMON_OPTIONS,
>          { /* end of list */ }
> diff --git a/block/qcow2.h b/block/qcow2.h
> index f789ce3ae0..3694c8d217 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
>      uint64_t length;
>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
>  
> +typedef struct Qcow2ZonedHeaderExtension {
> +    /* Zoned device attributes */
> +    uint8_t zoned_profile;
> +    uint8_t zoned;
> +    uint16_t reserved16;
> +    uint32_t zone_size;
> +    uint32_t zone_capacity;
> +    uint32_t nr_zones;
> +    uint32_t zone_nr_conv;
> +    uint32_t max_active_zones;
> +    uint32_t max_open_zones;
> +    uint32_t max_append_sectors;
> +} QEMU_PACKED Qcow2ZonedHeaderExtension;
> +
>  typedef struct Qcow2UnknownHeaderExtension {
>      uint32_t magic;
>      uint32_t len;
> @@ -422,6 +436,13 @@ typedef struct BDRVQcow2State {
>       * is to convert the image with the desired compression type set.
>       */
>      Qcow2CompressionType compression_type;
> +
> +    /* States of zoned device */
> +    Qcow2ZonedHeaderExtension zoned_header;
> +    uint32_t nr_zones_exp_open;
> +    uint32_t nr_zones_imp_open;
> +    uint32_t nr_zones_closed;
> +    BlockZoneWps *wps;

Please add wps in the patch that uses this field. I thought wps was a
generic BlockDriverState field and didn't expect BDRVQcow2State to have
it.

>  } BDRVQcow2State;
>  
>  typedef struct Qcow2COWRegion {
> diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
> index 2c4618375a..ef2ba6f670 100644
> --- a/docs/interop/qcow2.txt
> +++ b/docs/interop/qcow2.txt
> @@ -331,6 +331,30 @@ The fields of the bitmaps extension are:
>                     Offset into the image file at which the bitmap directory
>                     starts. Must be aligned to a cluster boundary.
>  
> +== Zoned extension ==
> +
> +The zoned extension is an optional header extension. It is required when
> +using the qcow2 file as the backing image for zoned device.

It's not clear here that this is about emulating a zoned storage device
rather than using qcow2 on a zoned storage device. Also, the term
"backing image" will probably be confused with qcow2's backing files
feature.

I suggest: It contains fields for emulating the zoned storage model
(https://zonedstorage.io/).

> +
> +The fields of the zoned extension are:
> +    Byte  0:  zoned_profile
> +              Type of zoned format. Must be `zbc` or `zns`.
> +                  1: `zbc`
> +                  2: `zns`
> +
> +          1:  zoned
> +              Type of zone.
> +
> +          2 - 3:  Reserved, must be zero.
> +
> +          4 - 7:  zone_size
> +          8 - 11:  zone_capacity
> +          12 - 15:  nr_zones
> +          16 - 19:  zone_nr_conv
> +          20 - 23:  max_active_zones
> +          24 - 27:  max_open_zones
> +          28 - 31:  max_append_sectors

Please document these fields, their units, etc.

> +
>  == Full disk encryption header pointer ==
>  
>  The full disk encryption header must be present if, and only if, the
> diff --git a/include/block/block-common.h b/include/block/block-common.h
> index e15395f2cb..9f04a772f6 100644
> --- a/include/block/block-common.h
> +++ b/include/block/block-common.h
> @@ -108,6 +108,11 @@ typedef enum BlockZoneType {
>      BLK_ZT_SWP = 0x3, /* Sequential writes preferred */
>  } BlockZoneType;
>  
> +typedef enum BlockZonedProfile {
> +    BLK_ZP_ZBC = 0x1,
> +    BLK_ZP_ZNS = 0x2,
> +} BlockZonedProfile;
> +
>  /*
>   * Zone descriptor data structure.
>   * Provides information on a zone with all position and size values in bytes.
> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> index 74195c3004..1dbe820a9b 100644
> --- a/include/block/block_int-common.h
> +++ b/include/block/block_int-common.h
> @@ -57,6 +57,14 @@
>  #define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
>  #define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
>  #define BLOCK_OPT_EXTL2             "extended_l2"
> +#define BLOCK_OPT_Z_PROFILE         "zoned_profile"
> +#define BLOCK_OPT_Z_MODEL           "zoned"
> +#define BLOCK_OPT_Z_SIZE            "zone_size"
> +#define BLOCK_OPT_Z_CAP             "zone_capacity"
> +#define BLOCK_OPT_Z_NR_COV          "zone_nr_conv"
> +#define BLOCK_OPT_Z_MAS             "max_append_sectors"
> +#define BLOCK_OPT_Z_MAZ             "max_active_zones"
> +#define BLOCK_OPT_Z_MOZ             "max_open_zones"
>  
>  #define BLOCK_PROBE_BUF_SIZE        512
>  
> @@ -872,12 +880,20 @@ typedef struct BlockLimits {
>       */
>      bool has_variable_length;
>  
> +    BlockZonedProfile zoned_profile;
> +
>      /* device zone model */
>      BlockZoneModel zoned;
>  
>      /* zone size expressed in bytes */
>      uint32_t zone_size;
>  
> +    /*
> +     * the number of usable logical blocks within the zone, expressed
> +     * in bytes. A zone capacity is smaller or equal to the zone size.
> +     */
> +    uint32_t zone_capacity;
> +
>      /* total number of zones */
>      uint32_t nr_zones;
>  
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 2b1d493d6e..0c97ae678b 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -5020,24 +5020,42 @@
>  #
>  # @compression-type: The image cluster compression method
>  #     (default: zlib, since 5.1)
> +# @zoned-profile: Two zoned device protocol options, zbc or zns
> +#                 (default: off, since 8.0)
> +# @zone-size: The size of a zone of the zoned device (since 8.0)
> +# @zone-capacity: The capacity of a zone of the zoned device (since 8.0)
> +# @zone-nr-conv: The number of conventional zones of the zoned device
> +#                (since 8.0)
> +# @max-open-zones: The maximal allowed open zones (since 8.0)
> +# @max-active-zones: The limit of the zones that have the implicit open,
> +#                    explicit open or closed state (since 8.0)
> +# @max-append-sectors: The maximal sectors that is allowed to append write
> +#                      (since 8.0)
>  #
>  # Since: 2.12
>  ##
>  { 'struct': 'BlockdevCreateOptionsQcow2',
> -  'data': { 'file':             'BlockdevRef',
> -            '*data-file':       'BlockdevRef',
> -            '*data-file-raw':   'bool',
> -            '*extended-l2':     'bool',
> -            'size':             'size',
> -            '*version':         'BlockdevQcow2Version',
> -            '*backing-file':    'str',
> -            '*backing-fmt':     'BlockdevDriver',
> -            '*encrypt':         'QCryptoBlockCreateOptions',
> -            '*cluster-size':    'size',
> -            '*preallocation':   'PreallocMode',
> -            '*lazy-refcounts':  'bool',
> -            '*refcount-bits':   'int',
> -            '*compression-type':'Qcow2CompressionType' } }
> +  'data': { 'file':                'BlockdevRef',
> +            '*data-file':          'BlockdevRef',
> +            '*data-file-raw':      'bool',
> +            '*extended-l2':        'bool',
> +            'size':                'size',
> +            '*version':            'BlockdevQcow2Version',
> +            '*backing-file':       'str',
> +            '*backing-fmt':        'BlockdevDriver',
> +            '*encrypt':            'QCryptoBlockCreateOptions',
> +            '*cluster-size':       'size',
> +            '*preallocation':      'PreallocMode',
> +            '*lazy-refcounts':     'bool',
> +            '*refcount-bits':      'int',
> +            '*compression-type':   'Qcow2CompressionType',
> +            '*zoned-profile':      'str',
> +            '*zone-size':          'size',
> +            '*zone-capacity':      'size',
> +            '*zone-nr-conv':       'uint32',
> +            '*max-open-zones':     'uint32',
> +            '*max-active-zones':   'uint32',
> +            '*max-append-sectors': 'uint32'}}
>  
>  ##
>  # @BlockdevCreateOptionsQed:
> -- 
> 2.40.1
>
Markus Armbruster Aug. 21, 2023, 1:13 p.m. UTC | #2
Sam Li <faithilikerun@gmail.com> writes:

> To configure the zoned format feature on the qcow2 driver, it
> requires following arguments: the device size, zoned profile,
> zoned model, zone size, zone capacity, number of conventional
> zones, limits on zone resources (max append sectors, max open
> zones, and max_active_zones). The zoned profile option is set
> to zns when using the qcow2 file as a ZNS drive.
>
> To create a qcow2 file with zoned format, use command like this:
> $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> zone_size=64M -o zone_capacity=64M -o zone_nr_conv=0 -o
> max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
>  -o zoned_profile=zbc/zns
>
> Signed-off-by: Sam Li <faithilikerun@gmail.com>

[...]

> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 2b1d493d6e..0c97ae678b 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -5020,24 +5020,42 @@
>  #
>  # @compression-type: The image cluster compression method
>  #     (default: zlib, since 5.1)
> +# @zoned-profile: Two zoned device protocol options, zbc or zns
> +#                 (default: off, since 8.0)

When a 'str' thing accepts a fixed set of (string) values, it most
likely should be an enum instead.  Have you considered making
@zoned-profile one?

> +# @zone-size: The size of a zone of the zoned device (since 8.0)
> +# @zone-capacity: The capacity of a zone of the zoned device (since 8.0)

In bytes, I presume?

What's the difference between size and capacity?

> +# @zone-nr-conv: The number of conventional zones of the zoned device
> +#                (since 8.0)
> +# @max-open-zones: The maximal allowed open zones (since 8.0)
> +# @max-active-zones: The limit of the zones that have the implicit open,
> +#                    explicit open or closed state (since 8.0)

Naming...  if I understand the comment correctly, then @zone-nr-conv,
@max-open-zones, and @max-active-zones are all counting zones.  Rename
@zone-nr-conv to @conventional-zones?

> +# @max-append-sectors: The maximal sectors that is allowed to append write

I'm not sure I understand the explanation.  Elaborate for me?

> +#                      (since 8.0)

Please format like

   #
   # @zoned-profile: Two zoned device protocol options, zbc or zns
   #     (default: off, since 8.0)
   #
   # @zone-size: The size of a zone of the zoned device (since 8.0)
   #
   # @zone-capacity: The capacity of a zone of the zoned device
   #     (since 8.0)
   #
   # @zone-nr-conv: The number of conventional zones of the zoned device
   #     (since 8.0)
   #
   # @max-open-zones: The maximal allowed open zones (since 8.0)
   #
   # @max-active-zones: The limit of the zones that have the implicit
   #     open, explicit open or closed state (since 8.0)
   #
   # @max-append-sectors: The maximal sectors that is allowed to append
   #     write (since 8.0)

to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
to conform to current conventions).

>  #
>  # Since: 2.12
>  ##
>  { 'struct': 'BlockdevCreateOptionsQcow2',
> -  'data': { 'file':             'BlockdevRef',
> -            '*data-file':       'BlockdevRef',
> -            '*data-file-raw':   'bool',
> -            '*extended-l2':     'bool',
> -            'size':             'size',
> -            '*version':         'BlockdevQcow2Version',
> -            '*backing-file':    'str',
> -            '*backing-fmt':     'BlockdevDriver',
> -            '*encrypt':         'QCryptoBlockCreateOptions',
> -            '*cluster-size':    'size',
> -            '*preallocation':   'PreallocMode',
> -            '*lazy-refcounts':  'bool',
> -            '*refcount-bits':   'int',
> -            '*compression-type':'Qcow2CompressionType' } }
> +  'data': { 'file':                'BlockdevRef',
> +            '*data-file':          'BlockdevRef',
> +            '*data-file-raw':      'bool',
> +            '*extended-l2':        'bool',
> +            'size':                'size',
> +            '*version':            'BlockdevQcow2Version',
> +            '*backing-file':       'str',
> +            '*backing-fmt':        'BlockdevDriver',
> +            '*encrypt':            'QCryptoBlockCreateOptions',
> +            '*cluster-size':       'size',
> +            '*preallocation':      'PreallocMode',
> +            '*lazy-refcounts':     'bool',
> +            '*refcount-bits':      'int',
> +            '*compression-type':   'Qcow2CompressionType',

I'd keep the existing lines unchanged.  Not a demand.

> +            '*zoned-profile':      'str',
> +            '*zone-size':          'size',
> +            '*zone-capacity':      'size',
> +            '*zone-nr-conv':       'uint32',
> +            '*max-open-zones':     'uint32',
> +            '*max-active-zones':   'uint32',
> +            '*max-append-sectors': 'uint32'}}

Keep the spaces around the } for consistency.  Yes, they're kind of
ugly.

>  
>  ##
>  # @BlockdevCreateOptionsQed:
Stefan Hajnoczi Aug. 21, 2023, 1:31 p.m. UTC | #3
On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> diff --git a/block/qcow2.h b/block/qcow2.h
> index f789ce3ae0..3694c8d217 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
>      uint64_t length;
>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
>  
> +typedef struct Qcow2ZonedHeaderExtension {
> +    /* Zoned device attributes */
> +    uint8_t zoned_profile;
> +    uint8_t zoned;
> +    uint16_t reserved16;
> +    uint32_t zone_size;
> +    uint32_t zone_capacity;

Should zone capacity be stored individually for each zone (alongside the
write pointer and other per zone metadata) instead of as a global value
for all zones? My understanding is that NVMe ZNS does not have a global
value and each zone could have a different zone capacity value.

> +    uint32_t nr_zones;

Is this field necessary since it can be derived from other image
options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?
Sam Li Aug. 28, 2023, 9:05 a.m. UTC | #4
Markus Armbruster <armbru@redhat.com> 于2023年8月21日周一 21:13写道:
>
> Sam Li <faithilikerun@gmail.com> writes:
>
> > To configure the zoned format feature on the qcow2 driver, it
> > requires following arguments: the device size, zoned profile,
> > zoned model, zone size, zone capacity, number of conventional
> > zones, limits on zone resources (max append sectors, max open
> > zones, and max_active_zones). The zoned profile option is set
> > to zns when using the qcow2 file as a ZNS drive.
> >
> > To create a qcow2 file with zoned format, use command like this:
> > $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> > zone_size=64M -o zone_capacity=64M -o zone_nr_conv=0 -o
> > max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> >  -o zoned_profile=zbc/zns
> >
> > Signed-off-by: Sam Li <faithilikerun@gmail.com>
>
> [...]
>
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index 2b1d493d6e..0c97ae678b 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -5020,24 +5020,42 @@
> >  #
> >  # @compression-type: The image cluster compression method
> >  #     (default: zlib, since 5.1)
> > +# @zoned-profile: Two zoned device protocol options, zbc or zns
> > +#                 (default: off, since 8.0)
>
> When a 'str' thing accepts a fixed set of (string) values, it most
> likely should be an enum instead.  Have you considered making
> @zoned-profile one?
>
> > +# @zone-size: The size of a zone of the zoned device (since 8.0)
> > +# @zone-capacity: The capacity of a zone of the zoned device (since 8.0)
>
> In bytes, I presume?

Yes.

>
> What's the difference between size and capacity?
>

Zone size is the total number of logical blocks within zones in bytes.
Zone capacity is the number of usable logical blocks within zones in
bytes. A zone capacity is always smaller or equal to than zone size.
According to ZBC/ZAC standards, a zone capacity is equal to the zone
size. While in ZNS spec, it can be smaller. I will add the
documentation and below in the next patches.

> > +# @zone-nr-conv: The number of conventional zones of the zoned device
> > +#                (since 8.0)
> > +# @max-open-zones: The maximal allowed open zones (since 8.0)
> > +# @max-active-zones: The limit of the zones that have the implicit open,
> > +#                    explicit open or closed state (since 8.0)
>
> Naming...  if I understand the comment correctly, then @zone-nr-conv,
> @max-open-zones, and @max-active-zones are all counting zones.  Rename
> @zone-nr-conv to @conventional-zones?
>
> > +# @max-append-sectors: The maximal sectors that is allowed to append write
>
> I'm not sure I understand the explanation.  Elaborate for me?

The max_append_sector is the maximum data size (in sectors) of a zone
append request that can be successfully issued to the device.  It is a
constraint on the maximum amount of data that can be appended to a
zone in a single request.

>
> > +#                      (since 8.0)
>
> Please format like
>
>    #
>    # @zoned-profile: Two zoned device protocol options, zbc or zns
>    #     (default: off, since 8.0)
>    #
>    # @zone-size: The size of a zone of the zoned device (since 8.0)
>    #
>    # @zone-capacity: The capacity of a zone of the zoned device
>    #     (since 8.0)
>    #
>    # @zone-nr-conv: The number of conventional zones of the zoned device
>    #     (since 8.0)
>    #
>    # @max-open-zones: The maximal allowed open zones (since 8.0)
>    #
>    # @max-active-zones: The limit of the zones that have the implicit
>    #     open, explicit open or closed state (since 8.0)
>    #
>    # @max-append-sectors: The maximal sectors that is allowed to append
>    #     write (since 8.0)
>
> to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
> to conform to current conventions).
>
> >  #
> >  # Since: 2.12
> >  ##
> >  { 'struct': 'BlockdevCreateOptionsQcow2',
> > -  'data': { 'file':             'BlockdevRef',
> > -            '*data-file':       'BlockdevRef',
> > -            '*data-file-raw':   'bool',
> > -            '*extended-l2':     'bool',
> > -            'size':             'size',
> > -            '*version':         'BlockdevQcow2Version',
> > -            '*backing-file':    'str',
> > -            '*backing-fmt':     'BlockdevDriver',
> > -            '*encrypt':         'QCryptoBlockCreateOptions',
> > -            '*cluster-size':    'size',
> > -            '*preallocation':   'PreallocMode',
> > -            '*lazy-refcounts':  'bool',
> > -            '*refcount-bits':   'int',
> > -            '*compression-type':'Qcow2CompressionType' } }
> > +  'data': { 'file':                'BlockdevRef',
> > +            '*data-file':          'BlockdevRef',
> > +            '*data-file-raw':      'bool',
> > +            '*extended-l2':        'bool',
> > +            'size':                'size',
> > +            '*version':            'BlockdevQcow2Version',
> > +            '*backing-file':       'str',
> > +            '*backing-fmt':        'BlockdevDriver',
> > +            '*encrypt':            'QCryptoBlockCreateOptions',
> > +            '*cluster-size':       'size',
> > +            '*preallocation':      'PreallocMode',
> > +            '*lazy-refcounts':     'bool',
> > +            '*refcount-bits':      'int',
> > +            '*compression-type':   'Qcow2CompressionType',
>
> I'd keep the existing lines unchanged.  Not a demand.
>
> > +            '*zoned-profile':      'str',
> > +            '*zone-size':          'size',
> > +            '*zone-capacity':      'size',
> > +            '*zone-nr-conv':       'uint32',
> > +            '*max-open-zones':     'uint32',
> > +            '*max-active-zones':   'uint32',
> > +            '*max-append-sectors': 'uint32'}}
>
> Keep the spaces around the } for consistency.  Yes, they're kind of
> ugly.

Thanks!

>
> >
> >  ##
> >  # @BlockdevCreateOptionsQed:
>
Sam Li Aug. 28, 2023, 9:22 a.m. UTC | #5
Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
>
> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> > diff --git a/block/qcow2.h b/block/qcow2.h
> > index f789ce3ae0..3694c8d217 100644
> > --- a/block/qcow2.h
> > +++ b/block/qcow2.h
> > @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
> >      uint64_t length;
> >  } QEMU_PACKED Qcow2CryptoHeaderExtension;
> >
> > +typedef struct Qcow2ZonedHeaderExtension {
> > +    /* Zoned device attributes */
> > +    uint8_t zoned_profile;
> > +    uint8_t zoned;
> > +    uint16_t reserved16;
> > +    uint32_t zone_size;
> > +    uint32_t zone_capacity;
>
> Should zone capacity be stored individually for each zone (alongside the
> write pointer and other per zone metadata) instead of as a global value
> for all zones? My understanding is that NVMe ZNS does not have a global
> value and each zone could have a different zone capacity value.

Though zone capacity is per-zone attribute, it remains same for all
zones in most cases. Referring to the NVMe ZNS spec, zone capacity
changes associate to RESET_ZONE op when the variable zone capacity bit
is '1'. It hasn't specifically tell what it is changed to. Current ZNS
emulation doesn't change zone capacity as well.

If the Variable Zone Capacity bit is cleared to ‘0’ in the Zone
Operation Characteristics field in the Zoned
Namespace Command Set specific Identify Namespace data structure, then
this field does not change without a change to the format of the zoned
namespace.

If the Variable Zone Capacity bit is set to ‘1’ in the Zone Operation
Characteristics field in the Zoned
Namespace Command Set specific Identify Namespace data structure, then
the zone capacity may
change upon successful completion of a Zone Management Send command
specifying the Zone Send
Action of Reset Zone.

>
> > +    uint32_t nr_zones;
>
> Is this field necessary since it can be derived from other image
> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?

It can be dropped. I added this for reducing duplication. Thanks!
Damien Le Moal Aug. 28, 2023, 10:12 a.m. UTC | #6
On 8/28/23 18:22, Sam Li wrote:
> Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
>>
>> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
>>> diff --git a/block/qcow2.h b/block/qcow2.h
>>> index f789ce3ae0..3694c8d217 100644
>>> --- a/block/qcow2.h
>>> +++ b/block/qcow2.h
>>> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
>>>      uint64_t length;
>>>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
>>>
>>> +typedef struct Qcow2ZonedHeaderExtension {
>>> +    /* Zoned device attributes */
>>> +    uint8_t zoned_profile;
>>> +    uint8_t zoned;
>>> +    uint16_t reserved16;
>>> +    uint32_t zone_size;
>>> +    uint32_t zone_capacity;
>>
>> Should zone capacity be stored individually for each zone (alongside the
>> write pointer and other per zone metadata) instead of as a global value
>> for all zones? My understanding is that NVMe ZNS does not have a global
>> value and each zone could have a different zone capacity value.
> 
> Though zone capacity is per-zone attribute, it remains same for all
> zones in most cases. Referring to the NVMe ZNS spec, zone capacity
> changes associate to RESET_ZONE op when the variable zone capacity bit
> is '1'. It hasn't specifically tell what it is changed to. Current ZNS
> emulation doesn't change zone capacity as well.
> 
> If the Variable Zone Capacity bit is cleared to ‘0’ in the Zone
> Operation Characteristics field in the Zoned
> Namespace Command Set specific Identify Namespace data structure, then
> this field does not change without a change to the format of the zoned
> namespace.
> 
> If the Variable Zone Capacity bit is set to ‘1’ in the Zone Operation
> Characteristics field in the Zoned
> Namespace Command Set specific Identify Namespace data structure, then
> the zone capacity may
> change upon successful completion of a Zone Management Send command
> specifying the Zone Send
> Action of Reset Zone.

Regardless of the variable zone capacity feature, zone capacity is per zone and
may be different between zones. That is why it is reported per zone in zone
report. The IO path code should not assume that the zone capacity is the same
for all zones.

For this particular case though, given that this is QCow2 emulation, limiting
ourselves to the same zone capacity for all zones is I think fine. But that
should be clearly stated somewhere may be...

> 
>>
>>> +    uint32_t nr_zones;
>>
>> Is this field necessary since it can be derived from other image
>> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?
> 
> It can be dropped. I added this for reducing duplication. Thanks!
Sam Li Aug. 28, 2023, 10:18 a.m. UTC | #7
Damien Le Moal <dlemoal@kernel.org> 于2023年8月28日周一 18:13写道:
>
> On 8/28/23 18:22, Sam Li wrote:
> > Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
> >>
> >> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> >>> diff --git a/block/qcow2.h b/block/qcow2.h
> >>> index f789ce3ae0..3694c8d217 100644
> >>> --- a/block/qcow2.h
> >>> +++ b/block/qcow2.h
> >>> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
> >>>      uint64_t length;
> >>>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
> >>>
> >>> +typedef struct Qcow2ZonedHeaderExtension {
> >>> +    /* Zoned device attributes */
> >>> +    uint8_t zoned_profile;
> >>> +    uint8_t zoned;
> >>> +    uint16_t reserved16;
> >>> +    uint32_t zone_size;
> >>> +    uint32_t zone_capacity;
> >>
> >> Should zone capacity be stored individually for each zone (alongside the
> >> write pointer and other per zone metadata) instead of as a global value
> >> for all zones? My understanding is that NVMe ZNS does not have a global
> >> value and each zone could have a different zone capacity value.
> >
> > Though zone capacity is per-zone attribute, it remains same for all
> > zones in most cases. Referring to the NVMe ZNS spec, zone capacity
> > changes associate to RESET_ZONE op when the variable zone capacity bit
> > is '1'. It hasn't specifically tell what it is changed to. Current ZNS
> > emulation doesn't change zone capacity as well.
> >
> > If the Variable Zone Capacity bit is cleared to ‘0’ in the Zone
> > Operation Characteristics field in the Zoned
> > Namespace Command Set specific Identify Namespace data structure, then
> > this field does not change without a change to the format of the zoned
> > namespace.
> >
> > If the Variable Zone Capacity bit is set to ‘1’ in the Zone Operation
> > Characteristics field in the Zoned
> > Namespace Command Set specific Identify Namespace data structure, then
> > the zone capacity may
> > change upon successful completion of a Zone Management Send command
> > specifying the Zone Send
> > Action of Reset Zone.
>
> Regardless of the variable zone capacity feature, zone capacity is per zone and
> may be different between zones. That is why it is reported per zone in zone
> report. The IO path code should not assume that the zone capacity is the same
> for all zones.

How is zone capacity changed, by devices or commands? Can you give
some example please?

>
> For this particular case though, given that this is QCow2 emulation, limiting
> ourselves to the same zone capacity for all zones is I think fine. But that
> should be clearly stated somewhere may be...

I see. The qcow2 documentaion can add that.

>
> >
> >>
> >>> +    uint32_t nr_zones;
> >>
> >> Is this field necessary since it can be derived from other image
> >> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?
> >
> > It can be dropped. I added this for reducing duplication. Thanks!
>
> --
> Damien Le Moal
> Western Digital Research
>
Damien Le Moal Aug. 28, 2023, 10:22 a.m. UTC | #8
On 8/28/23 19:18, Sam Li wrote:
> Damien Le Moal <dlemoal@kernel.org> 于2023年8月28日周一 18:13写道:
>>
>> On 8/28/23 18:22, Sam Li wrote:
>>> Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
>>>>
>>>> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
>>>>> diff --git a/block/qcow2.h b/block/qcow2.h
>>>>> index f789ce3ae0..3694c8d217 100644
>>>>> --- a/block/qcow2.h
>>>>> +++ b/block/qcow2.h
>>>>> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
>>>>>      uint64_t length;
>>>>>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
>>>>>
>>>>> +typedef struct Qcow2ZonedHeaderExtension {
>>>>> +    /* Zoned device attributes */
>>>>> +    uint8_t zoned_profile;
>>>>> +    uint8_t zoned;
>>>>> +    uint16_t reserved16;
>>>>> +    uint32_t zone_size;
>>>>> +    uint32_t zone_capacity;
>>>>
>>>> Should zone capacity be stored individually for each zone (alongside the
>>>> write pointer and other per zone metadata) instead of as a global value
>>>> for all zones? My understanding is that NVMe ZNS does not have a global
>>>> value and each zone could have a different zone capacity value.
>>>
>>> Though zone capacity is per-zone attribute, it remains same for all
>>> zones in most cases. Referring to the NVMe ZNS spec, zone capacity
>>> changes associate to RESET_ZONE op when the variable zone capacity bit
>>> is '1'. It hasn't specifically tell what it is changed to. Current ZNS
>>> emulation doesn't change zone capacity as well.
>>>
>>> If the Variable Zone Capacity bit is cleared to ‘0’ in the Zone
>>> Operation Characteristics field in the Zoned
>>> Namespace Command Set specific Identify Namespace data structure, then
>>> this field does not change without a change to the format of the zoned
>>> namespace.
>>>
>>> If the Variable Zone Capacity bit is set to ‘1’ in the Zone Operation
>>> Characteristics field in the Zoned
>>> Namespace Command Set specific Identify Namespace data structure, then
>>> the zone capacity may
>>> change upon successful completion of a Zone Management Send command
>>> specifying the Zone Send
>>> Action of Reset Zone.
>>
>> Regardless of the variable zone capacity feature, zone capacity is per zone and
>> may be different between zones. That is why it is reported per zone in zone
>> report. The IO path code should not assume that the zone capacity is the same
>> for all zones.
> 
> How is zone capacity changed, by devices or commands? Can you give
> some example please?

If the device does not support variable zone capacity, the zone capacity is
fixed at device manufacturing time and never changes. It is reported per zone
and you have to make things work with whatever value you see. The user cannot
change device zone capacity.

For you qcow2 zoned image, the equivalent is to fix the zone capacity when the
image is created and not allowing to change it. And for simplicity, the same
zone capacity value can be used for all zones, so having the zone capacity
value in the header is OK.

> 
>>
>> For this particular case though, given that this is QCow2 emulation, limiting
>> ourselves to the same zone capacity for all zones is I think fine. But that
>> should be clearly stated somewhere may be...
> 
> I see. The qcow2 documentaion can add that.
> 
>>
>>>
>>>>
>>>>> +    uint32_t nr_zones;
>>>>
>>>> Is this field necessary since it can be derived from other image
>>>> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?
>>>
>>> It can be dropped. I added this for reducing duplication. Thanks!
>>
>> --
>> Damien Le Moal
>> Western Digital Research
>>
Sam Li Aug. 28, 2023, 10:40 a.m. UTC | #9
Damien Le Moal <dlemoal@kernel.org> 于2023年8月28日周一 18:22写道:
>
> On 8/28/23 19:18, Sam Li wrote:
> > Damien Le Moal <dlemoal@kernel.org> 于2023年8月28日周一 18:13写道:
> >>
> >> On 8/28/23 18:22, Sam Li wrote:
> >>> Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
> >>>>
> >>>> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> >>>>> diff --git a/block/qcow2.h b/block/qcow2.h
> >>>>> index f789ce3ae0..3694c8d217 100644
> >>>>> --- a/block/qcow2.h
> >>>>> +++ b/block/qcow2.h
> >>>>> @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
> >>>>>      uint64_t length;
> >>>>>  } QEMU_PACKED Qcow2CryptoHeaderExtension;
> >>>>>
> >>>>> +typedef struct Qcow2ZonedHeaderExtension {
> >>>>> +    /* Zoned device attributes */
> >>>>> +    uint8_t zoned_profile;
> >>>>> +    uint8_t zoned;
> >>>>> +    uint16_t reserved16;
> >>>>> +    uint32_t zone_size;
> >>>>> +    uint32_t zone_capacity;
> >>>>
> >>>> Should zone capacity be stored individually for each zone (alongside the
> >>>> write pointer and other per zone metadata) instead of as a global value
> >>>> for all zones? My understanding is that NVMe ZNS does not have a global
> >>>> value and each zone could have a different zone capacity value.
> >>>
> >>> Though zone capacity is per-zone attribute, it remains same for all
> >>> zones in most cases. Referring to the NVMe ZNS spec, zone capacity
> >>> changes associate to RESET_ZONE op when the variable zone capacity bit
> >>> is '1'. It hasn't specifically tell what it is changed to. Current ZNS
> >>> emulation doesn't change zone capacity as well.
> >>>
> >>> If the Variable Zone Capacity bit is cleared to ‘0’ in the Zone
> >>> Operation Characteristics field in the Zoned
> >>> Namespace Command Set specific Identify Namespace data structure, then
> >>> this field does not change without a change to the format of the zoned
> >>> namespace.
> >>>
> >>> If the Variable Zone Capacity bit is set to ‘1’ in the Zone Operation
> >>> Characteristics field in the Zoned
> >>> Namespace Command Set specific Identify Namespace data structure, then
> >>> the zone capacity may
> >>> change upon successful completion of a Zone Management Send command
> >>> specifying the Zone Send
> >>> Action of Reset Zone.
> >>
> >> Regardless of the variable zone capacity feature, zone capacity is per zone and
> >> may be different between zones. That is why it is reported per zone in zone
> >> report. The IO path code should not assume that the zone capacity is the same
> >> for all zones.
> >
> > How is zone capacity changed, by devices or commands? Can you give
> > some example please?
>
> If the device does not support variable zone capacity, the zone capacity is
> fixed at device manufacturing time and never changes. It is reported per zone
> and you have to make things work with whatever value you see. The user cannot
> change device zone capacity.
>
> For you qcow2 zoned image, the equivalent is to fix the zone capacity when the
> image is created and not allowing to change it. And for simplicity, the same
> zone capacity value can be used for all zones, so having the zone capacity
> value in the header is OK.

Thanks!

>
> >
> >>
> >> For this particular case though, given that this is QCow2 emulation, limiting
> >> ourselves to the same zone capacity for all zones is I think fine. But that
> >> should be clearly stated somewhere may be...
> >
> > I see. The qcow2 documentaion can add that.
> >
> >>
> >>>
> >>>>
> >>>>> +    uint32_t nr_zones;
> >>>>
> >>>> Is this field necessary since it can be derived from other image
> >>>> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?
> >>>
> >>> It can be dropped. I added this for reducing duplication. Thanks!
> >>
> >> --
> >> Damien Le Moal
> >> Western Digital Research
> >>
>
> --
> Damien Le Moal
> Western Digital Research
>
Sam Li Aug. 28, 2023, 2:42 p.m. UTC | #10
Stefan Hajnoczi <stefanha@redhat.com> 于2023年8月21日周一 21:31写道:
>
> On Mon, Aug 14, 2023 at 04:58:00PM +0800, Sam Li wrote:
> > diff --git a/block/qcow2.h b/block/qcow2.h
> > index f789ce3ae0..3694c8d217 100644
> > --- a/block/qcow2.h
> > +++ b/block/qcow2.h
> > @@ -236,6 +236,20 @@ typedef struct Qcow2CryptoHeaderExtension {
> >      uint64_t length;
> >  } QEMU_PACKED Qcow2CryptoHeaderExtension;
> >
> > +typedef struct Qcow2ZonedHeaderExtension {
> > +    /* Zoned device attributes */
> > +    uint8_t zoned_profile;
> > +    uint8_t zoned;
> > +    uint16_t reserved16;
> > +    uint32_t zone_size;
> > +    uint32_t zone_capacity;
>
> Should zone capacity be stored individually for each zone (alongside the
> write pointer and other per zone metadata) instead of as a global value
> for all zones? My understanding is that NVMe ZNS does not have a global
> value and each zone could have a different zone capacity value.
>
> > +    uint32_t nr_zones;
>
> Is this field necessary since it can be derived from other image
> options: nr_zones = DIV_ROUND_UP(total_length, zone_capacity)?

Yes. The bs->total_sectors in refresh_limits is zero. Keeping a
persistent nr_zones helps assigning right value instead of zero.

The process is roughly like this:
*_qcow2_create: calculate nr_zones and write it to zoned_header
->  *_qcow2_update_header: update nr_zones
    ->  *_qcow2_read_extensions: read nr_zones in zoned_header to
Qcow2State and check if right (valid total size here)
      -> *_refresh_limits(): set bl.nr_zones to zoned_header.nr_zones

Sam
diff mbox series

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index c51388e99d..c1077c4a4a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -73,6 +73,7 @@  typedef struct {
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
 #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
+#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x7a6264
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
@@ -210,6 +211,7 @@  qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
     uint64_t offset;
     int ret;
     Qcow2BitmapHeaderExt bitmaps_ext;
+    Qcow2ZonedHeaderExtension zoned_ext;
 
     if (need_update_header != NULL) {
         *need_update_header = false;
@@ -431,6 +433,38 @@  qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
             break;
         }
 
+        case QCOW2_EXT_MAGIC_ZONED_FORMAT:
+        {
+            if (ext.len != sizeof(zoned_ext)) {
+                error_setg_errno(errp, -ret, "zoned_ext: "
+                                             "Invalid extension length");
+                return -EINVAL;
+            }
+            ret = bdrv_pread(bs->file, offset, ext.len, &zoned_ext, 0);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "zoned_ext: "
+                                             "Could not read ext header");
+                return ret;
+            }
+
+            zoned_ext.zone_size = be32_to_cpu(zoned_ext.zone_size);
+            zoned_ext.zone_capacity = be32_to_cpu(zoned_ext.zone_capacity);
+            zoned_ext.nr_zones = be32_to_cpu(zoned_ext.nr_zones);
+            zoned_ext.zone_nr_conv = be32_to_cpu(zoned_ext.zone_nr_conv);
+            zoned_ext.max_open_zones = be32_to_cpu(zoned_ext.max_open_zones);
+            zoned_ext.max_active_zones =
+                be32_to_cpu(zoned_ext.max_active_zones);
+            zoned_ext.max_append_sectors =
+                be32_to_cpu(zoned_ext.max_append_sectors);
+            s->zoned_header = zoned_ext;
+
+#ifdef DEBUG_EXT
+            printf("Qcow2: Got zoned format extension: "
+                   "offset=%" PRIu32 "\n", offset);
+#endif
+            break;
+        }
+
         default:
             /* unknown magic - save it in case we need to rewrite the header */
             /* If you add a new feature, make sure to also update the fast
@@ -3089,6 +3123,31 @@  int qcow2_update_header(BlockDriverState *bs)
         buflen -= ret;
     }
 
+    /* Zoned devices header extension */
+    if (s->zoned_header.zoned == BLK_Z_HM) {
+        Qcow2ZonedHeaderExtension zoned_header = {
+            .zoned_profile      = s->zoned_header.zoned_profile,
+            .zoned              = s->zoned_header.zoned,
+            .nr_zones           = cpu_to_be32(s->zoned_header.nr_zones),
+            .zone_size          = cpu_to_be32(s->zoned_header.zone_size),
+            .zone_capacity      = cpu_to_be32(s->zoned_header.zone_capacity),
+            .zone_nr_conv       = cpu_to_be32(s->zoned_header.zone_nr_conv),
+            .max_open_zones     = cpu_to_be32(s->zoned_header.max_open_zones),
+            .max_active_zones   =
+                cpu_to_be32(s->zoned_header.max_active_zones),
+            .max_append_sectors =
+                cpu_to_be32(s->zoned_header.max_append_sectors)
+        };
+        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT,
+                             &zoned_header, sizeof(zoned_header),
+                             buflen);
+        if (ret < 0) {
+            goto fail;
+        }
+        buf += ret;
+        buflen -= ret;
+    }
+
     /* Keep unknown header extensions */
     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
@@ -3773,6 +3832,23 @@  qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
         s->image_data_file = g_strdup(data_bs->filename);
     }
 
+    if (qcow2_opts->zoned_profile) {
+        BDRVQcow2State *s = blk_bs(blk)->opaque;
+        if (!strcmp(qcow2_opts->zoned_profile, "zbc")) {
+            s->zoned_header.zoned_profile = BLK_ZP_ZBC;
+            s->zoned_header.zone_capacity = qcow2_opts->zone_size;
+        } else if (!strcmp(qcow2_opts->zoned_profile, "zns")) {
+            s->zoned_header.zoned_profile = BLK_ZP_ZNS;
+            s->zoned_header.zone_capacity = qcow2_opts->zone_capacity;
+        }
+        s->zoned_header.zoned = BLK_Z_HM;
+        s->zoned_header.zone_size = qcow2_opts->zone_size;
+        s->zoned_header.zone_nr_conv = qcow2_opts->zone_nr_conv;
+        s->zoned_header.max_open_zones = qcow2_opts->max_open_zones;
+        s->zoned_header.max_active_zones = qcow2_opts->max_active_zones;
+        s->zoned_header.max_append_sectors = qcow2_opts->max_append_sectors;
+    }
+
     /* Create a full header (including things like feature table) */
     ret = qcow2_update_header(blk_bs(blk));
     bdrv_graph_co_rdunlock();
@@ -3891,6 +3967,13 @@  qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
         qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
     }
 
+    /* The available zoned-profile options are zbc, which stands for
+     * ZBC/ZAC standards, and zns following NVMe ZNS spec. */
+    val = qdict_get_try_str(qdict, BLOCK_OPT_Z_PROFILE);
+    if (val) {
+        qdict_put_str(qdict, BLOCK_OPT_Z_PROFILE, val);
+    }
+
     /* Change legacy command line options into QMP ones */
     static const QDictRenames opt_renames[] = {
         { BLOCK_OPT_BACKING_FILE,       "backing-file" },
@@ -3903,6 +3986,13 @@  qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
         { BLOCK_OPT_COMPAT_LEVEL,       "version" },
         { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
         { BLOCK_OPT_COMPRESSION_TYPE,   "compression-type" },
+        { BLOCK_OPT_Z_PROFILE,          "zoned-profile"},
+        { BLOCK_OPT_Z_NR_COV,           "zone-nr-conv"},
+        { BLOCK_OPT_Z_MOZ,              "max-open-zones"},
+        { BLOCK_OPT_Z_MAZ,              "max-active-zones"},
+        { BLOCK_OPT_Z_MAS,              "max-append-sectors"},
+        { BLOCK_OPT_Z_SIZE,             "zone-size"},
+        { BLOCK_OPT_Z_CAP,              "zone-capacity"},
         { NULL, NULL },
     };
 
@@ -6066,6 +6156,41 @@  static QemuOptsList qcow2_create_opts = {
             .help = "Compression method used for image cluster "        \
                     "compression",                                      \
             .def_value_str = "zlib"                                     \
+        },                                                              \
+            {
+            .name = BLOCK_OPT_Z_PROFILE,                                \
+            .type = QEMU_OPT_STRING,                                    \
+            .help = "zoned format option for the disk img",             \
+        },                                                              \
+            {                                                           \
+            .name = BLOCK_OPT_Z_SIZE,                                   \
+            .type = QEMU_OPT_SIZE,                                      \
+            .help = "zone size",                                        \
+        },                                                              \
+        {                                                           \
+            .name = BLOCK_OPT_Z_CAP,                                    \
+            .type = QEMU_OPT_SIZE,                                      \
+            .help = "zone capacity",                                    \
+        },                                                              \
+        {                                                               \
+                .name = BLOCK_OPT_Z_NR_COV,                             \
+                .type = QEMU_OPT_NUMBER,                                \
+                .help = "numbers of conventional zones",                \
+        },                                                              \
+        {                                                               \
+                .name = BLOCK_OPT_Z_MAS,                                \
+                .type = QEMU_OPT_NUMBER,                                \
+                .help = "max append sectors",                           \
+        },                                                              \
+        {                                                               \
+                .name = BLOCK_OPT_Z_MAZ,                                \
+                .type = QEMU_OPT_NUMBER,                                \
+                .help = "max active zones",                             \
+        },                                                              \
+        {                                                               \
+                .name = BLOCK_OPT_Z_MOZ,                                \
+                .type = QEMU_OPT_NUMBER,                                \
+                .help = "max open zones",                               \
         },
         QCOW_COMMON_OPTIONS,
         { /* end of list */ }
diff --git a/block/qcow2.h b/block/qcow2.h
index f789ce3ae0..3694c8d217 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -236,6 +236,20 @@  typedef struct Qcow2CryptoHeaderExtension {
     uint64_t length;
 } QEMU_PACKED Qcow2CryptoHeaderExtension;
 
+typedef struct Qcow2ZonedHeaderExtension {
+    /* Zoned device attributes */
+    uint8_t zoned_profile;
+    uint8_t zoned;
+    uint16_t reserved16;
+    uint32_t zone_size;
+    uint32_t zone_capacity;
+    uint32_t nr_zones;
+    uint32_t zone_nr_conv;
+    uint32_t max_active_zones;
+    uint32_t max_open_zones;
+    uint32_t max_append_sectors;
+} QEMU_PACKED Qcow2ZonedHeaderExtension;
+
 typedef struct Qcow2UnknownHeaderExtension {
     uint32_t magic;
     uint32_t len;
@@ -422,6 +436,13 @@  typedef struct BDRVQcow2State {
      * is to convert the image with the desired compression type set.
      */
     Qcow2CompressionType compression_type;
+
+    /* States of zoned device */
+    Qcow2ZonedHeaderExtension zoned_header;
+    uint32_t nr_zones_exp_open;
+    uint32_t nr_zones_imp_open;
+    uint32_t nr_zones_closed;
+    BlockZoneWps *wps;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 2c4618375a..ef2ba6f670 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -331,6 +331,30 @@  The fields of the bitmaps extension are:
                    Offset into the image file at which the bitmap directory
                    starts. Must be aligned to a cluster boundary.
 
+== Zoned extension ==
+
+The zoned extension is an optional header extension. It is required when
+using the qcow2 file as the backing image for zoned device.
+
+The fields of the zoned extension are:
+    Byte  0:  zoned_profile
+              Type of zoned format. Must be `zbc` or `zns`.
+                  1: `zbc`
+                  2: `zns`
+
+          1:  zoned
+              Type of zone.
+
+          2 - 3:  Reserved, must be zero.
+
+          4 - 7:  zone_size
+          8 - 11:  zone_capacity
+          12 - 15:  nr_zones
+          16 - 19:  zone_nr_conv
+          20 - 23:  max_active_zones
+          24 - 27:  max_open_zones
+          28 - 31:  max_append_sectors
+
 == Full disk encryption header pointer ==
 
 The full disk encryption header must be present if, and only if, the
diff --git a/include/block/block-common.h b/include/block/block-common.h
index e15395f2cb..9f04a772f6 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -108,6 +108,11 @@  typedef enum BlockZoneType {
     BLK_ZT_SWP = 0x3, /* Sequential writes preferred */
 } BlockZoneType;
 
+typedef enum BlockZonedProfile {
+    BLK_ZP_ZBC = 0x1,
+    BLK_ZP_ZNS = 0x2,
+} BlockZonedProfile;
+
 /*
  * Zone descriptor data structure.
  * Provides information on a zone with all position and size values in bytes.
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 74195c3004..1dbe820a9b 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -57,6 +57,14 @@ 
 #define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
 #define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
 #define BLOCK_OPT_EXTL2             "extended_l2"
+#define BLOCK_OPT_Z_PROFILE         "zoned_profile"
+#define BLOCK_OPT_Z_MODEL           "zoned"
+#define BLOCK_OPT_Z_SIZE            "zone_size"
+#define BLOCK_OPT_Z_CAP             "zone_capacity"
+#define BLOCK_OPT_Z_NR_COV          "zone_nr_conv"
+#define BLOCK_OPT_Z_MAS             "max_append_sectors"
+#define BLOCK_OPT_Z_MAZ             "max_active_zones"
+#define BLOCK_OPT_Z_MOZ             "max_open_zones"
 
 #define BLOCK_PROBE_BUF_SIZE        512
 
@@ -872,12 +880,20 @@  typedef struct BlockLimits {
      */
     bool has_variable_length;
 
+    BlockZonedProfile zoned_profile;
+
     /* device zone model */
     BlockZoneModel zoned;
 
     /* zone size expressed in bytes */
     uint32_t zone_size;
 
+    /*
+     * the number of usable logical blocks within the zone, expressed
+     * in bytes. A zone capacity is smaller or equal to the zone size.
+     */
+    uint32_t zone_capacity;
+
     /* total number of zones */
     uint32_t nr_zones;
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2b1d493d6e..0c97ae678b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5020,24 +5020,42 @@ 
 #
 # @compression-type: The image cluster compression method
 #     (default: zlib, since 5.1)
+# @zoned-profile: Two zoned device protocol options, zbc or zns
+#                 (default: off, since 8.0)
+# @zone-size: The size of a zone of the zoned device (since 8.0)
+# @zone-capacity: The capacity of a zone of the zoned device (since 8.0)
+# @zone-nr-conv: The number of conventional zones of the zoned device
+#                (since 8.0)
+# @max-open-zones: The maximal allowed open zones (since 8.0)
+# @max-active-zones: The limit of the zones that have the implicit open,
+#                    explicit open or closed state (since 8.0)
+# @max-append-sectors: The maximal sectors that is allowed to append write
+#                      (since 8.0)
 #
 # Since: 2.12
 ##
 { 'struct': 'BlockdevCreateOptionsQcow2',
-  'data': { 'file':             'BlockdevRef',
-            '*data-file':       'BlockdevRef',
-            '*data-file-raw':   'bool',
-            '*extended-l2':     'bool',
-            'size':             'size',
-            '*version':         'BlockdevQcow2Version',
-            '*backing-file':    'str',
-            '*backing-fmt':     'BlockdevDriver',
-            '*encrypt':         'QCryptoBlockCreateOptions',
-            '*cluster-size':    'size',
-            '*preallocation':   'PreallocMode',
-            '*lazy-refcounts':  'bool',
-            '*refcount-bits':   'int',
-            '*compression-type':'Qcow2CompressionType' } }
+  'data': { 'file':                'BlockdevRef',
+            '*data-file':          'BlockdevRef',
+            '*data-file-raw':      'bool',
+            '*extended-l2':        'bool',
+            'size':                'size',
+            '*version':            'BlockdevQcow2Version',
+            '*backing-file':       'str',
+            '*backing-fmt':        'BlockdevDriver',
+            '*encrypt':            'QCryptoBlockCreateOptions',
+            '*cluster-size':       'size',
+            '*preallocation':      'PreallocMode',
+            '*lazy-refcounts':     'bool',
+            '*refcount-bits':      'int',
+            '*compression-type':   'Qcow2CompressionType',
+            '*zoned-profile':      'str',
+            '*zone-size':          'size',
+            '*zone-capacity':      'size',
+            '*zone-nr-conv':       'uint32',
+            '*max-open-zones':     'uint32',
+            '*max-active-zones':   'uint32',
+            '*max-append-sectors': 'uint32'}}
 
 ##
 # @BlockdevCreateOptionsQed: