diff mbox

[1/6] block: Keep track of devices' I/O status

Message ID 1316715584-25557-2-git-send-email-lcapitulino@redhat.com
State New
Headers show

Commit Message

Luiz Capitulino Sept. 22, 2011, 6:19 p.m. UTC
This commit adds support to the BlockDriverState type to keep track
of devices' I/O status.

There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
(no space error) and BDRV_IOS_FAILED (any other error). The distinction
between no space and other errors is important because a management
application may want to watch for no space in order to extend the
space assigned to the VM and put it to run again.

Qemu devices supporting the I/O status feature have to enable it
explicitly by calling bdrv_iostatus_enable() _and_ have to be
configured to stop the VM on errors (ie. werror=stop|enospc or
rerror=stop).

In case of multiple errors being triggered in sequence only the first
one is stored. The I/O status is always reset to BDRV_IOS_OK when the
'cont' command is issued.

Next commits will add support to some devices and extend the
query-block/info block commands to return the I/O status information.

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
---
 block.c     |   32 ++++++++++++++++++++++++++++++++
 block.h     |    9 +++++++++
 block_int.h |    1 +
 monitor.c   |    6 ++++++
 4 files changed, 48 insertions(+), 0 deletions(-)

Comments

Zhiyong Wu Sept. 23, 2011, 7:06 a.m. UTC | #1
On Fri, Sep 23, 2011 at 2:19 AM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> This commit adds support to the BlockDriverState type to keep track
> of devices' I/O status.
>
> There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
> (no space error) and BDRV_IOS_FAILED (any other error). The distinction
> between no space and other errors is important because a management
> application may want to watch for no space in order to extend the
> space assigned to the VM and put it to run again.
>
> Qemu devices supporting the I/O status feature have to enable it
> explicitly by calling bdrv_iostatus_enable() _and_ have to be
> configured to stop the VM on errors (ie. werror=stop|enospc or
> rerror=stop).
>
> In case of multiple errors being triggered in sequence only the first
> one is stored. The I/O status is always reset to BDRV_IOS_OK when the
> 'cont' command is issued.
>
> Next commits will add support to some devices and extend the
> query-block/info block commands to return the I/O status information.
>
> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> ---
>  block.c     |   32 ++++++++++++++++++++++++++++++++
>  block.h     |    9 +++++++++
>  block_int.h |    1 +
>  monitor.c   |    6 ++++++
>  4 files changed, 48 insertions(+), 0 deletions(-)
>
> diff --git a/block.c b/block.c
> index e3fe97f..fbd90b4 100644
> --- a/block.c
> +++ b/block.c
> @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>     if (device_name[0] != '\0') {
>         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>     }
> +    bs->iostatus = BDRV_IOS_INVAL;
>     return bs;
>  }
bs->iostatus is set to BDRV_IOS_INVAL only in bdrv_new(), if the drive
is opened and closed repeatly, how about the field?
Moreover, it seems that it has not been reset when the drive is closed
via bdrv_close().

>
> @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>     return bs->in_use;
>  }
>
> +void bdrv_iostatus_enable(BlockDriverState *bs)
> +{
> +    assert(bs->iostatus == BDRV_IOS_INVAL);
> +    bs->iostatus = BDRV_IOS_OK;
> +}
> +
> +/* The I/O status is only enabled if the drive explicitly
> + * enables it _and_ the VM is configured to stop on errors */
> +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
> +{
> +    return (bs->iostatus != BDRV_IOS_INVAL &&
> +           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
> +            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
> +            bs->on_read_error == BLOCK_ERR_STOP_ANY));
> +}
> +
> +void bdrv_iostatus_reset(BlockDriverState *bs)
> +{
> +    if (bdrv_iostatus_is_enabled(bs)) {
> +        bs->iostatus = BDRV_IOS_OK;
> +    }
> +}
> +
> +void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
> +{
> +    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
> +        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
> +                                                BDRV_IOS_FAILED;
> +    }
> +}
> +
>  void
>  bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
>         enum BlockAcctType type)
> diff --git a/block.h b/block.h
> index 16bfa0a..de74af0 100644
> --- a/block.h
> +++ b/block.h
> @@ -77,6 +77,15 @@ typedef enum {
>     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>  } BlockMonEventAction;
>
> +typedef enum {
> +    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
> +    BDRV_IOS_MAX
> +} BlockIOStatus;
> +
> +void bdrv_iostatus_enable(BlockDriverState *bs);
> +void bdrv_iostatus_reset(BlockDriverState *bs);
> +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
> +void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
>  void bdrv_mon_event(const BlockDriverState *bdrv,
>                     BlockMonEventAction action, int is_read);
>  void bdrv_info_print(Monitor *mon, const QObject *data);
> diff --git a/block_int.h b/block_int.h
> index 8c3b863..f2f4f2d 100644
> --- a/block_int.h
> +++ b/block_int.h
> @@ -199,6 +199,7 @@ struct BlockDriverState {
>        drivers. They are not used by the block driver */
>     int cyls, heads, secs, translation;
>     BlockErrorAction on_read_error, on_write_error;
> +    BlockIOStatus iostatus;
>     char device_name[32];
>     unsigned long *dirty_bitmap;
>     int64_t dirty_count;
> diff --git a/monitor.c b/monitor.c
> index 8ec2c5e..88d8228 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1304,6 +1304,11 @@ struct bdrv_iterate_context {
>     int err;
>  };
>
> +static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
> +{
> +    bdrv_iostatus_reset(bs);
> +}
> +
>  /**
>  * do_cont(): Resume emulation.
>  */
> @@ -1320,6 +1325,7 @@ static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
>         return -1;
>     }
>
> +    bdrv_iterate(iostatus_bdrv_it, NULL);
>     bdrv_iterate(encrypted_bdrv_it, &context);
>     /* only resume the vm if all keys are set and valid */
>     if (!context.err) {
> --
> 1.7.7.rc0.72.g4b5ea
>
>
>
Markus Armbruster Sept. 23, 2011, 7:51 a.m. UTC | #2
Luiz Capitulino <lcapitulino@redhat.com> writes:

> This commit adds support to the BlockDriverState type to keep track
> of devices' I/O status.
>
> There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
> (no space error) and BDRV_IOS_FAILED (any other error). The distinction
> between no space and other errors is important because a management
> application may want to watch for no space in order to extend the
> space assigned to the VM and put it to run again.
>
> Qemu devices supporting the I/O status feature have to enable it
> explicitly by calling bdrv_iostatus_enable() _and_ have to be
> configured to stop the VM on errors (ie. werror=stop|enospc or
> rerror=stop).
>
> In case of multiple errors being triggered in sequence only the first
> one is stored. The I/O status is always reset to BDRV_IOS_OK when the
> 'cont' command is issued.
>
> Next commits will add support to some devices and extend the
> query-block/info block commands to return the I/O status information.
>
> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> ---
>  block.c     |   32 ++++++++++++++++++++++++++++++++
>  block.h     |    9 +++++++++
>  block_int.h |    1 +
>  monitor.c   |    6 ++++++
>  4 files changed, 48 insertions(+), 0 deletions(-)
>
> diff --git a/block.c b/block.c
> index e3fe97f..fbd90b4 100644
> --- a/block.c
> +++ b/block.c
> @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>      if (device_name[0] != '\0') {
>          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>      }
> +    bs->iostatus = BDRV_IOS_INVAL;
>      return bs;
>  }
>  
> @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>      return bs->in_use;
>  }
>  
> +void bdrv_iostatus_enable(BlockDriverState *bs)
> +{
> +    assert(bs->iostatus == BDRV_IOS_INVAL);
> +    bs->iostatus = BDRV_IOS_OK;
> +}

bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
that do tracking declare that by calling this function during
initialization.  Enables tracking if the BDS has suitable on_write_error
and on_read_error.

If a device gets hot unplugged, tracking remains enabled.  If the BDS
then gets reused with a device that doesn't do tracking, I/O status
becomes incorrect.  Can't happen right now, because we automatically
delete the BDS on hot unplug, but it's a trap.

Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().

> +
> +/* The I/O status is only enabled if the drive explicitly
> + * enables it _and_ the VM is configured to stop on errors */
> +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
> +{
> +    return (bs->iostatus != BDRV_IOS_INVAL &&
> +           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
> +            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
> +            bs->on_read_error == BLOCK_ERR_STOP_ANY));
> +}
> +
> +void bdrv_iostatus_reset(BlockDriverState *bs)
> +{
> +    if (bdrv_iostatus_is_enabled(bs)) {
> +        bs->iostatus = BDRV_IOS_OK;
> +    }
> +}
> +
> +void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
> +{
> +    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
> +        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
> +                                                BDRV_IOS_FAILED;
> +    }
> +}
> +

abs(error) feels... unusual.

If you want to guard against callers passing wrongly signed values, why
not simply assert(error >= 0)?

>  void
>  bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
>          enum BlockAcctType type)
> diff --git a/block.h b/block.h
> index 16bfa0a..de74af0 100644
> --- a/block.h
> +++ b/block.h
> @@ -77,6 +77,15 @@ typedef enum {
>      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>  } BlockMonEventAction;
>  
> +typedef enum {
> +    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
> +    BDRV_IOS_MAX
> +} BlockIOStatus;

Why is this in block.h?

> +
> +void bdrv_iostatus_enable(BlockDriverState *bs);
> +void bdrv_iostatus_reset(BlockDriverState *bs);
> +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
> +void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
>  void bdrv_mon_event(const BlockDriverState *bdrv,
>                      BlockMonEventAction action, int is_read);
>  void bdrv_info_print(Monitor *mon, const QObject *data);
> diff --git a/block_int.h b/block_int.h
> index 8c3b863..f2f4f2d 100644
> --- a/block_int.h
> +++ b/block_int.h
> @@ -199,6 +199,7 @@ struct BlockDriverState {
>         drivers. They are not used by the block driver */
>      int cyls, heads, secs, translation;
>      BlockErrorAction on_read_error, on_write_error;
> +    BlockIOStatus iostatus;
>      char device_name[32];
>      unsigned long *dirty_bitmap;
>      int64_t dirty_count;
> diff --git a/monitor.c b/monitor.c
> index 8ec2c5e..88d8228 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1304,6 +1304,11 @@ struct bdrv_iterate_context {
>      int err;
>  };
>  
> +static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
> +{
> +    bdrv_iostatus_reset(bs);
> +}
> +
>  /**
>   * do_cont(): Resume emulation.
>   */
> @@ -1320,6 +1325,7 @@ static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
>          return -1;
>      }
>  
> +    bdrv_iterate(iostatus_bdrv_it, NULL);
>      bdrv_iterate(encrypted_bdrv_it, &context);
>      /* only resume the vm if all keys are set and valid */
>      if (!context.err) {
Markus Armbruster Sept. 23, 2011, 7:53 a.m. UTC | #3
Markus Armbruster <armbru@redhat.com> writes:

> Luiz Capitulino <lcapitulino@redhat.com> writes:
>
>> This commit adds support to the BlockDriverState type to keep track
>> of devices' I/O status.
>>
>> There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
>> (no space error) and BDRV_IOS_FAILED (any other error). The distinction
>> between no space and other errors is important because a management
>> application may want to watch for no space in order to extend the
>> space assigned to the VM and put it to run again.
>>
>> Qemu devices supporting the I/O status feature have to enable it
>> explicitly by calling bdrv_iostatus_enable() _and_ have to be
>> configured to stop the VM on errors (ie. werror=stop|enospc or
>> rerror=stop).
>>
>> In case of multiple errors being triggered in sequence only the first
>> one is stored. The I/O status is always reset to BDRV_IOS_OK when the
>> 'cont' command is issued.
>>
>> Next commits will add support to some devices and extend the
>> query-block/info block commands to return the I/O status information.
>>
>> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
>> ---
>>  block.c     |   32 ++++++++++++++++++++++++++++++++
>>  block.h     |    9 +++++++++
>>  block_int.h |    1 +
>>  monitor.c   |    6 ++++++
>>  4 files changed, 48 insertions(+), 0 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index e3fe97f..fbd90b4 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>>      if (device_name[0] != '\0') {
>>          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>>      }
>> +    bs->iostatus = BDRV_IOS_INVAL;
>>      return bs;
>>  }
>>  
>> @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>>      return bs->in_use;
>>  }
>>  
>> +void bdrv_iostatus_enable(BlockDriverState *bs)
>> +{
>> +    assert(bs->iostatus == BDRV_IOS_INVAL);
>> +    bs->iostatus = BDRV_IOS_OK;
>> +}
>
> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
> that do tracking declare that by calling this function during
> initialization.  Enables tracking if the BDS has suitable on_write_error
> and on_read_error.
>
> If a device gets hot unplugged, tracking remains enabled.  If the BDS
> then gets reused with a device that doesn't do tracking, I/O status
> becomes incorrect.  Can't happen right now, because we automatically
> delete the BDS on hot unplug, but it's a trap.

And if the BDS gets reused with a device that does tracking, the
assertion above fails.

> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().

[...]
Luiz Capitulino Sept. 23, 2011, 2:01 p.m. UTC | #4
On Fri, 23 Sep 2011 09:51:24 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> Luiz Capitulino <lcapitulino@redhat.com> writes:
> 
> > This commit adds support to the BlockDriverState type to keep track
> > of devices' I/O status.
> >
> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
> > between no space and other errors is important because a management
> > application may want to watch for no space in order to extend the
> > space assigned to the VM and put it to run again.
> >
> > Qemu devices supporting the I/O status feature have to enable it
> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
> > configured to stop the VM on errors (ie. werror=stop|enospc or
> > rerror=stop).
> >
> > In case of multiple errors being triggered in sequence only the first
> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
> > 'cont' command is issued.
> >
> > Next commits will add support to some devices and extend the
> > query-block/info block commands to return the I/O status information.
> >
> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> > ---
> >  block.c     |   32 ++++++++++++++++++++++++++++++++
> >  block.h     |    9 +++++++++
> >  block_int.h |    1 +
> >  monitor.c   |    6 ++++++
> >  4 files changed, 48 insertions(+), 0 deletions(-)
> >
> > diff --git a/block.c b/block.c
> > index e3fe97f..fbd90b4 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
> >      if (device_name[0] != '\0') {
> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
> >      }
> > +    bs->iostatus = BDRV_IOS_INVAL;
> >      return bs;
> >  }
> >  
> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
> >      return bs->in_use;
> >  }
> >  
> > +void bdrv_iostatus_enable(BlockDriverState *bs)
> > +{
> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
> > +    bs->iostatus = BDRV_IOS_OK;
> > +}
> 
> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
> that do tracking declare that by calling this function during
> initialization.  Enables tracking if the BDS has suitable on_write_error
> and on_read_error.
> 
> If a device gets hot unplugged, tracking remains enabled.  If the BDS
> then gets reused with a device that doesn't do tracking, I/O status
> becomes incorrect.  Can't happen right now, because we automatically
> delete the BDS on hot unplug, but it's a trap.
> 
> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().

Ok, and in bdrv_close() as Zhi Yong said.

> > +
> > +/* The I/O status is only enabled if the drive explicitly
> > + * enables it _and_ the VM is configured to stop on errors */
> > +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
> > +{
> > +    return (bs->iostatus != BDRV_IOS_INVAL &&
> > +           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
> > +            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
> > +            bs->on_read_error == BLOCK_ERR_STOP_ANY));
> > +}
> > +
> > +void bdrv_iostatus_reset(BlockDriverState *bs)
> > +{
> > +    if (bdrv_iostatus_is_enabled(bs)) {
> > +        bs->iostatus = BDRV_IOS_OK;
> > +    }
> > +}
> > +
> > +void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
> > +{
> > +    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
> > +        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
> > +                                                BDRV_IOS_FAILED;
> > +    }
> > +}
> > +
> 
> abs(error) feels... unusual.
> 
> If you want to guard against callers passing wrongly signed values, why
> not simply assert(error >= 0)?

Yes. Actually, I thought that there were existing devices that could pass
a positive value (while others passed a negative one), but by taking a look
at the code now it seems that all supported devices pass a negative value,
so your suggestion makes sense.

> 
> >  void
> >  bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
> >          enum BlockAcctType type)
> > diff --git a/block.h b/block.h
> > index 16bfa0a..de74af0 100644
> > --- a/block.h
> > +++ b/block.h
> > @@ -77,6 +77,15 @@ typedef enum {
> >      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
> >  } BlockMonEventAction;
> >  
> > +typedef enum {
> > +    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
> > +    BDRV_IOS_MAX
> > +} BlockIOStatus;
> 
> Why is this in block.h?

I followed BlockErrorAction, you suggest block_int.h?

> 
> > +
> > +void bdrv_iostatus_enable(BlockDriverState *bs);
> > +void bdrv_iostatus_reset(BlockDriverState *bs);
> > +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
> > +void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
> >  void bdrv_mon_event(const BlockDriverState *bdrv,
> >                      BlockMonEventAction action, int is_read);
> >  void bdrv_info_print(Monitor *mon, const QObject *data);
> > diff --git a/block_int.h b/block_int.h
> > index 8c3b863..f2f4f2d 100644
> > --- a/block_int.h
> > +++ b/block_int.h
> > @@ -199,6 +199,7 @@ struct BlockDriverState {
> >         drivers. They are not used by the block driver */
> >      int cyls, heads, secs, translation;
> >      BlockErrorAction on_read_error, on_write_error;
> > +    BlockIOStatus iostatus;
> >      char device_name[32];
> >      unsigned long *dirty_bitmap;
> >      int64_t dirty_count;
> > diff --git a/monitor.c b/monitor.c
> > index 8ec2c5e..88d8228 100644
> > --- a/monitor.c
> > +++ b/monitor.c
> > @@ -1304,6 +1304,11 @@ struct bdrv_iterate_context {
> >      int err;
> >  };
> >  
> > +static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
> > +{
> > +    bdrv_iostatus_reset(bs);
> > +}
> > +
> >  /**
> >   * do_cont(): Resume emulation.
> >   */
> > @@ -1320,6 +1325,7 @@ static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
> >          return -1;
> >      }
> >  
> > +    bdrv_iterate(iostatus_bdrv_it, NULL);
> >      bdrv_iterate(encrypted_bdrv_it, &context);
> >      /* only resume the vm if all keys are set and valid */
> >      if (!context.err) {
>
Markus Armbruster Sept. 23, 2011, 3:36 p.m. UTC | #5
Luiz Capitulino <lcapitulino@redhat.com> writes:

> On Fri, 23 Sep 2011 09:51:24 +0200
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> 
>> > This commit adds support to the BlockDriverState type to keep track
>> > of devices' I/O status.
>> >
>> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
>> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
>> > between no space and other errors is important because a management
>> > application may want to watch for no space in order to extend the
>> > space assigned to the VM and put it to run again.
>> >
>> > Qemu devices supporting the I/O status feature have to enable it
>> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
>> > configured to stop the VM on errors (ie. werror=stop|enospc or
>> > rerror=stop).
>> >
>> > In case of multiple errors being triggered in sequence only the first
>> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
>> > 'cont' command is issued.
>> >
>> > Next commits will add support to some devices and extend the
>> > query-block/info block commands to return the I/O status information.
>> >
>> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
>> > ---
>> >  block.c     |   32 ++++++++++++++++++++++++++++++++
>> >  block.h     |    9 +++++++++
>> >  block_int.h |    1 +
>> >  monitor.c   |    6 ++++++
>> >  4 files changed, 48 insertions(+), 0 deletions(-)
>> >
>> > diff --git a/block.c b/block.c
>> > index e3fe97f..fbd90b4 100644
>> > --- a/block.c
>> > +++ b/block.c
>> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>> >      if (device_name[0] != '\0') {
>> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>> >      }
>> > +    bs->iostatus = BDRV_IOS_INVAL;
>> >      return bs;
>> >  }
>> >  
>> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>> >      return bs->in_use;
>> >  }
>> >  
>> > +void bdrv_iostatus_enable(BlockDriverState *bs)
>> > +{
>> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
>> > +    bs->iostatus = BDRV_IOS_OK;
>> > +}
>> 
>> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
>> that do tracking declare that by calling this function during
>> initialization.  Enables tracking if the BDS has suitable on_write_error
>> and on_read_error.
>> 
>> If a device gets hot unplugged, tracking remains enabled.  If the BDS
>> then gets reused with a device that doesn't do tracking, I/O status
>> becomes incorrect.  Can't happen right now, because we automatically
>> delete the BDS on hot unplug, but it's a trap.
>> 
>> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().
>
> Ok, and in bdrv_close() as Zhi Yong said.

Disabling in bdrv_close() makes the status go away on eject.  Makes some
sense, in a way.  Also complicates the simple rule "status persists from
stop to cont".  Hmm, consider the following race:

1. Management app sends eject command

2. qemu gets read error on the same drive, VM stops, I/O status set,
QEVENT_BLOCK_IO_ERROR sent to management app.

3. qemu receives eject command, bdrv_close() resets I/O status

4. Management receives QEVENT_BLOCK_IO_ERROR, and would like to find out
which drive caused it.

If 2 happens before 3, I/O status is lost.

>> > +
>> > +/* The I/O status is only enabled if the drive explicitly
>> > + * enables it _and_ the VM is configured to stop on errors */
>> > +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
>> > +{
>> > +    return (bs->iostatus != BDRV_IOS_INVAL &&
>> > +           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
>> > +            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
>> > +            bs->on_read_error == BLOCK_ERR_STOP_ANY));
>> > +}
>> > +
>> > +void bdrv_iostatus_reset(BlockDriverState *bs)
>> > +{
>> > +    if (bdrv_iostatus_is_enabled(bs)) {
>> > +        bs->iostatus = BDRV_IOS_OK;
>> > +    }
>> > +}
>> > +
>> > +void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
>> > +{
>> > +    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
>> > +        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
>> > +                                                BDRV_IOS_FAILED;
>> > +    }
>> > +}
>> > +
>> 
>> abs(error) feels... unusual.
>> 
>> If you want to guard against callers passing wrongly signed values, why
>> not simply assert(error >= 0)?
>
> Yes. Actually, I thought that there were existing devices that could pass
> a positive value (while others passed a negative one), but by taking a look
> at the code now it seems that all supported devices pass a negative value,
> so your suggestion makes sense.
>
>> 
>> >  void
>> >  bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
>> >          enum BlockAcctType type)
>> > diff --git a/block.h b/block.h
>> > index 16bfa0a..de74af0 100644
>> > --- a/block.h
>> > +++ b/block.h
>> > @@ -77,6 +77,15 @@ typedef enum {
>> >      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>> >  } BlockMonEventAction;
>> >  
>> > +typedef enum {
>> > +    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
>> > +    BDRV_IOS_MAX
>> > +} BlockIOStatus;
>> 
>> Why is this in block.h?
>
> I followed BlockErrorAction, you suggest block_int.h?

I guess I'd put it in block_int.h, just because I can.  No biggie,
though.

[...]
Luiz Capitulino Sept. 23, 2011, 7:41 p.m. UTC | #6
On Fri, 23 Sep 2011 17:36:53 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> Luiz Capitulino <lcapitulino@redhat.com> writes:
> 
> > On Fri, 23 Sep 2011 09:51:24 +0200
> > Markus Armbruster <armbru@redhat.com> wrote:
> >
> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
> >> 
> >> > This commit adds support to the BlockDriverState type to keep track
> >> > of devices' I/O status.
> >> >
> >> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
> >> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
> >> > between no space and other errors is important because a management
> >> > application may want to watch for no space in order to extend the
> >> > space assigned to the VM and put it to run again.
> >> >
> >> > Qemu devices supporting the I/O status feature have to enable it
> >> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
> >> > configured to stop the VM on errors (ie. werror=stop|enospc or
> >> > rerror=stop).
> >> >
> >> > In case of multiple errors being triggered in sequence only the first
> >> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
> >> > 'cont' command is issued.
> >> >
> >> > Next commits will add support to some devices and extend the
> >> > query-block/info block commands to return the I/O status information.
> >> >
> >> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> >> > ---
> >> >  block.c     |   32 ++++++++++++++++++++++++++++++++
> >> >  block.h     |    9 +++++++++
> >> >  block_int.h |    1 +
> >> >  monitor.c   |    6 ++++++
> >> >  4 files changed, 48 insertions(+), 0 deletions(-)
> >> >
> >> > diff --git a/block.c b/block.c
> >> > index e3fe97f..fbd90b4 100644
> >> > --- a/block.c
> >> > +++ b/block.c
> >> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
> >> >      if (device_name[0] != '\0') {
> >> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
> >> >      }
> >> > +    bs->iostatus = BDRV_IOS_INVAL;
> >> >      return bs;
> >> >  }
> >> >  
> >> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
> >> >      return bs->in_use;
> >> >  }
> >> >  
> >> > +void bdrv_iostatus_enable(BlockDriverState *bs)
> >> > +{
> >> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
> >> > +    bs->iostatus = BDRV_IOS_OK;
> >> > +}
> >> 
> >> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
> >> that do tracking declare that by calling this function during
> >> initialization.  Enables tracking if the BDS has suitable on_write_error
> >> and on_read_error.
> >> 
> >> If a device gets hot unplugged, tracking remains enabled.  If the BDS
> >> then gets reused with a device that doesn't do tracking, I/O status
> >> becomes incorrect.  Can't happen right now, because we automatically
> >> delete the BDS on hot unplug, but it's a trap.
> >> 
> >> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().
> >
> > Ok, and in bdrv_close() as Zhi Yong said.
> 
> Disabling in bdrv_close() makes the status go away on eject.  Makes some
> sense, in a way.  Also complicates the simple rule "status persists from
> stop to cont".  Hmm, consider the following race:
> 
> 1. Management app sends eject command
> 
> 2. qemu gets read error on the same drive, VM stops, I/O status set,
> QEVENT_BLOCK_IO_ERROR sent to management app.
> 
> 3. qemu receives eject command, bdrv_close() resets I/O status
> 
> 4. Management receives QEVENT_BLOCK_IO_ERROR, and would like to find out
> which drive caused it.
> 
> If 2 happens before 3, I/O status is lost.

Honestly speaking, I didn't intend to make it persistent on eject. Do we really
want to support this? I find it a bit confusing to have the I/O status on
something that has no media. In that case we should only return the io-status
info if the media is inserted.

This way, if the VM stops due to an I/O error and the media is ejected, a mngt
application would:

 1. Issue the query-block and find no guilt
 2. Issue cont
 3. The VM would execute normally

Of course that we have to be clear about it in the documentation and a
mngt app has to be prepared to deal with it.

> >> > +
> >> > +/* The I/O status is only enabled if the drive explicitly
> >> > + * enables it _and_ the VM is configured to stop on errors */
> >> > +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
> >> > +{
> >> > +    return (bs->iostatus != BDRV_IOS_INVAL &&
> >> > +           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
> >> > +            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
> >> > +            bs->on_read_error == BLOCK_ERR_STOP_ANY));
> >> > +}
> >> > +
> >> > +void bdrv_iostatus_reset(BlockDriverState *bs)
> >> > +{
> >> > +    if (bdrv_iostatus_is_enabled(bs)) {
> >> > +        bs->iostatus = BDRV_IOS_OK;
> >> > +    }
> >> > +}
> >> > +
> >> > +void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
> >> > +{
> >> > +    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
> >> > +        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
> >> > +                                                BDRV_IOS_FAILED;
> >> > +    }
> >> > +}
> >> > +
> >> 
> >> abs(error) feels... unusual.
> >> 
> >> If you want to guard against callers passing wrongly signed values, why
> >> not simply assert(error >= 0)?
> >
> > Yes. Actually, I thought that there were existing devices that could pass
> > a positive value (while others passed a negative one), but by taking a look
> > at the code now it seems that all supported devices pass a negative value,
> > so your suggestion makes sense.
> >
> >> 
> >> >  void
> >> >  bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
> >> >          enum BlockAcctType type)
> >> > diff --git a/block.h b/block.h
> >> > index 16bfa0a..de74af0 100644
> >> > --- a/block.h
> >> > +++ b/block.h
> >> > @@ -77,6 +77,15 @@ typedef enum {
> >> >      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
> >> >  } BlockMonEventAction;
> >> >  
> >> > +typedef enum {
> >> > +    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
> >> > +    BDRV_IOS_MAX
> >> > +} BlockIOStatus;
> >> 
> >> Why is this in block.h?
> >
> > I followed BlockErrorAction, you suggest block_int.h?
> 
> I guess I'd put it in block_int.h, just because I can.  No biggie,
> though.
> 
> [...]
>
Markus Armbruster Sept. 26, 2011, 9:34 a.m. UTC | #7
Luiz Capitulino <lcapitulino@redhat.com> writes:

> On Fri, 23 Sep 2011 17:36:53 +0200
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> 
>> > On Fri, 23 Sep 2011 09:51:24 +0200
>> > Markus Armbruster <armbru@redhat.com> wrote:
>> >
>> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> >> 
>> >> > This commit adds support to the BlockDriverState type to keep track
>> >> > of devices' I/O status.
>> >> >
>> >> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
>> >> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
>> >> > between no space and other errors is important because a management
>> >> > application may want to watch for no space in order to extend the
>> >> > space assigned to the VM and put it to run again.
>> >> >
>> >> > Qemu devices supporting the I/O status feature have to enable it
>> >> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
>> >> > configured to stop the VM on errors (ie. werror=stop|enospc or
>> >> > rerror=stop).
>> >> >
>> >> > In case of multiple errors being triggered in sequence only the first
>> >> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
>> >> > 'cont' command is issued.
>> >> >
>> >> > Next commits will add support to some devices and extend the
>> >> > query-block/info block commands to return the I/O status information.
>> >> >
>> >> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
>> >> > ---
>> >> >  block.c     |   32 ++++++++++++++++++++++++++++++++
>> >> >  block.h     |    9 +++++++++
>> >> >  block_int.h |    1 +
>> >> >  monitor.c   |    6 ++++++
>> >> >  4 files changed, 48 insertions(+), 0 deletions(-)
>> >> >
>> >> > diff --git a/block.c b/block.c
>> >> > index e3fe97f..fbd90b4 100644
>> >> > --- a/block.c
>> >> > +++ b/block.c
>> >> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>> >> >      if (device_name[0] != '\0') {
>> >> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>> >> >      }
>> >> > +    bs->iostatus = BDRV_IOS_INVAL;
>> >> >      return bs;
>> >> >  }
>> >> >  
>> >> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>> >> >      return bs->in_use;
>> >> >  }
>> >> >  
>> >> > +void bdrv_iostatus_enable(BlockDriverState *bs)
>> >> > +{
>> >> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
>> >> > +    bs->iostatus = BDRV_IOS_OK;
>> >> > +}
>> >> 
>> >> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
>> >> that do tracking declare that by calling this function during
>> >> initialization.  Enables tracking if the BDS has suitable on_write_error
>> >> and on_read_error.
>> >> 
>> >> If a device gets hot unplugged, tracking remains enabled.  If the BDS
>> >> then gets reused with a device that doesn't do tracking, I/O status
>> >> becomes incorrect.  Can't happen right now, because we automatically
>> >> delete the BDS on hot unplug, but it's a trap.
>> >> 
>> >> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().
>> >
>> > Ok, and in bdrv_close() as Zhi Yong said.
>> 
>> Disabling in bdrv_close() makes the status go away on eject.  Makes some
>> sense, in a way.  Also complicates the simple rule "status persists from
>> stop to cont".  Hmm, consider the following race:
>> 
>> 1. Management app sends eject command
>> 
>> 2. qemu gets read error on the same drive, VM stops, I/O status set,
>> QEVENT_BLOCK_IO_ERROR sent to management app.
>> 
>> 3. qemu receives eject command, bdrv_close() resets I/O status
>> 
>> 4. Management receives QEVENT_BLOCK_IO_ERROR, and would like to find out
>> which drive caused it.
>> 
>> If 2 happens before 3, I/O status is lost.
>
> Honestly speaking, I didn't intend to make it persistent on eject. Do we really
> want to support this? I find it a bit confusing to have the I/O status on
> something that has no media. In that case we should only return the io-status
> info if the media is inserted.
>
> This way, if the VM stops due to an I/O error and the media is ejected, a mngt
> application would:
>
>  1. Issue the query-block and find no guilt
>  2. Issue cont
>  3. The VM would execute normally
>
> Of course that we have to be clear about it in the documentation and a
> mngt app has to be prepared to deal with it.

The management app may well want to know that we hit errors on the
media, even when the error happens close to an eject.  In particular,
they probably want to alert their users on write errors.  Just because
you eject a medium doesn't mean you're no longer interested in the data
on it ;)

[...]
Luiz Capitulino Sept. 26, 2011, 12:49 p.m. UTC | #8
On Mon, 26 Sep 2011 11:34:34 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> Luiz Capitulino <lcapitulino@redhat.com> writes:
> 
> > On Fri, 23 Sep 2011 17:36:53 +0200
> > Markus Armbruster <armbru@redhat.com> wrote:
> >
> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
> >> 
> >> > On Fri, 23 Sep 2011 09:51:24 +0200
> >> > Markus Armbruster <armbru@redhat.com> wrote:
> >> >
> >> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
> >> >> 
> >> >> > This commit adds support to the BlockDriverState type to keep track
> >> >> > of devices' I/O status.
> >> >> >
> >> >> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
> >> >> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
> >> >> > between no space and other errors is important because a management
> >> >> > application may want to watch for no space in order to extend the
> >> >> > space assigned to the VM and put it to run again.
> >> >> >
> >> >> > Qemu devices supporting the I/O status feature have to enable it
> >> >> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
> >> >> > configured to stop the VM on errors (ie. werror=stop|enospc or
> >> >> > rerror=stop).
> >> >> >
> >> >> > In case of multiple errors being triggered in sequence only the first
> >> >> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
> >> >> > 'cont' command is issued.
> >> >> >
> >> >> > Next commits will add support to some devices and extend the
> >> >> > query-block/info block commands to return the I/O status information.
> >> >> >
> >> >> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> >> >> > ---
> >> >> >  block.c     |   32 ++++++++++++++++++++++++++++++++
> >> >> >  block.h     |    9 +++++++++
> >> >> >  block_int.h |    1 +
> >> >> >  monitor.c   |    6 ++++++
> >> >> >  4 files changed, 48 insertions(+), 0 deletions(-)
> >> >> >
> >> >> > diff --git a/block.c b/block.c
> >> >> > index e3fe97f..fbd90b4 100644
> >> >> > --- a/block.c
> >> >> > +++ b/block.c
> >> >> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
> >> >> >      if (device_name[0] != '\0') {
> >> >> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
> >> >> >      }
> >> >> > +    bs->iostatus = BDRV_IOS_INVAL;
> >> >> >      return bs;
> >> >> >  }
> >> >> >  
> >> >> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
> >> >> >      return bs->in_use;
> >> >> >  }
> >> >> >  
> >> >> > +void bdrv_iostatus_enable(BlockDriverState *bs)
> >> >> > +{
> >> >> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
> >> >> > +    bs->iostatus = BDRV_IOS_OK;
> >> >> > +}
> >> >> 
> >> >> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
> >> >> that do tracking declare that by calling this function during
> >> >> initialization.  Enables tracking if the BDS has suitable on_write_error
> >> >> and on_read_error.
> >> >> 
> >> >> If a device gets hot unplugged, tracking remains enabled.  If the BDS
> >> >> then gets reused with a device that doesn't do tracking, I/O status
> >> >> becomes incorrect.  Can't happen right now, because we automatically
> >> >> delete the BDS on hot unplug, but it's a trap.
> >> >> 
> >> >> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().
> >> >
> >> > Ok, and in bdrv_close() as Zhi Yong said.
> >> 
> >> Disabling in bdrv_close() makes the status go away on eject.  Makes some
> >> sense, in a way.  Also complicates the simple rule "status persists from
> >> stop to cont".  Hmm, consider the following race:
> >> 
> >> 1. Management app sends eject command
> >> 
> >> 2. qemu gets read error on the same drive, VM stops, I/O status set,
> >> QEVENT_BLOCK_IO_ERROR sent to management app.
> >> 
> >> 3. qemu receives eject command, bdrv_close() resets I/O status
> >> 
> >> 4. Management receives QEVENT_BLOCK_IO_ERROR, and would like to find out
> >> which drive caused it.
> >> 
> >> If 2 happens before 3, I/O status is lost.
> >
> > Honestly speaking, I didn't intend to make it persistent on eject. Do we really
> > want to support this? I find it a bit confusing to have the I/O status on
> > something that has no media. In that case we should only return the io-status
> > info if the media is inserted.
> >
> > This way, if the VM stops due to an I/O error and the media is ejected, a mngt
> > application would:
> >
> >  1. Issue the query-block and find no guilt
> >  2. Issue cont
> >  3. The VM would execute normally
> >
> > Of course that we have to be clear about it in the documentation and a
> > mngt app has to be prepared to deal with it.
> 
> The management app may well want to know that we hit errors on the
> media, even when the error happens close to an eject.  In particular,
> they probably want to alert their users on write errors.  Just because
> you eject a medium doesn't mean you're no longer interested in the data
> on it ;)

Ok, so I'll have to reset the I/O status on bdrv_open() too, so that
a new inserted media won't contain the previous media status.
Markus Armbruster Sept. 26, 2011, 1:41 p.m. UTC | #9
Luiz Capitulino <lcapitulino@redhat.com> writes:

> On Mon, 26 Sep 2011 11:34:34 +0200
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> 
>> > On Fri, 23 Sep 2011 17:36:53 +0200
>> > Markus Armbruster <armbru@redhat.com> wrote:
>> >
>> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> >> 
>> >> > On Fri, 23 Sep 2011 09:51:24 +0200
>> >> > Markus Armbruster <armbru@redhat.com> wrote:
>> >> >
>> >> >> Luiz Capitulino <lcapitulino@redhat.com> writes:
>> >> >> 
>> >> >> > This commit adds support to the BlockDriverState type to keep track
>> >> >> > of devices' I/O status.
>> >> >> >
>> >> >> > There are three possible status: BDRV_IOS_OK (no error), BDRV_IOS_ENOSPC
>> >> >> > (no space error) and BDRV_IOS_FAILED (any other error). The distinction
>> >> >> > between no space and other errors is important because a management
>> >> >> > application may want to watch for no space in order to extend the
>> >> >> > space assigned to the VM and put it to run again.
>> >> >> >
>> >> >> > Qemu devices supporting the I/O status feature have to enable it
>> >> >> > explicitly by calling bdrv_iostatus_enable() _and_ have to be
>> >> >> > configured to stop the VM on errors (ie. werror=stop|enospc or
>> >> >> > rerror=stop).
>> >> >> >
>> >> >> > In case of multiple errors being triggered in sequence only the first
>> >> >> > one is stored. The I/O status is always reset to BDRV_IOS_OK when the
>> >> >> > 'cont' command is issued.
>> >> >> >
>> >> >> > Next commits will add support to some devices and extend the
>> >> >> > query-block/info block commands to return the I/O status information.
>> >> >> >
>> >> >> > Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
>> >> >> > ---
>> >> >> >  block.c     |   32 ++++++++++++++++++++++++++++++++
>> >> >> >  block.h     |    9 +++++++++
>> >> >> >  block_int.h |    1 +
>> >> >> >  monitor.c   |    6 ++++++
>> >> >> >  4 files changed, 48 insertions(+), 0 deletions(-)
>> >> >> >
>> >> >> > diff --git a/block.c b/block.c
>> >> >> > index e3fe97f..fbd90b4 100644
>> >> >> > --- a/block.c
>> >> >> > +++ b/block.c
>> >> >> > @@ -221,6 +221,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>> >> >> >      if (device_name[0] != '\0') {
>> >> >> >          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>> >> >> >      }
>> >> >> > +    bs->iostatus = BDRV_IOS_INVAL;
>> >> >> >      return bs;
>> >> >> >  }
>> >> >> >  
>> >> >> > @@ -3181,6 +3182,37 @@ int bdrv_in_use(BlockDriverState *bs)
>> >> >> >      return bs->in_use;
>> >> >> >  }
>> >> >> >  
>> >> >> > +void bdrv_iostatus_enable(BlockDriverState *bs)
>> >> >> > +{
>> >> >> > +    assert(bs->iostatus == BDRV_IOS_INVAL);
>> >> >> > +    bs->iostatus = BDRV_IOS_OK;
>> >> >> > +}
>> >> >> 
>> >> >> bdrv_new() creates the BDS with I/O status tracking disabled.  Devices
>> >> >> that do tracking declare that by calling this function during
>> >> >> initialization.  Enables tracking if the BDS has suitable on_write_error
>> >> >> and on_read_error.
>> >> >> 
>> >> >> If a device gets hot unplugged, tracking remains enabled.  If the BDS
>> >> >> then gets reused with a device that doesn't do tracking, I/O status
>> >> >> becomes incorrect.  Can't happen right now, because we automatically
>> >> >> delete the BDS on hot unplug, but it's a trap.
>> >> >> 
>> >> >> Suggest to disable tracking in bdrv_detach_dev() or bdrv_attach_dev().
>> >> >
>> >> > Ok, and in bdrv_close() as Zhi Yong said.
>> >> 
>> >> Disabling in bdrv_close() makes the status go away on eject.  Makes some
>> >> sense, in a way.  Also complicates the simple rule "status persists from
>> >> stop to cont".  Hmm, consider the following race:
>> >> 
>> >> 1. Management app sends eject command
>> >> 
>> >> 2. qemu gets read error on the same drive, VM stops, I/O status set,
>> >> QEVENT_BLOCK_IO_ERROR sent to management app.
>> >> 
>> >> 3. qemu receives eject command, bdrv_close() resets I/O status
>> >> 
>> >> 4. Management receives QEVENT_BLOCK_IO_ERROR, and would like to find out
>> >> which drive caused it.
>> >> 
>> >> If 2 happens before 3, I/O status is lost.
>> >
>> > Honestly speaking, I didn't intend to make it persistent on eject. Do we really
>> > want to support this? I find it a bit confusing to have the I/O status on
>> > something that has no media. In that case we should only return the io-status
>> > info if the media is inserted.
>> >
>> > This way, if the VM stops due to an I/O error and the media is ejected, a mngt
>> > application would:
>> >
>> >  1. Issue the query-block and find no guilt
>> >  2. Issue cont
>> >  3. The VM would execute normally
>> >
>> > Of course that we have to be clear about it in the documentation and a
>> > mngt app has to be prepared to deal with it.
>> 
>> The management app may well want to know that we hit errors on the
>> media, even when the error happens close to an eject.  In particular,
>> they probably want to alert their users on write errors.  Just because
>> you eject a medium doesn't mean you're no longer interested in the data
>> on it ;)
>
> Ok, so I'll have to reset the I/O status on bdrv_open() too, so that
> a new inserted media won't contain the previous media status.

Same race with "change" (which calls bdrv_close() and bdrv_open())
instead of "eject" (which only calls bdrv_close()).

I figure we better define the problem away, like this: I/O status is
what made the VM stop.  It's valid from VM stop to VM continue or drive
delete, whatever comes first.  In particular, media change doesn't
affect it.

The only alternative I can see right now is making media change fail
while I/O status isn't okay, but that "cure" looks far worse than the
disease, and I'm not at all sure it's sufficient.

Two more instances of the race, both caused by destroying the drive (and
thus I/O status) while it's being used by a device (and thus prone to
getting I/O status set):

1. Device hot unplug while drive is not empty

Possible fix: stop auto-destroying drives on device unplug.  Then a
management app can unplug the device, wait until it's gone, then delete
the drive.  Safe, because I/O status can't change anymore once the
device detached from the drive.

2. drive_del of non-empty drive while a device is attached.

Possible fix: split into detach and delete.
diff mbox

Patch

diff --git a/block.c b/block.c
index e3fe97f..fbd90b4 100644
--- a/block.c
+++ b/block.c
@@ -221,6 +221,7 @@  BlockDriverState *bdrv_new(const char *device_name)
     if (device_name[0] != '\0') {
         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
     }
+    bs->iostatus = BDRV_IOS_INVAL;
     return bs;
 }
 
@@ -3181,6 +3182,37 @@  int bdrv_in_use(BlockDriverState *bs)
     return bs->in_use;
 }
 
+void bdrv_iostatus_enable(BlockDriverState *bs)
+{
+    assert(bs->iostatus == BDRV_IOS_INVAL);
+    bs->iostatus = BDRV_IOS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
+{
+    return (bs->iostatus != BDRV_IOS_INVAL &&
+           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
+            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
+            bs->on_read_error == BLOCK_ERR_STOP_ANY));
+}
+
+void bdrv_iostatus_reset(BlockDriverState *bs)
+{
+    if (bdrv_iostatus_is_enabled(bs)) {
+        bs->iostatus = BDRV_IOS_OK;
+    }
+}
+
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
+{
+    if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
+        bs->iostatus = (abs(error) == ENOSPC) ? BDRV_IOS_ENOSPC :
+                                                BDRV_IOS_FAILED;
+    }
+}
+
 void
 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
         enum BlockAcctType type)
diff --git a/block.h b/block.h
index 16bfa0a..de74af0 100644
--- a/block.h
+++ b/block.h
@@ -77,6 +77,15 @@  typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockMonEventAction;
 
+typedef enum {
+    BDRV_IOS_INVAL, BDRV_IOS_OK, BDRV_IOS_FAILED, BDRV_IOS_ENOSPC,
+    BDRV_IOS_MAX
+} BlockIOStatus;
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_mon_event(const BlockDriverState *bdrv,
                     BlockMonEventAction action, int is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
diff --git a/block_int.h b/block_int.h
index 8c3b863..f2f4f2d 100644
--- a/block_int.h
+++ b/block_int.h
@@ -199,6 +199,7 @@  struct BlockDriverState {
        drivers. They are not used by the block driver */
     int cyls, heads, secs, translation;
     BlockErrorAction on_read_error, on_write_error;
+    BlockIOStatus iostatus;
     char device_name[32];
     unsigned long *dirty_bitmap;
     int64_t dirty_count;
diff --git a/monitor.c b/monitor.c
index 8ec2c5e..88d8228 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1304,6 +1304,11 @@  struct bdrv_iterate_context {
     int err;
 };
 
+static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
+{
+    bdrv_iostatus_reset(bs);
+}
+
 /**
  * do_cont(): Resume emulation.
  */
@@ -1320,6 +1325,7 @@  static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
         return -1;
     }
 
+    bdrv_iterate(iostatus_bdrv_it, NULL);
     bdrv_iterate(encrypted_bdrv_it, &context);
     /* only resume the vm if all keys are set and valid */
     if (!context.err) {