diff mbox

[3/7] block: raw-posix image file reopen

Message ID 7ad1da59f3ef71e4c99c083a0675508ca9279c53.1346352124.git.jcody@redhat.com
State New
Headers show

Commit Message

Jeff Cody Aug. 30, 2012, 6:47 p.m. UTC
This is derived from the Supriya Kannery's reopen patches.

This contains the raw-posix driver changes for the bdrv_reopen_*
functions.  All changes are staged into a temporary scratch buffer
during the prepare() stage, and copied over to the live structure
during commit().  Upon abort(), all changes are abandoned, and the
live structures are unmodified.

The _prepare() will create an extra fd - either by means of a dup,
if possible, or opening a new fd if not (for instance, access
control changes).  Upon _commit(), the original fd is closed and
the new fd is used.  Upon _abort(), the duplicate/new fd is closed.

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/raw-posix.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 139 insertions(+), 14 deletions(-)

Comments

Eric Blake Aug. 30, 2012, 10:15 p.m. UTC | #1
On 08/30/2012 11:47 AM, Jeff Cody wrote:
> This is derived from the Supriya Kannery's reopen patches.
> 
> This contains the raw-posix driver changes for the bdrv_reopen_*
> functions.  All changes are staged into a temporary scratch buffer
> during the prepare() stage, and copied over to the live structure
> during commit().  Upon abort(), all changes are abandoned, and the
> live structures are unmodified.
> 
> The _prepare() will create an extra fd - either by means of a dup,
> if possible, or opening a new fd if not (for instance, access
> control changes).  Upon _commit(), the original fd is closed and
> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
> 

> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
> +        /* dup the original fd */
> +        /* TODO: use qemu fcntl wrapper */
> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);

I assume this TODO has to be fixed to allow compilation on systems that
lack F_DUPFD_CLOEXEC.

> +        if (raw_s->fd == -1) {
> +            ret = -1;
> +            goto error;
> +        }
> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
> +    } else {
> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);

Is raw_s->open_flags every going to contain O_CREAT, or is the 0644 mode
argument spurious?
Jeff Cody Aug. 31, 2012, 2:42 p.m. UTC | #2
On 08/30/2012 06:15 PM, Eric Blake wrote:
> On 08/30/2012 11:47 AM, Jeff Cody wrote:
>> This is derived from the Supriya Kannery's reopen patches.
>>
>> This contains the raw-posix driver changes for the bdrv_reopen_*
>> functions.  All changes are staged into a temporary scratch buffer
>> during the prepare() stage, and copied over to the live structure
>> during commit().  Upon abort(), all changes are abandoned, and the
>> live structures are unmodified.
>>
>> The _prepare() will create an extra fd - either by means of a dup,
>> if possible, or opening a new fd if not (for instance, access
>> control changes).  Upon _commit(), the original fd is closed and
>> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
>>
> 
>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>> +        /* dup the original fd */
>> +        /* TODO: use qemu fcntl wrapper */
>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
> 
> I assume this TODO has to be fixed to allow compilation on systems that
> lack F_DUPFD_CLOEXEC.

Yes, either that or add the logic here.

> 
>> +        if (raw_s->fd == -1) {
>> +            ret = -1;
>> +            goto error;
>> +        }
>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>> +    } else {
>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
> 
> Is raw_s->open_flags every going to contain O_CREAT, or is the 0644 mode
> argument spurious?
>

Thanks, you are right, it is spurious.  The raw_s->open_flags are
explicitly set via raw_parse_flags(), so we know it will never contain
O_CREAT.
Kevin Wolf Aug. 31, 2012, 2:49 p.m. UTC | #3
Am 31.08.2012 16:42, schrieb Jeff Cody:
> On 08/30/2012 06:15 PM, Eric Blake wrote:
>> On 08/30/2012 11:47 AM, Jeff Cody wrote:
>>> This is derived from the Supriya Kannery's reopen patches.
>>>
>>> This contains the raw-posix driver changes for the bdrv_reopen_*
>>> functions.  All changes are staged into a temporary scratch buffer
>>> during the prepare() stage, and copied over to the live structure
>>> during commit().  Upon abort(), all changes are abandoned, and the
>>> live structures are unmodified.
>>>
>>> The _prepare() will create an extra fd - either by means of a dup,
>>> if possible, or opening a new fd if not (for instance, access
>>> control changes).  Upon _commit(), the original fd is closed and
>>> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
>>>
>>
>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>> +        /* dup the original fd */
>>> +        /* TODO: use qemu fcntl wrapper */
>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>
>> I assume this TODO has to be fixed to allow compilation on systems that
>> lack F_DUPFD_CLOEXEC.
> 
> Yes, either that or add the logic here.

Would qemu_dup_flags() from osdep.c be the right thing here? It was
introduces with Corey's fd passing series.

>>> +        if (raw_s->fd == -1) {
>>> +            ret = -1;
>>> +            goto error;
>>> +        }
>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>> +    } else {
>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>
>> Is raw_s->open_flags every going to contain O_CREAT, or is the 0644 mode
>> argument spurious?
> 
> Thanks, you are right, it is spurious.  The raw_s->open_flags are
> explicitly set via raw_parse_flags(), so we know it will never contain
> O_CREAT.

We can probably assert it.

Kevin
Jeff Cody Aug. 31, 2012, 3:10 p.m. UTC | #4
On 08/31/2012 10:49 AM, Kevin Wolf wrote:
> Am 31.08.2012 16:42, schrieb Jeff Cody:
>> On 08/30/2012 06:15 PM, Eric Blake wrote:
>>> On 08/30/2012 11:47 AM, Jeff Cody wrote:
>>>> This is derived from the Supriya Kannery's reopen patches.
>>>>
>>>> This contains the raw-posix driver changes for the bdrv_reopen_*
>>>> functions.  All changes are staged into a temporary scratch buffer
>>>> during the prepare() stage, and copied over to the live structure
>>>> during commit().  Upon abort(), all changes are abandoned, and the
>>>> live structures are unmodified.
>>>>
>>>> The _prepare() will create an extra fd - either by means of a dup,
>>>> if possible, or opening a new fd if not (for instance, access
>>>> control changes).  Upon _commit(), the original fd is closed and
>>>> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
>>>>
>>>
>>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>>> +        /* dup the original fd */
>>>> +        /* TODO: use qemu fcntl wrapper */
>>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>>
>>> I assume this TODO has to be fixed to allow compilation on systems that
>>> lack F_DUPFD_CLOEXEC.
>>
>> Yes, either that or add the logic here.
> 
> Would qemu_dup_flags() from osdep.c be the right thing here? It was
> introduces with Corey's fd passing series.

I think so - that is the one I was thinking about.  It would just need
to be made non-static.

> 
>>>> +        if (raw_s->fd == -1) {
>>>> +            ret = -1;
>>>> +            goto error;
>>>> +        }
>>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>>> +    } else {
>>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>>
>>> Is raw_s->open_flags every going to contain O_CREAT, or is the 0644 mode
>>> argument spurious?
>>
>> Thanks, you are right, it is spurious.  The raw_s->open_flags are
>> explicitly set via raw_parse_flags(), so we know it will never contain
>> O_CREAT.
> 
> We can probably assert it.
>

OK
Kevin Wolf Sept. 5, 2012, 3:30 p.m. UTC | #5
Am 30.08.2012 20:47, schrieb Jeff Cody:
> This is derived from the Supriya Kannery's reopen patches.
> 
> This contains the raw-posix driver changes for the bdrv_reopen_*
> functions.  All changes are staged into a temporary scratch buffer
> during the prepare() stage, and copied over to the live structure
> during commit().  Upon abort(), all changes are abandoned, and the
> live structures are unmodified.
> 
> The _prepare() will create an extra fd - either by means of a dup,
> if possible, or opening a new fd if not (for instance, access
> control changes).  Upon _commit(), the original fd is closed and
> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
> 
> Signed-off-by: Jeff Cody <jcody@redhat.com>
> ---
>  block/raw-posix.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 139 insertions(+), 14 deletions(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 6be20b1..48086d7 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -140,6 +140,15 @@ typedef struct BDRVRawState {
>  #endif
>  } BDRVRawState;
>  
> +typedef struct BDRVRawReopenState {
> +    BDRVReopenState reopen_state;
> +    int fd;
> +    int open_flags;
> +    uint8_t *aligned_buf;
> +    unsigned aligned_buf_size;
> +    BDRVRawState *stash_s;
> +} BDRVRawReopenState;
> +
>  static int fd_open(BlockDriverState *bs);
>  static int64_t raw_getlength(BlockDriverState *bs);
>  
> @@ -185,6 +194,28 @@ static int raw_normalize_devicepath(const char **filename)
>  }
>  #endif
>  
> +static void raw_parse_flags(int bdrv_flags, int *open_flags)
> +{
> +    assert(open_flags != NULL);
> +
> +    *open_flags |= O_BINARY;
> +    *open_flags &= ~O_ACCMODE;
> +    if (bdrv_flags & BDRV_O_RDWR) {
> +        *open_flags |= O_RDWR;
> +    } else {
> +        *open_flags |= O_RDONLY;
> +    }
> +
> +    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
> +     * and O_DIRECT for no caching. */
> +    if ((bdrv_flags & BDRV_O_NOCACHE)) {
> +        *open_flags |= O_DIRECT;
> +    }
> +    if (!(bdrv_flags & BDRV_O_CACHE_WB)) {
> +        *open_flags |= O_DSYNC;
> +    }
> +}

The code motion would ideally be a separate patch.

> +
>  static int raw_open_common(BlockDriverState *bs, const char *filename,
>                             int bdrv_flags, int open_flags)
>  {
> @@ -196,20 +227,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
>          return ret;
>      }
>  
> -    s->open_flags = open_flags | O_BINARY;
> -    s->open_flags &= ~O_ACCMODE;
> -    if (bdrv_flags & BDRV_O_RDWR) {
> -        s->open_flags |= O_RDWR;
> -    } else {
> -        s->open_flags |= O_RDONLY;
> -    }
> -
> -    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
> -     * and O_DIRECT for no caching. */
> -    if ((bdrv_flags & BDRV_O_NOCACHE))
> -        s->open_flags |= O_DIRECT;
> -    if (!(bdrv_flags & BDRV_O_CACHE_WB))
> -        s->open_flags |= O_DSYNC;
> +    s->open_flags = open_flags;
> +    raw_parse_flags(bdrv_flags, &s->open_flags);
>  
>      s->fd = -1;
>      fd = qemu_open(filename, s->open_flags, 0644);
> @@ -283,6 +302,109 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
>      return raw_open_common(bs, filename, flags, 0);
>  }
>  
> +static int raw_reopen_prepare(BDRVReopenState *state, Error **errp)
> +{
> +    BDRVRawState *s;
> +    BDRVRawReopenState *raw_s;
> +    int ret = 0;
> +
> +    assert(state != NULL);
> +    assert(state->bs != NULL);
> +
> +    s = state->bs->opaque;
> +
> +    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
> +    raw_s = state->opaque;
> +
> +    raw_parse_flags(state->flags, &raw_s->open_flags);
> +
> +    /*
> +     * If we didn't have BDRV_O_NOCACHE set before, we may not have allocated
> +     * aligned_buf
> +     */
> +    if ((state->flags & BDRV_O_NOCACHE)) {
> +        /*
> +         * Allocate a buffer for read/modify/write cycles.  Choose the size
> +         * pessimistically as we don't know the block size yet.
> +         */
> +        raw_s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
> +        raw_s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE,
> +                                           raw_s->aligned_buf_size);
> +
> +        if (raw_s->aligned_buf == NULL) {
> +            ret = -1;
> +            goto error;
> +        }

Even though it's pretty small, I think I would factor this out into a
small static helper to make sure it's kept in sync with raw_open_common().

> +    }
> +
> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
> +#ifdef O_NOATIME
> +    fcntl_flags |= O_NOATIME;
> +#endif
> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
> +        /* dup the original fd */
> +        /* TODO: use qemu fcntl wrapper */
> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
> +        if (raw_s->fd == -1) {
> +            ret = -1;
> +            goto error;
> +        }
> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
> +    } else {
> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
> +        if (raw_s->fd == -1) {
> +            ret = -1;
> +        }

Ignoring this part for now, with qemu_dup_flags() it's going to look a
bit different. In particular, I'm hoping that we don't get a second
fcntl_flags enumeration here, but can just fall back to qemu_open()
whenever qemu_dup_flags() fails.

If we do need to keep fcntl_flags here, we'll probably want to add
O_DIRECT to it.

> +    }
> +error:
> +    return ret;
> +}
> +
> +
> +static void raw_reopen_commit(BDRVReopenState *state)
> +{
> +    BDRVRawReopenState *raw_s = state->opaque;
> +    BDRVRawState *s = state->bs->opaque;
> +
> +    if (raw_s->aligned_buf != NULL) {
> +        if (s->aligned_buf) {
> +            qemu_vfree(s->aligned_buf);
> +        }
> +        s->aligned_buf      = raw_s->aligned_buf;
> +        s->aligned_buf_size = raw_s->aligned_buf_size;
> +    }
> +
> +    s->open_flags = raw_s->open_flags;
> +
> +    close(s->fd);
> +    s->fd = raw_s->fd;
> +
> +    g_free(state->opaque);
> +    state->opaque = NULL;
> +}

I think s->use_aio must be changed as well, it depends on
BDRV_O_NOCACHE. Maybe we even need to do it in prepare, laio_init() may
need to be called.

Kevin
Jeff Cody Sept. 5, 2012, 4:43 p.m. UTC | #6
On 09/05/2012 11:30 AM, Kevin Wolf wrote:
> Am 30.08.2012 20:47, schrieb Jeff Cody:
>> This is derived from the Supriya Kannery's reopen patches.
>>
>> This contains the raw-posix driver changes for the bdrv_reopen_*
>> functions.  All changes are staged into a temporary scratch buffer
>> during the prepare() stage, and copied over to the live structure
>> during commit().  Upon abort(), all changes are abandoned, and the
>> live structures are unmodified.
>>
>> The _prepare() will create an extra fd - either by means of a dup,
>> if possible, or opening a new fd if not (for instance, access
>> control changes).  Upon _commit(), the original fd is closed and
>> the new fd is used.  Upon _abort(), the duplicate/new fd is closed.
>>
>> Signed-off-by: Jeff Cody <jcody@redhat.com>
>> ---
>>  block/raw-posix.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 139 insertions(+), 14 deletions(-)
>>
>> diff --git a/block/raw-posix.c b/block/raw-posix.c
>> index 6be20b1..48086d7 100644
>> --- a/block/raw-posix.c
>> +++ b/block/raw-posix.c
>> @@ -140,6 +140,15 @@ typedef struct BDRVRawState {
>>  #endif
>>  } BDRVRawState;
>>  
>> +typedef struct BDRVRawReopenState {
>> +    BDRVReopenState reopen_state;
>> +    int fd;
>> +    int open_flags;
>> +    uint8_t *aligned_buf;
>> +    unsigned aligned_buf_size;
>> +    BDRVRawState *stash_s;
>> +} BDRVRawReopenState;
>> +
>>  static int fd_open(BlockDriverState *bs);
>>  static int64_t raw_getlength(BlockDriverState *bs);
>>  
>> @@ -185,6 +194,28 @@ static int raw_normalize_devicepath(const char **filename)
>>  }
>>  #endif
>>  
>> +static void raw_parse_flags(int bdrv_flags, int *open_flags)
>> +{
>> +    assert(open_flags != NULL);
>> +
>> +    *open_flags |= O_BINARY;
>> +    *open_flags &= ~O_ACCMODE;
>> +    if (bdrv_flags & BDRV_O_RDWR) {
>> +        *open_flags |= O_RDWR;
>> +    } else {
>> +        *open_flags |= O_RDONLY;
>> +    }
>> +
>> +    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
>> +     * and O_DIRECT for no caching. */
>> +    if ((bdrv_flags & BDRV_O_NOCACHE)) {
>> +        *open_flags |= O_DIRECT;
>> +    }
>> +    if (!(bdrv_flags & BDRV_O_CACHE_WB)) {
>> +        *open_flags |= O_DSYNC;
>> +    }
>> +}
> 
> The code motion would ideally be a separate patch.
>

OK

>> +
>>  static int raw_open_common(BlockDriverState *bs, const char *filename,
>>                             int bdrv_flags, int open_flags)
>>  {
>> @@ -196,20 +227,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
>>          return ret;
>>      }
>>  
>> -    s->open_flags = open_flags | O_BINARY;
>> -    s->open_flags &= ~O_ACCMODE;
>> -    if (bdrv_flags & BDRV_O_RDWR) {
>> -        s->open_flags |= O_RDWR;
>> -    } else {
>> -        s->open_flags |= O_RDONLY;
>> -    }
>> -
>> -    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
>> -     * and O_DIRECT for no caching. */
>> -    if ((bdrv_flags & BDRV_O_NOCACHE))
>> -        s->open_flags |= O_DIRECT;
>> -    if (!(bdrv_flags & BDRV_O_CACHE_WB))
>> -        s->open_flags |= O_DSYNC;
>> +    s->open_flags = open_flags;
>> +    raw_parse_flags(bdrv_flags, &s->open_flags);
>>  
>>      s->fd = -1;
>>      fd = qemu_open(filename, s->open_flags, 0644);
>> @@ -283,6 +302,109 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
>>      return raw_open_common(bs, filename, flags, 0);
>>  }
>>  
>> +static int raw_reopen_prepare(BDRVReopenState *state, Error **errp)
>> +{
>> +    BDRVRawState *s;
>> +    BDRVRawReopenState *raw_s;
>> +    int ret = 0;
>> +
>> +    assert(state != NULL);
>> +    assert(state->bs != NULL);
>> +
>> +    s = state->bs->opaque;
>> +
>> +    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
>> +    raw_s = state->opaque;
>> +
>> +    raw_parse_flags(state->flags, &raw_s->open_flags);
>> +
>> +    /*
>> +     * If we didn't have BDRV_O_NOCACHE set before, we may not have allocated
>> +     * aligned_buf
>> +     */
>> +    if ((state->flags & BDRV_O_NOCACHE)) {
>> +        /*
>> +         * Allocate a buffer for read/modify/write cycles.  Choose the size
>> +         * pessimistically as we don't know the block size yet.
>> +         */
>> +        raw_s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
>> +        raw_s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE,
>> +                                           raw_s->aligned_buf_size);
>> +
>> +        if (raw_s->aligned_buf == NULL) {
>> +            ret = -1;
>> +            goto error;
>> +        }
> 
> Even though it's pretty small, I think I would factor this out into a
> small static helper to make sure it's kept in sync with raw_open_common().
> 

OK, and like your suggestion above, I'll put that in a separate code
motion patch.

>> +    }
>> +
>> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
>> +#ifdef O_NOATIME
>> +    fcntl_flags |= O_NOATIME;
>> +#endif
>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>> +        /* dup the original fd */
>> +        /* TODO: use qemu fcntl wrapper */
>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>> +        if (raw_s->fd == -1) {
>> +            ret = -1;
>> +            goto error;
>> +        }
>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>> +    } else {
>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>> +        if (raw_s->fd == -1) {
>> +            ret = -1;
>> +        }
> 
> Ignoring this part for now, with qemu_dup_flags() it's going to look a
> bit different. In particular, I'm hoping that we don't get a second
> fcntl_flags enumeration here, but can just fall back to qemu_open()
> whenever qemu_dup_flags() fails.

That will require modification to qemu_dup_flags()... I believe
qemu_dup_flags() silently filters out fcntl incompatible flags.

Maybe it would be best to create a small helper function in osdep.c, that
fetches the fcntl_flags.  Then qemu_dup_flags() and this function would
use the same helper to fetch fcntl_flags.  The results of that would
determine if we call qemu_dup_flags() or qemu_open().

Although, I do think it makes sense to always try qemu_open() if
qemu_dup_flags() fails for some reason.

> 
> If we do need to keep fcntl_flags here, we'll probably want to add
> O_DIRECT to it.
> 

>> +    }
>> +error:
>> +    return ret;
>> +}
>> +
>> +
>> +static void raw_reopen_commit(BDRVReopenState *state)
>> +{
>> +    BDRVRawReopenState *raw_s = state->opaque;
>> +    BDRVRawState *s = state->bs->opaque;
>> +
>> +    if (raw_s->aligned_buf != NULL) {
>> +        if (s->aligned_buf) {
>> +            qemu_vfree(s->aligned_buf);
>> +        }
>> +        s->aligned_buf      = raw_s->aligned_buf;
>> +        s->aligned_buf_size = raw_s->aligned_buf_size;
>> +    }
>> +
>> +    s->open_flags = raw_s->open_flags;
>> +
>> +    close(s->fd);
>> +    s->fd = raw_s->fd;
>> +
>> +    g_free(state->opaque);
>> +    state->opaque = NULL;
>> +}
> 
> I think s->use_aio must be changed as well, it depends on
> BDRV_O_NOCACHE. Maybe we even need to do it in prepare, laio_init() may
> need to be called.

Yes, good catch, thanks.

> 
> Kevin
>
Kevin Wolf Sept. 6, 2012, 9:23 a.m. UTC | #7
Am 05.09.2012 18:43, schrieb Jeff Cody:
>>> +    }
>>> +
>>> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
>>> +#ifdef O_NOATIME
>>> +    fcntl_flags |= O_NOATIME;
>>> +#endif
>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>> +        /* dup the original fd */
>>> +        /* TODO: use qemu fcntl wrapper */
>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>> +        if (raw_s->fd == -1) {
>>> +            ret = -1;
>>> +            goto error;
>>> +        }
>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>> +    } else {
>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>> +        if (raw_s->fd == -1) {
>>> +            ret = -1;
>>> +        }
>>
>> Ignoring this part for now, with qemu_dup_flags() it's going to look a
>> bit different. In particular, I'm hoping that we don't get a second
>> fcntl_flags enumeration here, but can just fall back to qemu_open()
>> whenever qemu_dup_flags() fails.
> 
> That will require modification to qemu_dup_flags()... I believe
> qemu_dup_flags() silently filters out fcntl incompatible flags.
> 
> Maybe it would be best to create a small helper function in osdep.c, that
> fetches the fcntl_flags.  Then qemu_dup_flags() and this function would
> use the same helper to fetch fcntl_flags.  The results of that would
> determine if we call qemu_dup_flags() or qemu_open().
> 
> Although, I do think it makes sense to always try qemu_open() if
> qemu_dup_flags() fails for some reason.

If we can modify qemu_dup_flags() to fail if it can't provide the right
set of flags, then I think we should do it - and I think we can. Even
for the existing cases with fd passing it shouldn't break anything, but
only add an additional safety check.

And if touching the function motivates Corey to write some fd passing
test cases so that you can't break it, even better. ;-)

Kevin
Corey Bryant Sept. 6, 2012, 3:34 p.m. UTC | #8
On 09/06/2012 05:23 AM, Kevin Wolf wrote:
> Am 05.09.2012 18:43, schrieb Jeff Cody:
>>>> +    }
>>>> +
>>>> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
>>>> +#ifdef O_NOATIME
>>>> +    fcntl_flags |= O_NOATIME;
>>>> +#endif
>>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>>> +        /* dup the original fd */
>>>> +        /* TODO: use qemu fcntl wrapper */
>>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>>> +        if (raw_s->fd == -1) {
>>>> +            ret = -1;
>>>> +            goto error;
>>>> +        }
>>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>>> +    } else {
>>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>>> +        if (raw_s->fd == -1) {
>>>> +            ret = -1;
>>>> +        }
>>>
>>> Ignoring this part for now, with qemu_dup_flags() it's going to look a
>>> bit different. In particular, I'm hoping that we don't get a second
>>> fcntl_flags enumeration here, but can just fall back to qemu_open()
>>> whenever qemu_dup_flags() fails.
>>
>> That will require modification to qemu_dup_flags()... I believe
>> qemu_dup_flags() silently filters out fcntl incompatible flags.
>>
>> Maybe it would be best to create a small helper function in osdep.c, that
>> fetches the fcntl_flags.  Then qemu_dup_flags() and this function would
>> use the same helper to fetch fcntl_flags.  The results of that would
>> determine if we call qemu_dup_flags() or qemu_open().
>>
>> Although, I do think it makes sense to always try qemu_open() if
>> qemu_dup_flags() fails for some reason.

I'm curious why you can't always call qemu_open().

Some things to consider so that fd passing doesn't break when a reopen 
occurs.  Mainly all the concerns revolve around how fd passing keeps 
track of references to fd sets (note: adding and removing fd set 
references is all done in qemu_open and qemu_close).

* When reopening, qemu_open needs to be called before qemu_close.  This 
will prevent the reference list for an fdset from becoming empty.  If 
qemu_close is called before qemu_open, the reference list can become 
empty, and the fdset could be cleaned up before the qemu_open.  Then 
qemu_open would fail.

* qemu_open/qemu_close need to be used rather than open/close so that 
the references for fd passing are properly accounted for.

* I don't think you want to call qemu_dup_flags directly since it 
doesn't update the reference list for fd passing.  Only qemu_open and 
qemu_close update the reference list.

>
> If we can modify qemu_dup_flags() to fail if it can't provide the right
> set of flags, then I think we should do it - and I think we can. Even
> for the existing cases with fd passing it shouldn't break anything, but
> only add an additional safety check.
>
> And if touching the function motivates Corey to write some fd passing
> test cases so that you can't break it, even better. ;-)

:) Sorry, I do plan to do this soon.  I've just been side-tracked with 
some other things.
Kevin Wolf Sept. 7, 2012, 10:40 a.m. UTC | #9
Am 06.09.2012 17:34, schrieb Corey Bryant:
> 
> 
> On 09/06/2012 05:23 AM, Kevin Wolf wrote:
>> Am 05.09.2012 18:43, schrieb Jeff Cody:
>>>>> +    }
>>>>> +
>>>>> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
>>>>> +#ifdef O_NOATIME
>>>>> +    fcntl_flags |= O_NOATIME;
>>>>> +#endif
>>>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>>>> +        /* dup the original fd */
>>>>> +        /* TODO: use qemu fcntl wrapper */
>>>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>>>> +        if (raw_s->fd == -1) {
>>>>> +            ret = -1;
>>>>> +            goto error;
>>>>> +        }
>>>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>>>> +    } else {
>>>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>>>> +        if (raw_s->fd == -1) {
>>>>> +            ret = -1;
>>>>> +        }
>>>>
>>>> Ignoring this part for now, with qemu_dup_flags() it's going to look a
>>>> bit different. In particular, I'm hoping that we don't get a second
>>>> fcntl_flags enumeration here, but can just fall back to qemu_open()
>>>> whenever qemu_dup_flags() fails.
>>>
>>> That will require modification to qemu_dup_flags()... I believe
>>> qemu_dup_flags() silently filters out fcntl incompatible flags.
>>>
>>> Maybe it would be best to create a small helper function in osdep.c, that
>>> fetches the fcntl_flags.  Then qemu_dup_flags() and this function would
>>> use the same helper to fetch fcntl_flags.  The results of that would
>>> determine if we call qemu_dup_flags() or qemu_open().
>>>
>>> Although, I do think it makes sense to always try qemu_open() if
>>> qemu_dup_flags() fails for some reason.
> 
> I'm curious why you can't always call qemu_open().

I believe the original reason was that qemu_open() is more likely to
fail, for example if the image file has been renamed/moved/deleted since
the first open. You could still use fcntl() on an existing file
descriptor, but reopening would fail.

> Some things to consider so that fd passing doesn't break when a reopen 
> occurs.  Mainly all the concerns revolve around how fd passing keeps 
> track of references to fd sets (note: adding and removing fd set 
> references is all done in qemu_open and qemu_close).
> 
> * When reopening, qemu_open needs to be called before qemu_close.  This 
> will prevent the reference list for an fdset from becoming empty.  If 
> qemu_close is called before qemu_open, the reference list can become 
> empty, and the fdset could be cleaned up before the qemu_open.  Then 
> qemu_open would fail.

Will automatically be right when we properly implement transactional
semantics.

> * qemu_open/qemu_close need to be used rather than open/close so that 
> the references for fd passing are properly accounted for.

Congratulations, you've just discovered a bug in Jeff's patches. It was
a good idea to CC you. ;-)

> * I don't think you want to call qemu_dup_flags directly since it 
> doesn't update the reference list for fd passing.  Only qemu_open and 
> qemu_close update the reference list.

That's a good point, too. So probably a small wrapper that just updates
the reference list in addition?

>> If we can modify qemu_dup_flags() to fail if it can't provide the right
>> set of flags, then I think we should do it - and I think we can. Even
>> for the existing cases with fd passing it shouldn't break anything, but
>> only add an additional safety check.
>>
>> And if touching the function motivates Corey to write some fd passing
>> test cases so that you can't break it, even better. ;-)
> 
> :) Sorry, I do plan to do this soon.  I've just been side-tracked with 
> some other things.

No problem, it was just such a great opportunity to remind you. ;-)

Kevin
Corey Bryant Sept. 7, 2012, 2:29 p.m. UTC | #10
On 09/07/2012 06:40 AM, Kevin Wolf wrote:
> Am 06.09.2012 17:34, schrieb Corey Bryant:
>>
>>
>> On 09/06/2012 05:23 AM, Kevin Wolf wrote:
>>> Am 05.09.2012 18:43, schrieb Jeff Cody:
>>>>>> +    }
>>>>>> +
>>>>>> +    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
>>>>>> +#ifdef O_NOATIME
>>>>>> +    fcntl_flags |= O_NOATIME;
>>>>>> +#endif
>>>>>> +    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
>>>>>> +        /* dup the original fd */
>>>>>> +        /* TODO: use qemu fcntl wrapper */
>>>>>> +        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
>>>>>> +        if (raw_s->fd == -1) {
>>>>>> +            ret = -1;
>>>>>> +            goto error;
>>>>>> +        }
>>>>>> +        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
>>>>>> +    } else {
>>>>>> +        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
>>>>>> +        if (raw_s->fd == -1) {
>>>>>> +            ret = -1;
>>>>>> +        }
>>>>>
>>>>> Ignoring this part for now, with qemu_dup_flags() it's going to look a
>>>>> bit different. In particular, I'm hoping that we don't get a second
>>>>> fcntl_flags enumeration here, but can just fall back to qemu_open()
>>>>> whenever qemu_dup_flags() fails.
>>>>
>>>> That will require modification to qemu_dup_flags()... I believe
>>>> qemu_dup_flags() silently filters out fcntl incompatible flags.
>>>>
>>>> Maybe it would be best to create a small helper function in osdep.c, that
>>>> fetches the fcntl_flags.  Then qemu_dup_flags() and this function would
>>>> use the same helper to fetch fcntl_flags.  The results of that would
>>>> determine if we call qemu_dup_flags() or qemu_open().
>>>>
>>>> Although, I do think it makes sense to always try qemu_open() if
>>>> qemu_dup_flags() fails for some reason.
>>
>> I'm curious why you can't always call qemu_open().
>
> I believe the original reason was that qemu_open() is more likely to
> fail, for example if the image file has been renamed/moved/deleted since
> the first open. You could still use fcntl() on an existing file
> descriptor, but reopening would fail.
>
>> Some things to consider so that fd passing doesn't break when a reopen
>> occurs.  Mainly all the concerns revolve around how fd passing keeps
>> track of references to fd sets (note: adding and removing fd set
>> references is all done in qemu_open and qemu_close).
>>
>> * When reopening, qemu_open needs to be called before qemu_close.  This
>> will prevent the reference list for an fdset from becoming empty.  If
>> qemu_close is called before qemu_open, the reference list can become
>> empty, and the fdset could be cleaned up before the qemu_open.  Then
>> qemu_open would fail.
>
> Will automatically be right when we properly implement transactional
> semantics.
>
>> * qemu_open/qemu_close need to be used rather than open/close so that
>> the references for fd passing are properly accounted for.
>
> Congratulations, you've just discovered a bug in Jeff's patches. It was
> a good idea to CC you. ;-)
>
>> * I don't think you want to call qemu_dup_flags directly since it
>> doesn't update the reference list for fd passing.  Only qemu_open and
>> qemu_close update the reference list.
>
> That's a good point, too. So probably a small wrapper that just updates
> the reference list in addition?
>

You could do that.  And yes you'd have to add code to add the new dup fd 
to an fdset's dup_fds list if in fact the fd that you dup'd was a member 
of an fdset's dup_fds list (see how qemu_close() and qemu_open() do this).

But wouldn't it be easier to just go through qemu_close() then 
qemu_open() to perform the reopen?  Then you don't have to add this 
extra code to account for fd passing.
diff mbox

Patch

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6be20b1..48086d7 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -140,6 +140,15 @@  typedef struct BDRVRawState {
 #endif
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+    BDRVReopenState reopen_state;
+    int fd;
+    int open_flags;
+    uint8_t *aligned_buf;
+    unsigned aligned_buf_size;
+    BDRVRawState *stash_s;
+} BDRVRawReopenState;
+
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);
 
@@ -185,6 +194,28 @@  static int raw_normalize_devicepath(const char **filename)
 }
 #endif
 
+static void raw_parse_flags(int bdrv_flags, int *open_flags)
+{
+    assert(open_flags != NULL);
+
+    *open_flags |= O_BINARY;
+    *open_flags &= ~O_ACCMODE;
+    if (bdrv_flags & BDRV_O_RDWR) {
+        *open_flags |= O_RDWR;
+    } else {
+        *open_flags |= O_RDONLY;
+    }
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        *open_flags |= O_DIRECT;
+    }
+    if (!(bdrv_flags & BDRV_O_CACHE_WB)) {
+        *open_flags |= O_DSYNC;
+    }
+}
+
 static int raw_open_common(BlockDriverState *bs, const char *filename,
                            int bdrv_flags, int open_flags)
 {
@@ -196,20 +227,8 @@  static int raw_open_common(BlockDriverState *bs, const char *filename,
         return ret;
     }
 
-    s->open_flags = open_flags | O_BINARY;
-    s->open_flags &= ~O_ACCMODE;
-    if (bdrv_flags & BDRV_O_RDWR) {
-        s->open_flags |= O_RDWR;
-    } else {
-        s->open_flags |= O_RDONLY;
-    }
-
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((bdrv_flags & BDRV_O_NOCACHE))
-        s->open_flags |= O_DIRECT;
-    if (!(bdrv_flags & BDRV_O_CACHE_WB))
-        s->open_flags |= O_DSYNC;
+    s->open_flags = open_flags;
+    raw_parse_flags(bdrv_flags, &s->open_flags);
 
     s->fd = -1;
     fd = qemu_open(filename, s->open_flags, 0644);
@@ -283,6 +302,109 @@  static int raw_open(BlockDriverState *bs, const char *filename, int flags)
     return raw_open_common(bs, filename, flags, 0);
 }
 
+static int raw_reopen_prepare(BDRVReopenState *state, Error **errp)
+{
+    BDRVRawState *s;
+    BDRVRawReopenState *raw_s;
+    int ret = 0;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    s = state->bs->opaque;
+
+    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
+    raw_s = state->opaque;
+
+    raw_parse_flags(state->flags, &raw_s->open_flags);
+
+    /*
+     * If we didn't have BDRV_O_NOCACHE set before, we may not have allocated
+     * aligned_buf
+     */
+    if ((state->flags & BDRV_O_NOCACHE)) {
+        /*
+         * Allocate a buffer for read/modify/write cycles.  Choose the size
+         * pessimistically as we don't know the block size yet.
+         */
+        raw_s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
+        raw_s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE,
+                                           raw_s->aligned_buf_size);
+
+        if (raw_s->aligned_buf == NULL) {
+            ret = -1;
+            goto error;
+        }
+    }
+
+    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
+#ifdef O_NOATIME
+    fcntl_flags |= O_NOATIME;
+#endif
+    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+        /* dup the original fd */
+        /* TODO: use qemu fcntl wrapper */
+        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
+        if (raw_s->fd == -1) {
+            ret = -1;
+            goto error;
+        }
+        ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+    } else {
+        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644);
+        if (raw_s->fd == -1) {
+            ret = -1;
+        }
+    }
+error:
+    return ret;
+}
+
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
+
+    if (raw_s->aligned_buf != NULL) {
+        if (s->aligned_buf) {
+            qemu_vfree(s->aligned_buf);
+        }
+        s->aligned_buf      = raw_s->aligned_buf;
+        s->aligned_buf_size = raw_s->aligned_buf_size;
+    }
+
+    s->open_flags = raw_s->open_flags;
+
+    close(s->fd);
+    s->fd = raw_s->fd;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+
+     /* nothing to do if NULL, we didn't get far enough */
+    if (raw_s == NULL) {
+        return;
+    }
+
+    if (raw_s->fd >= 0) {
+        close(raw_s->fd);
+        raw_s->fd = -1;
+        if (raw_s->aligned_buf != NULL) {
+            qemu_vfree(raw_s->aligned_buf);
+        }
+    }
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
 /* XXX: use host sector size if necessary with:
 #ifdef DIOCGSECTORSIZE
         {
@@ -735,6 +857,9 @@  static BlockDriver bdrv_file = {
     .instance_size = sizeof(BDRVRawState),
     .bdrv_probe = NULL, /* no probe for protocols */
     .bdrv_file_open = raw_open,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit = raw_reopen_commit,
+    .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_co_discard = raw_co_discard,