diff mbox series

[v3,2/2] quorum: Implement bdrv_co_pwrite_zeroes()

Message ID 2faad461e6bffc4a50547547b8c20c39e0f544e8.1605111801.git.berto@igalia.com
State New
Headers show
Series quorum: Implement bdrv_co_block_status() | expand

Commit Message

Alberto Garcia Nov. 11, 2020, 4:53 p.m. UTC
This simply calls bdrv_co_pwrite_zeroes() in all children

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block/quorum.c             | 18 ++++++++++++++++--
 tests/qemu-iotests/312     |  7 +++++++
 tests/qemu-iotests/312.out |  4 ++++
 3 files changed, 27 insertions(+), 2 deletions(-)

Comments

Max Reitz Nov. 13, 2020, 11:49 a.m. UTC | #1
On 11.11.20 17:53, Alberto Garcia wrote:
> This simply calls bdrv_co_pwrite_zeroes() in all children
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>   block/quorum.c             | 18 ++++++++++++++++--
>   tests/qemu-iotests/312     |  7 +++++++
>   tests/qemu-iotests/312.out |  4 ++++
>   3 files changed, 27 insertions(+), 2 deletions(-)

Should we set supported_zero_flags to something?  I think we can at 
least set BDRV_REQ_NO_FALLBACK.  We could also try BDRV_REQ_FUA.

> diff --git a/block/quorum.c b/block/quorum.c
> index 9691a9bee9..c81572f513 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -692,8 +692,13 @@ static void write_quorum_entry(void *opaque)
>       QuorumChildRequest *sacb = &acb->qcrs[i];
>   
>       sacb->bs = s->children[i]->bs;
> -    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
> -                                acb->qiov, acb->flags);
> +    if (acb->flags & BDRV_REQ_ZERO_WRITE) {
> +        sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset,
> +                                          acb->bytes, acb->flags);
> +    } else {
> +        sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
> +                                    acb->qiov, acb->flags);
> +    }

Seems unnecessary (bdrv_co_pwritev() can handle BDRV_REQ_ZERO_WRITE), 
but perhaps it’s good to be explicit.

>       if (sacb->ret == 0) {
>           acb->success_count++;
>       } else {

[...]

> diff --git a/tests/qemu-iotests/312 b/tests/qemu-iotests/312
> index 1b08f1552f..93046393e7 100755
> --- a/tests/qemu-iotests/312
> +++ b/tests/qemu-iotests/312
> @@ -114,6 +114,13 @@ $QEMU_IO -c "write -P 0 $((0x200000)) $((0x10000))" "$TEST_IMG.0" | _filter_qemu
>   $QEMU_IO -c "write -z   $((0x200000)) $((0x30000))" "$TEST_IMG.1" | _filter_qemu_io
>   $QEMU_IO -c "write -P 0 $((0x200000)) $((0x20000))" "$TEST_IMG.2" | _filter_qemu_io
>   
> +# Test 5: write data to a region and then zeroize it, doing it
> +# directly on the quorum device instead of the individual images.
> +# This has no effect on the end result but proves that the quorum driver
> +# supports 'write -z'.
> +$QEMU_IO -c "open -o $quorum" -c "write $((0x250000)) $((0x10000))" | _filter_qemu_io
> +$QEMU_IO -c "open -o $quorum" -c "write -z $((0x250000)) $((0x10000))" | _filter_qemu_io
> +

My gut would have preferred a test where the data region is larger than 
the zeroed region (so we can see that the first write has done 
something), but who cares about my gut.

I don’t mind not setting supported_zero_flags enough to warrant 
withholding a

Reviewed-by: Max Reitz <mreitz@redhat.com>

But I’ll give you some time to reply before I’d take this patch to 
block-next.  (That is, unless Kevin takes it during my two-week PTO...)

>   echo
>   echo '### Launch the drive-mirror job'
>   echo
Alberto Garcia Nov. 13, 2020, 4:07 p.m. UTC | #2
On Fri 13 Nov 2020 12:49:04 PM CET, Max Reitz wrote:
> On 11.11.20 17:53, Alberto Garcia wrote:
>> This simply calls bdrv_co_pwrite_zeroes() in all children
>> 
>> Signed-off-by: Alberto Garcia <berto@igalia.com>
>> ---
>>   block/quorum.c             | 18 ++++++++++++++++--
>>   tests/qemu-iotests/312     |  7 +++++++
>>   tests/qemu-iotests/312.out |  4 ++++
>>   3 files changed, 27 insertions(+), 2 deletions(-)
>
> Should we set supported_zero_flags to something?  I think we can at 
> least set BDRV_REQ_NO_FALLBACK.  We could also try BDRV_REQ_FUA.

We could set all supported_zero_flags as long as all children support
them, right? 

>> +    if (acb->flags & BDRV_REQ_ZERO_WRITE) {
>> +        sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset,
>> +                                          acb->bytes, acb->flags);
>> +    } else {
>> +        sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
>> +                                    acb->qiov, acb->flags);
>> +    }
>
> Seems unnecessary (bdrv_co_pwritev() can handle BDRV_REQ_ZERO_WRITE), 
> but perhaps it’s good to be explicit.

pwrite_zeroes() does this additionaly:

    if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
        flags &= ~BDRV_REQ_MAY_UNMAP;
    }

Berto
Max Reitz Nov. 13, 2020, 4:11 p.m. UTC | #3
On 13.11.20 17:07, Alberto Garcia wrote:
> On Fri 13 Nov 2020 12:49:04 PM CET, Max Reitz wrote:
>> On 11.11.20 17:53, Alberto Garcia wrote:
>>> This simply calls bdrv_co_pwrite_zeroes() in all children
>>>
>>> Signed-off-by: Alberto Garcia <berto@igalia.com>
>>> ---
>>>    block/quorum.c             | 18 ++++++++++++++++--
>>>    tests/qemu-iotests/312     |  7 +++++++
>>>    tests/qemu-iotests/312.out |  4 ++++
>>>    3 files changed, 27 insertions(+), 2 deletions(-)
>>
>> Should we set supported_zero_flags to something?  I think we can at
>> least set BDRV_REQ_NO_FALLBACK.  We could also try BDRV_REQ_FUA.
> 
> We could set all supported_zero_flags as long as all children support
> them, right?

Sure, I was just thinking that we could set these regardless of whether 
the children support them, because (on zero-writes) the block layer will 
figure out for us whether the child nodes support them. O:)

>>> +    if (acb->flags & BDRV_REQ_ZERO_WRITE) {
>>> +        sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset,
>>> +                                          acb->bytes, acb->flags);
>>> +    } else {
>>> +        sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
>>> +                                    acb->qiov, acb->flags);
>>> +    }
>>
>> Seems unnecessary (bdrv_co_pwritev() can handle BDRV_REQ_ZERO_WRITE),
>> but perhaps it’s good to be explicit.
> 
> pwrite_zeroes() does this additionaly:
> 
>      if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
>          flags &= ~BDRV_REQ_MAY_UNMAP;
>      }

Interesting.  Technically, Quorum doesn’t support that flag (in 
supported_zero_flags O:))), so it shouldn’t appear, but, er, well then.

Max
Alberto Garcia Nov. 13, 2020, 4:26 p.m. UTC | #4
On Fri 13 Nov 2020 05:11:20 PM CET, Max Reitz wrote:

>> We could set all supported_zero_flags as long as all children support
>> them, right?
>
> Sure, I was just thinking that we could set these regardless of
> whether the children support them, because (on zero-writes) the block
> layer will figure out for us whether the child nodes support them. O:)

But it can happen that one child supports e.g. BDRV_REQ_NO_FALLBACK but
the rest don't. In this case I think the block layer should return
-ENOTSUP earlier without writing to the child(ren) that do support that
flag.

So Quorum's supported_zero_flags would be the logical and of all of its
children's flags, right?

I'm unsure about BDRV_REQ_WRITE_UNCHANGED, many filters set that on top
of the other flags, but when would a BDS not support this flag?

>> pwrite_zeroes() does this additionaly:
>> 
>>      if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
>>          flags &= ~BDRV_REQ_MAY_UNMAP;
>>      }
>
> Interesting.  Technically, Quorum doesn’t support that flag (in
> supported_zero_flags O:))), so it shouldn’t appear, but, er, well
> then.

It would with the change that I'm proposing above.

Berto
Max Reitz Nov. 13, 2020, 4:35 p.m. UTC | #5
On 13.11.20 17:26, Alberto Garcia wrote:
> On Fri 13 Nov 2020 05:11:20 PM CET, Max Reitz wrote:
> 
>>> We could set all supported_zero_flags as long as all children support
>>> them, right?
>>
>> Sure, I was just thinking that we could set these regardless of
>> whether the children support them, because (on zero-writes) the block
>> layer will figure out for us whether the child nodes support them. O:)
> 
> But it can happen that one child supports e.g. BDRV_REQ_NO_FALLBACK but
> the rest don't. In this case I think the block layer should return
> -ENOTSUP earlier without writing to the child(ren) that do support that
> flag.

That’s true.

> So Quorum's supported_zero_flags would be the logical and of all of its
> children's flags, right?

Yes.  (And so it would need to be recalculated whenever a child is added 
or removed.)

> I'm unsure about BDRV_REQ_WRITE_UNCHANGED, many filters set that on top
> of the other flags, but when would a BDS not support this flag?

The WRITE_UNCHANGED flag is basically just a hint for the permission 
system that for such a write, the WRITE_UNCHANGED permission is 
sufficient.  So drivers that can pass through *all* WRITE_UNCHANGED 
requests as they are (i.e., filter drivers) can support this write/zero 
flag and then pass that flag on.  This way, they are safe not to take 
the WRITE permission on their child unless their parent has taken the 
WRITE permission on them.

(Otherwise, they’d also have to take the WRITE permission if the parent 
only holds the WRITE_UNCHANGED permission.)

Supporting this flag doesn’t make sense for drivers that won’t be able 
to pass it on all the time.  For example, qcow2 generally cannot pass it 
on, because it still needs to perform WRITE_UNCHANGED requests as normal 
writes.  (WRITE_UNCHANGED comes from copy-on-read; the data will read 
the same, hence the _UNCHANGED, but it still needs to be allocated on 
the receiving format layer.)

(Technically, qcow2 could figure out whether the requests it generates 
from a WRITE_UNCHANGED request still result in unchanging writes in the 
protocol layer, but there is no reason to.  It needs the WRITE 
permission anyway, because there are going to be some non-unchanging 
writes it has to perform.  And if it holds the WRITE permission, there 
is no need to bother with the WRITE_UNCHANGED flag.)

>>> pwrite_zeroes() does this additionaly:
>>>
>>>       if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
>>>           flags &= ~BDRV_REQ_MAY_UNMAP;
>>>       }
>>
>> Interesting.  Technically, Quorum doesn’t support that flag (in
>> supported_zero_flags O:))), so it shouldn’t appear, but, er, well
>> then.
> 
> It would with the change that I'm proposing above.

Yes, I know.  Hence the “O:)”. O:)

Max
diff mbox series

Patch

diff --git a/block/quorum.c b/block/quorum.c
index 9691a9bee9..c81572f513 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -692,8 +692,13 @@  static void write_quorum_entry(void *opaque)
     QuorumChildRequest *sacb = &acb->qcrs[i];
 
     sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-                                acb->qiov, acb->flags);
+    if (acb->flags & BDRV_REQ_ZERO_WRITE) {
+        sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset,
+                                          acb->bytes, acb->flags);
+    } else {
+        sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+                                    acb->qiov, acb->flags);
+    }
     if (sacb->ret == 0) {
         acb->success_count++;
     } else {
@@ -739,6 +744,14 @@  static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
+static int quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                   int bytes, BdrvRequestFlags flags)
+
+{
+    return quorum_co_pwritev(bs, offset, bytes, NULL,
+                             flags | BDRV_REQ_ZERO_WRITE);
+}
+
 static int64_t quorum_getlength(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
@@ -1251,6 +1264,7 @@  static BlockDriver bdrv_quorum = {
 
     .bdrv_co_preadv                     = quorum_co_preadv,
     .bdrv_co_pwritev                    = quorum_co_pwritev,
+    .bdrv_co_pwrite_zeroes              = quorum_co_pwrite_zeroes,
 
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
diff --git a/tests/qemu-iotests/312 b/tests/qemu-iotests/312
index 1b08f1552f..93046393e7 100755
--- a/tests/qemu-iotests/312
+++ b/tests/qemu-iotests/312
@@ -114,6 +114,13 @@  $QEMU_IO -c "write -P 0 $((0x200000)) $((0x10000))" "$TEST_IMG.0" | _filter_qemu
 $QEMU_IO -c "write -z   $((0x200000)) $((0x30000))" "$TEST_IMG.1" | _filter_qemu_io
 $QEMU_IO -c "write -P 0 $((0x200000)) $((0x20000))" "$TEST_IMG.2" | _filter_qemu_io
 
+# Test 5: write data to a region and then zeroize it, doing it
+# directly on the quorum device instead of the individual images.
+# This has no effect on the end result but proves that the quorum driver
+# supports 'write -z'.
+$QEMU_IO -c "open -o $quorum" -c "write $((0x250000)) $((0x10000))" | _filter_qemu_io
+$QEMU_IO -c "open -o $quorum" -c "write -z $((0x250000)) $((0x10000))" | _filter_qemu_io
+
 echo
 echo '### Launch the drive-mirror job'
 echo
diff --git a/tests/qemu-iotests/312.out b/tests/qemu-iotests/312.out
index 4ae749175b..778dda95c7 100644
--- a/tests/qemu-iotests/312.out
+++ b/tests/qemu-iotests/312.out
@@ -39,6 +39,10 @@  wrote 196608/196608 bytes at offset 2097152
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 131072/131072 bytes at offset 2097152
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2424832
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2424832
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 ### Launch the drive-mirror job