diff mbox

[for-2.4,05/10] block/iscsi: optimize WRITE10/16 if cache.writeback is not set

Message ID 1429186730-3866-6-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven April 16, 2015, 12:18 p.m. UTC
SCSI allowes to tell the target to not return from a write command
if the date is not written to the disk. Use this so called FUA
bit if it is supported to optimize WRITE commands if writeback is
not allowed.

In this case qemu always issues a WRITE followed by a FLUSH. This
is 2 round trip times. If we set the FUA bit we can ignore the
following FLUSH.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block/iscsi.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini April 16, 2015, 12:42 p.m. UTC | #1
On 16/04/2015 14:18, Peter Lieven wrote:
> SCSI allowes to tell the target to not return from a write command
> if the date is not written to the disk. Use this so called FUA
> bit if it is supported to optimize WRITE commands if writeback is
> not allowed.
> 
> In this case qemu always issues a WRITE followed by a FLUSH. This
> is 2 round trip times. If we set the FUA bit we can ignore the
> following FLUSH.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  block/iscsi.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 237faa1..7fb04d7 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -68,6 +68,7 @@ typedef struct IscsiLun {
>      bool lbprz;
>      bool dpofua;
>      bool has_write_same;
> +    bool force_next_flush;
>  } IscsiLun;
>  
>  typedef struct IscsiTask {
> @@ -370,6 +371,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
>      struct IscsiTask iTask;
>      uint64_t lba;
>      uint32_t num_sectors;
> +    int fua = iscsilun->dpofua && !bs->enable_write_cache;
>  
>      if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
>          return -EINVAL;
> @@ -388,12 +390,12 @@ retry:
>      if (iscsilun->use_16_for_rw) {
>          iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                          NULL, num_sectors * iscsilun->block_size,
> -                                        iscsilun->block_size, 0, 0, 0, 0, 0,
> +                                        iscsilun->block_size, 0, 0, fua, 0, 0,
>                                          iscsi_co_generic_cb, &iTask);
>      } else {
>          iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                          NULL, num_sectors * iscsilun->block_size,
> -                                        iscsilun->block_size, 0, 0, 0, 0, 0,
> +                                        iscsilun->block_size, 0, 0, fua, 0, 0,
>                                          iscsi_co_generic_cb, &iTask);
>      }
>      if (iTask.task == NULL) {
> @@ -621,6 +623,11 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
>          return 0;
>      }
>  
> +    if (iscsilun->dpofua && !bs->enable_write_cache &&
> +        !iscsilun->force_next_flush) {
> +        return 0;
> +    }
> +
>      iscsi_co_init_iscsitask(iscsilun, &iTask);
>  
>  retry:
> @@ -648,6 +655,8 @@ retry:
>          return -EIO;
>      }
>  
> +    iscsilun->force_next_flush = false;

You still need a flush if you do WRITE SAME, WRITE+FUA, WRITE+FUA.
Also, since bs->enable_write_cache can be toggled arbitrarily, I would
prefer to set force_next_flush on all non-FUA writes, including those
done with bs->enable_write_cache.

>      return 0;
>  }
>  
> @@ -969,6 +978,8 @@ retry:
>          iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
>      }
>  
> +    iscsilun->force_next_flush = true;

Also, I think it is iscsi_co_generic_cb that should set
force_next_flush, so that it is only set on failure.  Not really for the
optimization value, but because it's clearer.

I think that if you do these changes, iscsi_co_flush can just check "if
(!iscsilun->force_next_flush)".

But still---nice approach. :)

>      return 0;
>  }
>  
>
Peter Lieven April 16, 2015, 1:02 p.m. UTC | #2
Am 16.04.2015 um 14:42 schrieb Paolo Bonzini:
>
> On 16/04/2015 14:18, Peter Lieven wrote:
>> SCSI allowes to tell the target to not return from a write command
>> if the date is not written to the disk. Use this so called FUA
>> bit if it is supported to optimize WRITE commands if writeback is
>> not allowed.
>>
>> In this case qemu always issues a WRITE followed by a FLUSH. This
>> is 2 round trip times. If we set the FUA bit we can ignore the
>> following FLUSH.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   block/iscsi.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index 237faa1..7fb04d7 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -68,6 +68,7 @@ typedef struct IscsiLun {
>>       bool lbprz;
>>       bool dpofua;
>>       bool has_write_same;
>> +    bool force_next_flush;
>>   } IscsiLun;
>>   
>>   typedef struct IscsiTask {
>> @@ -370,6 +371,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
>>       struct IscsiTask iTask;
>>       uint64_t lba;
>>       uint32_t num_sectors;
>> +    int fua = iscsilun->dpofua && !bs->enable_write_cache;
>>   
>>       if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
>>           return -EINVAL;
>> @@ -388,12 +390,12 @@ retry:
>>       if (iscsilun->use_16_for_rw) {
>>           iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
>>                                           NULL, num_sectors * iscsilun->block_size,
>> -                                        iscsilun->block_size, 0, 0, 0, 0, 0,
>> +                                        iscsilun->block_size, 0, 0, fua, 0, 0,
>>                                           iscsi_co_generic_cb, &iTask);
>>       } else {
>>           iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
>>                                           NULL, num_sectors * iscsilun->block_size,
>> -                                        iscsilun->block_size, 0, 0, 0, 0, 0,
>> +                                        iscsilun->block_size, 0, 0, fua, 0, 0,
>>                                           iscsi_co_generic_cb, &iTask);
>>       }
>>       if (iTask.task == NULL) {
>> @@ -621,6 +623,11 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
>>           return 0;
>>       }
>>   
>> +    if (iscsilun->dpofua && !bs->enable_write_cache &&
>> +        !iscsilun->force_next_flush) {
>> +        return 0;
>> +    }
>> +
>>       iscsi_co_init_iscsitask(iscsilun, &iTask);
>>   
>>   retry:
>> @@ -648,6 +655,8 @@ retry:
>>           return -EIO;
>>       }
>>   
>> +    iscsilun->force_next_flush = false;
> You still need a flush if you do WRITE SAME, WRITE+FUA, WRITE+FUA.
> Also, since bs->enable_write_cache can be toggled arbitrarily, I would
> prefer to set force_next_flush on all non-FUA writes, including those
> done with bs->enable_write_cache.

Good idea. So we can avoid flushes if there was never a write before.

>
>>       return 0;
>>   }
>>   
>> @@ -969,6 +978,8 @@ retry:
>>           iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
>>       }
>>   
>> +    iscsilun->force_next_flush = true;
> Also, I think it is iscsi_co_generic_cb that should set
> force_next_flush, so that it is only set on failure.  Not really for the
> optimization value, but because it's clearer.

I don't get what you mean with it should only "set on failure".
My approach would be to add a flag to the iTask that the command
requires to set force_flush after successful completion. Is it that
what you mean?

>
> I think that if you do these changes, iscsi_co_flush can just check "if
> (!iscsilun->force_next_flush)".
>
> But still---nice approach. :)

Thanks :-)

Peter
Paolo Bonzini April 16, 2015, 1:17 p.m. UTC | #3
On 16/04/2015 15:02, Peter Lieven wrote:
>>>
>> Also, I think it is iscsi_co_generic_cb that should set
>> force_next_flush, so that it is only set on failure.  Not really for the
>> optimization value, but because it's clearer.
> 
> I don't get what you mean with it should only "set on failure".
> My approach would be to add a flag to the iTask that the command
> requires to set force_flush after successful completion. Is it that
> what you mean?

Set on success.  Lack of sleep.

Paolo
Peter Lieven April 17, 2015, 7:14 a.m. UTC | #4
Am 16.04.2015 um 15:17 schrieb Paolo Bonzini:
>
> On 16/04/2015 15:02, Peter Lieven wrote:
>>> Also, I think it is iscsi_co_generic_cb that should set
>>> force_next_flush, so that it is only set on failure.  Not really for the
>>> optimization value, but because it's clearer.
>> I don't get what you mean with it should only "set on failure".
>> My approach would be to add a flag to the iTask that the command
>> requires to set force_flush after successful completion. Is it that
>> what you mean?
> Set on success.  Lack of sleep.

I have send a v2 following your suggestions.

Peter
diff mbox

Patch

diff --git a/block/iscsi.c b/block/iscsi.c
index 237faa1..7fb04d7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -68,6 +68,7 @@  typedef struct IscsiLun {
     bool lbprz;
     bool dpofua;
     bool has_write_same;
+    bool force_next_flush;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -370,6 +371,7 @@  static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
     struct IscsiTask iTask;
     uint64_t lba;
     uint32_t num_sectors;
+    int fua = iscsilun->dpofua && !bs->enable_write_cache;
 
     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
         return -EINVAL;
@@ -388,12 +390,12 @@  retry:
     if (iscsilun->use_16_for_rw) {
         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                         NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
                                         iscsi_co_generic_cb, &iTask);
     } else {
         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                         NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
                                         iscsi_co_generic_cb, &iTask);
     }
     if (iTask.task == NULL) {
@@ -621,6 +623,11 @@  static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
         return 0;
     }
 
+    if (iscsilun->dpofua && !bs->enable_write_cache &&
+        !iscsilun->force_next_flush) {
+        return 0;
+    }
+
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 
 retry:
@@ -648,6 +655,8 @@  retry:
         return -EIO;
     }
 
+    iscsilun->force_next_flush = false;
+
     return 0;
 }
 
@@ -969,6 +978,8 @@  retry:
         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
     }
 
+    iscsilun->force_next_flush = true;
+
     return 0;
 }