diff mbox

[1/4] ide/atapi: make PIO read requests async

Message ID 1444652845-20642-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Oct. 12, 2015, 12:27 p.m. UTC
PIO read requests on the ATAPI interface used to be sync blk requests.
This has two significant drawbacks. First the main loop hangs util an
I/O request is completed and secondly if the I/O request does not
complete (e.g. due to an unresponsive storage) Qemu hangs completely.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 hw/ide/atapi.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 9 deletions(-)

Comments

Stefan Hajnoczi Oct. 22, 2015, 4:17 p.m. UTC | #1
On Mon, Oct 12, 2015 at 02:27:22PM +0200, Peter Lieven wrote:
> @@ -129,9 +134,71 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>          ret = -EIO;
>          break;
>      }
> +
> +    if (!ret) {
> +        s->lba++;

This function probably shouldn't take the lba argument if it modifies
s->lba.  You dropped the sector_size argument and I think the lba
argument should be dropped for the same reason.

> +static int cd_read_sector(IDEState *s, int lba, void *buf)
> +{
> +    if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
> +        return -EINVAL;
> +    }
> +
> +    s->iov.iov_base = buf;
> +    if (s->cd_sector_size == 2352) {
> +        buf += 16;
> +    }
> +
> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +#ifdef DEBUG_IDE_ATAPI
> +    printf("cd_read_sector: lba=%d\n", lba);
> +#endif
> +
> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +                      cd_read_sector_cb, s) == NULL) {

This function never returns NULL, the if statement can be removed.

Doesn't the aiocb need to be stored for cancellation (e.g. device reset)?

> +        return -EIO;
> +    }
> +
> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);

Why does accounting start *after* the read request has been submitted?
Peter Lieven Oct. 23, 2015, 3:17 p.m. UTC | #2
Am 22.10.2015 um 18:17 schrieb Stefan Hajnoczi:
> On Mon, Oct 12, 2015 at 02:27:22PM +0200, Peter Lieven wrote:
>> @@ -129,9 +134,71 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>>          ret = -EIO;
>>          break;
>>      }
>> +
>> +    if (!ret) {
>> +        s->lba++;
> This function probably shouldn't take the lba argument if it modifies
> s->lba.  You dropped the sector_size argument and I think the lba
> argument should be dropped for the same reason.
>
>> +static int cd_read_sector(IDEState *s, int lba, void *buf)
>> +{
>> +    if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    s->iov.iov_base = buf;
>> +    if (s->cd_sector_size == 2352) {
>> +        buf += 16;
>> +    }
>> +
>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>> +
>> +#ifdef DEBUG_IDE_ATAPI
>> +    printf("cd_read_sector: lba=%d\n", lba);
>> +#endif
>> +
>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>> +                      cd_read_sector_cb, s) == NULL) {
> This function never returns NULL, the if statement can be removed.

Okay, that was my fault. I was believing it could return NULL.

>
> Doesn't the aiocb need to be stored for cancellation (e.g. device reset)?

The ide_readv_cancelable function introduced in Patch 3 will store
the aiocb. At this point there is blk_drain_all called when the device is reset.

>
>> +        return -EIO;
>> +    }
>> +
>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> Why does accounting start *after* the read request has been submitted?

You are right it has to start before the request.

Peter
John Snow Nov. 3, 2015, 12:48 a.m. UTC | #3
On 10/12/2015 08:27 AM, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has two significant drawbacks. First the main loop hangs util an
> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  hw/ide/atapi.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 84 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..2271ea2 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,11 +105,16 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>      memset(buf, 0, 288);
>  }
>  
> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> +static int
> +cd_read_sector_sync(IDEState *s, int lba, uint8_t *buf)
>  {
>      int ret;
>  
> -    switch(sector_size) {
> +#ifdef DEBUG_IDE_ATAPI
> +    printf("cd_read_sector_sync: lba=%d\n", lba);
> +#endif
> +
> +    switch (s->cd_sector_size) {
>      case 2048:
>          block_acct_start(blk_get_stats(s->blk), &s->acct,
>                           4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> @@ -129,9 +134,71 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>          ret = -EIO;
>          break;
>      }
> +
> +    if (!ret) {
> +        s->lba++;
> +        s->io_buffer_index = 0;
> +    }
> +
>      return ret;
>  }
>  
> +static void cd_read_sector_cb(void *opaque, int ret)
> +{
> +    IDEState *s = opaque;
> +
> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +#ifdef DEBUG_IDE_ATAPI
> +    printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret);
> +#endif
> +
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +        return;
> +    }
> +
> +    if (s->cd_sector_size == 2352) {
> +        cd_data_to_raw(s->io_buffer, s->lba);
> +    }
> +
> +    s->lba++;
> +    s->io_buffer_index = 0;
> +    s->status &= ~BUSY_STAT;
> +
> +    ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf)
> +{
> +    if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
> +        return -EINVAL;
> +    }
> +
> +    s->iov.iov_base = buf;
> +    if (s->cd_sector_size == 2352) {
> +        buf += 16;
> +    }
> +
> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +#ifdef DEBUG_IDE_ATAPI
> +    printf("cd_read_sector: lba=%d\n", lba);
> +#endif
> +
> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +                      cd_read_sector_cb, s) == NULL) {
> +        return -EIO;
> +    }
> +
> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +
> +    s->status |= BUSY_STAT;
> +    return 0;
> +}
> +
>  void ide_atapi_cmd_ok(IDEState *s)
>  {
>      s->error = 0;
> @@ -182,18 +249,27 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>          ide_atapi_cmd_ok(s);
>          ide_set_irq(s->bus);
>  #ifdef DEBUG_IDE_ATAPI
> -        printf("status=0x%x\n", s->status);
> +        printf("end of transfer, status=0x%x\n", s->status);
>  #endif
>      } else {
>          /* see if a new sector must be read */
>          if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
> -            ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
> -            if (ret < 0) {
> -                ide_atapi_io_error(s, ret);
> +            if (!s->elementary_transfer_size) {
> +                ret = cd_read_sector(s, s->lba, s->io_buffer);
> +                if (ret < 0) {
> +                    ide_atapi_io_error(s, ret);
> +                }
>                  return;
> +            } else {
> +                /* rebuffering within an elementary transfer is
> +                 * only possible with a sync request because we
> +                 * end up with a race condition otherwise */
> +                ret = cd_read_sector_sync(s, s->lba, s->io_buffer);
> +                if (ret < 0) {
> +                    ide_atapi_io_error(s, ret);
> +                    return;
> +                }
>              }
> -            s->lba++;
> -            s->io_buffer_index = 0;
>          }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
> @@ -275,7 +351,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>      s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
>  
> -    s->status = READY_STAT | SEEK_STAT;
>      ide_atapi_cmd_reply_end(s);
>  }
>  
> 

This patch looks good to me, apart from Stefan's other comments. Will
you be sending a V3? I can try to merge it for this week before the hard
freeze hits.

--js
Peter Lieven Nov. 3, 2015, 7:03 a.m. UTC | #4
Am 03.11.2015 um 01:48 schrieb John Snow:
>
> On 10/12/2015 08:27 AM, Peter Lieven wrote:
>> PIO read requests on the ATAPI interface used to be sync blk requests.
>> This has two significant drawbacks. First the main loop hangs util an
>> I/O request is completed and secondly if the I/O request does not
>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   hw/ide/atapi.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>>   1 file changed, 84 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>> index 747f466..2271ea2 100644
>> --- a/hw/ide/atapi.c
>> +++ b/hw/ide/atapi.c
>> @@ -105,11 +105,16 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>       memset(buf, 0, 288);
>>   }
>>   
>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>> +static int
>> +cd_read_sector_sync(IDEState *s, int lba, uint8_t *buf)
>>   {
>>       int ret;
>>   
>> -    switch(sector_size) {
>> +#ifdef DEBUG_IDE_ATAPI
>> +    printf("cd_read_sector_sync: lba=%d\n", lba);
>> +#endif
>> +
>> +    switch (s->cd_sector_size) {
>>       case 2048:
>>           block_acct_start(blk_get_stats(s->blk), &s->acct,
>>                            4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> @@ -129,9 +134,71 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>>           ret = -EIO;
>>           break;
>>       }
>> +
>> +    if (!ret) {
>> +        s->lba++;
>> +        s->io_buffer_index = 0;
>> +    }
>> +
>>       return ret;
>>   }
>>   
>> +static void cd_read_sector_cb(void *opaque, int ret)
>> +{
>> +    IDEState *s = opaque;
>> +
>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>> +
>> +#ifdef DEBUG_IDE_ATAPI
>> +    printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret);
>> +#endif
>> +
>> +    if (ret < 0) {
>> +        ide_atapi_io_error(s, ret);
>> +        return;
>> +    }
>> +
>> +    if (s->cd_sector_size == 2352) {
>> +        cd_data_to_raw(s->io_buffer, s->lba);
>> +    }
>> +
>> +    s->lba++;
>> +    s->io_buffer_index = 0;
>> +    s->status &= ~BUSY_STAT;
>> +
>> +    ide_atapi_cmd_reply_end(s);
>> +}
>> +
>> +static int cd_read_sector(IDEState *s, int lba, void *buf)
>> +{
>> +    if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    s->iov.iov_base = buf;
>> +    if (s->cd_sector_size == 2352) {
>> +        buf += 16;
>> +    }
>> +
>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>> +
>> +#ifdef DEBUG_IDE_ATAPI
>> +    printf("cd_read_sector: lba=%d\n", lba);
>> +#endif
>> +
>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>> +                      cd_read_sector_cb, s) == NULL) {
>> +        return -EIO;
>> +    }
>> +
>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> +
>> +    s->status |= BUSY_STAT;
>> +    return 0;
>> +}
>> +
>>   void ide_atapi_cmd_ok(IDEState *s)
>>   {
>>       s->error = 0;
>> @@ -182,18 +249,27 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>>           ide_atapi_cmd_ok(s);
>>           ide_set_irq(s->bus);
>>   #ifdef DEBUG_IDE_ATAPI
>> -        printf("status=0x%x\n", s->status);
>> +        printf("end of transfer, status=0x%x\n", s->status);
>>   #endif
>>       } else {
>>           /* see if a new sector must be read */
>>           if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
>> -            ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>> -            if (ret < 0) {
>> -                ide_atapi_io_error(s, ret);
>> +            if (!s->elementary_transfer_size) {
>> +                ret = cd_read_sector(s, s->lba, s->io_buffer);
>> +                if (ret < 0) {
>> +                    ide_atapi_io_error(s, ret);
>> +                }
>>                   return;
>> +            } else {
>> +                /* rebuffering within an elementary transfer is
>> +                 * only possible with a sync request because we
>> +                 * end up with a race condition otherwise */
>> +                ret = cd_read_sector_sync(s, s->lba, s->io_buffer);
>> +                if (ret < 0) {
>> +                    ide_atapi_io_error(s, ret);
>> +                    return;
>> +                }
>>               }
>> -            s->lba++;
>> -            s->io_buffer_index = 0;
>>           }
>>           if (s->elementary_transfer_size > 0) {
>>               /* there are some data left to transmit in this elementary
>> @@ -275,7 +351,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>>       s->io_buffer_index = sector_size;
>>       s->cd_sector_size = sector_size;
>>   
>> -    s->status = READY_STAT | SEEK_STAT;
>>       ide_atapi_cmd_reply_end(s);
>>   }
>>   
>>
> This patch looks good to me, apart from Stefan's other comments. Will
> you be sending a V3? I can try to merge it for this week before the hard
> freeze hits.

I will look at this today.

Peter
diff mbox

Patch

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..2271ea2 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -105,11 +105,16 @@  static void cd_data_to_raw(uint8_t *buf, int lba)
     memset(buf, 0, 288);
 }
 
-static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
+static int
+cd_read_sector_sync(IDEState *s, int lba, uint8_t *buf)
 {
     int ret;
 
-    switch(sector_size) {
+#ifdef DEBUG_IDE_ATAPI
+    printf("cd_read_sector_sync: lba=%d\n", lba);
+#endif
+
+    switch (s->cd_sector_size) {
     case 2048:
         block_acct_start(blk_get_stats(s->blk), &s->acct,
                          4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
@@ -129,9 +134,71 @@  static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
         ret = -EIO;
         break;
     }
+
+    if (!ret) {
+        s->lba++;
+        s->io_buffer_index = 0;
+    }
+
     return ret;
 }
 
+static void cd_read_sector_cb(void *opaque, int ret)
+{
+    IDEState *s = opaque;
+
+    block_acct_done(blk_get_stats(s->blk), &s->acct);
+
+#ifdef DEBUG_IDE_ATAPI
+    printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret);
+#endif
+
+    if (ret < 0) {
+        ide_atapi_io_error(s, ret);
+        return;
+    }
+
+    if (s->cd_sector_size == 2352) {
+        cd_data_to_raw(s->io_buffer, s->lba);
+    }
+
+    s->lba++;
+    s->io_buffer_index = 0;
+    s->status &= ~BUSY_STAT;
+
+    ide_atapi_cmd_reply_end(s);
+}
+
+static int cd_read_sector(IDEState *s, int lba, void *buf)
+{
+    if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
+        return -EINVAL;
+    }
+
+    s->iov.iov_base = buf;
+    if (s->cd_sector_size == 2352) {
+        buf += 16;
+    }
+
+    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+
+#ifdef DEBUG_IDE_ATAPI
+    printf("cd_read_sector: lba=%d\n", lba);
+#endif
+
+    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
+                      cd_read_sector_cb, s) == NULL) {
+        return -EIO;
+    }
+
+    block_acct_start(blk_get_stats(s->blk), &s->acct,
+                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+
+    s->status |= BUSY_STAT;
+    return 0;
+}
+
 void ide_atapi_cmd_ok(IDEState *s)
 {
     s->error = 0;
@@ -182,18 +249,27 @@  void ide_atapi_cmd_reply_end(IDEState *s)
         ide_atapi_cmd_ok(s);
         ide_set_irq(s->bus);
 #ifdef DEBUG_IDE_ATAPI
-        printf("status=0x%x\n", s->status);
+        printf("end of transfer, status=0x%x\n", s->status);
 #endif
     } else {
         /* see if a new sector must be read */
         if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
-            ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
-            if (ret < 0) {
-                ide_atapi_io_error(s, ret);
+            if (!s->elementary_transfer_size) {
+                ret = cd_read_sector(s, s->lba, s->io_buffer);
+                if (ret < 0) {
+                    ide_atapi_io_error(s, ret);
+                }
                 return;
+            } else {
+                /* rebuffering within an elementary transfer is
+                 * only possible with a sync request because we
+                 * end up with a race condition otherwise */
+                ret = cd_read_sector_sync(s, s->lba, s->io_buffer);
+                if (ret < 0) {
+                    ide_atapi_io_error(s, ret);
+                    return;
+                }
             }
-            s->lba++;
-            s->io_buffer_index = 0;
         }
         if (s->elementary_transfer_size > 0) {
             /* there are some data left to transmit in this elementary
@@ -275,7 +351,6 @@  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
     s->io_buffer_index = sector_size;
     s->cd_sector_size = sector_size;
 
-    s->status = READY_STAT | SEEK_STAT;
     ide_atapi_cmd_reply_end(s);
 }