diff mbox

[1/5] ide/atapi: make PIO read requests async

Message ID 1442838328-23117-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Sept. 21, 2015, 12:25 p.m. UTC
PIO read requests on the ATAPI interface used to be sync blk requests.
This has to siginificant drawbacks. First the main loop hangs util an
I/O request is completed and secondly if the I/O request does not
complete (e.g. due to an unresponsive storage) Qemu hangs completely.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 26 deletions(-)

Comments

John Snow Oct. 2, 2015, 9:02 p.m. UTC | #1
On 09/21/2015 08:25 AM, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has to siginificant drawbacks. First the main loop hangs util an
> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..9257e1c 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>      memset(buf, 0, 288);
>  }
>  
> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> +static void cd_read_sector_cb(void *opaque, int ret)
>  {
> -    int ret;
> +    IDEState *s = opaque;
>  
> -    switch(sector_size) {
> -    case 2048:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        break;
> -    case 2352:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        if (ret < 0)
> -            return ret;
> -        cd_data_to_raw(buf, lba);
> -        break;
> -    default:
> -        ret = -EIO;
> -        break;
> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +        return;
> +    }
> +
> +    if (s->cd_sector_size == 2352) {
> +        cd_data_to_raw(s->io_buffer, s->lba);
>      }
> -    return ret;
> +
> +    s->lba++;
> +    s->io_buffer_index = 0;
> +    s->status &= ~BUSY_STAT;
> +
> +    ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> +{
> +    if (sector_size != 2048 && sector_size != 2352) {
> +        return -EINVAL;
> +    }
> +
> +    s->iov.iov_base = buf;
> +    if (sector_size == 2352) {
> +        buf += 4;
> +    }
> +
> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +                      cd_read_sector_cb, s) == NULL) {
> +        return -EIO;
> +    }
> +
> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +    s->status |= BUSY_STAT;
> +    return 0;
>  }
>  
>  void ide_atapi_cmd_ok(IDEState *s)
> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>              ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>              if (ret < 0) {
>                  ide_atapi_io_error(s, ret);
> -                return;
>              }
> -            s->lba++;
> -            s->io_buffer_index = 0;
> +            return;
>          }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
> @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>      s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
>  
> -    s->status = READY_STAT | SEEK_STAT;
>      ide_atapi_cmd_reply_end(s);
>  }
>  
> 

For me, this hangs the pio_large test in tests/ide-test, path:
/x86_64/ide/cdrom/pio_large

...I'll debug this over the weekend.

I think I checked this test in after you wrote this patch series, with a
mind of being able to test ATAPI for Alexander's ATAPI-SCSI bridge, but
it seems useful here :)
John Snow Oct. 5, 2015, 9:15 p.m. UTC | #2
On 09/21/2015 08:25 AM, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has to siginificant drawbacks. First the main loop hangs util an
> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..9257e1c 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>      memset(buf, 0, 288);
>  }
>  
> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> +static void cd_read_sector_cb(void *opaque, int ret)
>  {
> -    int ret;
> +    IDEState *s = opaque;
>  
> -    switch(sector_size) {
> -    case 2048:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        break;
> -    case 2352:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        if (ret < 0)
> -            return ret;
> -        cd_data_to_raw(buf, lba);
> -        break;
> -    default:
> -        ret = -EIO;
> -        break;
> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +        return;
> +    }
> +
> +    if (s->cd_sector_size == 2352) {
> +        cd_data_to_raw(s->io_buffer, s->lba);
>      }
> -    return ret;
> +
> +    s->lba++;
> +    s->io_buffer_index = 0;
> +    s->status &= ~BUSY_STAT;
> +
> +    ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> +{
> +    if (sector_size != 2048 && sector_size != 2352) {
> +        return -EINVAL;
> +    }
> +
> +    s->iov.iov_base = buf;
> +    if (sector_size == 2352) {
> +        buf += 4;
> +    }
> +
> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +                      cd_read_sector_cb, s) == NULL) {
> +        return -EIO;
> +    }
> +
> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +    s->status |= BUSY_STAT;
> +    return 0;
>  }
>  

We discussed this off-list a bit, but for upstream synchronization:

Unfortunately, I believe making cd_read_sector here non-blocking makes
ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
s->end_transfer_func() nonblocking, which functions like ide_data_readw
are not prepared to cope with.

My suggestion is to buffer an entire DRQ block of data at once
(byte_count_limit) to avoid the problem.

Thanks,
--js

>  void ide_atapi_cmd_ok(IDEState *s)
> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>              ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>              if (ret < 0) {
>                  ide_atapi_io_error(s, ret);
> -                return;
>              }
> -            s->lba++;
> -            s->io_buffer_index = 0;
> +            return;
>          }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
> @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>      s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
>  
> -    s->status = READY_STAT | SEEK_STAT;
>      ide_atapi_cmd_reply_end(s);
>  }
>  
>
Kevin Wolf Oct. 6, 2015, 8:57 a.m. UTC | #3
Am 05.10.2015 um 23:15 hat John Snow geschrieben:
> 
> 
> On 09/21/2015 08:25 AM, Peter Lieven wrote:
> > PIO read requests on the ATAPI interface used to be sync blk requests.
> > This has to siginificant drawbacks. First the main loop hangs util an
> > I/O request is completed and secondly if the I/O request does not
> > complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> > 
> > Signed-off-by: Peter Lieven <pl@kamp.de>
> > ---
> >  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
> >  1 file changed, 43 insertions(+), 26 deletions(-)
> > 
> > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> > index 747f466..9257e1c 100644
> > --- a/hw/ide/atapi.c
> > +++ b/hw/ide/atapi.c
> > @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
> >      memset(buf, 0, 288);
> >  }
> >  
> > -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> > +static void cd_read_sector_cb(void *opaque, int ret)
> >  {
> > -    int ret;
> > +    IDEState *s = opaque;
> >  
> > -    switch(sector_size) {
> > -    case 2048:
> > -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> > -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> > -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> > -        break;
> > -    case 2352:
> > -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> > -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> > -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> > -        if (ret < 0)
> > -            return ret;
> > -        cd_data_to_raw(buf, lba);
> > -        break;
> > -    default:
> > -        ret = -EIO;
> > -        break;
> > +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> > +
> > +    if (ret < 0) {
> > +        ide_atapi_io_error(s, ret);
> > +        return;
> > +    }
> > +
> > +    if (s->cd_sector_size == 2352) {
> > +        cd_data_to_raw(s->io_buffer, s->lba);
> >      }
> > -    return ret;
> > +
> > +    s->lba++;
> > +    s->io_buffer_index = 0;
> > +    s->status &= ~BUSY_STAT;
> > +
> > +    ide_atapi_cmd_reply_end(s);
> > +}
> > +
> > +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> > +{
> > +    if (sector_size != 2048 && sector_size != 2352) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    s->iov.iov_base = buf;
> > +    if (sector_size == 2352) {
> > +        buf += 4;
> > +    }

This doesn't look quite right, buf is never read after this.

Also, why +=4 when it was originally buf + 16?

> > +
> > +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> > +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> > +
> > +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> > +                      cd_read_sector_cb, s) == NULL) {
> > +        return -EIO;
> > +    }
> > +
> > +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> > +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > +    s->status |= BUSY_STAT;
> > +    return 0;
> >  }
> >  
> 
> We discussed this off-list a bit, but for upstream synchronization:
> 
> Unfortunately, I believe making cd_read_sector here non-blocking makes
> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
> s->end_transfer_func() nonblocking, which functions like ide_data_readw
> are not prepared to cope with.

I don't think that's a problem as long as BSY is set while the
asynchronous command is running and DRQ is cleared. The latter will
protect ide_data_readw(). ide_sector_read() does essentially the same
thing.

Or maybe I'm just missing what you're trying to say.

> My suggestion is to buffer an entire DRQ block of data at once
> (byte_count_limit) to avoid the problem.

No matter whether there is a problem or not, buffering more data at once
(and therefore doing less requests) is better for performance anyway.

Kevin

> >  void ide_atapi_cmd_ok(IDEState *s)
> > @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
> >              ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
> >              if (ret < 0) {
> >                  ide_atapi_io_error(s, ret);
> > -                return;
> >              }
> > -            s->lba++;
> > -            s->io_buffer_index = 0;
> > +            return;
> >          }
> >          if (s->elementary_transfer_size > 0) {
> >              /* there are some data left to transmit in this elementary
> > @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
> >      s->io_buffer_index = sector_size;
> >      s->cd_sector_size = sector_size;
> >  
> > -    s->status = READY_STAT | SEEK_STAT;
> >      ide_atapi_cmd_reply_end(s);
> >  }
> >  
> >
Peter Lieven Oct. 6, 2015, 9:20 a.m. UTC | #4
Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>   hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>       memset(buf, 0, 288);
>>>   }
>>>   
>>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>   {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>   
>>> -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>       }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
> This doesn't look quite right, buf is never read after this.
>
> Also, why +=4 when it was originally buf + 16?

You are right. I mixed that up.

>
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>   }
>>>   
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
> I don't think that's a problem as long as BSY is set while the
> asynchronous command is running and DRQ is cleared. The latter will
> protect ide_data_readw(). ide_sector_read() does essentially the same
> thing.

  I was thinking the same. Without the BSY its not working at all.

>
> Or maybe I'm just missing what you're trying to say.
>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> No matter whether there is a problem or not, buffering more data at once
> (and therefore doing less requests) is better for performance anyway.

Its possible to do only one read in the backend and read the whole
request into the IO buffer. I send a follow-up.

Maybe do you have a pointer to the test tool that John mentioned?

Peter
Laszlo Ersek Oct. 6, 2015, 1:05 p.m. UTC | #5
On 09/21/15 14:25, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has to siginificant drawbacks. First the main loop hangs util an

Trivial comments:
- s/to/two/
- s/siginificant/significant/

Thanks
Laszlo

> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..9257e1c 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>      memset(buf, 0, 288);
>  }
>  
> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> +static void cd_read_sector_cb(void *opaque, int ret)
>  {
> -    int ret;
> +    IDEState *s = opaque;
>  
> -    switch(sector_size) {
> -    case 2048:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        break;
> -    case 2352:
> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> -        if (ret < 0)
> -            return ret;
> -        cd_data_to_raw(buf, lba);
> -        break;
> -    default:
> -        ret = -EIO;
> -        break;
> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +        return;
> +    }
> +
> +    if (s->cd_sector_size == 2352) {
> +        cd_data_to_raw(s->io_buffer, s->lba);
>      }
> -    return ret;
> +
> +    s->lba++;
> +    s->io_buffer_index = 0;
> +    s->status &= ~BUSY_STAT;
> +
> +    ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> +{
> +    if (sector_size != 2048 && sector_size != 2352) {
> +        return -EINVAL;
> +    }
> +
> +    s->iov.iov_base = buf;
> +    if (sector_size == 2352) {
> +        buf += 4;
> +    }
> +
> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +                      cd_read_sector_cb, s) == NULL) {
> +        return -EIO;
> +    }
> +
> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +    s->status |= BUSY_STAT;
> +    return 0;
>  }
>  
>  void ide_atapi_cmd_ok(IDEState *s)
> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>              ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>              if (ret < 0) {
>                  ide_atapi_io_error(s, ret);
> -                return;
>              }
> -            s->lba++;
> -            s->io_buffer_index = 0;
> +            return;
>          }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
> @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>      s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
>  
> -    s->status = READY_STAT | SEEK_STAT;
>      ide_atapi_cmd_reply_end(s);
>  }
>  
>
John Snow Oct. 6, 2015, 3:54 p.m. UTC | #6
On 10/06/2015 04:57 AM, Kevin Wolf wrote:
> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>      memset(buf, 0, 288);
>>>  }
>>>  
>>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>  {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>  
>>> -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>      }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
> 
> This doesn't look quite right, buf is never read after this.
> 
> Also, why +=4 when it was originally buf + 16?
> 
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>  }
>>>  
>>
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
> 
> I don't think that's a problem as long as BSY is set while the
> asynchronous command is running and DRQ is cleared. The latter will
> protect ide_data_readw(). ide_sector_read() does essentially the same
> thing.
> 
> Or maybe I'm just missing what you're trying to say.
> 

It will be correct from a code standpoint, but I don't think the guest
*expects* DRQ to become re-set before byte_count_limit is exhausted.

In the synchronous version of the code, DRQ flickers while we rebuffer
s->io_buffer, but since it's synchronous, the guest *never sees this*.

The guest does not necessarily have any reason or motivation to check if
DRQ is still set after 2048 bytes -- is that recommended in the spec?

("Warning! The drive may decide to rebuffer arbitrarily while you read,
so check if DRQ is still set before each read to the data register!" ??)

>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> 
> No matter whether there is a problem or not, buffering more data at once
> (and therefore doing less requests) is better for performance anyway.
> 
> Kevin
> 
>>>  void ide_atapi_cmd_ok(IDEState *s)
>>> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>>>              ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>>>              if (ret < 0) {
>>>                  ide_atapi_io_error(s, ret);
>>> -                return;
>>>              }
>>> -            s->lba++;
>>> -            s->io_buffer_index = 0;
>>> +            return;
>>>          }
>>>          if (s->elementary_transfer_size > 0) {
>>>              /* there are some data left to transmit in this elementary
>>> @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>>>      s->io_buffer_index = sector_size;
>>>      s->cd_sector_size = sector_size;
>>>  
>>> -    s->status = READY_STAT | SEEK_STAT;
>>>      ide_atapi_cmd_reply_end(s);
>>>  }
>>>  
>>>
John Snow Oct. 6, 2015, 5:07 p.m. UTC | #7
On 10/06/2015 05:20 AM, Peter Lieven wrote:
> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>
>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>> I/O request is completed and secondly if the I/O request does not
>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>>
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>>   hw/ide/atapi.c | 69
>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>> index 747f466..9257e1c 100644
>>>> --- a/hw/ide/atapi.c
>>>> +++ b/hw/ide/atapi.c
>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>       memset(buf, 0, 288);
>>>>   }
>>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>> sector_size)
>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>   {
>>>> -    int ret;
>>>> +    IDEState *s = opaque;
>>>>   -    switch(sector_size) {
>>>> -    case 2048:
>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> -        break;
>>>> -    case 2352:
>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> -        if (ret < 0)
>>>> -            return ret;
>>>> -        cd_data_to_raw(buf, lba);
>>>> -        break;
>>>> -    default:
>>>> -        ret = -EIO;
>>>> -        break;
>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> +
>>>> +    if (ret < 0) {
>>>> +        ide_atapi_io_error(s, ret);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (s->cd_sector_size == 2352) {
>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>       }
>>>> -    return ret;
>>>> +
>>>> +    s->lba++;
>>>> +    s->io_buffer_index = 0;
>>>> +    s->status &= ~BUSY_STAT;
>>>> +
>>>> +    ide_atapi_cmd_reply_end(s);
>>>> +}
>>>> +
>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>> sector_size)
>>>> +{
>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    s->iov.iov_base = buf;
>>>> +    if (sector_size == 2352) {
>>>> +        buf += 4;
>>>> +    }
>> This doesn't look quite right, buf is never read after this.
>>
>> Also, why +=4 when it was originally buf + 16?
> 
> You are right. I mixed that up.
> 
>>
>>>> +
>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>> +
>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>> +        return -EIO;
>>>> +    }
>>>> +
>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> +    s->status |= BUSY_STAT;
>>>> +    return 0;
>>>>   }
>>>>   
>>> We discussed this off-list a bit, but for upstream synchronization:
>>>
>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>> are not prepared to cope with.
>> I don't think that's a problem as long as BSY is set while the
>> asynchronous command is running and DRQ is cleared. The latter will
>> protect ide_data_readw(). ide_sector_read() does essentially the same
>> thing.
> 
>  I was thinking the same. Without the BSY its not working at all.
> 
>>
>> Or maybe I'm just missing what you're trying to say.
>>
>>> My suggestion is to buffer an entire DRQ block of data at once
>>> (byte_count_limit) to avoid the problem.
>> No matter whether there is a problem or not, buffering more data at once
>> (and therefore doing less requests) is better for performance anyway.
> 
> Its possible to do only one read in the backend and read the whole
> request into the IO buffer. I send a follow-up.
> 

Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
and the READ10 cdb can request up to 128MiB! For performance, it might
be nice to always buffer something like:

MIN(128K, nb_sectors * sector_size)

and then as the guest drains the DRQ block of size byte_count_limit
which can only be at largest 0xFFFE (we can fit in at least two of these
per io_buffer refill) we can just shift the data_ptr and data_end
pointers to utilize io_buffer like a ring buffer.

Because the guest can at most fetch 0xfffe bytes at a time, it will tend
to leave at least 4 bytes left over from a 64 block read. Luckily, we've
got 4 extra bytes in s->io_buffer, so with a ring buffer we can always
rebuffer *at least* two full DRQ blocks of data at a time.

The routine would basically look like this:

- No DRQ blocks buffered, so read up to 64 blocks or however many are
left for our transfer
- If we have at least one full DRQ block allocated, start the transfer
and send an interrupt
- If we ran out of DRQ blocks, go back to the top and buffer them.

This would eliminate the need for code stanza #3 in
ide_atapi_cmd_reply_end, which re-starts a transfer without signaling to
the guest. We'd only have:

ide_atapi_cmd_reply_end(...) {
  if (packet_transfer_size == 0) { end(...); return; }
  if (blocks_buffered < 1) { async_buffer_blocks(...); return; }
  ide_transfer_start(...)
  ide_set_irq(s->bus);
}


which is a good deal simpler than what we have now, though I need to
look into the formatting of raw CD data a little more to make sure my
numbers make sense... it may not be quite so easy to buffer multiple DRQ
blocks in some cases, but so it goes -- we should always be able to
buffer at least one.

> Maybe do you have a pointer to the test tool that John mentioned?
> 
> Peter
>
Peter Lieven Oct. 6, 2015, 5:12 p.m. UTC | #8
> Am 06.10.2015 um 19:07 schrieb John Snow <jsnow@redhat.com>:
> 
> 
> 
>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>> 
>>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>>> I/O request is completed and secondly if the I/O request does not
>>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>>> 
>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>> ---
>>>>>  hw/ide/atapi.c | 69
>>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>>> 
>>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>>> index 747f466..9257e1c 100644
>>>>> --- a/hw/ide/atapi.c
>>>>> +++ b/hw/ide/atapi.c
>>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>>      memset(buf, 0, 288);
>>>>>  }
>>>>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>>> sector_size)
>>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>>  {
>>>>> -    int ret;
>>>>> +    IDEState *s = opaque;
>>>>>  -    switch(sector_size) {
>>>>> -    case 2048:
>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>> -        break;
>>>>> -    case 2352:
>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>> -        if (ret < 0)
>>>>> -            return ret;
>>>>> -        cd_data_to_raw(buf, lba);
>>>>> -        break;
>>>>> -    default:
>>>>> -        ret = -EIO;
>>>>> -        break;
>>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>> +
>>>>> +    if (ret < 0) {
>>>>> +        ide_atapi_io_error(s, ret);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (s->cd_sector_size == 2352) {
>>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>>      }
>>>>> -    return ret;
>>>>> +
>>>>> +    s->lba++;
>>>>> +    s->io_buffer_index = 0;
>>>>> +    s->status &= ~BUSY_STAT;
>>>>> +
>>>>> +    ide_atapi_cmd_reply_end(s);
>>>>> +}
>>>>> +
>>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>>> sector_size)
>>>>> +{
>>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    s->iov.iov_base = buf;
>>>>> +    if (sector_size == 2352) {
>>>>> +        buf += 4;
>>>>> +    }
>>> This doesn't look quite right, buf is never read after this.
>>> 
>>> Also, why +=4 when it was originally buf + 16?
>> 
>> You are right. I mixed that up.
>> 
>>> 
>>>>> +
>>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>>> +
>>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>>> +        return -EIO;
>>>>> +    }
>>>>> +
>>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>> +    s->status |= BUSY_STAT;
>>>>> +    return 0;
>>>>>  }
>>>> We discussed this off-list a bit, but for upstream synchronization:
>>>> 
>>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>>> are not prepared to cope with.
>>> I don't think that's a problem as long as BSY is set while the
>>> asynchronous command is running and DRQ is cleared. The latter will
>>> protect ide_data_readw(). ide_sector_read() does essentially the same
>>> thing.
>> 
>> I was thinking the same. Without the BSY its not working at all.
>> 
>>> 
>>> Or maybe I'm just missing what you're trying to say.
>>> 
>>>> My suggestion is to buffer an entire DRQ block of data at once
>>>> (byte_count_limit) to avoid the problem.
>>> No matter whether there is a problem or not, buffering more data at once
>>> (and therefore doing less requests) is better for performance anyway.
>> 
>> Its possible to do only one read in the backend and read the whole
>> request into the IO buffer. I send a follow-up.
> 
> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
> and the READ10 cdb can request up to 128MiB! For performance, it might
> be nice to always buffer something like:
> 
> MIN(128K, nb_sectors * sector_size)

isnt nb_sectors limited to CD_MAX_SECTORS (32)?

Peter


> 
> and then as the guest drains the DRQ block of size byte_count_limit
> which can only be at largest 0xFFFE (we can fit in at least two of these
> per io_buffer refill) we can just shift the data_ptr and data_end
> pointers to utilize io_buffer like a ring buffer.
> 
> Because the guest can at most fetch 0xfffe bytes at a time, it will tend
> to leave at least 4 bytes left over from a 64 block read. Luckily, we've
> got 4 extra bytes in s->io_buffer, so with a ring buffer we can always
> rebuffer *at least* two full DRQ blocks of data at a time.
> 
> The routine would basically look like this:
> 
> - No DRQ blocks buffered, so read up to 64 blocks or however many are
> left for our transfer
> - If we have at least one full DRQ block allocated, start the transfer
> and send an interrupt
> - If we ran out of DRQ blocks, go back to the top and buffer them.
> 
> This would eliminate the need for code stanza #3 in
> ide_atapi_cmd_reply_end, which re-starts a transfer without signaling to
> the guest. We'd only have:
> 
> ide_atapi_cmd_reply_end(...) {
>  if (packet_transfer_size == 0) { end(...); return; }
>  if (blocks_buffered < 1) { async_buffer_blocks(...); return; }
>  ide_transfer_start(...)
>  ide_set_irq(s->bus);
> }
> 
> 
> which is a good deal simpler than what we have now, though I need to
> look into the formatting of raw CD data a little more to make sure my
> numbers make sense... it may not be quite so easy to buffer multiple DRQ
> blocks in some cases, but so it goes -- we should always be able to
> buffer at least one.
> 
>> Maybe do you have a pointer to the test tool that John mentioned?
>> 
>> Peter
>>
John Snow Oct. 6, 2015, 5:56 p.m. UTC | #9
On 10/06/2015 01:12 PM, Peter Lieven wrote:
> 
>> Am 06.10.2015 um 19:07 schrieb John Snow <jsnow@redhat.com>:
>>
>>
>>
>>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>>>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>>>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>>>
>>>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>>>> I/O request is completed and secondly if the I/O request does not
>>>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>>>>
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>>  hw/ide/atapi.c | 69
>>>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>>>> index 747f466..9257e1c 100644
>>>>>> --- a/hw/ide/atapi.c
>>>>>> +++ b/hw/ide/atapi.c
>>>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>>>      memset(buf, 0, 288);
>>>>>>  }
>>>>>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>>>> sector_size)
>>>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>>>  {
>>>>>> -    int ret;
>>>>>> +    IDEState *s = opaque;
>>>>>>  -    switch(sector_size) {
>>>>>> -    case 2048:
>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>> -        break;
>>>>>> -    case 2352:
>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>> -        if (ret < 0)
>>>>>> -            return ret;
>>>>>> -        cd_data_to_raw(buf, lba);
>>>>>> -        break;
>>>>>> -    default:
>>>>>> -        ret = -EIO;
>>>>>> -        break;
>>>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>> +
>>>>>> +    if (ret < 0) {
>>>>>> +        ide_atapi_io_error(s, ret);
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (s->cd_sector_size == 2352) {
>>>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>>>      }
>>>>>> -    return ret;
>>>>>> +
>>>>>> +    s->lba++;
>>>>>> +    s->io_buffer_index = 0;
>>>>>> +    s->status &= ~BUSY_STAT;
>>>>>> +
>>>>>> +    ide_atapi_cmd_reply_end(s);
>>>>>> +}
>>>>>> +
>>>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>>>> sector_size)
>>>>>> +{
>>>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    s->iov.iov_base = buf;
>>>>>> +    if (sector_size == 2352) {
>>>>>> +        buf += 4;
>>>>>> +    }
>>>> This doesn't look quite right, buf is never read after this.
>>>>
>>>> Also, why +=4 when it was originally buf + 16?
>>>
>>> You are right. I mixed that up.
>>>
>>>>
>>>>>> +
>>>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>>>> +
>>>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>>>> +        return -EIO;
>>>>>> +    }
>>>>>> +
>>>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>> +    s->status |= BUSY_STAT;
>>>>>> +    return 0;
>>>>>>  }
>>>>> We discussed this off-list a bit, but for upstream synchronization:
>>>>>
>>>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>>>> are not prepared to cope with.
>>>> I don't think that's a problem as long as BSY is set while the
>>>> asynchronous command is running and DRQ is cleared. The latter will
>>>> protect ide_data_readw(). ide_sector_read() does essentially the same
>>>> thing.
>>>
>>> I was thinking the same. Without the BSY its not working at all.
>>>
>>>>
>>>> Or maybe I'm just missing what you're trying to say.
>>>>
>>>>> My suggestion is to buffer an entire DRQ block of data at once
>>>>> (byte_count_limit) to avoid the problem.
>>>> No matter whether there is a problem or not, buffering more data at once
>>>> (and therefore doing less requests) is better for performance anyway.
>>>
>>> Its possible to do only one read in the backend and read the whole
>>> request into the IO buffer. I send a follow-up.
>>
>> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
>> and the READ10 cdb can request up to 128MiB! For performance, it might
>> be nice to always buffer something like:
>>
>> MIN(128K, nb_sectors * sector_size)
> 
> isnt nb_sectors limited to CD_MAX_SECTORS (32)?
> 
> Peter
> 

CD_MAX_SECTORS is... (80 * 60 * 75 * 2048) / 512 --> 1440000, and
describes the maximum sector size for a CD medium, not the request size.

Where'd you get the 32 number?

> 
>>
>> and then as the guest drains the DRQ block of size byte_count_limit
>> which can only be at largest 0xFFFE (we can fit in at least two of these
>> per io_buffer refill) we can just shift the data_ptr and data_end
>> pointers to utilize io_buffer like a ring buffer.
>>
>> Because the guest can at most fetch 0xfffe bytes at a time, it will tend
>> to leave at least 4 bytes left over from a 64 block read. Luckily, we've
>> got 4 extra bytes in s->io_buffer, so with a ring buffer we can always
>> rebuffer *at least* two full DRQ blocks of data at a time.
>>
>> The routine would basically look like this:
>>
>> - No DRQ blocks buffered, so read up to 64 blocks or however many are
>> left for our transfer
>> - If we have at least one full DRQ block allocated, start the transfer
>> and send an interrupt
>> - If we ran out of DRQ blocks, go back to the top and buffer them.
>>
>> This would eliminate the need for code stanza #3 in
>> ide_atapi_cmd_reply_end, which re-starts a transfer without signaling to
>> the guest. We'd only have:
>>
>> ide_atapi_cmd_reply_end(...) {
>>  if (packet_transfer_size == 0) { end(...); return; }
>>  if (blocks_buffered < 1) { async_buffer_blocks(...); return; }
>>  ide_transfer_start(...)
>>  ide_set_irq(s->bus);
>> }
>>
>>
>> which is a good deal simpler than what we have now, though I need to
>> look into the formatting of raw CD data a little more to make sure my
>> numbers make sense... it may not be quite so easy to buffer multiple DRQ
>> blocks in some cases, but so it goes -- we should always be able to
>> buffer at least one.
>>
>>> Maybe do you have a pointer to the test tool that John mentioned?
>>>
>>> Peter
>>>
Peter Lieven Oct. 6, 2015, 6:31 p.m. UTC | #10
Am 06.10.2015 um 19:56 schrieb John Snow:
>
> On 10/06/2015 01:12 PM, Peter Lieven wrote:
>>> Am 06.10.2015 um 19:07 schrieb John Snow <jsnow@redhat.com>:
>>>
>>>
>>>
>>>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>>>>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>>>>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>>>>> I/O request is completed and secondly if the I/O request does not
>>>>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>>>>>
>>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>>> ---
>>>>>>>  hw/ide/atapi.c | 69
>>>>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>>>>>
>>>>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>>>>> index 747f466..9257e1c 100644
>>>>>>> --- a/hw/ide/atapi.c
>>>>>>> +++ b/hw/ide/atapi.c
>>>>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>>>>      memset(buf, 0, 288);
>>>>>>>  }
>>>>>>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>>>>> sector_size)
>>>>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>>>>  {
>>>>>>> -    int ret;
>>>>>>> +    IDEState *s = opaque;
>>>>>>>  -    switch(sector_size) {
>>>>>>> -    case 2048:
>>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>> -        break;
>>>>>>> -    case 2352:
>>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>> -        if (ret < 0)
>>>>>>> -            return ret;
>>>>>>> -        cd_data_to_raw(buf, lba);
>>>>>>> -        break;
>>>>>>> -    default:
>>>>>>> -        ret = -EIO;
>>>>>>> -        break;
>>>>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>> +
>>>>>>> +    if (ret < 0) {
>>>>>>> +        ide_atapi_io_error(s, ret);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (s->cd_sector_size == 2352) {
>>>>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>>>>      }
>>>>>>> -    return ret;
>>>>>>> +
>>>>>>> +    s->lba++;
>>>>>>> +    s->io_buffer_index = 0;
>>>>>>> +    s->status &= ~BUSY_STAT;
>>>>>>> +
>>>>>>> +    ide_atapi_cmd_reply_end(s);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>>>>> sector_size)
>>>>>>> +{
>>>>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    s->iov.iov_base = buf;
>>>>>>> +    if (sector_size == 2352) {
>>>>>>> +        buf += 4;
>>>>>>> +    }
>>>>> This doesn't look quite right, buf is never read after this.
>>>>>
>>>>> Also, why +=4 when it was originally buf + 16?
>>>> You are right. I mixed that up.
>>>>
>>>>>>> +
>>>>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>>>>> +
>>>>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>>>>> +        return -EIO;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>> +    s->status |= BUSY_STAT;
>>>>>>> +    return 0;
>>>>>>>  }
>>>>>> We discussed this off-list a bit, but for upstream synchronization:
>>>>>>
>>>>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>>>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>>>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>>>>> are not prepared to cope with.
>>>>> I don't think that's a problem as long as BSY is set while the
>>>>> asynchronous command is running and DRQ is cleared. The latter will
>>>>> protect ide_data_readw(). ide_sector_read() does essentially the same
>>>>> thing.
>>>> I was thinking the same. Without the BSY its not working at all.
>>>>
>>>>> Or maybe I'm just missing what you're trying to say.
>>>>>
>>>>>> My suggestion is to buffer an entire DRQ block of data at once
>>>>>> (byte_count_limit) to avoid the problem.
>>>>> No matter whether there is a problem or not, buffering more data at once
>>>>> (and therefore doing less requests) is better for performance anyway.
>>>> Its possible to do only one read in the backend and read the whole
>>>> request into the IO buffer. I send a follow-up.
>>> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
>>> and the READ10 cdb can request up to 128MiB! For performance, it might
>>> be nice to always buffer something like:
>>>
>>> MIN(128K, nb_sectors * sector_size)
>> isnt nb_sectors limited to CD_MAX_SECTORS (32)?
>>
>> Peter
>>
> CD_MAX_SECTORS is... (80 * 60 * 75 * 2048) / 512 --> 1440000, and
> describes the maximum sector size for a CD medium, not the request size.
>
> Where'd you get the 32 number?

You are right. I mixed this up. You where talking of a maximum transfer
size of close to 32 sectors. But you where referring to an ide transfer not
the maximum request size in terms of SCSI block limits.

I will rework that patch on Thursday.

Maybe you can have a look at the other patches of this series as well? Then I can
respin the whole series.

Thanks for your help,
Peter
John Snow Oct. 6, 2015, 6:34 p.m. UTC | #11
On 10/06/2015 02:31 PM, Peter Lieven wrote:
> Am 06.10.2015 um 19:56 schrieb John Snow:
>>
>> On 10/06/2015 01:12 PM, Peter Lieven wrote:
>>>> Am 06.10.2015 um 19:07 schrieb John Snow <jsnow@redhat.com>:
>>>>
>>>>
>>>>
>>>>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>>>>>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>>>>>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>>>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>>>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>>>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>>>>>> I/O request is completed and secondly if the I/O request does not
>>>>>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
Maybe you can have a look at the other patches of this series as well?
Then I can
respin the whole series.


>>>>>>>>
>>>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>>>> ---
>>>>>>>>  hw/ide/atapi.c | 69
>>>>>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>>>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>>>>>> index 747f466..9257e1c 100644
>>>>>>>> --- a/hw/ide/atapi.c
>>>>>>>> +++ b/hw/ide/atapi.c
>>>>>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>>>>>      memset(buf, 0, 288);
>>>>>>>>  }
>>>>>>>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>>>>>> sector_size)
>>>>>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>>>>>  {
>>>>>>>> -    int ret;
>>>>>>>> +    IDEState *s = opaque;
>>>>>>>>  -    switch(sector_size) {
>>>>>>>> -    case 2048:
>>>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>>> -        break;
>>>>>>>> -    case 2352:
>>>>>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>>>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>>> -        if (ret < 0)
>>>>>>>> -            return ret;
>>>>>>>> -        cd_data_to_raw(buf, lba);
>>>>>>>> -        break;
>>>>>>>> -    default:
>>>>>>>> -        ret = -EIO;
>>>>>>>> -        break;
>>>>>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>>>>>> +
>>>>>>>> +    if (ret < 0) {
>>>>>>>> +        ide_atapi_io_error(s, ret);
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (s->cd_sector_size == 2352) {
>>>>>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>>>>>      }
>>>>>>>> -    return ret;
>>>>>>>> +
>>>>>>>> +    s->lba++;
>>>>>>>> +    s->io_buffer_index = 0;
>>>>>>>> +    s->status &= ~BUSY_STAT;
>>>>>>>> +
>>>>>>>> +    ide_atapi_cmd_reply_end(s);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>>>>>> sector_size)
>>>>>>>> +{
>>>>>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>>>>>> +        return -EINVAL;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    s->iov.iov_base = buf;
>>>>>>>> +    if (sector_size == 2352) {
>>>>>>>> +        buf += 4;
>>>>>>>> +    }
>>>>>> This doesn't look quite right, buf is never read after this.
>>>>>>
>>>>>> Also, why +=4 when it was originally buf + 16?
>>>>> You are right. I mixed that up.
>>>>>
>>>>>>>> +
>>>>>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>>>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>>>>>> +
>>>>>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>>>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>>>>>> +        return -EIO;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>>>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>>>>>> +    s->status |= BUSY_STAT;
>>>>>>>> +    return 0;
>>>>>>>>  }
>>>>>>> We discussed this off-list a bit, but for upstream synchronization:
>>>>>>>
>>>>>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>>>>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>>>>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>>>>>> are not prepared to cope with.
>>>>>> I don't think that's a problem as long as BSY is set while the
>>>>>> asynchronous command is running and DRQ is cleared. The latter will
>>>>>> protect ide_data_readw(). ide_sector_read() does essentially the same
>>>>>> thing.
>>>>> I was thinking the same. Without the BSY its not working at all.
>>>>>
>>>>>> Or maybe I'm just missing what you're trying to say.
>>>>>>
>>>>>>> My suggestion is to buffer an entire DRQ block of data at once
>>>>>>> (byte_count_limit) to avoid the problem.
>>>>>> No matter whether there is a problem or not, buffering more data at once
>>>>>> (and therefore doing less requests) is better for performance anyway.
>>>>> Its possible to do only one read in the backend and read the whole
>>>>> request into the IO buffer. I send a follow-up.
>>>> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
>>>> and the READ10 cdb can request up to 128MiB! For performance, it might
>>>> be nice to always buffer something like:
>>>>
>>>> MIN(128K, nb_sectors * sector_size)
>>> isnt nb_sectors limited to CD_MAX_SECTORS (32)?
>>>
>>> Peter
>>>
>> CD_MAX_SECTORS is... (80 * 60 * 75 * 2048) / 512 --> 1440000, and
>> describes the maximum sector size for a CD medium, not the request size.
>>
>> Where'd you get the 32 number?
> 
> You are right. I mixed this up. You where talking of a maximum transfer
> size of close to 32 sectors. But you where referring to an ide transfer not
> the maximum request size in terms of SCSI block limits.
> 

Ah, yes. You can request up to 0xfffe bytes per DRQ cycle, which is a
hair shy of 32 sectors, but the overall transaction can be quite large.

> I will rework that patch on Thursday.
> 
> Maybe you can have a look at the other patches of this series as well? Then I can
> respin the whole series.
> 
> Thanks for your help,
> Peter
> 

Sure thing. Sorry to drag you into one of the darkest corners of hw/ide/*.

--js
Kevin Wolf Oct. 7, 2015, 7:28 a.m. UTC | #12
Am 06.10.2015 um 17:54 hat John Snow geschrieben:
> 
> 
> On 10/06/2015 04:57 AM, Kevin Wolf wrote:
> > Am 05.10.2015 um 23:15 hat John Snow geschrieben:
> >>
> >>
> >> On 09/21/2015 08:25 AM, Peter Lieven wrote:
> >>> PIO read requests on the ATAPI interface used to be sync blk requests.
> >>> This has to siginificant drawbacks. First the main loop hangs util an
> >>> I/O request is completed and secondly if the I/O request does not
> >>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> >>>
> >>> Signed-off-by: Peter Lieven <pl@kamp.de>
> >>> ---
> >>>  hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
> >>>  1 file changed, 43 insertions(+), 26 deletions(-)
> >>>
> >>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> >>> index 747f466..9257e1c 100644
> >>> --- a/hw/ide/atapi.c
> >>> +++ b/hw/ide/atapi.c
> >>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
> >>>      memset(buf, 0, 288);
> >>>  }
> >>>  
> >>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
> >>> +static void cd_read_sector_cb(void *opaque, int ret)
> >>>  {
> >>> -    int ret;
> >>> +    IDEState *s = opaque;
> >>>  
> >>> -    switch(sector_size) {
> >>> -    case 2048:
> >>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> >>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> >>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> >>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> >>> -        break;
> >>> -    case 2352:
> >>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
> >>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> >>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> >>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
> >>> -        if (ret < 0)
> >>> -            return ret;
> >>> -        cd_data_to_raw(buf, lba);
> >>> -        break;
> >>> -    default:
> >>> -        ret = -EIO;
> >>> -        break;
> >>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
> >>> +
> >>> +    if (ret < 0) {
> >>> +        ide_atapi_io_error(s, ret);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (s->cd_sector_size == 2352) {
> >>> +        cd_data_to_raw(s->io_buffer, s->lba);
> >>>      }
> >>> -    return ret;
> >>> +
> >>> +    s->lba++;
> >>> +    s->io_buffer_index = 0;
> >>> +    s->status &= ~BUSY_STAT;
> >>> +
> >>> +    ide_atapi_cmd_reply_end(s);
> >>> +}
> >>> +
> >>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> >>> +{
> >>> +    if (sector_size != 2048 && sector_size != 2352) {
> >>> +        return -EINVAL;
> >>> +    }
> >>> +
> >>> +    s->iov.iov_base = buf;
> >>> +    if (sector_size == 2352) {
> >>> +        buf += 4;
> >>> +    }
> > 
> > This doesn't look quite right, buf is never read after this.
> > 
> > Also, why +=4 when it was originally buf + 16?
> > 
> >>> +
> >>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> >>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> >>> +
> >>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> >>> +                      cd_read_sector_cb, s) == NULL) {
> >>> +        return -EIO;
> >>> +    }
> >>> +
> >>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
> >>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> >>> +    s->status |= BUSY_STAT;
> >>> +    return 0;
> >>>  }
> >>>  
> >>
> >> We discussed this off-list a bit, but for upstream synchronization:
> >>
> >> Unfortunately, I believe making cd_read_sector here non-blocking makes
> >> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
> >> s->end_transfer_func() nonblocking, which functions like ide_data_readw
> >> are not prepared to cope with.
> > 
> > I don't think that's a problem as long as BSY is set while the
> > asynchronous command is running and DRQ is cleared. The latter will
> > protect ide_data_readw(). ide_sector_read() does essentially the same
> > thing.
> > 
> > Or maybe I'm just missing what you're trying to say.
> > 
> 
> It will be correct from a code standpoint, but I don't think the guest
> *expects* DRQ to become re-set before byte_count_limit is exhausted.

Oh, I misunderstood what you're after. Yes, I think you're right. The
guest most probably uses string I/O instructions like 'rep insw' in
order to transfer the whole block, i.e. it doesn't even check the status
register in between and will simply transfer invalid data (zeros) while
DRQ isn't set.

> In the synchronous version of the code, DRQ flickers while we rebuffer
> s->io_buffer, but since it's synchronous, the guest *never sees this*.

Thanks, that the current code would be wrong if it weren't synchronous
is the part I missed.

> The guest does not necessarily have any reason or motivation to check if
> DRQ is still set after 2048 bytes -- is that recommended in the spec?
> 
> ("Warning! The drive may decide to rebuffer arbitrarily while you read,
> so check if DRQ is still set before each read to the data register!" ??)

No, it's not recommended. PIO performance is already bad enough without
it. :-)

If you want to check this, have a look at the state machines described in
APT. In my (probably outdated) version it's chapter 9.8 "PACKET command
protocol".

Kevin
diff mbox

Patch

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..9257e1c 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -105,31 +105,51 @@  static void cd_data_to_raw(uint8_t *buf, int lba)
     memset(buf, 0, 288);
 }
 
-static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
+static void cd_read_sector_cb(void *opaque, int ret)
 {
-    int ret;
+    IDEState *s = opaque;
 
-    switch(sector_size) {
-    case 2048:
-        block_acct_start(blk_get_stats(s->blk), &s->acct,
-                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
-        block_acct_done(blk_get_stats(s->blk), &s->acct);
-        break;
-    case 2352:
-        block_acct_start(blk_get_stats(s->blk), &s->acct,
-                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
-        block_acct_done(blk_get_stats(s->blk), &s->acct);
-        if (ret < 0)
-            return ret;
-        cd_data_to_raw(buf, lba);
-        break;
-    default:
-        ret = -EIO;
-        break;
+    block_acct_done(blk_get_stats(s->blk), &s->acct);
+
+    if (ret < 0) {
+        ide_atapi_io_error(s, ret);
+        return;
+    }
+
+    if (s->cd_sector_size == 2352) {
+        cd_data_to_raw(s->io_buffer, s->lba);
     }
-    return ret;
+
+    s->lba++;
+    s->io_buffer_index = 0;
+    s->status &= ~BUSY_STAT;
+
+    ide_atapi_cmd_reply_end(s);
+}
+
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+{
+    if (sector_size != 2048 && sector_size != 2352) {
+        return -EINVAL;
+    }
+
+    s->iov.iov_base = buf;
+    if (sector_size == 2352) {
+        buf += 4;
+    }
+
+    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+
+    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
+                      cd_read_sector_cb, s) == NULL) {
+        return -EIO;
+    }
+
+    block_acct_start(blk_get_stats(s->blk), &s->acct,
+                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+    s->status |= BUSY_STAT;
+    return 0;
 }
 
 void ide_atapi_cmd_ok(IDEState *s)
@@ -190,10 +210,8 @@  void ide_atapi_cmd_reply_end(IDEState *s)
             ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
             if (ret < 0) {
                 ide_atapi_io_error(s, ret);
-                return;
             }
-            s->lba++;
-            s->io_buffer_index = 0;
+            return;
         }
         if (s->elementary_transfer_size > 0) {
             /* there are some data left to transmit in this elementary
@@ -275,7 +293,6 @@  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
     s->io_buffer_index = sector_size;
     s->cd_sector_size = sector_size;
 
-    s->status = READY_STAT | SEEK_STAT;
     ide_atapi_cmd_reply_end(s);
 }