Message ID | 56138A76.1030804@kamp.de |
---|---|
State | New |
Headers | show |
Am 06.10.2015 um 10:46 schrieb Peter Lieven: > Am 05.10.2015 um 23:15 schrieb John Snow: >> >> On 09/21/2015 08:25 AM, Peter Lieven wrote: >>> PIO read requests on the ATAPI interface used to be sync blk requests. >>> This has to siginificant drawbacks. First the main loop hangs util an >>> I/O request is completed and secondly if the I/O request does not >>> complete (e.g. due to an unresponsive storage) Qemu hangs completely. >>> >>> Signed-off-by: Peter Lieven <pl@kamp.de> >>> --- >>> hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++---------------------- >>> 1 file changed, 43 insertions(+), 26 deletions(-) >>> >>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c >>> index 747f466..9257e1c 100644 >>> --- a/hw/ide/atapi.c >>> +++ b/hw/ide/atapi.c >>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba) >>> memset(buf, 0, 288); >>> } >>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) >>> +static void cd_read_sector_cb(void *opaque, int ret) >>> { >>> - int ret; >>> + IDEState *s = opaque; >>> - switch(sector_size) { >>> - case 2048: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - break; >>> - case 2352: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - if (ret < 0) >>> - return ret; >>> - cd_data_to_raw(buf, lba); >>> - break; >>> - default: >>> - ret = -EIO; >>> - break; >>> + block_acct_done(blk_get_stats(s->blk), &s->acct); >>> + >>> + if (ret < 0) { >>> + ide_atapi_io_error(s, ret); >>> + return; >>> + } >>> + >>> + if (s->cd_sector_size == 2352) { >>> + cd_data_to_raw(s->io_buffer, s->lba); >>> } >>> - return ret; >>> + >>> + s->lba++; >>> + s->io_buffer_index = 0; >>> + s->status &= ~BUSY_STAT; >>> + >>> + ide_atapi_cmd_reply_end(s); >>> +} >>> + >>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size) >>> +{ >>> + if (sector_size != 2048 && sector_size != 2352) { >>> + return -EINVAL; >>> + } >>> + >>> + s->iov.iov_base = buf; >>> + if (sector_size == 2352) { >>> + buf += 4; >>> + } >>> + >>> + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; >>> + qemu_iovec_init_external(&s->qiov, &s->iov, 1); >>> + >>> + if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4, >>> + cd_read_sector_cb, s) == NULL) { >>> + return -EIO; >>> + } >>> + >>> + block_acct_start(blk_get_stats(s->blk), &s->acct, >>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> + s->status |= BUSY_STAT; >>> + return 0; >>> } >> We discussed this off-list a bit, but for upstream synchronization: >> >> Unfortunately, I believe making cd_read_sector here non-blocking makes >> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to >> s->end_transfer_func() nonblocking, which functions like ide_data_readw >> are not prepared to cope with. >> >> My suggestion is to buffer an entire DRQ block of data at once >> (byte_count_limit) to avoid the problem. > > Hi John, > > first of all thank you for the detailed analysis. > > Is the following what you have i mind. For PIO reads > 1 sector > it is a great improvement for the NFS backend: > > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c > index ab45495..ec2ba89 100644 > --- a/hw/ide/atapi.c > +++ b/hw/ide/atapi.c > @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret) > } > > if (s->cd_sector_size == 2352) { > - cd_data_to_raw(s->io_buffer, s->lba); > + int nb_sectors = s->packet_transfer_size / 2352; > + while (nb_sectors--) { > + memmove(s->io_buffer + nb_sectors * 2352 + 4, > + s->io_buffer + nb_sectors * 2048, 2048); > + cd_data_to_raw(s->io_buffer + nb_sectors * 2352, > + s->lba + nb_sectors); > + } > } > > - s->lba++; > + s->lba = -1; > s->io_buffer_index = 0; > s->status &= ~BUSY_STAT; > > ide_atapi_cmd_reply_end(s); > } > > -static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size) > +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, int nb_sectors) > { > if (sector_size != 2048 && sector_size != 2352) { > return -EINVAL; > } > > s->iov.iov_base = buf; > - if (sector_size == 2352) { > - buf += 4; > - } > - > - s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; > + s->iov.iov_len = nb_sectors * 2048; > qemu_iovec_init_external(&s->qiov, &s->iov, 1); > > - if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4, > - cd_read_sector_cb, s) == NULL) { > + if (ide_readv_cancelable(s, (int64_t)lba << 2, > + &s->qiov, nb_sectors * 4, > + cd_read_sector_cb, s) == NULL) { > return -EIO; > } > > block_acct_start(blk_get_stats(s->blk), &s->acct, > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); > + nb_sectors * 2048, BLOCK_ACCT_READ); > s->status |= BUSY_STAT; > return 0; > } > @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret) > /* The whole ATAPI transfer logic is handled in this function */ > void ide_atapi_cmd_reply_end(IDEState *s) > { > - int byte_count_limit, size, ret; > + int byte_count_limit, size; > #ifdef DEBUG_IDE_ATAPI > printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", > s->packet_transfer_size, > @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) > printf("status=0x%x\n", s->status); > #endif > } else { > - /* see if a new sector must be read */ > - if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { > - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); > - if (ret < 0) { > - ide_atapi_io_error(s, ret); > - } > - return; > - } > if (s->elementary_transfer_size > 0) { > /* there are some data left to transmit in this elementary > transfer */ > @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) > static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, > int sector_size) > { > + int ret; > s->lba = lba; > s->packet_transfer_size = nb_sectors * sector_size; > + assert(s->packet_transfer_size <= > + IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4); > s->elementary_transfer_size = 0; > - s->io_buffer_index = sector_size; > s->cd_sector_size = sector_size; > - > - ide_atapi_cmd_reply_end(s); > + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size, > + nb_sectors); > + if (ret < 0) { > + ide_atapi_io_error(s, ret); > + } > } > > static void ide_atapi_cmd_check_status(IDEState *s) > > Forgot to mention with this patch I pass tests/ide-test now. Peter
On 10/06/2015 04:46 AM, Peter Lieven wrote: > Am 05.10.2015 um 23:15 schrieb John Snow: >> >> On 09/21/2015 08:25 AM, Peter Lieven wrote: >>> PIO read requests on the ATAPI interface used to be sync blk requests. >>> This has to siginificant drawbacks. First the main loop hangs util an >>> I/O request is completed and secondly if the I/O request does not >>> complete (e.g. due to an unresponsive storage) Qemu hangs completely. >>> >>> Signed-off-by: Peter Lieven <pl@kamp.de> >>> --- >>> hw/ide/atapi.c | 69 >>> ++++++++++++++++++++++++++++++++++++---------------------- >>> 1 file changed, 43 insertions(+), 26 deletions(-) >>> >>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c >>> index 747f466..9257e1c 100644 >>> --- a/hw/ide/atapi.c >>> +++ b/hw/ide/atapi.c >>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba) >>> memset(buf, 0, 288); >>> } >>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int >>> sector_size) >>> +static void cd_read_sector_cb(void *opaque, int ret) >>> { >>> - int ret; >>> + IDEState *s = opaque; >>> - switch(sector_size) { >>> - case 2048: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - break; >>> - case 2352: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - if (ret < 0) >>> - return ret; >>> - cd_data_to_raw(buf, lba); >>> - break; >>> - default: >>> - ret = -EIO; >>> - break; >>> + block_acct_done(blk_get_stats(s->blk), &s->acct); >>> + >>> + if (ret < 0) { >>> + ide_atapi_io_error(s, ret); >>> + return; >>> + } >>> + >>> + if (s->cd_sector_size == 2352) { >>> + cd_data_to_raw(s->io_buffer, s->lba); >>> } >>> - return ret; >>> + >>> + s->lba++; >>> + s->io_buffer_index = 0; >>> + s->status &= ~BUSY_STAT; >>> + >>> + ide_atapi_cmd_reply_end(s); >>> +} >>> + >>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int >>> sector_size) >>> +{ >>> + if (sector_size != 2048 && sector_size != 2352) { >>> + return -EINVAL; >>> + } >>> + >>> + s->iov.iov_base = buf; >>> + if (sector_size == 2352) { >>> + buf += 4; >>> + } >>> + >>> + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; >>> + qemu_iovec_init_external(&s->qiov, &s->iov, 1); >>> + >>> + if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4, >>> + cd_read_sector_cb, s) == NULL) { >>> + return -EIO; >>> + } >>> + >>> + block_acct_start(blk_get_stats(s->blk), &s->acct, >>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> + s->status |= BUSY_STAT; >>> + return 0; >>> } >>> >> We discussed this off-list a bit, but for upstream synchronization: >> >> Unfortunately, I believe making cd_read_sector here non-blocking makes >> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to >> s->end_transfer_func() nonblocking, which functions like ide_data_readw >> are not prepared to cope with. >> >> My suggestion is to buffer an entire DRQ block of data at once >> (byte_count_limit) to avoid the problem. > > Hi John, > > first of all thank you for the detailed analysis. > > Is the following what you have i mind. For PIO reads > 1 sector > it is a great improvement for the NFS backend: > > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c > index ab45495..ec2ba89 100644 > --- a/hw/ide/atapi.c > +++ b/hw/ide/atapi.c > @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret) > } > > if (s->cd_sector_size == 2352) { > - cd_data_to_raw(s->io_buffer, s->lba); > + int nb_sectors = s->packet_transfer_size / 2352; > + while (nb_sectors--) { > + memmove(s->io_buffer + nb_sectors * 2352 + 4, > + s->io_buffer + nb_sectors * 2048, 2048); > + cd_data_to_raw(s->io_buffer + nb_sectors * 2352, > + s->lba + nb_sectors); > + } > } Is this going to be correct in cases where the number of sectors we are copying is less than the total request size? We might need to bookmark how many sectors/bytes we're copying this go-around. Perhaps by looking at lcyl/hcyl. > > - s->lba++; > + s->lba = -1; > s->io_buffer_index = 0; > s->status &= ~BUSY_STAT; > > ide_atapi_cmd_reply_end(s); > } > Well, I might not name it cd_read_sector and cd_read_sector_cb anymore. Perhaps cd_read_sectors[_cb]. > -static int cd_read_sector(IDEState *s, int lba, void *buf, int > sector_size) > +static int cd_read_sector(IDEState *s, int lba, void *buf, int > sector_size, int nb_sectors) > { > if (sector_size != 2048 && sector_size != 2352) { > return -EINVAL; > } > > s->iov.iov_base = buf; > - if (sector_size == 2352) { > - buf += 4; > - } > - > - s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; > + s->iov.iov_len = nb_sectors * 2048; > qemu_iovec_init_external(&s->qiov, &s->iov, 1); > > - if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4, > - cd_read_sector_cb, s) == NULL) { > + if (ide_readv_cancelable(s, (int64_t)lba << 2, > + &s->qiov, nb_sectors * 4, > + cd_read_sector_cb, s) == NULL) { > return -EIO; > } > > block_acct_start(blk_get_stats(s->blk), &s->acct, > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); > + nb_sectors * 2048, BLOCK_ACCT_READ); > s->status |= BUSY_STAT; > return 0; > } > @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret) > /* The whole ATAPI transfer logic is handled in this function */ > void ide_atapi_cmd_reply_end(IDEState *s) > { > - int byte_count_limit, size, ret; > + int byte_count_limit, size; > #ifdef DEBUG_IDE_ATAPI > printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", > s->packet_transfer_size, > @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) > printf("status=0x%x\n", s->status); > #endif > } else { > - /* see if a new sector must be read */ > - if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { > - ret = cd_read_sector(s, s->lba, s->io_buffer, > s->cd_sector_size); > - if (ret < 0) { > - ide_atapi_io_error(s, ret); > - } > - return; > - } > if (s->elementary_transfer_size > 0) { > /* there are some data left to transmit in this elementary > transfer */ > @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int > size, int max_size) > static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, > int sector_size) > { > + int ret; > s->lba = lba; > s->packet_transfer_size = nb_sectors * sector_size; > + assert(s->packet_transfer_size <= > + IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4); > s->elementary_transfer_size = 0; > - s->io_buffer_index = sector_size; > s->cd_sector_size = sector_size; > - > - ide_atapi_cmd_reply_end(s); > + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size, > + nb_sectors); > + if (ret < 0) { > + ide_atapi_io_error(s, ret); > + } > } > > static void ide_atapi_cmd_check_status(IDEState *s) > > > Did you also have a look at the other patches? > > Thanks, > Peter On my queue; hopefully Stefan can take a peek too, but I'll try to review the IDE-specific bits. I imagine you want to wait to respin until we've looked at all the patches, that's fine -- I'll try not to keep you waiting for much longer. --js
Am 07.10.2015 um 18:42 schrieb John Snow: > > On 10/06/2015 04:46 AM, Peter Lieven wrote: >> Am 05.10.2015 um 23:15 schrieb John Snow: >>> On 09/21/2015 08:25 AM, Peter Lieven wrote: >>>> PIO read requests on the ATAPI interface used to be sync blk requests. >>>> This has to siginificant drawbacks. First the main loop hangs util an >>>> I/O request is completed and secondly if the I/O request does not >>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely. >>>> >>>> Signed-off-by: Peter Lieven <pl@kamp.de> >>>> --- >>>> hw/ide/atapi.c | 69 >>>> ++++++++++++++++++++++++++++++++++++---------------------- >>>> 1 file changed, 43 insertions(+), 26 deletions(-) >>>> >>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c >>>> index 747f466..9257e1c 100644 >>>> --- a/hw/ide/atapi.c >>>> +++ b/hw/ide/atapi.c >>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba) >>>> memset(buf, 0, 288); >>>> } >>>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int >>>> sector_size) >>>> +static void cd_read_sector_cb(void *opaque, int ret) >>>> { >>>> - int ret; >>>> + IDEState *s = opaque; >>>> - switch(sector_size) { >>>> - case 2048: >>>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); >>>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>>> - break; >>>> - case 2352: >>>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); >>>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>>> - if (ret < 0) >>>> - return ret; >>>> - cd_data_to_raw(buf, lba); >>>> - break; >>>> - default: >>>> - ret = -EIO; >>>> - break; >>>> + block_acct_done(blk_get_stats(s->blk), &s->acct); >>>> + >>>> + if (ret < 0) { >>>> + ide_atapi_io_error(s, ret); >>>> + return; >>>> + } >>>> + >>>> + if (s->cd_sector_size == 2352) { >>>> + cd_data_to_raw(s->io_buffer, s->lba); >>>> } >>>> - return ret; >>>> + >>>> + s->lba++; >>>> + s->io_buffer_index = 0; >>>> + s->status &= ~BUSY_STAT; >>>> + >>>> + ide_atapi_cmd_reply_end(s); >>>> +} >>>> + >>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int >>>> sector_size) >>>> +{ >>>> + if (sector_size != 2048 && sector_size != 2352) { >>>> + return -EINVAL; >>>> + } >>>> + >>>> + s->iov.iov_base = buf; >>>> + if (sector_size == 2352) { >>>> + buf += 4; >>>> + } >>>> + >>>> + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; >>>> + qemu_iovec_init_external(&s->qiov, &s->iov, 1); >>>> + >>>> + if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4, >>>> + cd_read_sector_cb, s) == NULL) { >>>> + return -EIO; >>>> + } >>>> + >>>> + block_acct_start(blk_get_stats(s->blk), &s->acct, >>>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>>> + s->status |= BUSY_STAT; >>>> + return 0; >>>> } >>>> >>> We discussed this off-list a bit, but for upstream synchronization: >>> >>> Unfortunately, I believe making cd_read_sector here non-blocking makes >>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to >>> s->end_transfer_func() nonblocking, which functions like ide_data_readw >>> are not prepared to cope with. >>> >>> My suggestion is to buffer an entire DRQ block of data at once >>> (byte_count_limit) to avoid the problem. >> Hi John, >> >> first of all thank you for the detailed analysis. >> >> Is the following what you have i mind. For PIO reads > 1 sector >> it is a great improvement for the NFS backend: >> >> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c >> index ab45495..ec2ba89 100644 >> --- a/hw/ide/atapi.c >> +++ b/hw/ide/atapi.c >> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret) >> } >> >> if (s->cd_sector_size == 2352) { >> - cd_data_to_raw(s->io_buffer, s->lba); >> + int nb_sectors = s->packet_transfer_size / 2352; >> + while (nb_sectors--) { >> + memmove(s->io_buffer + nb_sectors * 2352 + 4, >> + s->io_buffer + nb_sectors * 2048, 2048); >> + cd_data_to_raw(s->io_buffer + nb_sectors * 2352, >> + s->lba + nb_sectors); >> + } >> } > Is this going to be correct in cases where the number of sectors we are > copying is less than the total request size? We might need to bookmark > how many sectors/bytes we're copying this go-around. Perhaps by looking > at lcyl/hcyl. It needs some adjustments, like the whole copying logic. We need a read and a write offset. And, of course, it should read +16 and not +4 in the memmove line as Kevin pointed out. My current plan is to rebuffer if not all data has been read from the block layer and we have less than 0xfffe unsend bytes in the buffer. I would then move the unsend bytes to the front of the io_buffer and append more data. In any case it would be good to have a test for such a big transfer in ide_test. With byte limits that devide the sector size and also not. > >> - s->lba++; >> + s->lba = -1; >> s->io_buffer_index = 0; >> s->status &= ~BUSY_STAT; >> >> ide_atapi_cmd_reply_end(s); >> } >> > Well, I might not name it cd_read_sector and cd_read_sector_cb anymore. > Perhaps cd_read_sectors[_cb]. Sure, we could also add a pio_ präfix. Since DMA uses its own functions. > >> -static int cd_read_sector(IDEState *s, int lba, void *buf, int >> sector_size) >> +static int cd_read_sector(IDEState *s, int lba, void *buf, int >> sector_size, int nb_sectors) >> { >> if (sector_size != 2048 && sector_size != 2352) { >> return -EINVAL; >> } >> >> s->iov.iov_base = buf; >> - if (sector_size == 2352) { >> - buf += 4; >> - } >> - >> - s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; >> + s->iov.iov_len = nb_sectors * 2048; >> qemu_iovec_init_external(&s->qiov, &s->iov, 1); >> >> - if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4, >> - cd_read_sector_cb, s) == NULL) { >> + if (ide_readv_cancelable(s, (int64_t)lba << 2, >> + &s->qiov, nb_sectors * 4, >> + cd_read_sector_cb, s) == NULL) { >> return -EIO; >> } >> >> block_acct_start(blk_get_stats(s->blk), &s->acct, >> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >> + nb_sectors * 2048, BLOCK_ACCT_READ); >> s->status |= BUSY_STAT; >> return 0; >> } >> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret) >> /* The whole ATAPI transfer logic is handled in this function */ >> void ide_atapi_cmd_reply_end(IDEState *s) >> { >> - int byte_count_limit, size, ret; >> + int byte_count_limit, size; >> #ifdef DEBUG_IDE_ATAPI >> printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", >> s->packet_transfer_size, >> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) >> printf("status=0x%x\n", s->status); >> #endif >> } else { >> - /* see if a new sector must be read */ >> - if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { >> - ret = cd_read_sector(s, s->lba, s->io_buffer, >> s->cd_sector_size); >> - if (ret < 0) { >> - ide_atapi_io_error(s, ret); >> - } >> - return; >> - } >> if (s->elementary_transfer_size > 0) { >> /* there are some data left to transmit in this elementary >> transfer */ >> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int >> size, int max_size) >> static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, >> int sector_size) >> { >> + int ret; >> s->lba = lba; >> s->packet_transfer_size = nb_sectors * sector_size; >> + assert(s->packet_transfer_size <= >> + IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4); >> s->elementary_transfer_size = 0; >> - s->io_buffer_index = sector_size; >> s->cd_sector_size = sector_size; >> - >> - ide_atapi_cmd_reply_end(s); >> + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size, >> + nb_sectors); >> + if (ret < 0) { >> + ide_atapi_io_error(s, ret); >> + } >> } >> >> static void ide_atapi_cmd_check_status(IDEState *s) >> >> >> Did you also have a look at the other patches? >> >> Thanks, >> Peter > On my queue; hopefully Stefan can take a peek too, but I'll try to > review the IDE-specific bits. I imagine you want to wait to respin until > we've looked at all the patches, that's fine -- I'll try not to keep you > waiting for much longer. Thanks, Peter
Hi all, short summary from my side. The whole thing seems to get complicated, let me explain why: 1) During review I found that the code in ide_atapi_cmd_reply_end can't work correctly if the byte_count_limit is not a divider or a multiple of cd_sector_size. The reason is that as soon as we load the next sector we start at io_buffer offset 0 overwriting whatever is left in there for transfer. We also reset the io_buffer_index to 0 which means if we continue with the elementary transfer we always transfer a whole sector (of corrupt data) regardless if we are allowed to transfer that much data. Before we consider fixing this I wonder if it is legal at all to have an unaligned byte_count_limit. It obviously has never caused trouble in practice so maybe its not happening in real life. 2) I found that whatever cool optimization I put in to buffer multiple sectors at once I end up with code that breaks migration because older versions would either not fill the io_buffer as expected or we introduce variables that older versions do not understand. This will lead to problems if we migrate in the middle of a transfer. 3) My current plan to get this patch to a useful state would be to use my initial patch and just change the code to use a sync request if we need to buffer additional sectors in an elementary transfer. I found that in real world operating systems the byte_count_limit seems to be equal to the cd_sector_size. After all its just a PIO transfer an operating system will likely switch to DMA as soon as the kernel ist loaded. Thanks, Peter
On 10/08/2015 08:06 AM, Peter Lieven wrote: > Hi all, > > short summary from my side. The whole thing seems to get complicated, > let me explain why: > > 1) During review I found that the code in ide_atapi_cmd_reply_end can't > work correctly if the > byte_count_limit is not a divider or a multiple of cd_sector_size. The > reason is that as soon > as we load the next sector we start at io_buffer offset 0 overwriting > whatever is left in there > for transfer. We also reset the io_buffer_index to 0 which means if we > continue with the > elementary transfer we always transfer a whole sector (of corrupt data) > regardless if we > are allowed to transfer that much data. Before we consider fixing this I > wonder if it > is legal at all to have an unaligned byte_count_limit. It obviously has > never caused trouble in > practice so maybe its not happening in real life. > I had overlooked that part. Good catch. I do suspect that in practice nobody will be asking for bizarre values. There's no rule against an unaligned byte_count_limit as far as I have read, but suspect nobody would have a reason to use it in practice. > 2) I found that whatever cool optimization I put in to buffer multiple > sectors at once I end > up with code that breaks migration because older versions would either > not fill the io_buffer > as expected or we introduce variables that older versions do not > understand. This will > lead to problems if we migrate in the middle of a transfer. > Ech. This sounds like a bit of a problem. I'll need to think about this one... > 3) My current plan to get this patch to a useful state would be to use > my initial patch and just > change the code to use a sync request if we need to buffer additional > sectors in an elementary > transfer. I found that in real world operating systems the > byte_count_limit seems to be equal to > the cd_sector_size. After all its just a PIO transfer an operating > system will likely switch to DMA > as soon as the kernel ist loaded. > > Thanks, > Peter > It sounds like that might be "good enough" for now, and won't make behavior *worse* than it currently is. You can adjust the test I had checked in to not use a "tricky" value and we can amend support for this later if desired.
Am 08.10.2015 um 18:44 hat John Snow geschrieben: > On 10/08/2015 08:06 AM, Peter Lieven wrote: > > Hi all, > > > > short summary from my side. The whole thing seems to get complicated, > > let me explain why: > > > > 1) During review I found that the code in ide_atapi_cmd_reply_end can't > > work correctly if the > > byte_count_limit is not a divider or a multiple of cd_sector_size. The > > reason is that as soon > > as we load the next sector we start at io_buffer offset 0 overwriting > > whatever is left in there > > for transfer. We also reset the io_buffer_index to 0 which means if we > > continue with the > > elementary transfer we always transfer a whole sector (of corrupt data) > > regardless if we > > are allowed to transfer that much data. Before we consider fixing this I > > wonder if it > > is legal at all to have an unaligned byte_count_limit. It obviously has > > never caused trouble in > > practice so maybe its not happening in real life. > > > > I had overlooked that part. Good catch. I do suspect that in practice > nobody will be asking for bizarre values. > > There's no rule against an unaligned byte_count_limit as far as I have > read, but suspect nobody would have a reason to use it in practice. If we know that our code is technically wrong, "nobody uses it" is not a good reason not to fix it. Because sooner or later someone will use it. > > 2) I found that whatever cool optimization I put in to buffer multiple > > sectors at once I end > > up with code that breaks migration because older versions would either > > not fill the io_buffer > > as expected or we introduce variables that older versions do not > > understand. This will > > lead to problems if we migrate in the middle of a transfer. > > > > Ech. This sounds like a bit of a problem. I'll need to think about this > one... Sounds like a textbook example for subsections to me? Kevin
Am 09.10.2015 um 10:21 schrieb Kevin Wolf: > Am 08.10.2015 um 18:44 hat John Snow geschrieben: >> On 10/08/2015 08:06 AM, Peter Lieven wrote: >>> Hi all, >>> >>> short summary from my side. The whole thing seems to get complicated, >>> let me explain why: >>> >>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't >>> work correctly if the >>> byte_count_limit is not a divider or a multiple of cd_sector_size. The >>> reason is that as soon >>> as we load the next sector we start at io_buffer offset 0 overwriting >>> whatever is left in there >>> for transfer. We also reset the io_buffer_index to 0 which means if we >>> continue with the >>> elementary transfer we always transfer a whole sector (of corrupt data) >>> regardless if we >>> are allowed to transfer that much data. Before we consider fixing this I >>> wonder if it >>> is legal at all to have an unaligned byte_count_limit. It obviously has >>> never caused trouble in >>> practice so maybe its not happening in real life. >>> >> I had overlooked that part. Good catch. I do suspect that in practice >> nobody will be asking for bizarre values. >> >> There's no rule against an unaligned byte_count_limit as far as I have >> read, but suspect nobody would have a reason to use it in practice. > If we know that our code is technically wrong, "nobody uses it" is not a > good reason not to fix it. Because sooner or later someone will use it. I found that I just misinterpreted the code. I think its correct altough its not nice. > >>> 2) I found that whatever cool optimization I put in to buffer multiple >>> sectors at once I end >>> up with code that breaks migration because older versions would either >>> not fill the io_buffer >>> as expected or we introduce variables that older versions do not >>> understand. This will >>> lead to problems if we migrate in the middle of a transfer. >>> >> Ech. This sounds like a bit of a problem. I'll need to think about this >> one... > Sounds like a textbook example for subsections to me? I wonder if we need this at all. Its just PIO. However, Windows and Linux fall back to PIO if I disconnect the NFS Server for some time. With the latest version of my patch series this works fine now and even works fine after restoring NFS connectivy. This was not properly working because of the glitch that John found. I have an optimization in mind that might work and will disable the need for the sync requests while keeping migration possible. I hope to be able to complete this by Monday and send out a V2 then. If someone wants to have a look at what I currently have (especially the cancelable requests part) its here: https://github.com/plieven/qemu/commits/atapi_async_V2 Thanks, Peter
On 10/09/2015 04:21 AM, Kevin Wolf wrote: > Am 08.10.2015 um 18:44 hat John Snow geschrieben: >> On 10/08/2015 08:06 AM, Peter Lieven wrote: >>> Hi all, >>> >>> short summary from my side. The whole thing seems to get complicated, >>> let me explain why: >>> >>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't >>> work correctly if the >>> byte_count_limit is not a divider or a multiple of cd_sector_size. The >>> reason is that as soon >>> as we load the next sector we start at io_buffer offset 0 overwriting >>> whatever is left in there >>> for transfer. We also reset the io_buffer_index to 0 which means if we >>> continue with the >>> elementary transfer we always transfer a whole sector (of corrupt data) >>> regardless if we >>> are allowed to transfer that much data. Before we consider fixing this I >>> wonder if it >>> is legal at all to have an unaligned byte_count_limit. It obviously has >>> never caused trouble in >>> practice so maybe its not happening in real life. >>> >> >> I had overlooked that part. Good catch. I do suspect that in practice >> nobody will be asking for bizarre values. >> >> There's no rule against an unaligned byte_count_limit as far as I have >> read, but suspect nobody would have a reason to use it in practice. > > If we know that our code is technically wrong, "nobody uses it" is not a > good reason not to fix it. Because sooner or later someone will use it. > Not arguing against fixing it, just speaking to priorities and not making it worse. If it were up to me I'd spent the next three months obsessively making the AHCI emulation spec-perfect, but suspect I'd fix close to zero bugs actually being observed in the real world. You can always trust that I want to fix every bug no matter how trivial or untriggerable in the field. :) >>> 2) I found that whatever cool optimization I put in to buffer multiple >>> sectors at once I end >>> up with code that breaks migration because older versions would either >>> not fill the io_buffer >>> as expected or we introduce variables that older versions do not >>> understand. This will >>> lead to problems if we migrate in the middle of a transfer. >>> >> >> Ech. This sounds like a bit of a problem. I'll need to think about this >> one... > > Sounds like a textbook example for subsections to me? > > Kevin > Haven't used them before, personally -- I'll take a look.
Am 08.10.2015 um 18:44 schrieb John Snow: > > On 10/08/2015 08:06 AM, Peter Lieven wrote: >> Hi all, >> >> short summary from my side. The whole thing seems to get complicated, >> let me explain why: >> >> 1) During review I found that the code in ide_atapi_cmd_reply_end can't >> work correctly if the >> byte_count_limit is not a divider or a multiple of cd_sector_size. The >> reason is that as soon >> as we load the next sector we start at io_buffer offset 0 overwriting >> whatever is left in there >> for transfer. We also reset the io_buffer_index to 0 which means if we >> continue with the >> elementary transfer we always transfer a whole sector (of corrupt data) >> regardless if we >> are allowed to transfer that much data. Before we consider fixing this I >> wonder if it >> is legal at all to have an unaligned byte_count_limit. It obviously has >> never caused trouble in >> practice so maybe its not happening in real life. >> > I had overlooked that part. Good catch. I do suspect that in practice > nobody will be asking for bizarre values. > > There's no rule against an unaligned byte_count_limit as far as I have > read, but suspect nobody would have a reason to use it in practice. > >> 2) I found that whatever cool optimization I put in to buffer multiple >> sectors at once I end >> up with code that breaks migration because older versions would either >> not fill the io_buffer >> as expected or we introduce variables that older versions do not >> understand. This will >> lead to problems if we migrate in the middle of a transfer. >> > Ech. This sounds like a bit of a problem. I'll need to think about this > one... > >> 3) My current plan to get this patch to a useful state would be to use >> my initial patch and just >> change the code to use a sync request if we need to buffer additional >> sectors in an elementary >> transfer. I found that in real world operating systems the >> byte_count_limit seems to be equal to >> the cd_sector_size. After all its just a PIO transfer an operating >> system will likely switch to DMA >> as soon as the kernel ist loaded. >> >> Thanks, >> Peter >> > It sounds like that might be "good enough" for now, and won't make > behavior *worse* than it currently is. You can adjust the test I had > checked in to not use a "tricky" value and we can amend support for this > later if desired. Have you had a chance to look at the series with the "good enough" fix? Thanks, Peter
On 10/14/2015 02:19 PM, Peter Lieven wrote: > Am 08.10.2015 um 18:44 schrieb John Snow: >> >> On 10/08/2015 08:06 AM, Peter Lieven wrote: >>> Hi all, >>> >>> short summary from my side. The whole thing seems to get complicated, >>> let me explain why: >>> >>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't >>> work correctly if the >>> byte_count_limit is not a divider or a multiple of cd_sector_size. The >>> reason is that as soon >>> as we load the next sector we start at io_buffer offset 0 overwriting >>> whatever is left in there >>> for transfer. We also reset the io_buffer_index to 0 which means if we >>> continue with the >>> elementary transfer we always transfer a whole sector (of corrupt data) >>> regardless if we >>> are allowed to transfer that much data. Before we consider fixing this I >>> wonder if it >>> is legal at all to have an unaligned byte_count_limit. It obviously has >>> never caused trouble in >>> practice so maybe its not happening in real life. >>> >> I had overlooked that part. Good catch. I do suspect that in practice >> nobody will be asking for bizarre values. >> >> There's no rule against an unaligned byte_count_limit as far as I have >> read, but suspect nobody would have a reason to use it in practice. >> >>> 2) I found that whatever cool optimization I put in to buffer multiple >>> sectors at once I end >>> up with code that breaks migration because older versions would either >>> not fill the io_buffer >>> as expected or we introduce variables that older versions do not >>> understand. This will >>> lead to problems if we migrate in the middle of a transfer. >>> >> Ech. This sounds like a bit of a problem. I'll need to think about this >> one... >> >>> 3) My current plan to get this patch to a useful state would be to use >>> my initial patch and just >>> change the code to use a sync request if we need to buffer additional >>> sectors in an elementary >>> transfer. I found that in real world operating systems the >>> byte_count_limit seems to be equal to >>> the cd_sector_size. After all its just a PIO transfer an operating >>> system will likely switch to DMA >>> as soon as the kernel ist loaded. >>> >>> Thanks, >>> Peter >>> >> It sounds like that might be "good enough" for now, and won't make >> behavior *worse* than it currently is. You can adjust the test I had >> checked in to not use a "tricky" value and we can amend support for this >> later if desired. > > Have you had a chance to look at the series with the "good enough" fix? > > Thanks, > Peter > Will do so Friday, thanks! --js
Am 14.10.2015 um 20:21 schrieb John Snow: > > On 10/14/2015 02:19 PM, Peter Lieven wrote: >> Am 08.10.2015 um 18:44 schrieb John Snow: >>> On 10/08/2015 08:06 AM, Peter Lieven wrote: >>>> Hi all, >>>> >>>> short summary from my side. The whole thing seems to get complicated, >>>> let me explain why: >>>> >>>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't >>>> work correctly if the >>>> byte_count_limit is not a divider or a multiple of cd_sector_size. The >>>> reason is that as soon >>>> as we load the next sector we start at io_buffer offset 0 overwriting >>>> whatever is left in there >>>> for transfer. We also reset the io_buffer_index to 0 which means if we >>>> continue with the >>>> elementary transfer we always transfer a whole sector (of corrupt data) >>>> regardless if we >>>> are allowed to transfer that much data. Before we consider fixing this I >>>> wonder if it >>>> is legal at all to have an unaligned byte_count_limit. It obviously has >>>> never caused trouble in >>>> practice so maybe its not happening in real life. >>>> >>> I had overlooked that part. Good catch. I do suspect that in practice >>> nobody will be asking for bizarre values. >>> >>> There's no rule against an unaligned byte_count_limit as far as I have >>> read, but suspect nobody would have a reason to use it in practice. >>> >>>> 2) I found that whatever cool optimization I put in to buffer multiple >>>> sectors at once I end >>>> up with code that breaks migration because older versions would either >>>> not fill the io_buffer >>>> as expected or we introduce variables that older versions do not >>>> understand. This will >>>> lead to problems if we migrate in the middle of a transfer. >>>> >>> Ech. This sounds like a bit of a problem. I'll need to think about this >>> one... >>> >>>> 3) My current plan to get this patch to a useful state would be to use >>>> my initial patch and just >>>> change the code to use a sync request if we need to buffer additional >>>> sectors in an elementary >>>> transfer. I found that in real world operating systems the >>>> byte_count_limit seems to be equal to >>>> the cd_sector_size. After all its just a PIO transfer an operating >>>> system will likely switch to DMA >>>> as soon as the kernel ist loaded. >>>> >>>> Thanks, >>>> Peter >>>> >>> It sounds like that might be "good enough" for now, and won't make >>> behavior *worse* than it currently is. You can adjust the test I had >>> checked in to not use a "tricky" value and we can amend support for this >>> later if desired. >> Have you had a chance to look at the series with the "good enough" fix? >> >> Thanks, >> Peter >> > Will do so Friday, thanks! Thank you, Peter
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index ab45495..ec2ba89 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret) } if (s->cd_sector_size == 2352) { - cd_data_to_raw(s->io_buffer, s->lba); + int nb_sectors = s->packet_transfer_size / 2352; + while (nb_sectors--) { + memmove(s->io_buffer + nb_sectors * 2352 + 4, + s->io_buffer + nb_sectors * 2048, 2048); + cd_data_to_raw(s->io_buffer + nb_sectors * 2352, + s->lba + nb_sectors); + } } - s->lba++; + s->lba = -1; s->io_buffer_index = 0; s->status &= ~BUSY_STAT; ide_atapi_cmd_reply_end(s); } -static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size) +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, int nb_sectors) { if (sector_size != 2048 && sector_size != 2352) { return -EINVAL; } s->iov.iov_base = buf; - if (sector_size == 2352) { - buf += 4; - } - - s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; + s->iov.iov_len = nb_sectors * 2048; qemu_iovec_init_external(&s->qiov, &s->iov, 1); - if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4, - cd_read_sector_cb, s) == NULL) { + if (ide_readv_cancelable(s, (int64_t)lba << 2, + &s->qiov, nb_sectors * 4, + cd_read_sector_cb, s) == NULL) { return -EIO; } block_acct_start(blk_get_stats(s->blk), &s->acct, - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + nb_sectors * 2048, BLOCK_ACCT_READ); s->status |= BUSY_STAT; return 0; } @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret) /* The whole ATAPI transfer logic is handled in this function */ void ide_atapi_cmd_reply_end(IDEState *s) { - int byte_count_limit, size, ret; + int byte_count_limit, size; #ifdef DEBUG_IDE_ATAPI printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", s->packet_transfer_size, @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) printf("status=0x%x\n", s->status); #endif } else { - /* see if a new sector must be read */ - if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); - if (ret < 0) { - ide_atapi_io_error(s, ret); - } - return; - } if (s->elementary_transfer_size > 0) { /* there are some data left to transmit in this elementary transfer */ @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, int sector_size) { + int ret; s->lba = lba; s->packet_transfer_size = nb_sectors * sector_size; + assert(s->packet_transfer_size <= + IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4); s->elementary_transfer_size = 0; - s->io_buffer_index = sector_size; s->cd_sector_size = sector_size; - - ide_atapi_cmd_reply_end(s); + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size, + nb_sectors); + if (ret < 0) { + ide_atapi_io_error(s, ret); + } } static void ide_atapi_cmd_check_status(IDEState *s)