Message ID | 20200316142928.153431-24-its@irrelevant.dk |
---|---|
State | New |
Headers | show |
Series | nvme: support NVMe v1.3d, SGLs and multiple namespaces | expand |
On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and > use them in nvme_map_prp. > > This fixes a bug where in the case of a CMB transfer, the device would > map to the buffer with a wrong length. > > Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.") > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > --- > hw/block/nvme.c | 97 +++++++++++++++++++++++++++++++++++-------- > hw/block/trace-events | 1 + > 2 files changed, 81 insertions(+), 17 deletions(-) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index 08267e847671..187c816eb6ad 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -59,6 +59,11 @@ > > static void nvme_process_sq(void *opaque); > > +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr) > +{ > + return &n->cmbuf[addr - n->ctrl_mem.addr]; > +} > + > static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) > { > hwaddr low = n->ctrl_mem.addr; > @@ -70,7 +75,7 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) > static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > { > if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) { > - memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); > + memcpy(buf, nvme_addr_to_cmb(n, addr), size); > return; > } > > @@ -153,29 +158,79 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) > } > } > > +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, > + size_t len) > +{ > + if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len)) { > + return NVME_DATA_TRAS_ERROR; > + } I just noticed that in theory (not that it really matters) but addr+len refers to the byte which is already not the part of the transfer. > + > + qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); Also intersting is we can add 0 sized iovec. > + > + return NVME_SUCCESS; > +} > + > +static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov, > + hwaddr addr, size_t len) > +{ > + if (nvme_addr_is_cmb(n, addr)) { > + if (qsg && qsg->sg) { > + return NVME_INVALID_USE_OF_CMB | NVME_DNR; > + } > + > + assert(iov); > + > + if (!iov->iov) { > + qemu_iovec_init(iov, 1); > + } > + > + return nvme_map_addr_cmb(n, iov, addr, len); > + } > + > + if (iov && iov->iov) { > + return NVME_INVALID_USE_OF_CMB | NVME_DNR; > + } > + > + assert(qsg); > + > + if (!qsg->sg) { > + pci_dma_sglist_init(qsg, &n->parent_obj, 1); > + } > + > + qemu_sglist_add(qsg, addr, len); > + > + return NVME_SUCCESS; > +} Looks very good. > + > static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, > uint64_t prp2, uint32_t len, NvmeCtrl *n) > { > hwaddr trans_len = n->page_size - (prp1 % n->page_size); > trans_len = MIN(len, trans_len); > int num_prps = (len >> n->page_bits) + 1; > + uint16_t status; > > if (unlikely(!prp1)) { > trace_nvme_dev_err_invalid_prp(); > return NVME_INVALID_FIELD | NVME_DNR; > - } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr && > - prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { > - qsg->nsg = 0; > + } > + > + if (nvme_addr_is_cmb(n, prp1)) { > qemu_iovec_init(iov, num_prps); > - qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); > } else { > pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); > - qemu_sglist_add(qsg, prp1, trans_len); > } > + > + status = nvme_map_addr(n, qsg, iov, prp1, trans_len); > + if (status) { > + goto unmap; > + } > + > len -= trans_len; > if (len) { > if (unlikely(!prp2)) { > trace_nvme_dev_err_invalid_prp2_missing(); > + status = NVME_INVALID_FIELD | NVME_DNR; > goto unmap; > } > if (len > n->page_size) { > @@ -192,6 +247,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, > if (i == n->max_prp_ents - 1 && len > n->page_size) { > if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { > trace_nvme_dev_err_invalid_prplist_ent(prp_ent); > + status = NVME_INVALID_FIELD | NVME_DNR; > goto unmap; > } > > @@ -205,14 +261,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, > > if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { > trace_nvme_dev_err_invalid_prplist_ent(prp_ent); > + status = NVME_INVALID_FIELD | NVME_DNR; > goto unmap; > } > > trans_len = MIN(len, n->page_size); > - if (qsg->nsg){ > - qemu_sglist_add(qsg, prp_ent, trans_len); > - } else { > - qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); > + status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len); > + if (status) { > + goto unmap; > } > len -= trans_len; > i++; > @@ -220,20 +276,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, > } else { > if (unlikely(prp2 & (n->page_size - 1))) { > trace_nvme_dev_err_invalid_prp2_align(prp2); > + status = NVME_INVALID_FIELD | NVME_DNR; > goto unmap; > } > - if (qsg->nsg) { > - qemu_sglist_add(qsg, prp2, len); > - } else { > - qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); > + status = nvme_map_addr(n, qsg, iov, prp2, len); > + if (status) { > + goto unmap; > } > } > } > return NVME_SUCCESS; > > - unmap: > - qemu_sglist_destroy(qsg); > - return NVME_INVALID_FIELD | NVME_DNR; > +unmap: > + if (iov && iov->iov) { > + qemu_iovec_destroy(iov); > + } > + > + if (qsg && qsg->sg) { > + qemu_sglist_destroy(qsg); > + } > + > + return status; > } > > static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, > diff --git a/hw/block/trace-events b/hw/block/trace-events > index 4cde0844ef64..adf11313f956 100644 > --- a/hw/block/trace-events > +++ b/hw/block/trace-events > @@ -33,6 +33,7 @@ nvme_dev_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" > nvme_dev_irq_pin(void) "pulsing IRQ pin" > nvme_dev_irq_masked(void) "IRQ is masked" > nvme_dev_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" > +nvme_dev_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1 > 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" > nvme_dev_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" > nvme_dev_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", > qflags=%"PRIu16"" > nvme_dev_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", > qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" Looks very good overall, Best regards, Maxim Levitsky
On Mar 25 12:45, Maxim Levitsky wrote: > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > > From: Klaus Jensen <k.jensen@samsung.com> > > > > Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and > > use them in nvme_map_prp. > > > > This fixes a bug where in the case of a CMB transfer, the device would > > map to the buffer with a wrong length. > > > > Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.") > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > > --- > > hw/block/nvme.c | 97 +++++++++++++++++++++++++++++++++++-------- > > hw/block/trace-events | 1 + > > 2 files changed, 81 insertions(+), 17 deletions(-) > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > index 08267e847671..187c816eb6ad 100644 > > --- a/hw/block/nvme.c > > +++ b/hw/block/nvme.c > > @@ -153,29 +158,79 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) > > } > > } > > > > +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, > > + size_t len) > > +{ > > + if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len)) { > > + return NVME_DATA_TRAS_ERROR; > > + } > > I just noticed that > in theory (not that it really matters) but addr+len refers to the byte which is already > not the part of the transfer. > Oh. Good catch - and I think that it does matter? Can't we end up rejecting a valid access? Anyway, I fixed it with a '- 1'. > > > + > > + qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); > Also intersting is we can add 0 sized iovec. > I added a check on len. This also makes sure the above '- 1' fix doesn't cause an 'addr + 0 - 1' to be done.
On Tue, 2020-03-31 at 07:44 +0200, Klaus Birkelund Jensen wrote: > On Mar 25 12:45, Maxim Levitsky wrote: > > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > > > From: Klaus Jensen <k.jensen@samsung.com> > > > > > > Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and > > > use them in nvme_map_prp. > > > > > > This fixes a bug where in the case of a CMB transfer, the device would > > > map to the buffer with a wrong length. > > > > > > Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.") > > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > > > --- > > > hw/block/nvme.c | 97 +++++++++++++++++++++++++++++++++++-------- > > > hw/block/trace-events | 1 + > > > 2 files changed, 81 insertions(+), 17 deletions(-) > > > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > > index 08267e847671..187c816eb6ad 100644 > > > --- a/hw/block/nvme.c > > > +++ b/hw/block/nvme.c > > > @@ -153,29 +158,79 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) > > > } > > > } > > > > > > +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, > > > + size_t len) > > > +{ > > > + if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len)) { > > > + return NVME_DATA_TRAS_ERROR; > > > + } > > > > I just noticed that > > in theory (not that it really matters) but addr+len refers to the byte which is already > > not the part of the transfer. > > > > Oh. Good catch - and I think that it does matter? Can't we end up > rejecting a valid access? Anyway, I fixed it with a '- 1'. Actually thinking again about it, we can indeed reject the access if the data happens to to include last byte of CMB. That can absolutely happen. When I wrote this I was thinking the other way around that we might reject data that is in regular ram and 'touches' the CMB, which indeed won't happen since RAM usually don't come close to MMIO ranges. Anyway there is not reason to not fix such issues. > > > > > > + > > > + qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); > > > > Also intersting is we can add 0 sized iovec. > > > > I added a check on len. This also makes sure the above '- 1' fix doesn't > cause an 'addr + 0 - 1' to be done. Yes that is what I was thinking, len=0 needs a special case here. Best regards, Maxim Levitsky
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 08267e847671..187c816eb6ad 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -59,6 +59,11 @@ static void nvme_process_sq(void *opaque); +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr) +{ + return &n->cmbuf[addr - n->ctrl_mem.addr]; +} + static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { hwaddr low = n->ctrl_mem.addr; @@ -70,7 +75,7 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) { - memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); + memcpy(buf, nvme_addr_to_cmb(n, addr), size); return; } @@ -153,29 +158,79 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) } } +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, + size_t len) +{ + if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len)) { + return NVME_DATA_TRAS_ERROR; + } + + qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov, + hwaddr addr, size_t len) +{ + if (nvme_addr_is_cmb(n, addr)) { + if (qsg && qsg->sg) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + assert(iov); + + if (!iov->iov) { + qemu_iovec_init(iov, 1); + } + + return nvme_map_addr_cmb(n, iov, addr, len); + } + + if (iov && iov->iov) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + assert(qsg); + + if (!qsg->sg) { + pci_dma_sglist_init(qsg, &n->parent_obj, 1); + } + + qemu_sglist_add(qsg, addr, len); + + return NVME_SUCCESS; +} + static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, uint64_t prp2, uint32_t len, NvmeCtrl *n) { hwaddr trans_len = n->page_size - (prp1 % n->page_size); trans_len = MIN(len, trans_len); int num_prps = (len >> n->page_bits) + 1; + uint16_t status; if (unlikely(!prp1)) { trace_nvme_dev_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; - } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr && - prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { - qsg->nsg = 0; + } + + if (nvme_addr_is_cmb(n, prp1)) { qemu_iovec_init(iov, num_prps); - qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); } else { pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); - qemu_sglist_add(qsg, prp1, trans_len); } + + status = nvme_map_addr(n, qsg, iov, prp1, trans_len); + if (status) { + goto unmap; + } + len -= trans_len; if (len) { if (unlikely(!prp2)) { trace_nvme_dev_err_invalid_prp2_missing(); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } if (len > n->page_size) { @@ -192,6 +247,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_dev_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } @@ -205,14 +261,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_dev_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } trans_len = MIN(len, n->page_size); - if (qsg->nsg){ - qemu_sglist_add(qsg, prp_ent, trans_len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); + status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len); + if (status) { + goto unmap; } len -= trans_len; i++; @@ -220,20 +276,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } else { if (unlikely(prp2 & (n->page_size - 1))) { trace_nvme_dev_err_invalid_prp2_align(prp2); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } - if (qsg->nsg) { - qemu_sglist_add(qsg, prp2, len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); + status = nvme_map_addr(n, qsg, iov, prp2, len); + if (status) { + goto unmap; } } } return NVME_SUCCESS; - unmap: - qemu_sglist_destroy(qsg); - return NVME_INVALID_FIELD | NVME_DNR; +unmap: + if (iov && iov->iov) { + qemu_iovec_destroy(iov); + } + + if (qsg && qsg->sg) { + qemu_sglist_destroy(qsg); + } + + return status; } static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, diff --git a/hw/block/trace-events b/hw/block/trace-events index 4cde0844ef64..adf11313f956 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -33,6 +33,7 @@ nvme_dev_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" nvme_dev_irq_pin(void) "pulsing IRQ pin" nvme_dev_irq_masked(void) "IRQ is masked" nvme_dev_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +nvme_dev_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" nvme_dev_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" nvme_dev_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" nvme_dev_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"