Message ID | 20230718103511.53767-2-its@irrelevant.dk |
---|---|
State | New |
Headers | show |
Series | hw/nvme: fix endianness issue for shadow doorbells | expand |
On 7/18/23 12:35, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > doorbell buffers"), we fixed shadow doorbells for big-endian guests > running on little endian hosts. But I did not fix little-endian guests > on big-endian hosts. Fix this. > > Solves issue #1765. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1765 > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > Cc: qemu-stable@nongnu.org > Reported-by: Thomas Huth <thuth@redhat.com> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Tested-by: Cédric Le Goater <clg@redhat.com> with a PowerNV QEMU machine running on a s390x LPAR. Thanks, C. > --- > hw/nvme/ctrl.c | 18 +++++++++++++----- > 1 file changed, 13 insertions(+), 5 deletions(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 8e8e870b9a80..dadc2dc7da10 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > PCIDevice *pci = PCI_DEVICE(n); > uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); > uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); > + uint32_t v; > int i; > > /* Address should be page aligned */ > @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > NvmeCQueue *cq = n->cq[i]; > > if (sq) { > + v = cpu_to_le32(sq->tail); > + > /* > * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) > * nvme_process_db() uses this hard-coded way to calculate > @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > */ > sq->db_addr = dbs_addr + (i << 3); > sq->ei_addr = eis_addr + (i << 3); > - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); > > if (n->params.ioeventfd && sq->sqid != 0) { > if (!nvme_init_sq_ioeventfd(sq)) { > @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > } > > if (cq) { > + v = cpu_to_le32(cq->head); > + > /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ > cq->db_addr = dbs_addr + (i << 3) + (1 << 2); > cq->ei_addr = eis_addr + (i << 3) + (1 << 2); > - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); > + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); > > if (n->params.ioeventfd && cq->cqid != 0) { > if (!nvme_init_cq_ioeventfd(cq)) { > @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) > static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > { > PCIDevice *pci = PCI_DEVICE(n); > - uint32_t qid; > + uint32_t qid, v; > > if (unlikely(addr & ((1 << 2) - 1))) { > NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, > @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > start_sqs = nvme_cq_full(cq) ? 1 : 0; > cq->head = new_head; > if (!qid && n->dbbuf_enabled) { > - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); > + v = cpu_to_le32(cq->head); > + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); > } > if (start_sqs) { > NvmeSQueue *sq; > @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > > sq->tail = new_tail; > if (!qid && n->dbbuf_enabled) { > + v = cpu_to_le32(sq->tail); > + > /* > * The spec states "the host shall also update the controller's > * corresponding doorbell property to match the value of that entry > @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > * including ones that run on Linux, are not updating Admin Queues, > * so we can't trust reading it for an appropriate sq tail. > */ > - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); > } > > qemu_bh_schedule(sq->bh);
On 18/07/2023 12.35, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > doorbell buffers"), we fixed shadow doorbells for big-endian guests > running on little endian hosts. But I did not fix little-endian guests > on big-endian hosts. Fix this. > > Solves issue #1765. > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > Cc: qemu-stable@nongnu.org > Reported-by: Thomas Huth <thuth@redhat.com> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > --- > hw/nvme/ctrl.c | 18 +++++++++++++----- > 1 file changed, 13 insertions(+), 5 deletions(-) Thank you very much, this fixes tests/avocado/boot_linux_console.py:BootLinuxConsole.test_ppc_powernv8 on my s390x host: Tested-by: Thomas Huth <thuth@redhat.com>
On Tue, Jul 18, 2023 at 12:35:12PM +0200, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > doorbell buffers"), we fixed shadow doorbells for big-endian guests > running on little endian hosts. But I did not fix little-endian guests > on big-endian hosts. Fix this. > > Solves issue #1765. > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > Cc: qemu-stable@nongnu.org > Reported-by: Thomas Huth <thuth@redhat.com> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Looks good! Reviewed-by: Keith Busch <kbusch@kernel.org>
On 18/7/23 12:35, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > doorbell buffers"), we fixed shadow doorbells for big-endian guests > running on little endian hosts. But I did not fix little-endian guests > on big-endian hosts. Fix this. > > Solves issue #1765. > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > Cc: qemu-stable@nongnu.org > Reported-by: Thomas Huth <thuth@redhat.com> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > --- > hw/nvme/ctrl.c | 18 +++++++++++++----- > 1 file changed, 13 insertions(+), 5 deletions(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 8e8e870b9a80..dadc2dc7da10 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > PCIDevice *pci = PCI_DEVICE(n); > uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); > uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); > + uint32_t v; > int i; > > /* Address should be page aligned */ > @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > NvmeCQueue *cq = n->cq[i]; > > if (sq) { > + v = cpu_to_le32(sq->tail); > + > /* > * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) > * nvme_process_db() uses this hard-coded way to calculate > @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > */ > sq->db_addr = dbs_addr + (i << 3); > sq->ei_addr = eis_addr + (i << 3); > - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); Or use the PCI DMA ldst API which does the swapping for you: stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); > > if (n->params.ioeventfd && sq->sqid != 0) { > if (!nvme_init_sq_ioeventfd(sq)) { > @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > } > > if (cq) { > + v = cpu_to_le32(cq->head); > + > /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ > cq->db_addr = dbs_addr + (i << 3) + (1 << 2); > cq->ei_addr = eis_addr + (i << 3) + (1 << 2); > - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); > + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); > > if (n->params.ioeventfd && cq->cqid != 0) { > if (!nvme_init_cq_ioeventfd(cq)) { > @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) > static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > { > PCIDevice *pci = PCI_DEVICE(n); > - uint32_t qid; > + uint32_t qid, v; > > if (unlikely(addr & ((1 << 2) - 1))) { > NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, > @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > start_sqs = nvme_cq_full(cq) ? 1 : 0; > cq->head = new_head; > if (!qid && n->dbbuf_enabled) { > - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); > + v = cpu_to_le32(cq->head); > + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); > } > if (start_sqs) { > NvmeSQueue *sq; > @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > > sq->tail = new_tail; > if (!qid && n->dbbuf_enabled) { > + v = cpu_to_le32(sq->tail); > + > /* > * The spec states "the host shall also update the controller's > * corresponding doorbell property to match the value of that entry > @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > * including ones that run on Linux, are not updating Admin Queues, > * so we can't trust reading it for an appropriate sq tail. > */ > - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); > } > > qemu_bh_schedule(sq->bh);
On Jul 18 13:18, Philippe Mathieu-Daudé wrote: > On 18/7/23 12:35, Klaus Jensen wrote: > > From: Klaus Jensen <k.jensen@samsung.com> > > > > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for > > doorbell buffers"), we fixed shadow doorbells for big-endian guests > > running on little endian hosts. But I did not fix little-endian guests > > on big-endian hosts. Fix this. > > > > Solves issue #1765. > > > > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") > > Cc: qemu-stable@nongnu.org > > Reported-by: Thomas Huth <thuth@redhat.com> > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > > --- > > hw/nvme/ctrl.c | 18 +++++++++++++----- > > 1 file changed, 13 insertions(+), 5 deletions(-) > > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > > index 8e8e870b9a80..dadc2dc7da10 100644 > > --- a/hw/nvme/ctrl.c > > +++ b/hw/nvme/ctrl.c > > @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > > PCIDevice *pci = PCI_DEVICE(n); > > uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); > > uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); > > + uint32_t v; > > int i; > > /* Address should be page aligned */ > > @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > > NvmeCQueue *cq = n->cq[i]; > > if (sq) { > > + v = cpu_to_le32(sq->tail); > > + > > /* > > * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) > > * nvme_process_db() uses this hard-coded way to calculate > > @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) > > */ > > sq->db_addr = dbs_addr + (i << 3); > > sq->ei_addr = eis_addr + (i << 3); > > - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); > > + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); > > Or use the PCI DMA ldst API which does the swapping for you: > > stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); > Nice! That's definitely preferable. I'll queue that up for next, but leave this patch as-is since it's been tested on a real big-endian host and gotten its tags ;)
On 18/7/23 13:34, Klaus Jensen wrote: > On Jul 18 13:18, Philippe Mathieu-Daudé wrote: >> On 18/7/23 12:35, Klaus Jensen wrote: >>> From: Klaus Jensen <k.jensen@samsung.com> >>> >>> In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for >>> doorbell buffers"), we fixed shadow doorbells for big-endian guests >>> running on little endian hosts. But I did not fix little-endian guests >>> on big-endian hosts. Fix this. >>> >>> Solves issue #1765. >>> >>> Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support") >>> Cc: qemu-stable@nongnu.org >>> Reported-by: Thomas Huth <thuth@redhat.com> >>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com> >>> --- >>> hw/nvme/ctrl.c | 18 +++++++++++++----- >>> 1 file changed, 13 insertions(+), 5 deletions(-) >>> >>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c >>> index 8e8e870b9a80..dadc2dc7da10 100644 >>> --- a/hw/nvme/ctrl.c >>> +++ b/hw/nvme/ctrl.c >>> @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) >>> PCIDevice *pci = PCI_DEVICE(n); >>> uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); >>> uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); >>> + uint32_t v; >>> int i; >>> /* Address should be page aligned */ >>> @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) >>> NvmeCQueue *cq = n->cq[i]; >>> if (sq) { >>> + v = cpu_to_le32(sq->tail); >>> + >>> /* >>> * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) >>> * nvme_process_db() uses this hard-coded way to calculate >>> @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) >>> */ >>> sq->db_addr = dbs_addr + (i << 3); >>> sq->ei_addr = eis_addr + (i << 3); >>> - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); >>> + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); >> >> Or use the PCI DMA ldst API which does the swapping for you: >> >> stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED); >> > > Nice! That's definitely preferable. I'll queue that up for next, but > leave this patch as-is since it's been tested on a real big-endian host > and gotten its tags ;) OK, then: Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8e8e870b9a80..dadc2dc7da10 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) PCIDevice *pci = PCI_DEVICE(n); uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1); uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2); + uint32_t v; int i; /* Address should be page aligned */ @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) NvmeCQueue *cq = n->cq[i]; if (sq) { + v = cpu_to_le32(sq->tail); + /* * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3) * nvme_process_db() uses this hard-coded way to calculate @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) */ sq->db_addr = dbs_addr + (i << 3); sq->ei_addr = eis_addr + (i << 3); - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); if (n->params.ioeventfd && sq->sqid != 0) { if (!nvme_init_sq_ioeventfd(sq)) { @@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) } if (cq) { + v = cpu_to_le32(cq->head); + /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */ cq->db_addr = dbs_addr + (i << 3) + (1 << 2); cq->ei_addr = eis_addr + (i << 3) + (1 << 2); - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); if (n->params.ioeventfd && cq->cqid != 0) { if (!nvme_init_cq_ioeventfd(cq)) { @@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) { PCIDevice *pci = PCI_DEVICE(n); - uint32_t qid; + uint32_t qid, v; if (unlikely(addr & ((1 << 2) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, @@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) start_sqs = nvme_cq_full(cq) ? 1 : 0; cq->head = new_head; if (!qid && n->dbbuf_enabled) { - pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head)); + v = cpu_to_le32(cq->head); + pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head)); } if (start_sqs) { NvmeSQueue *sq; @@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq->tail = new_tail; if (!qid && n->dbbuf_enabled) { + v = cpu_to_le32(sq->tail); + /* * The spec states "the host shall also update the controller's * corresponding doorbell property to match the value of that entry @@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) * including ones that run on Linux, are not updating Admin Queues, * so we can't trust reading it for an appropriate sq tail. */ - pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail)); + pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail)); } qemu_bh_schedule(sq->bh);