Message ID | 20210617100820.75510-1-its@irrelevant.dk |
---|---|
State | New |
Headers | show |
Series | hw/nvme: fix pin-based interrupt behavior (again) | expand |
On Jun 17 12:08, Klaus Jensen wrote: >From: Klaus Jensen <k.jensen@samsung.com> > >Jakub noticed[1] that, when using pin-based interrupts, the device will >unconditionally deasssert when any CQEs are acknowledged. However, the >pin should not be deasserted if other completion queues still holds >unacknowledged CQEs. > >The bug is an artifact of commit ca247d35098d ("hw/block/nvme: fix >pin-based interrupt behavior") which fixed one bug but introduced >another. This is the third time someone tries to fix pin-based >interrupts (see commit 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt >behaviour of NVMe"))... > >Third time's the charm, so fix it, again, by keeping track of how many >CQs have unacknowledged CQEs and only deassert when all are cleared. > > [1]: <20210610114624.304681-1-jakub.jermar@kernkonzept.com> > >Fixes: ca247d35098d ("hw/block/nvme: fix pin-based interrupt behavior") >Reported-by: Jakub Jermář <jakub.jermar@kernkonzept.com> >Signed-off-by: Klaus Jensen <k.jensen@samsung.com> >--- > hw/nvme/nvme.h | 1 + > hw/nvme/ctrl.c | 15 ++++++++++++++- > 2 files changed, 15 insertions(+), 1 deletion(-) > >diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h >index 93a7e0e5380e..60250b579464 100644 >--- a/hw/nvme/nvme.h >+++ b/hw/nvme/nvme.h >@@ -405,6 +405,7 @@ typedef struct NvmeCtrl { > uint32_t max_q_ents; > uint8_t outstanding_aers; > uint32_t irq_status; >+ int cq_pending; > uint64_t host_timestamp; /* Timestamp sent by the host */ > uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ > uint64_t starttime_ms; >diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c >index 7dea64b72e6a..9419f67c4e88 100644 >--- a/hw/nvme/ctrl.c >+++ b/hw/nvme/ctrl.c >@@ -473,7 +473,9 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) > return; > } else { > assert(cq->vector < 32); >- n->irq_status &= ~(1 << cq->vector); >+ if (!n->cq_pending) { >+ n->irq_status &= ~(1 << cq->vector); >+ } > nvme_irq_check(n); > } > } >@@ -1258,6 +1260,7 @@ static void nvme_post_cqes(void *opaque) > NvmeCQueue *cq = opaque; > NvmeCtrl *n = cq->ctrl; > NvmeRequest *req, *next; >+ bool pending = cq->head != cq->tail; > int ret; > > QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { >@@ -1287,6 +1290,10 @@ static void nvme_post_cqes(void *opaque) > QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); > } > if (cq->tail != cq->head) { >+ if (!pending) { >+ n->cq_pending++; >+ } >+ > nvme_irq_assert(n, cq); > } > } >@@ -4099,6 +4106,11 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) > trace_pci_nvme_err_invalid_del_cq_notempty(qid); > return NVME_INVALID_QUEUE_DEL; > } >+ >+ if (cq->tail != cq->head) { >+ n->cq_pending--; >+ } >+ > nvme_irq_deassert(n, cq); > trace_pci_nvme_del_cq(qid); > nvme_free_cq(cq, n); >@@ -5758,6 +5770,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > } > > if (cq->tail == cq->head) { >+ n->cq_pending--; > nvme_irq_deassert(n, cq); > } > } else { >-- >2.32.0 > Jakub, can you test this in your environment?
On 6/17/21 12:09 PM, Klaus Jensen wrote: > On Jun 17 12:08, Klaus Jensen wrote: >> From: Klaus Jensen <k.jensen@samsung.com> >> >> Jakub noticed[1] that, when using pin-based interrupts, the device will >> unconditionally deasssert when any CQEs are acknowledged. However, the >> pin should not be deasserted if other completion queues still holds >> unacknowledged CQEs. >> >> The bug is an artifact of commit ca247d35098d ("hw/block/nvme: fix >> pin-based interrupt behavior") which fixed one bug but introduced >> another. This is the third time someone tries to fix pin-based >> interrupts (see commit 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt >> behaviour of NVMe"))... >> >> Third time's the charm, so fix it, again, by keeping track of how many >> CQs have unacknowledged CQEs and only deassert when all are cleared. >> >> [1]: <20210610114624.304681-1-jakub.jermar@kernkonzept.com> >> >> Fixes: ca247d35098d ("hw/block/nvme: fix pin-based interrupt behavior") >> Reported-by: Jakub Jermář <jakub.jermar@kernkonzept.com> >> Signed-off-by: Klaus Jensen <k.jensen@samsung.com> >> --- >> hw/nvme/nvme.h | 1 + >> hw/nvme/ctrl.c | 15 ++++++++++++++- >> 2 files changed, 15 insertions(+), 1 deletion(-) >> >> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h >> index 93a7e0e5380e..60250b579464 100644 >> --- a/hw/nvme/nvme.h >> +++ b/hw/nvme/nvme.h >> @@ -405,6 +405,7 @@ typedef struct NvmeCtrl { >> uint32_t max_q_ents; >> uint8_t outstanding_aers; >> uint32_t irq_status; >> + int cq_pending; >> uint64_t host_timestamp; /* Timestamp sent by >> the host */ >> uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ >> uint64_t starttime_ms; >> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c >> index 7dea64b72e6a..9419f67c4e88 100644 >> --- a/hw/nvme/ctrl.c >> +++ b/hw/nvme/ctrl.c >> @@ -473,7 +473,9 @@ static void nvme_irq_deassert(NvmeCtrl *n, >> NvmeCQueue *cq) >> return; >> } else { >> assert(cq->vector < 32); >> - n->irq_status &= ~(1 << cq->vector); >> + if (!n->cq_pending) { >> + n->irq_status &= ~(1 << cq->vector); >> + } >> nvme_irq_check(n); >> } >> } >> @@ -1258,6 +1260,7 @@ static void nvme_post_cqes(void *opaque) >> NvmeCQueue *cq = opaque; >> NvmeCtrl *n = cq->ctrl; >> NvmeRequest *req, *next; >> + bool pending = cq->head != cq->tail; >> int ret; >> >> QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { >> @@ -1287,6 +1290,10 @@ static void nvme_post_cqes(void *opaque) >> QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); >> } >> if (cq->tail != cq->head) { >> + if (!pending) { >> + n->cq_pending++; >> + } >> + >> nvme_irq_assert(n, cq); >> } >> } >> @@ -4099,6 +4106,11 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, >> NvmeRequest *req) >> trace_pci_nvme_err_invalid_del_cq_notempty(qid); >> return NVME_INVALID_QUEUE_DEL; >> } >> + >> + if (cq->tail != cq->head) { >> + n->cq_pending--; >> + } >> + >> nvme_irq_deassert(n, cq); >> trace_pci_nvme_del_cq(qid); >> nvme_free_cq(cq, n); >> @@ -5758,6 +5770,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr >> addr, int val) >> } >> >> if (cq->tail == cq->head) { >> + n->cq_pending--; >> nvme_irq_deassert(n, cq); >> } >> } else { >> -- >> 2.32.0 >> > > Jakub, can you test this in your environment? Yep, still tests fine. Jakub
On Thu, Jun 17, 2021 at 12:08:20PM +0200, Klaus Jensen wrote: > if (cq->tail != cq->head) { > + if (!pending) { > + n->cq_pending++; > + } You should check cq->irq_enabled before incrementing cq_pending. You don't want to leave the irq asserted when polled queues have outsanding CQEs.
On Jun 17 07:50, Keith Busch wrote: >On Thu, Jun 17, 2021 at 12:08:20PM +0200, Klaus Jensen wrote: >> if (cq->tail != cq->head) { >> + if (!pending) { >> + n->cq_pending++; >> + } > >You should check cq->irq_enabled before incrementing cq_pending. You >don't want to leave the irq asserted when polled queues have outsanding >CQEs. Good catch. Thanks!
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 93a7e0e5380e..60250b579464 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -405,6 +405,7 @@ typedef struct NvmeCtrl { uint32_t max_q_ents; uint8_t outstanding_aers; uint32_t irq_status; + int cq_pending; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ uint64_t starttime_ms; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 7dea64b72e6a..9419f67c4e88 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -473,7 +473,9 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) return; } else { assert(cq->vector < 32); - n->irq_status &= ~(1 << cq->vector); + if (!n->cq_pending) { + n->irq_status &= ~(1 << cq->vector); + } nvme_irq_check(n); } } @@ -1258,6 +1260,7 @@ static void nvme_post_cqes(void *opaque) NvmeCQueue *cq = opaque; NvmeCtrl *n = cq->ctrl; NvmeRequest *req, *next; + bool pending = cq->head != cq->tail; int ret; QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { @@ -1287,6 +1290,10 @@ static void nvme_post_cqes(void *opaque) QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { + if (!pending) { + n->cq_pending++; + } + nvme_irq_assert(n, cq); } } @@ -4099,6 +4106,11 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_del_cq_notempty(qid); return NVME_INVALID_QUEUE_DEL; } + + if (cq->tail != cq->head) { + n->cq_pending--; + } + nvme_irq_deassert(n, cq); trace_pci_nvme_del_cq(qid); nvme_free_cq(cq, n); @@ -5758,6 +5770,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) } if (cq->tail == cq->head) { + n->cq_pending--; nvme_irq_deassert(n, cq); } } else {