Message ID | 20221020113538.36526-1-its@irrelevant.dk |
---|---|
State | New |
Headers | show |
Series | hw/nvme: reenable cqe batching | expand |
On Thu, Oct 20, 2022 at 01:35:38PM +0200, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell > updates") had the unintended effect of disabling batching of CQEs. > > This patch changes the sq/cq timers to bottom halfs and instead of > calling nvme_post_cqes() immediately (causing an interrupt per cqe), we > defer the call. Nice change, looks good! Timers never did seem to be the best fit for this. Reviewed-by: Keith Busch <kbusch@kernel.org>
at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote: > Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell > updates") had the unintended effect of disabling batching of CQEs. > > This patch changes the sq/cq timers to bottom halfs and instead of > calling nvme_post_cqes() immediately (causing an interrupt per cqe), we > defer the call. This change is definitely desired. > > | iops > -----------------+------ > baseline | 138k > +cqe batching | 233k What is the queue depth config for this test?
On Okt 21 10:37, Jinhao Fan wrote: > at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote: > > > Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell > > updates") had the unintended effect of disabling batching of CQEs. > > > > This patch changes the sq/cq timers to bottom halfs and instead of > > calling nvme_post_cqes() immediately (causing an interrupt per cqe), we > > defer the call. > > This change is definitely desired. > > > > > | iops > > -----------------+------ > > baseline | 138k > > +cqe batching | 233k > > What is the queue depth config for this test? > That's 64. My box isnt nearly as beefy as yours ;)
at 1:37 PM, Klaus Jensen <its@irrelevant.dk> wrote: > On Okt 21 10:37, Jinhao Fan wrote: >> at 7:35 PM, Klaus Jensen <its@irrelevant.dk> wrote: >> >>> Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell >>> updates") had the unintended effect of disabling batching of CQEs. >>> >>> This patch changes the sq/cq timers to bottom halfs and instead of >>> calling nvme_post_cqes() immediately (causing an interrupt per cqe), we >>> defer the call. >> >> This change is definitely desired. >> >>> | iops >>> -----------------+------ >>> baseline | 138k >>> +cqe batching | 233k >> >> What is the queue depth config for this test? > > That's 64. My box isnt nearly as beefy as yours ;) :). The improvement looks great. Reviewed-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
On Oct 20 13:35, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell > updates") had the unintended effect of disabling batching of CQEs. > > This patch changes the sq/cq timers to bottom halfs and instead of > calling nvme_post_cqes() immediately (causing an interrupt per cqe), we > defer the call. > > | iops > -----------------+------ > baseline | 138k > +cqe batching | 233k > > Fixes: 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates") > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Thanks for the reviews, applied to nvme-next!
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 87aeba056499..73c870a42996 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1401,13 +1401,7 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); - if (req->sq->ioeventfd_enabled) { - /* Post CQE directly since we are in main loop thread */ - nvme_post_cqes(cq); - } else { - /* Schedule the timer to post CQE later since we are in vcpu thread */ - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); - } + qemu_bh_schedule(cq->bh); } static void nvme_process_aers(void *opaque) @@ -4252,7 +4246,7 @@ static void nvme_cq_notifier(EventNotifier *e) nvme_irq_deassert(n, cq); } - nvme_post_cqes(cq); + qemu_bh_schedule(cq->bh); } static int nvme_init_cq_ioeventfd(NvmeCQueue *cq) @@ -4307,7 +4301,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) uint16_t offset = sq->sqid << 3; n->sq[sq->sqid] = NULL; - timer_free(sq->timer); + qemu_bh_delete(sq->bh); if (sq->ioeventfd_enabled) { memory_region_del_eventfd(&n->iomem, 0x1000 + offset, 4, false, 0, &sq->notifier); @@ -4381,7 +4375,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, sq->io_req[i].sq = sq; QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } - sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); + + sq->bh = qemu_bh_new(nvme_process_sq, sq); if (n->dbbuf_enabled) { sq->db_addr = n->dbbuf_dbs + (sqid << 3); @@ -4698,7 +4693,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) uint16_t offset = (cq->cqid << 3) + (1 << 2); n->cq[cq->cqid] = NULL; - timer_free(cq->timer); + qemu_bh_delete(cq->bh); if (cq->ioeventfd_enabled) { memory_region_del_eventfd(&n->iomem, 0x1000 + offset, 4, false, 0, &cq->notifier); @@ -4771,7 +4766,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, } } n->cq[cqid] = cq; - cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); + cq->bh = qemu_bh_new(nvme_post_cqes, cq); } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) @@ -6913,9 +6908,9 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) if (start_sqs) { NvmeSQueue *sq; QTAILQ_FOREACH(sq, &cq->sq_list, entry) { - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + qemu_bh_schedule(sq->bh); } - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + qemu_bh_schedule(cq->bh); } if (cq->tail == cq->head) { @@ -6984,7 +6979,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail, sizeof(sq->tail)); } - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + + qemu_bh_schedule(sq->bh); } } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 79f5c281c223..7adf042ec3e4 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -375,7 +375,7 @@ typedef struct NvmeSQueue { uint64_t dma_addr; uint64_t db_addr; uint64_t ei_addr; - QEMUTimer *timer; + QEMUBH *bh; EventNotifier notifier; bool ioeventfd_enabled; NvmeRequest *io_req; @@ -396,7 +396,7 @@ typedef struct NvmeCQueue { uint64_t dma_addr; uint64_t db_addr; uint64_t ei_addr; - QEMUTimer *timer; + QEMUBH *bh; EventNotifier notifier; bool ioeventfd_enabled; QTAILQ_HEAD(, NvmeSQueue) sq_list;