diff mbox series

[2/2] hw/block/nvme: drain namespaces on sq deletion

Message ID 20210408193709.435939-2-its@irrelevant.dk
State New
Headers show
Series [1/2] hw/block/nvme: store aiocb in compare | expand

Commit Message

Klaus Jensen April 8, 2021, 7:37 p.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

For most commands, when issuing an AIO, the BlockAIOCB is stored in the
NvmeRequest aiocb pointer when the AIO is issued. The main use of this
is cancelling AIOs when deleting submission queues (it is currently not
used for Abort).

However, some commands like Dataset Management Zone Management Send
(zone reset) may involve more than one AIO and here the AIOs are issued
without saving a reference to the BlockAIOCB. This is a problem since
nvme_del_sq() will attempt to cancel outstanding AIOs, potentially with
an invalid BlockAIOCB since the aiocb pointer is not NULL'ed when the
request structure is recycled.

Fix this by

  1. making sure the aiocb pointer is NULL'ed when requests are recycled
  2. only attempt to cancel the AIO if the aiocb is non-NULL
  3. if any AIOs could not be cancelled, drain all aio as a last resort.

Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command")
Fixes: c94973288cd9 ("hw/block/nvme: add broadcast nsid support flush command")
Fixes: e4e430b3d6ba ("hw/block/nvme: add simple copy command")
Fixes: 5f5dc4c6a942 ("hw/block/nvme: zero out zones on reset")
Fixes: 2605257a26b8 ("hw/block/nvme: add the dataset management command")
Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Cc: Minwoo Im <minwoo.im@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

Comments

Minwoo Im April 9, 2021, 11:09 a.m. UTC | #1
On 21-04-08 21:37:09, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> For most commands, when issuing an AIO, the BlockAIOCB is stored in the
> NvmeRequest aiocb pointer when the AIO is issued. The main use of this
> is cancelling AIOs when deleting submission queues (it is currently not
> used for Abort).
> 
> However, some commands like Dataset Management Zone Management Send
> (zone reset) may involve more than one AIO and here the AIOs are issued
> without saving a reference to the BlockAIOCB. This is a problem since
> nvme_del_sq() will attempt to cancel outstanding AIOs, potentially with
> an invalid BlockAIOCB since the aiocb pointer is not NULL'ed when the
> request structure is recycled.
> 
> Fix this by
> 
>   1. making sure the aiocb pointer is NULL'ed when requests are recycled
>   2. only attempt to cancel the AIO if the aiocb is non-NULL
>   3. if any AIOs could not be cancelled, drain all aio as a last resort.
> 
> Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command")
> Fixes: c94973288cd9 ("hw/block/nvme: add broadcast nsid support flush command")
> Fixes: e4e430b3d6ba ("hw/block/nvme: add simple copy command")
> Fixes: 5f5dc4c6a942 ("hw/block/nvme: zero out zones on reset")
> Fixes: 2605257a26b8 ("hw/block/nvme: add the dataset management command")
> Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
> Cc: Minwoo Im <minwoo.im@samsung.com>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 23 +++++++++++++++++++++--
>  1 file changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 94bc373260be..3c4297e38a52 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -470,6 +470,7 @@ static void nvme_req_clear(NvmeRequest *req)
>  {
>      req->ns = NULL;
>      req->opaque = NULL;
> +    req->aiocb = NULL;
>      memset(&req->cqe, 0x0, sizeof(req->cqe));
>      req->status = NVME_SUCCESS;
>  }
> @@ -3681,6 +3682,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
>      NvmeSQueue *sq;
>      NvmeCQueue *cq;
>      uint16_t qid = le16_to_cpu(c->qid);
> +    int nsid;

Even we don't have fully supported number of namespaces in this device
(0xFFFFFFFF), can we have this one with `uint32_t` ?

Otherwise, looks good to me.

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
Klaus Jensen April 9, 2021, 11:45 a.m. UTC | #2
On Apr  9 20:09, Minwoo Im wrote:
>On 21-04-08 21:37:09, Klaus Jensen wrote:
>> From: Klaus Jensen <k.jensen@samsung.com>
>>
>> For most commands, when issuing an AIO, the BlockAIOCB is stored in the
>> NvmeRequest aiocb pointer when the AIO is issued. The main use of this
>> is cancelling AIOs when deleting submission queues (it is currently not
>> used for Abort).
>>
>> However, some commands like Dataset Management Zone Management Send
>> (zone reset) may involve more than one AIO and here the AIOs are issued
>> without saving a reference to the BlockAIOCB. This is a problem since
>> nvme_del_sq() will attempt to cancel outstanding AIOs, potentially with
>> an invalid BlockAIOCB since the aiocb pointer is not NULL'ed when the
>> request structure is recycled.
>>
>> Fix this by
>>
>>   1. making sure the aiocb pointer is NULL'ed when requests are recycled
>>   2. only attempt to cancel the AIO if the aiocb is non-NULL
>>   3. if any AIOs could not be cancelled, drain all aio as a last resort.
>>
>> Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command")
>> Fixes: c94973288cd9 ("hw/block/nvme: add broadcast nsid support flush command")
>> Fixes: e4e430b3d6ba ("hw/block/nvme: add simple copy command")
>> Fixes: 5f5dc4c6a942 ("hw/block/nvme: zero out zones on reset")
>> Fixes: 2605257a26b8 ("hw/block/nvme: add the dataset management command")
>> Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
>> Cc: Minwoo Im <minwoo.im@samsung.com>
>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>> ---
>>  hw/block/nvme.c | 23 +++++++++++++++++++++--
>>  1 file changed, 21 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>> index 94bc373260be..3c4297e38a52 100644
>> --- a/hw/block/nvme.c
>> +++ b/hw/block/nvme.c
>> @@ -470,6 +470,7 @@ static void nvme_req_clear(NvmeRequest *req)
>>  {
>>      req->ns = NULL;
>>      req->opaque = NULL;
>> +    req->aiocb = NULL;
>>      memset(&req->cqe, 0x0, sizeof(req->cqe));
>>      req->status = NVME_SUCCESS;
>>  }
>> @@ -3681,6 +3682,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
>>      NvmeSQueue *sq;
>>      NvmeCQueue *cq;
>>      uint16_t qid = le16_to_cpu(c->qid);
>> +    int nsid;
>
>Even we don't have fully supported number of namespaces in this device
>(0xFFFFFFFF), can we have this one with `uint32_t` ?
>

Sure!

>Otherwise, looks good to me.
>
>Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
>
Gollu Appalanaidu April 9, 2021, 11:52 a.m. UTC | #3
On Thu, Apr 08, 2021 at 09:37:09PM +0200, Klaus Jensen wrote:
>From: Klaus Jensen <k.jensen@samsung.com>
>
>For most commands, when issuing an AIO, the BlockAIOCB is stored in the
>NvmeRequest aiocb pointer when the AIO is issued. The main use of this
>is cancelling AIOs when deleting submission queues (it is currently not
>used for Abort).
>
>However, some commands like Dataset Management Zone Management Send
>(zone reset) may involve more than one AIO and here the AIOs are issued
>without saving a reference to the BlockAIOCB. This is a problem since
>nvme_del_sq() will attempt to cancel outstanding AIOs, potentially with
>an invalid BlockAIOCB since the aiocb pointer is not NULL'ed when the
>request structure is recycled.
>
>Fix this by
>
>  1. making sure the aiocb pointer is NULL'ed when requests are recycled
>  2. only attempt to cancel the AIO if the aiocb is non-NULL
>  3. if any AIOs could not be cancelled, drain all aio as a last resort.
>
>Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command")
>Fixes: c94973288cd9 ("hw/block/nvme: add broadcast nsid support flush command")
>Fixes: e4e430b3d6ba ("hw/block/nvme: add simple copy command")
>Fixes: 5f5dc4c6a942 ("hw/block/nvme: zero out zones on reset")
>Fixes: 2605257a26b8 ("hw/block/nvme: add the dataset management command")
>Cc: Gollu Appalanaidu <anaidu.gollu@samsung.com>
>Cc: Minwoo Im <minwoo.im@samsung.com>
>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>---
> hw/block/nvme.c | 23 +++++++++++++++++++++--
> 1 file changed, 21 insertions(+), 2 deletions(-)
>
>diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>index 94bc373260be..3c4297e38a52 100644
>--- a/hw/block/nvme.c
>+++ b/hw/block/nvme.c
>@@ -470,6 +470,7 @@ static void nvme_req_clear(NvmeRequest *req)
> {
>     req->ns = NULL;
>     req->opaque = NULL;
>+    req->aiocb = NULL;
>     memset(&req->cqe, 0x0, sizeof(req->cqe));
>     req->status = NVME_SUCCESS;
> }
>@@ -3681,6 +3682,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
>     NvmeSQueue *sq;
>     NvmeCQueue *cq;
>     uint16_t qid = le16_to_cpu(c->qid);
>+    int nsid;
>
>     if (unlikely(!qid || nvme_check_sqid(n, qid))) {
>         trace_pci_nvme_err_invalid_del_sq(qid);
>@@ -3692,9 +3694,26 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
>     sq = n->sq[qid];
>     while (!QTAILQ_EMPTY(&sq->out_req_list)) {
>         r = QTAILQ_FIRST(&sq->out_req_list);
>-        assert(r->aiocb);
>-        blk_aio_cancel(r->aiocb);
>+        if (r->aiocb) {
>+            blk_aio_cancel(r->aiocb);
>+        }
>     }
>+
>+    /*
>+     * Drain all namespaces if there are still outstanding requests that we
>+     * could not cancel explicitly.
>+     */
>+    if (!QTAILQ_EMPTY(&sq->out_req_list)) {
>+        for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
>+            NvmeNamespace *ns = nvme_ns(n, nsid);
>+            if (ns) {
>+                nvme_ns_drain(ns);
>+            }
>+        }
>+    }
>+
>+    assert(QTAILQ_EMPTY(&sq->out_req_list));
>+
>     if (!nvme_check_cqid(n, sq->cqid)) {
>         cq = n->cq[sq->cqid];
>         QTAILQ_REMOVE(&cq->sq_list, sq, entry);
>--

LTM.
Reviewed-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>

>2.31.1
>
>
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 94bc373260be..3c4297e38a52 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -470,6 +470,7 @@  static void nvme_req_clear(NvmeRequest *req)
 {
     req->ns = NULL;
     req->opaque = NULL;
+    req->aiocb = NULL;
     memset(&req->cqe, 0x0, sizeof(req->cqe));
     req->status = NVME_SUCCESS;
 }
@@ -3681,6 +3682,7 @@  static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
     NvmeSQueue *sq;
     NvmeCQueue *cq;
     uint16_t qid = le16_to_cpu(c->qid);
+    int nsid;
 
     if (unlikely(!qid || nvme_check_sqid(n, qid))) {
         trace_pci_nvme_err_invalid_del_sq(qid);
@@ -3692,9 +3694,26 @@  static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
     sq = n->sq[qid];
     while (!QTAILQ_EMPTY(&sq->out_req_list)) {
         r = QTAILQ_FIRST(&sq->out_req_list);
-        assert(r->aiocb);
-        blk_aio_cancel(r->aiocb);
+        if (r->aiocb) {
+            blk_aio_cancel(r->aiocb);
+        }
     }
+
+    /*
+     * Drain all namespaces if there are still outstanding requests that we
+     * could not cancel explicitly.
+     */
+    if (!QTAILQ_EMPTY(&sq->out_req_list)) {
+        for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+            NvmeNamespace *ns = nvme_ns(n, nsid);
+            if (ns) {
+                nvme_ns_drain(ns);
+            }
+        }
+    }
+
+    assert(QTAILQ_EMPTY(&sq->out_req_list));
+
     if (!nvme_check_cqid(n, sq->cqid)) {
         cq = n->cq[sq->cqid];
         QTAILQ_REMOVE(&cq->sq_list, sq, entry);