@@ -9,20 +9,22 @@
*/
/**
- * Reference Specification: NVM Express 1.2.1
+ * Reference Specification: NVM Express 1.3d
*
* https://nvmexpress.org/resources/specifications/
*/
/**
* Usage: add options:
- * -drive file=<file>,if=none,id=<drive_id>
- * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
- * cmb_size_mb=<cmb_size_mb[optional]>, \
- * num_queues=<N[optional]>
+ * -drive file=<file>,if=none,id=<drive_id>
+ * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
*
- * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
- * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
+ * Advanced optional options:
+ *
+ * num_queues=<uint32> : Maximum number of IO Queues.
+ * Default: 64
+ * cmb_size_mb=<uint32> : Size of Controller Memory Buffer in MBs.
+ * Default: 0 (disabled)
*/
#include "qemu/osdep.h"
@@ -345,6 +347,8 @@ static void nvme_post_cqes(void *opaque)
static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
{
assert(cq->cqid == req->sq->cqid);
+
+ trace_nvme_enqueue_req_completion(req->cid, cq->cqid, req->status);
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
@@ -530,6 +534,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
if (sq->sqid) {
g_free(sq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -596,6 +601,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
cq = n->cq[cqid];
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
n->sq[sqid] = sq;
+ n->qs_created++;
}
static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -741,7 +747,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
uint32_t dw11 = le32_to_cpu(cmd->cdw11);
uint32_t dw12 = le32_to_cpu(cmd->cdw12);
uint32_t dw13 = le32_to_cpu(cmd->cdw13);
- uint16_t lid = dw10 & 0xff;
+ uint8_t lid = dw10 & 0xff;
+ uint8_t lsp = (dw10 >> 8) & 0xf;
uint8_t rae = (dw10 >> 15) & 0x1;
uint32_t numdl, numdu;
uint64_t off, lpol, lpou;
@@ -759,7 +766,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}
- trace_nvme_get_log(req->cid, lid, rae, len, off);
+ trace_nvme_get_log(req->cid, lid, lsp, rae, len, off);
switch (lid) {
case NVME_LOG_ERROR_INFO:
@@ -783,6 +790,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
if (cq->cqid) {
g_free(cq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -823,6 +831,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
msix_vector_use(&n->parent_obj, cq->vector);
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+ n->qs_created++;
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -896,7 +905,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
prp1, prp2);
}
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c)
{
static const int data_len = 4 * KiB;
uint32_t min_nsid = le32_to_cpu(c->nsid);
@@ -906,7 +915,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
uint16_t ret;
int i, j = 0;
- trace_nvme_identify_nslist(min_nsid);
+ trace_nvme_identify_ns_list(min_nsid);
list = g_malloc0(data_len);
for (i = 0; i < n->num_namespaces; i++) {
@@ -923,6 +932,41 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
return ret;
}
+static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *c)
+{
+ static const int len = 4096;
+
+ struct ns_descr {
+ uint8_t nidt;
+ uint8_t nidl;
+ uint8_t rsvd2[2];
+ uint8_t nid[16];
+ };
+
+ uint32_t nsid = le32_to_cpu(c->nsid);
+ uint64_t prp1 = le64_to_cpu(c->prp1);
+ uint64_t prp2 = le64_to_cpu(c->prp2);
+
+ struct ns_descr *list;
+ uint16_t ret;
+
+ trace_nvme_identify_ns_descr_list(nsid);
+
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ list = g_malloc0(len);
+ list->nidt = 0x3;
+ list->nidl = 0x10;
+ *(uint32_t *) &list->nid[12] = cpu_to_be32(nsid);
+
+ ret = nvme_dma_read_prp(n, (uint8_t *) list, len, prp1, prp2);
+ g_free(list);
+ return ret;
+}
+
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
{
NvmeIdentify *c = (NvmeIdentify *)cmd;
@@ -933,7 +977,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
case 0x01:
return nvme_identify_ctrl(n, c);
case 0x02:
- return nvme_identify_nslist(n, c);
+ return nvme_identify_ns_list(n, c);
+ case 0x03:
+ return nvme_identify_ns_descr_list(n, cmd);
default:
trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1100,6 +1146,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
break;
case NVME_NUMBER_OF_QUEUES:
+ if (n->qs_created > 2) {
+ return NVME_CMD_SEQ_ERROR | NVME_DNR;
+ }
+
+ if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
((dw11 >> 16) & 0xFFFF) + 1,
n->params.num_queues - 1,
@@ -1815,7 +1869,7 @@ static void nvme_init_ctrl(NvmeCtrl *n)
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
id->ieee[2] = 0xb3;
- id->ver = cpu_to_le32(0x00010201);
+ id->ver = cpu_to_le32(0x00010300);
id->oacs = cpu_to_le16(0);
id->acl = 3;
id->aerl = n->params.aerl;
@@ -1846,7 +1900,7 @@ static void nvme_init_ctrl(NvmeCtrl *n)
NVME_CAP_SET_CSS(n->bar.cap, 1);
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
- n->bar.vs = 0x00010201;
+ n->bar.vs = 0x00010300;
n->bar.intmc = n->bar.intms = 0;
}
@@ -102,6 +102,7 @@ typedef struct NvmeCtrl {
uint16_t temperature;
uint8_t elp_index;
uint64_t error_count;
+ uint32_t qs_created;
QEMUTimer *aer_timer;
uint8_t aer_mask;
@@ -39,8 +39,9 @@ nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uin
nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
nvme_identify_ctrl(void) "identify controller"
-nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
-nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
+nvme_identify_ns_list(uint32_t ns) "nsid %"PRIu32""
+nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32""
nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32""
nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
@@ -48,12 +49,13 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
-nvme_get_log(uint16_t cid, uint16_t lid, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
+nvme_get_log(uint16_t cid, uint16_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
nvme_process_aers(void) "processing aers"
nvme_aer(uint16_t cid) "cid %"PRIu16""
nvme_aer_aerl_exceeded(void) "aerl exceeded"
nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
@@ -550,7 +550,9 @@ typedef struct NvmeIdCtrl {
uint32_t rtd3e;
uint32_t oaes;
uint32_t ctratt;
- uint8_t rsvd255[156];
+ uint8_t rsvd100[12];
+ uint8_t fguid[16];
+ uint8_t rsvd128[128];
uint16_t oacs;
uint8_t acl;
uint8_t aerl;
@@ -568,9 +570,15 @@ typedef struct NvmeIdCtrl {
uint8_t tnvmcap[16];
uint8_t unvmcap[16];
uint32_t rpmbs;
- uint8_t rsvd319[4];
+ uint16_t edstt;
+ uint8_t dsto;
+ uint8_t fwug;
uint16_t kas;
- uint8_t rsvd511[190];
+ uint16_t hctma;
+ uint16_t mntmt;
+ uint16_t mxtmt;
+ uint32_t sanicap;
+ uint8_t rsvd332[180];
uint8_t sqes;
uint8_t cqes;
uint16_t maxcmd;
@@ -678,13 +686,21 @@ typedef struct NvmeIdNs {
uint8_t mc;
uint8_t dpc;
uint8_t dps;
-
uint8_t nmic;
uint8_t rescap;
uint8_t fpi;
uint8_t dlfeat;
-
- uint8_t res34[94];
+ uint16_t nawun;
+ uint16_t nawupf;
+ uint16_t nacwu;
+ uint16_t nabsn;
+ uint16_t nabo;
+ uint16_t nabspf;
+ uint16_t noiob;
+ uint8_t nvmcap[16];
+ uint8_t resv103[40];
+ uint8_t nguid[16];
+ uint64_t eui64;
NvmeLBAF lbaf[16];
uint8_t res192[192];
uint8_t vs[3712];
Add the new Namespace Identification Descriptor List (CNS 03h) and track creation of queues to enable the controller to return Command Sequence Error if Set Features is called for Number of Queues after any queues have been created. Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> --- hw/block/nvme.c | 82 +++++++++++++++++++++++++++++++++++-------- hw/block/nvme.h | 1 + hw/block/trace-events | 8 +++-- include/block/nvme.h | 28 +++++++++++---- 4 files changed, 96 insertions(+), 23 deletions(-)