@@ -334,6 +334,46 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page)
+{
+ NvmeAsyncEvent *event;
+
+ trace_nvme_enqueue_event(event_type, event_info, log_page);
+
+ /*
+ * Do not enqueue the event if something of this type is already queued.
+ * This bounds the size of the event queue and makes sure it does not grow
+ * indefinitely when events are not processed by the host (i.e. does not
+ * issue any AERs).
+ */
+ if (n->aer_mask_queued & (1 << event_type)) {
+ trace_nvme_enqueue_event_masked(event_type);
+ return;
+ }
+ n->aer_mask_queued |= (1 << event_type);
+
+ event = g_new(NvmeAsyncEvent, 1);
+ event->result = (NvmeAerResult) {
+ .event_type = event_type,
+ .event_info = event_info,
+ .log_page = log_page,
+ };
+
+ QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
+
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+ n->aer_mask &= ~(1 << event_type);
+ if (!QTAILQ_EMPTY(&n->aer_queue)) {
+ timer_mod(n->aer_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ }
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -578,7 +618,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
return NVME_SUCCESS;
}
-static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd,
+static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
uint32_t buf_len, uint64_t off, NvmeRequest *req)
{
uint32_t trans_len;
@@ -591,12 +631,16 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd,
trans_len = MIN(sizeof(*n->elpes) * (n->params.elpe + 1) - off, buf_len);
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+ }
+
return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1,
prp2);
}
-static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
- uint64_t off, NvmeRequest *req)
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off, NvmeRequest *req)
{
uint64_t prp1 = le64_to_cpu(cmd->prp1);
uint64_t prp2 = le64_to_cpu(cmd->prp2);
@@ -645,6 +689,10 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
smart.power_on_hours[0] = cpu_to_le64(
(((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
prp2);
}
@@ -697,9 +745,9 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
switch (lid) {
case NVME_LOG_ERROR_INFO:
- return nvme_error_info(n, cmd, len, off, req);
+ return nvme_error_info(n, cmd, rae, len, off, req);
case NVME_LOG_SMART_INFO:
- return nvme_smart_info(n, cmd, len, off, req);
+ return nvme_smart_info(n, cmd, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, cmd, len, off, req);
default:
@@ -957,6 +1005,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
break;
case NVME_TIMESTAMP:
return nvme_get_feature_timestamp(n, cmd);
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ result = cpu_to_le32(n->features.async_config);
+ break;
default:
trace_nvme_err_invalid_getfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -992,6 +1043,12 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
switch (dw10) {
case NVME_TEMPERATURE_THRESHOLD:
n->features.temp_thresh = dw11;
+
+ if (n->features.temp_thresh <= n->temperature) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+ NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO);
+ }
+
break;
case NVME_VOLATILE_WRITE_CACHE:
@@ -1007,6 +1064,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
break;
case NVME_TIMESTAMP:
return nvme_set_feature_timestamp(n, cmd);
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ n->features.async_config = dw11;
+ break;
default:
trace_nvme_err_invalid_setfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1015,6 +1075,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return NVME_SUCCESS;
}
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ trace_nvme_aer(req->cid);
+
+ if (n->outstanding_aers > n->params.aerl) {
+ trace_nvme_aer_aerl_exceeded();
+ return NVME_AER_LIMIT_EXCEEDED;
+ }
+
+ n->aer_reqs[n->outstanding_aers] = req;
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ n->outstanding_aers++;
+
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
switch (cmd->opcode) {
@@ -1036,12 +1112,60 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
+ case NVME_ADM_CMD_ASYNC_EV_REQ:
+ return nvme_aer(n, cmd, req);
default:
trace_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
}
+static void nvme_process_aers(void *opaque)
+{
+ NvmeCtrl *n = opaque;
+ NvmeAsyncEvent *event, *next;
+
+ trace_nvme_process_aers();
+
+ QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+ NvmeRequest *req;
+ NvmeAerResult *result;
+
+ /* can't post cqe if there is nothing to complete */
+ if (!n->outstanding_aers) {
+ trace_nvme_no_outstanding_aers();
+ break;
+ }
+
+ /* ignore if masked (cqe posted, but event not cleared) */
+ if (n->aer_mask & (1 << event->result.event_type)) {
+ trace_nvme_aer_masked(event->result.event_type, n->aer_mask);
+ continue;
+ }
+
+ QTAILQ_REMOVE(&n->aer_queue, event, entry);
+
+ n->aer_mask |= 1 << event->result.event_type;
+ n->aer_mask_queued &= ~(1 << event->result.event_type);
+ n->outstanding_aers--;
+
+ req = n->aer_reqs[n->outstanding_aers];
+
+ result = (NvmeAerResult *) &req->cqe.result;
+ result->event_type = event->result.event_type;
+ result->event_info = event->result.event_info;
+ result->log_page = event->result.log_page;
+ g_free(event);
+
+ req->status = NVME_SUCCESS;
+
+ trace_nvme_aer_post_cqe(result->event_type, result->event_info,
+ result->log_page);
+
+ nvme_enqueue_req_completion(&n->admin_cq, req);
+ }
+}
+
static void nvme_process_sq(void *opaque)
{
NvmeSQueue *sq = opaque;
@@ -1091,6 +1215,20 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
}
}
+ if (n->aer_timer) {
+ timer_del(n->aer_timer);
+ timer_free(n->aer_timer);
+ n->aer_timer = NULL;
+ }
+
+ while (!QTAILQ_EMPTY(&n->aer_queue)) {
+ NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
+ QTAILQ_REMOVE(&n->aer_queue, event, entry);
+ g_free(event);
+ }
+
+ n->outstanding_aers = 0;
+
blk_flush(n->conf.blk);
n->bar.cc = 0;
}
@@ -1187,6 +1325,9 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
+ n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n);
+ QTAILQ_INIT(&n->aer_queue);
+
return 0;
}
@@ -1379,6 +1520,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"completion queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1389,6 +1537,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_head=%"PRIu16", ignoring",
qid, new_head);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1417,6 +1571,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"submission queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1427,6 +1588,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_tail=%"PRIu16", ignoring",
qid, new_tail);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1523,6 +1690,7 @@ static void nvme_init_state(NvmeCtrl *n)
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->temperature = NVME_TEMPERATURE;
n->features.temp_thresh = 0x14d;
+ n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -1590,6 +1758,7 @@ static void nvme_init_ctrl(NvmeCtrl *n)
id->ver = cpu_to_le32(0x00010201);
id->oacs = cpu_to_le16(0);
id->acl = 3;
+ id->aerl = n->params.aerl;
id->frmw = 7 << 1;
id->lpa = 1 << 2;
id->elpe = n->params.elpe;
@@ -1677,6 +1846,7 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->cq);
g_free(n->sq);
g_free(n->elpes);
+ g_free(n->aer_reqs);
if (n->params.cmb_size_mb) {
g_free(n->cmbuf);
@@ -7,17 +7,19 @@
DEFINE_PROP_STRING("serial", _state, _props.serial), \
DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
- DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24)
+ DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24), \
+ DEFINE_PROP_UINT8("aerl", _state, _props.aerl, 3)
typedef struct NvmeParams {
char *serial;
uint32_t num_queues;
uint32_t cmb_size_mb;
uint8_t elpe;
+ uint8_t aerl;
} NvmeParams;
typedef struct NvmeAsyncEvent {
- QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry;
+ QTAILQ_ENTRY(NvmeAsyncEvent) entry;
NvmeAerResult result;
} NvmeAsyncEvent;
@@ -89,6 +91,7 @@ typedef struct NvmeCtrl {
uint32_t num_namespaces;
uint32_t max_q_ents;
uint64_t ns_size;
+ uint8_t outstanding_aers;
uint32_t cmbsz;
uint32_t cmbloc;
uint8_t *cmbuf;
@@ -98,6 +101,12 @@ typedef struct NvmeCtrl {
uint64_t starttime_ms;
uint16_t temperature;
+ QEMUTimer *aer_timer;
+ uint8_t aer_mask;
+ uint8_t aer_mask_queued;
+ NvmeRequest **aer_reqs;
+ QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
+
NvmeNamespace *namespaces;
NvmeSQueue **sq;
NvmeCQueue **cq;
@@ -47,6 +47,14 @@ nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_coun
nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
nvme_get_log(uint16_t cid, uint16_t lid, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
+nvme_process_aers(void) "processing aers"
+nvme_aer(uint16_t cid) "cid %"PRIu16""
+nvme_aer_aerl_exceeded(void) "aerl exceeded"
+nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
+nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
@@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest {
NVME_AER_TYPE_SMART = 1,
NVME_AER_TYPE_IO_SPECIFIC = 6,
NVME_AER_TYPE_VENDOR_SPECIFIC = 7,
- NVME_AER_INFO_ERR_INVALID_SQ = 0,
- NVME_AER_INFO_ERR_INVALID_DB = 1,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1,
NVME_AER_INFO_ERR_DIAG_FAIL = 2,
NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4,
Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.2 ("Asynchronous Event Request command"). Mostly imported from Keith's qemu-nvme tree. Modified to not enqueue events if something of the same type is already queued (but not cleared by the host). Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> --- hw/block/nvme.c | 180 ++++++++++++++++++++++++++++++++++++++++-- hw/block/nvme.h | 13 ++- hw/block/trace-events | 8 ++ include/block/nvme.h | 4 +- 4 files changed, 196 insertions(+), 9 deletions(-)