@@ -23,7 +23,7 @@
* max_ioqpairs=<N[optional]>, \
* aerl=<N[optional]>, aer_max_queued=<N[optional]>, \
* mdts=<N[optional]>,zoned.append_size_limit=<N[optional]>, \
- * subsys=<subsys_id> \
+ * subsys=<subsys_id>,cmd-retry-delay=<N[optional]> \
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
* subsys=<subsys_id>
@@ -71,6 +71,14 @@
* data size being in effect. By setting this property to 0, users can make
* ZASL to be equal to MDTS. This property only affects zoned namespaces.
*
+ * - `cmd-retry-delay`
+ * Command Retry Delay value in unit of millisecond. This value will be
+ * reported to the CRDT1(Command Retry Delay Time 1) in Identify Controller
+ * data structure in 100 milliseconds unit. If this is not given, DNR(Do Not
+ * Retry) bit field in the Status field of CQ entry. If it's given to 0,
+ * CRD(Command Retry Delay) will be set to 0 which is for retry without
+ * delay. Otherwise, it will set to 1 to delay for CRDT1 value.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `subsys`
@@ -154,6 +162,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
[NVME_WRITE_ATOMICITY] = true,
[NVME_ASYNCHRONOUS_EVENT_CONF] = true,
[NVME_TIMESTAMP] = true,
+ [NVME_HOST_BEHAVIOR_SUPPORT] = true,
};
static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
@@ -163,6 +172,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
+ [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
};
static const uint32_t nvme_cse_acs[256] = {
@@ -904,6 +914,16 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return status;
}
+static inline bool nvme_should_retry(NvmeRequest *req)
+{
+ switch (req->status) {
+ case NVME_COMMAND_INTERRUPTED:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -947,7 +967,13 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
assert(cq->cqid == req->sq->cqid);
if (req->status != NVME_SUCCESS) {
- req->status |= NVME_DNR;
+ if (cq->ctrl->features.acre && nvme_should_retry(req)) {
+ if (cq->ctrl->params.cmd_retry_delay > 0) {
+ req->status |= NVME_CRD_CRDT1;
+ }
+ } else {
+ req->status |= NVME_DNR;
+ }
}
trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
@@ -3401,6 +3427,16 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
DMA_DIRECTION_FROM_DEVICE, req);
}
+static uint16_t nvme_get_feature_host_behavior(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeFeatureHostBehavior data = {};
+
+ data.acre = n->features.acre;
+
+ return nvme_dma(n, (uint8_t *)&data, sizeof(data),
+ DMA_DIRECTION_FROM_DEVICE, req);
+}
+
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = &req->cmd;
@@ -3506,6 +3542,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
goto out;
case NVME_TIMESTAMP:
return nvme_get_feature_timestamp(n, req);
+ case NVME_HOST_BEHAVIOR_SUPPORT:
+ return nvme_get_feature_host_behavior(n, req);
default:
break;
}
@@ -3569,6 +3607,22 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
return NVME_SUCCESS;
}
+static uint16_t nvme_set_feature_host_behavior(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeFeatureHostBehavior data;
+ int ret;
+
+ ret = nvme_dma(n, (uint8_t *)&data, sizeof(data),
+ DMA_DIRECTION_TO_DEVICE, req);
+ if (ret) {
+ return ret;
+ }
+
+ n->features.acre = data.acre;
+
+ return NVME_SUCCESS;
+}
+
static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns = NULL;
@@ -3700,6 +3754,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_TIMESTAMP:
return nvme_set_feature_timestamp(n, req);
+ case NVME_HOST_BEHAVIOR_SUPPORT:
+ return nvme_set_feature_host_behavior(n, req);
case NVME_COMMAND_SET_PROFILE:
if (dw11 & 0x1ff) {
trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
@@ -4699,6 +4755,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->oacs = cpu_to_le16(0);
id->cntrltype = 0x1;
+ if (n->params.cmd_retry_delay > 0) {
+ id->crdt1 = cpu_to_le16(DIV_ROUND_UP(n->params.cmd_retry_delay, 100));
+ }
+
/*
* Because the controller always completes the Abort command immediately,
* there can never be more than one concurrently executing Abort command,
@@ -4859,6 +4919,7 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_SIZE32("zoned.append_size_limit", NvmeCtrl, params.zasl_bs,
NVME_DEFAULT_MAX_ZA_SIZE),
+ DEFINE_PROP_INT32("cmd-retry-delay", NvmeCtrl, params.cmd_retry_delay, -1),
DEFINE_PROP_END_OF_LIST(),
};
@@ -22,6 +22,7 @@ typedef struct NvmeParams {
bool use_intel_id;
uint32_t zasl_bs;
bool legacy_cmb;
+ int32_t cmd_retry_delay;
} NvmeParams;
typedef struct NvmeAsyncEvent {
@@ -124,6 +125,7 @@ typedef struct NvmeFeatureVal {
uint16_t temp_thresh_low;
};
uint32_t async_config;
+ uint8_t acre;
} NvmeFeatureVal;
typedef struct NvmeCtrl {
@@ -813,6 +813,7 @@ enum NvmeStatusCodes {
NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
NVME_INVALID_USE_OF_CMB = 0x0012,
NVME_INVALID_PRP_OFFSET = 0x0013,
+ NVME_COMMAND_INTERRUPTED = 0x0021,
NVME_CMD_SET_CMB_REJECTED = 0x002b,
NVME_INVALID_CMD_SET = 0x002c,
NVME_LBA_RANGE = 0x0080,
@@ -858,6 +859,7 @@ enum NvmeStatusCodes {
NVME_DULB = 0x0287,
NVME_MORE = 0x2000,
NVME_DNR = 0x4000,
+ NVME_CRD_CRDT1 = 0x0800,
NVME_NO_COMPLETE = 0xffff,
};
@@ -987,7 +989,10 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
uint8_t rsvd100[11];
uint8_t cntrltype;
uint8_t fguid[16];
- uint8_t rsvd128[128];
+ uint16_t crdt1;
+ uint16_t crdt2;
+ uint16_t crdt3;
+ uint8_t rsvd134[122];
uint16_t oacs;
uint8_t acl;
uint8_t aerl;
@@ -1131,6 +1136,7 @@ enum NvmeFeatureIds {
NVME_WRITE_ATOMICITY = 0xa,
NVME_ASYNCHRONOUS_EVENT_CONF = 0xb,
NVME_TIMESTAMP = 0xe,
+ NVME_HOST_BEHAVIOR_SUPPORT = 0x16,
NVME_COMMAND_SET_PROFILE = 0x19,
NVME_SOFTWARE_PROGRESS_MARKER = 0x80,
NVME_FID_MAX = 0x100,
@@ -1162,6 +1168,11 @@ typedef enum NvmeGetFeatureSelect {
#define NVME_SETFEAT_SAVE(dw10) \
((dw10 >> NVME_SETFEAT_SAVE_SHIFT) & NVME_SETFEAT_SAVE_MASK)
+typedef struct QEMU_PACKED NvmeFeatureHostBehavior {
+ uint8_t acre;
+ uint8_t rsvd1[511];
+} NvmeFeatureHostBehavior;
+
typedef struct QEMU_PACKED NvmeRangeType {
uint8_t type;
uint8_t attributes;
Set CRDT1(Command Retry Delay Time 1) in the Identify controller data structure to milliseconds units of 100ms by the given value of 'cmd-retry-delay' parameter which is newly added. If cmd-retry-delay=1000, it will be set CRDT1 to 10. This patch only considers the CRDT1 without CRDT2 and 3 for the simplicity. This patch also introduced set/get feature command handler for Host Behavior feature (16h). In this feature, ACRE(Advanced Command Retry Enable) will be set by the host based on the Identify controller data structure, especially by CRDTs. If 'cmd-retry-delay' is not given, the default value will be -1 which is CRDT will not be configured at all and ACRE will not be supported. In this case, we just set NVME_DNR to the error CQ entry just like we used to. If it's given to positive value, then ACRE will be supported by the device. Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com> --- hw/block/nvme.c | 65 ++++++++++++++++++++++++++++++++++++++++++-- hw/block/nvme.h | 2 ++ include/block/nvme.h | 13 ++++++++- 3 files changed, 77 insertions(+), 3 deletions(-)