@@ -63,6 +63,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
+ case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_ZONE_MGMT_SEND";
case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_ZONE_MGMT_RECV";
default: return "NVME_NVM_CMD_UNKNOWN";
}
@@ -485,6 +485,7 @@ enum NvmeIoCommands {
NVME_CMD_WRITE_ZEROES = 0x08,
NVME_CMD_DSM = 0x09,
NVME_CMD_COPY = 0x19,
+ NVME_CMD_ZONE_MGMT_SEND = 0x79,
NVME_CMD_ZONE_MGMT_RECV = 0x7a,
};
@@ -598,6 +599,34 @@ enum {
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
};
+typedef struct QEMU_PACKED NvmeZoneMgmtSendCmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint32_t rsvd8[4];
+ NvmeCmdDptr dptr;
+ uint64_t slba;
+ uint32_t rsvd48;
+ uint8_t zsa;
+ uint8_t select_all;
+ uint8_t rsvd54[2];
+ uint32_t rsvd56[2];
+} NvmeZoneMgmtSendCmd;
+
+enum {
+ NVME_ZMS_SELECT_ALL = 0x1,
+};
+
+enum {
+ NVME_ZMS_CLOSE = 0x1,
+ NVME_ZMS_FINISH = 0x2,
+ NVME_ZMS_OPEN = 0x3,
+ NVME_ZMS_RESET = 0x4,
+ NVME_ZMS_OFFLINE = 0x5,
+ NVME_ZMS_SET_ZDE = 0x10,
+};
+
typedef struct QEMU_PACKED NvmeZoneMgmtRecvCmd {
uint8_t opcode;
uint8_t flags;
@@ -1257,6 +1286,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeZoneMgmtSendCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneMgmtRecvCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
@@ -1008,6 +1008,12 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
+static inline void nvme_zone_reset_wp(NvmeZone *zone)
+{
+ zone->zd->wp = zone->zd->zslba;
+ zone->wp_staging = nvme_zslba(zone);
+}
+
static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState to)
{
@@ -1030,6 +1036,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSEO:
switch (to) {
case NVME_ZS_ZSE:
+ nvme_zone_reset_wp(zone);
+
+ /* fallthrough */
+
case NVME_ZS_ZSO:
NVME_ZA_CLEAR_ALL(zd->za);
@@ -1050,6 +1060,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSC:
switch (to) {
case NVME_ZS_ZSE:
+ nvme_zone_reset_wp(zone);
+
+ /* fallthrough */
+
case NVME_ZS_ZSO:
NVME_ZA_CLEAR_ALL(zd->za);
@@ -1082,6 +1096,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSF:
switch (to) {
case NVME_ZS_ZSE:
+ nvme_zone_reset_wp(zone);
+
+ /* fallthrough */
+
case NVME_ZS_ZSO:
NVME_ZA_CLEAR_ALL(zd->za);
@@ -1460,6 +1478,367 @@ out:
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_zone_reset_ctx {
+ NvmeRequest *req;
+ NvmeZone *zone;
+};
+
+static void nvme_aio_zone_reset_cb(void *opaque, int ret)
+{
+ struct nvme_zone_reset_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeZone *zone = ctx->zone;
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+
+ g_free(ctx);
+
+ trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), nvme_zslba(zone));
+
+ if (!ret) {
+ nvme_zrm_transition(req->ns, zone, NVME_ZS_ZSE);
+ } else {
+ nvme_aio_err(req, ret, zone);
+ }
+
+ (*resets)--;
+
+ if (*resets) {
+ return;
+ }
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static uint16_t nvme_zone_mgmt_send_close(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+ uint16_t status;
+
+ trace_pci_nvme_zone_mgmt_send_close(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ switch (nvme_zs(zone)) {
+ case NVME_ZS_ZSC:
+ return NVME_SUCCESS;
+
+ case NVME_ZS_ZSE:
+ /*
+ * The state machine in nvme_zrm_transition allows zones to transition
+ * from ZSE to ZSC. That transition is only valid if done as part Set
+ * Zone Descriptor, so do an early check here.
+ */
+ return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
+
+ default:
+ break;
+ }
+
+ status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSC);
+ if (status) {
+ return status;
+ }
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_zone_mgmt_send_finish(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+ uint16_t status;
+
+ trace_pci_nvme_zone_mgmt_send_finish(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ if (nvme_zs(zone) == NVME_ZS_ZSF) {
+ return NVME_SUCCESS;
+ }
+
+ status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSF);
+ if (status) {
+ return status;
+ }
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_zone_mgmt_send_open(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+ uint16_t status;
+
+ trace_pci_nvme_zone_mgmt_send_open(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ if (nvme_zs(zone) == NVME_ZS_ZSEO) {
+ return NVME_SUCCESS;
+ }
+
+ status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSEO);
+ if (status) {
+ return status;
+ }
+
+ return NVME_SUCCESS;
+}
+
+static void __nvme_zone_mgmt_send_reset(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+ uint64_t zslba = nvme_zslba(zone);
+ uint64_t zsze = nvme_ns_zsze(ns);
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+ struct nvme_zone_reset_ctx *ctx = g_new(struct nvme_zone_reset_ctx, 1);
+
+ trace_pci_nvme_zone_mgmt_send_reset(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ /*
+ * The zone reset callback needs to know the zone that is being reset in
+ * order to transition the zone.
+ */
+ ctx->req = req;
+ ctx->zone = zone;
+
+ (*resets)++;
+
+ blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zslba),
+ nvme_l2b(ns, zsze), BDRV_REQ_MAY_UNMAP,
+ nvme_aio_zone_reset_cb, ctx);
+}
+
+static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+
+ *resets = 1;
+
+ __nvme_zone_mgmt_send_reset(n, req, zone);
+
+ (*resets)--;
+
+ return *resets ? NVME_NO_COMPLETE : req->status;
+}
+
+static uint16_t nvme_zone_mgmt_send_offline(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+
+ trace_pci_nvme_zone_mgmt_send_offline(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ switch (nvme_zs(zone)) {
+ case NVME_ZS_ZSRO:
+ nvme_zrm_transition(ns, zone, NVME_ZS_ZSO);
+
+ /* fallthrough */
+
+ case NVME_ZS_ZSO:
+ return NVME_SUCCESS;
+
+ default:
+ break;
+ }
+
+ return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
+}
+
+static uint16_t nvme_zone_mgmt_send_set_zde(NvmeCtrl *n, NvmeRequest *req,
+ NvmeZone *zone)
+{
+ NvmeNamespace *ns = req->ns;
+ uint16_t status;
+
+ trace_pci_nvme_zone_mgmt_send_set_zde(nvme_cid(req), nvme_nsid(ns),
+ nvme_zslba(zone), nvme_zs_str(zone));
+
+ if (nvme_zs(zone) != NVME_ZS_ZSE) {
+ trace_pci_nvme_err_invalid_zone_state(nvme_zslba(zone),
+ nvme_zs_str(zone),
+ nvme_zs(zone));
+ return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR;
+ }
+
+ status = nvme_check_mdts(n, nvme_ns_zdes_bytes(ns));
+ if (status) {
+ return status;
+ }
+
+ status = nvme_dma(n, zone->zde, nvme_ns_zdes_bytes(ns),
+ DMA_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ return status;
+ }
+
+ status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSC);
+ if (status) {
+ return status;
+ }
+
+ NVME_ZA_SET(zone->zd->za, NVME_ZA_ZDEV);
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_zone_mgmt_send_all(NvmeCtrl *n, NvmeNamespace *ns,
+ uint8_t zsa, NvmeRequest *req)
+{
+ NvmeZone *zone;
+ uintptr_t *resets = (uintptr_t *)&req->opaque;
+ uint16_t status = NVME_SUCCESS;
+
+ trace_pci_nvme_zone_mgmt_send_all(nvme_cid(req), nvme_nsid(ns), zsa);
+
+ switch (zsa) {
+ case NVME_ZMS_SET_ZDE:
+ return NVME_INVALID_FIELD | NVME_DNR;
+
+ case NVME_ZMS_CLOSE:
+ for (int i = 0; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ switch (nvme_zs(zone)) {
+ case NVME_ZS_ZSIO:
+ case NVME_ZS_ZSEO:
+ status = nvme_zone_mgmt_send_close(n, req, zone);
+ if (status) {
+ return status;
+ }
+
+ default:
+ continue;
+ }
+ }
+
+ break;
+
+ case NVME_ZMS_FINISH:
+ for (int i = 0; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ switch (nvme_zs(zone)) {
+ case NVME_ZS_ZSIO:
+ case NVME_ZS_ZSEO:
+ case NVME_ZS_ZSC:
+ status = nvme_zone_mgmt_send_finish(n, req, zone);
+ if (status) {
+ return status;
+ }
+
+ default:
+ continue;
+ }
+ }
+
+ break;
+
+ case NVME_ZMS_OPEN:
+ for (int i = 0; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ if (nvme_zs(zone) == NVME_ZS_ZSC) {
+ status = nvme_zone_mgmt_send_open(n, req, zone);
+ if (status) {
+ return status;
+ }
+ }
+ }
+
+ break;
+
+ case NVME_ZMS_RESET:
+ *resets = 1;
+
+ for (int i = 0; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ switch (nvme_zs(zone)) {
+ case NVME_ZS_ZSIO:
+ case NVME_ZS_ZSEO:
+ case NVME_ZS_ZSC:
+ case NVME_ZS_ZSF:
+ __nvme_zone_mgmt_send_reset(n, req, zone);
+ default:
+ continue;
+ }
+ }
+
+ (*resets)--;
+
+ return *resets ? NVME_NO_COMPLETE : req->status;
+
+ case NVME_ZMS_OFFLINE:
+ for (int i = 0; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ if (nvme_zs(zone) == NVME_ZS_ZSRO) {
+ status = nvme_zone_mgmt_send_offline(n, req, zone);
+ if (status) {
+ return status;
+ }
+ }
+ }
+
+ break;
+ }
+
+ return status;
+}
+
+static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeZoneMgmtSendCmd *send = (NvmeZoneMgmtSendCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ NvmeZone *zone;
+ uint8_t zsa = send->zsa;
+ uint8_t select_all = send->select_all & 0x1;
+ uint64_t zslba = le64_to_cpu(send->slba);
+
+ if (!nvme_ns_zoned(ns)) {
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ trace_pci_nvme_zone_mgmt_send(nvme_cid(req), ns->params.nsid, zslba, zsa,
+ select_all);
+
+ if (select_all) {
+ return nvme_zone_mgmt_send_all(n, ns, zsa, req);
+ }
+
+ zone = nvme_ns_zone(ns, zslba);
+ if (!zone) {
+ trace_pci_nvme_err_invalid_zone(zslba);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (zslba != nvme_zslba(zone)) {
+ trace_pci_nvme_err_invalid_zslba(zslba);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ switch (zsa) {
+ case NVME_ZMS_CLOSE:
+ return nvme_zone_mgmt_send_close(n, req, zone);
+ case NVME_ZMS_FINISH:
+ return nvme_zone_mgmt_send_finish(n, req, zone);
+ case NVME_ZMS_OPEN:
+ return nvme_zone_mgmt_send_open(n, req, zone);
+ case NVME_ZMS_RESET:
+ return nvme_zone_mgmt_send_reset(n, req, zone);
+ case NVME_ZMS_OFFLINE:
+ return nvme_zone_mgmt_send_offline(n, req, zone);
+ case NVME_ZMS_SET_ZDE:
+ return nvme_zone_mgmt_send_set_zde(n, req, zone);
+ }
+
+ return NVME_INVALID_FIELD | NVME_DNR;
+}
+
static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
{
NvmeZoneMgmtRecvCmd *recv = (NvmeZoneMgmtRecvCmd *)&req->cmd;
@@ -2038,6 +2417,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_dsm(n, req);
case NVME_CMD_COPY:
return nvme_copy(n, req);
+ case NVME_CMD_ZONE_MGMT_SEND:
+ return nvme_zone_mgmt_send(n, req);
case NVME_CMD_ZONE_MGMT_RECV:
return nvme_zone_mgmt_recv(n, req);
default:
@@ -2294,6 +2675,8 @@ static void nvme_effects_nvm(NvmeEffectsLog *effects)
static void nvme_effects_zoned(NvmeEffectsLog *effects)
{
effects->iocs[NVME_CMD_ZONE_MGMT_RECV] = NVME_EFFECTS_CSUPP;
+ effects->iocs[NVME_CMD_ZONE_MGMT_SEND] = NVME_EFFECTS_CSUPP |
+ NVME_EFFECTS_LBCC;
}
static uint16_t nvme_effects_log(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
@@ -54,6 +54,16 @@ pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid
pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
+pci_nvme_zone_mgmt_send(uint16_t cid, uint32_t nsid, uint64_t zslba, uint8_t zsa, uint8_t select_all) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zsa 0x%"PRIx8" select_all 0x%"PRIx8""
+pci_nvme_zone_mgmt_send_all(uint16_t cid, uint32_t nsid, uint8_t za) "cid %"PRIu16" nsid %"PRIu32" za 0x%"PRIx8""
+pci_nvme_zone_mgmt_send_close(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_zone_mgmt_send_finish(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_zone_mgmt_send_open(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_zone_mgmt_send_reset(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_zone_mgmt_send_reset_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_zone_mgmt_send_offline(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_zone_mgmt_send_set_zde(uint16_t cid, uint32_t nsid, uint64_t zslba, const char *zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
pci_nvme_zone_mgmt_recv(uint16_t cid, uint32_t nsid, uint64_t slba, uint64_t len, uint8_t zra, uint8_t zfeat, uint8_t zflags) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" len %"PRIu64" zra 0x%"PRIx8" zrasf 0x%"PRIx8" pr 0x%"PRIx8""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
@@ -142,6 +152,8 @@ pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
pci_nvme_err_invalid_zone_state(uint64_t zslba, const char *zs_str, uint8_t zs) "zslba 0x%"PRIx64" zs '%s' (%"PRIu8")"
+pci_nvme_err_invalid_zone(uint64_t lba) "lba 0x%"PRIx64""
+pci_nvme_err_invalid_zslba(uint64_t lba) "lba 0x%"PRIx64""
pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"