Message ID | 20240531033455.3774806-10-luchangqi.123@bytedance.com |
---|---|
State | New |
Headers | show |
Series | Support persistent reservation operations | expand |
On 24-05-31 11:34:54, Changqi Lu wrote: > Add reservation acquire, reservation register, > reservation release and reservation report commands > in the nvme device layer. > > By introducing these commands, this enables the nvme > device to perform reservation-related tasks, including > querying keys, querying reservation status, registering > reservation keys, initiating and releasing reservations, > as well as clearing and preempting reservations held by > other keys. > > These commands are crucial for management and control of > shared storage resources in a persistent manner. > Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com> > Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> > --- > hw/nvme/ctrl.c | 321 ++++++++++++++++++++++++++++++++++++++++++- > hw/nvme/nvme.h | 4 + > include/block/nvme.h | 37 +++++ > 3 files changed, 361 insertions(+), 1 deletion(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 182307a48b..033abd0afe 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -294,6 +294,10 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { > [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, > [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, > [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, > }; > > static const uint32_t nvme_cse_iocs_zoned[256] = { > @@ -308,6 +312,10 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { > [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, > }; > > static void nvme_process_sq(void *opaque); > @@ -1745,6 +1753,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret) > > switch (req->cmd.opcode) { > case NVME_CMD_READ: > + case NVME_CMD_RESV_REPORT: > status = NVME_UNRECOVERED_READ; > break; > case NVME_CMD_FLUSH: > @@ -1752,6 +1761,9 @@ static void nvme_aio_err(NvmeRequest *req, int ret) > case NVME_CMD_WRITE_ZEROES: > case NVME_CMD_ZONE_APPEND: > case NVME_CMD_COPY: > + case NVME_CMD_RESV_REGISTER: > + case NVME_CMD_RESV_ACQUIRE: > + case NVME_CMD_RESV_RELEASE: > status = NVME_WRITE_FAULT; > break; > default: > @@ -2127,7 +2139,10 @@ static inline bool nvme_is_write(NvmeRequest *req) > > return rw->opcode == NVME_CMD_WRITE || > rw->opcode == NVME_CMD_ZONE_APPEND || > - rw->opcode == NVME_CMD_WRITE_ZEROES; > + rw->opcode == NVME_CMD_WRITE_ZEROES || > + rw->opcode == NVME_CMD_RESV_REGISTER || > + rw->opcode == NVME_CMD_RESV_ACQUIRE || > + rw->opcode == NVME_CMD_RESV_RELEASE; > } > > static void nvme_misc_cb(void *opaque, int ret) > @@ -2692,6 +2707,302 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) > return NVME_NO_COMPLETE; > } > > +typedef struct NvmeKeyInfo { > + uint64_t cr_key; > + uint64_t nr_key; > +} NvmeKeyInfo; > + > +static uint16_t nvme_resv_register(NvmeCtrl *n, NvmeRequest *req) > +{ > + int ret; > + NvmeKeyInfo key_info; > + NvmeNamespace *ns = req->ns; > + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); > + bool ignore_key = cdw10 >> 3 & 0x1; > + uint8_t action = cdw10 & 0x7; > + uint8_t ptpl = cdw10 >> 30 & 0x3; > + bool aptpl; > + > + switch (ptpl) { > + case NVME_RESV_PTPL_NO_CHANGE: > + aptpl = (ns->id_ns.rescap & NVME_PR_CAP_PTPL) ? true : false; > + break; > + case NVME_RESV_PTPL_DISABLE: > + aptpl = false; > + break; > + case NVME_RESV_PTPL_ENABLE: > + aptpl = true; > + break; > + default: > + return NVME_INVALID_FIELD; > + } > + > + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req); > + if (ret) { > + return ret; > + } > + > + switch (action) { > + case NVME_RESV_REGISTER_ACTION_REGISTER: > + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, 0, > + key_info.nr_key, 0, aptpl, > + ignore_key, nvme_misc_cb, > + req); > + break; > + case NVME_RESV_REGISTER_ACTION_UNREGISTER: > + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, 0, > + 0, aptpl, ignore_key, > + nvme_misc_cb, req); > + break; > + case NVME_RESV_REGISTER_ACTION_REPLACE: > + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, > + key_info.nr_key, 0, aptpl, ignore_key, > + nvme_misc_cb, req); > + break; > + default: > + return NVME_INVALID_FIELD; > + } > + > + return NVME_NO_COMPLETE; > +} > + > +static uint16_t nvme_resv_release(NvmeCtrl *n, NvmeRequest *req) > +{ > + int ret; > + uint64_t cr_key; > + NvmeNamespace *ns = req->ns; > + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); > + uint8_t action = cdw10 & 0x7; > + NvmeResvType type = cdw10 >> 8 & 0xff; > + > + ret = nvme_h2c(n, (uint8_t *)&cr_key, sizeof(cr_key), req); > + if (ret) { > + return ret; > + } > + > + switch (action) { > + case NVME_RESV_RELEASE_ACTION_RELEASE: > + req->aiocb = blk_aio_pr_release(ns->blkconf.blk, cr_key, > + nvme_pr_type_to_block(type), > + nvme_misc_cb, req); > + break; > + case NVME_RESV_RELEASE_ACTION_CLEAR: > + req->aiocb = blk_aio_pr_clear(ns->blkconf.blk, cr_key, > + nvme_misc_cb, req); > + break; > + default: > + return NVME_INVALID_FIELD; > + } > + > + return NVME_NO_COMPLETE; > +} > + > +static uint16_t nvme_resv_acquire(NvmeCtrl *n, NvmeRequest *req) > +{ > + int ret; > + NvmeKeyInfo key_info; > + NvmeNamespace *ns = req->ns; > + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); > + uint8_t action = cdw10 & 0x7; > + NvmeResvType type = cdw10 >> 8 & 0xff; > + > + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req); > + if (ret) { > + return ret; > + } > + > + switch (action) { > + case NVME_RESV_ACQUIRE_ACTION_ACQUIRE: > + req->aiocb = blk_aio_pr_reserve(ns->blkconf.blk, key_info.cr_key, > + nvme_pr_type_to_block(type), > + nvme_misc_cb, req); > + break; > + case NVME_RESV_ACQUIRE_ACTION_PREEMPT: > + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, > + key_info.cr_key, key_info.nr_key, > + nvme_pr_type_to_block(type), > + false, nvme_misc_cb, req); > + break; > + case NVME_RESV_ACQUIRE_ACTION_PREEMPT_AND_ABORT: > + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, key_info.cr_key, > + key_info.nr_key, type, true, > + nvme_misc_cb, req); > + break; > + default: > + return NVME_INVALID_FIELD; > + } > + > + return NVME_NO_COMPLETE; > +} > + > +typedef struct NvmeResvKeys { > + uint32_t generation; > + uint32_t num_keys; > + uint64_t *keys; > + NvmeRequest *req; > +} NvmeResvKeys; > + > +typedef struct NvmeReadReservation { > + uint32_t generation; > + uint64_t key; > + BlockPrType type; > + NvmeRequest *req; > + NvmeResvKeys *keys_info; > +} NvmeReadReservation; > + > +static int nvme_read_reservation_cb(NvmeReadReservation *reservation) > +{ > + int rc; > + NvmeReservationStatus *nvme_status; > + NvmeRequest *req = reservation->req; > + NvmeCtrl *n = req->sq->ctrl; > + NvmeResvKeys *keys_info = reservation->keys_info; > + int len = sizeof(NvmeReservationStatusHeader) + > + sizeof(NvmeRegisteredCtrl) * keys_info->num_keys; > + > + nvme_status = g_malloc(len); > + nvme_status->header.gen = reservation->generation; > + nvme_status->header.rtype = block_pr_type_to_nvme(reservation->type); > + nvme_status->header.regctl = keys_info->num_keys; > + for (int i = 0; i < keys_info->num_keys; i++) { > + nvme_status->regctl_ds[i].cntlid = nvme_ctrl(req)->cntlid; > + nvme_status->regctl_ds[i].rkey = keys_info->keys[i]; > + nvme_status->regctl_ds[i].rcsts = keys_info->keys[i] == > + reservation->key ? 1 : 0; > + /* hostid is not supported currently */ > + memset(&nvme_status->regctl_ds[i].hostid, 0, 8); > + } > + > + rc = nvme_c2h(n, (uint8_t *)nvme_status, len, req); > + g_free(nvme_status); > + return rc; > +} > + > +static int nvme_read_reservation_ext_cb(NvmeReadReservation *reservation) > +{ > + int rc; > + NvmeReservationStatusExt *nvme_status_ext; > + NvmeRequest *req = reservation->req; > + NvmeCtrl *n = req->sq->ctrl; > + NvmeResvKeys *keys_info = reservation->keys_info; > + int len = sizeof(NvmeReservationStatusHeader) + > + sizeof(uint8_t) * 40 + > + sizeof(NvmeRegisteredCtrlExt) * keys_info->num_keys; > + > + nvme_status_ext = g_malloc(len); > + nvme_status_ext->header.gen = cpu_to_be32(reservation->generation); > + nvme_status_ext->header.rtype = block_pr_type_to_nvme(reservation->type); > + nvme_status_ext->header.regctl = cpu_to_be16(keys_info->num_keys); > + > + for (int i = 0; i < keys_info->num_keys; i++) { > + uint16_t ctnlid = nvme_ctrl(req)->cntlid; > + nvme_status_ext->regctl_eds[i].cntlid = cpu_to_be16(ctnlid); > + nvme_status_ext->regctl_eds[i].rkey = cpu_to_be64(keys_info->keys[i]); > + nvme_status_ext->regctl_eds[i].rcsts = keys_info->keys[i] == > + reservation->key ? 1 : 0; > + /* hostid is not supported currently */ > + memset(&nvme_status_ext->regctl_eds[i].hostid, 0, 16); > + } > + > + rc = nvme_c2h(n, (uint8_t *)nvme_status_ext, len, req); > + g_free(nvme_status_ext); > + return rc; > +} > + > +static void nvme_resv_read_reservation_cb(void *opaque, int ret) > +{ > + NvmeReadReservation *reservation = opaque; > + NvmeRequest *req = reservation->req; > + bool eds = req->cmd.cdw11 & 0x1; > + NvmeResvKeys *keys_info = reservation->keys_info; > + > + if (ret < 0) { > + goto out; > + } > + > + if (eds) { > + ret = nvme_read_reservation_ext_cb(reservation); > + } else { > + ret = nvme_read_reservation_cb(reservation); > + } > + > +out: > + g_free(keys_info); > + g_free(reservation); > + nvme_misc_cb(req, ret); > +} > + > +static void nvme_resv_read_keys_cb(void *opaque, int ret) > +{ > + NvmeResvKeys *keys_info = opaque; > + NvmeRequest *req = keys_info->req; > + NvmeNamespace *ns = req->ns; > + NvmeReadReservation *reservation; > + > + if (ret < 0) { > + goto out; > + } > + > + keys_info->num_keys = MIN(ret, keys_info->num_keys); > + reservation = g_new0(NvmeReadReservation, 1); > + memset(reservation, 0, sizeof(*reservation)); > + reservation->req = req; > + reservation->keys_info = keys_info; > + > + req->aiocb = blk_aio_pr_read_reservation(ns->blkconf.blk, > + &reservation->generation, &reservation->key, > + &reservation->type, nvme_resv_read_reservation_cb, > + reservation); > + return; > + > +out: > + g_free(keys_info); > + nvme_misc_cb(req, ret); > +} > + > + > +static uint16_t nvme_resv_report(NvmeCtrl *n, NvmeRequest *req) > +{ > + int num_keys; > + uint32_t cdw10 = req->cmd.cdw10; > + uint32_t cdw11 = req->cmd.cdw11; > + int buflen = (cdw10 + 1) * sizeof(uint32_t); > + bool eds = cdw11 & 0x1; > + NvmeNamespace *ns = req->ns; > + NvmeResvKeys *keys_info; > + > + if (eds) { > + if (buflen < sizeof(NvmeReservationStatusHeader) + > + sizeof(uint8_t) * 40) { > + return NVME_INVALID_FIELD; > + } > + > + num_keys = (buflen - sizeof(NvmeReservationStatusHeader) - > + sizeof(uint8_t) * 40) / > + sizeof(struct NvmeRegisteredCtrlExt); > + } else { > + if (buflen < sizeof(NvmeReservationStatusHeader)) { > + return NVME_INVALID_FIELD; > + } > + > + num_keys = (buflen - sizeof(NvmeReservationStatusHeader)) / > + sizeof(struct NvmeRegisteredCtrl); > + } > + > + keys_info = g_new0(NvmeResvKeys, 1); > + keys_info->generation = 0; > + /* num_keys is the maximum number of keys that can be transmitted */ > + keys_info->num_keys = num_keys; > + keys_info->keys = g_malloc(sizeof(uint64_t) * num_keys); Where will `keys_info->keys` it be freed? Callbacks do not look like freeing `keys` itself, but freeing `keys_info`. Am I missing something? > + keys_info->req = req; > + > + req->aiocb = blk_aio_pr_read_keys(ns->blkconf.blk, &keys_info->generation, > + keys_info->num_keys, keys_info->keys, > + nvme_resv_read_keys_cb, keys_info); > + > + return NVME_NO_COMPLETE; > +} > + > typedef struct NvmeCopyAIOCB { > BlockAIOCB common; > BlockAIOCB *aiocb; > @@ -4469,6 +4780,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) > return nvme_dsm(n, req); > case NVME_CMD_VERIFY: > return nvme_verify(n, req); > + case NVME_CMD_RESV_REGISTER: > + return nvme_resv_register(n, req); > + case NVME_CMD_RESV_REPORT: > + return nvme_resv_report(n, req); > + case NVME_CMD_RESV_ACQUIRE: > + return nvme_resv_acquire(n, req); > + case NVME_CMD_RESV_RELEASE: > + return nvme_resv_release(n, req); > case NVME_CMD_COPY: > return nvme_copy(n, req); > case NVME_CMD_ZONE_MGMT_SEND: > diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h > index b1ad27c8f2..808b26f086 100644 > --- a/hw/nvme/nvme.h > +++ b/hw/nvme/nvme.h > @@ -470,6 +470,10 @@ static inline const char *nvme_io_opc_str(uint8_t opc) > case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; > case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; > case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; > + case NVME_CMD_RESV_REGISTER: return "NVME_CMD_RESV_REGISTER"; > + case NVME_CMD_RESV_REPORT: return "NVME_CMD_RESV_REPORT"; > + case NVME_CMD_RESV_ACQUIRE: return "NVME_CMD_RESV_ACQUIRE"; > + case NVME_CMD_RESV_RELEASE: return "NVME_CMD_RESV_RELEASE"; > default: return "NVME_NVM_CMD_UNKNOWN"; > } > } > diff --git a/include/block/nvme.h b/include/block/nvme.h > index da6ccb0f3b..3e88b8be69 100644 > --- a/include/block/nvme.h > +++ b/include/block/nvme.h > @@ -702,6 +702,43 @@ typedef enum NVMEPrCap { > NVME_PR_CAP_EX_AC_AR), > } NvmePrCap; > > +typedef struct QEMU_PACKED NvmeRegisteredCtrl { > + uint16_t cntlid; > + uint8_t rcsts; > + uint8_t rsvd3[5]; > + uint8_t hostid[8]; > + uint64_t rkey; > +} NvmeRegisteredCtrl; > + > +typedef struct QEMU_PACKED NvmeRegisteredCtrlExt { > + uint16_t cntlid; > + uint8_t rcsts; > + uint8_t rsvd3[5]; > + uint64_t rkey; > + uint8_t hostid[16]; > + uint8_t rsvd32[32]; > +} NvmeRegisteredCtrlExt; > + > +typedef struct QEMU_PACKED NvmeReservationStatusHeader { > + uint32_t gen; > + uint8_t rtype; > + uint16_t regctl; > + uint16_t resv5; > + uint8_t ptpls; > + uint8_t resv10[14]; > +} NvmeReservationStatusHeader; > + > +typedef struct QEMU_PACKED NvmeReservationStatus { > + struct NvmeReservationStatusHeader header; > + struct NvmeRegisteredCtrl regctl_ds[]; > +} NvmeReservationStatus; > + > +typedef struct QEMU_PACKED NvmeReservationStatusExt { > + struct NvmeReservationStatusHeader header; > + uint8_t rsvd24[40]; > + struct NvmeRegisteredCtrlExt regctl_eds[]; > +} NvmeReservationStatusExt; > + > typedef struct QEMU_PACKED NvmeDeleteQ { > uint8_t opcode; > uint8_t flags; > -- > 2.20.1 > > >
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 182307a48b..033abd0afe 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -294,6 +294,10 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_zoned[256] = { @@ -308,6 +312,10 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, }; static void nvme_process_sq(void *opaque); @@ -1745,6 +1753,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret) switch (req->cmd.opcode) { case NVME_CMD_READ: + case NVME_CMD_RESV_REPORT: status = NVME_UNRECOVERED_READ; break; case NVME_CMD_FLUSH: @@ -1752,6 +1761,9 @@ static void nvme_aio_err(NvmeRequest *req, int ret) case NVME_CMD_WRITE_ZEROES: case NVME_CMD_ZONE_APPEND: case NVME_CMD_COPY: + case NVME_CMD_RESV_REGISTER: + case NVME_CMD_RESV_ACQUIRE: + case NVME_CMD_RESV_RELEASE: status = NVME_WRITE_FAULT; break; default: @@ -2127,7 +2139,10 @@ static inline bool nvme_is_write(NvmeRequest *req) return rw->opcode == NVME_CMD_WRITE || rw->opcode == NVME_CMD_ZONE_APPEND || - rw->opcode == NVME_CMD_WRITE_ZEROES; + rw->opcode == NVME_CMD_WRITE_ZEROES || + rw->opcode == NVME_CMD_RESV_REGISTER || + rw->opcode == NVME_CMD_RESV_ACQUIRE || + rw->opcode == NVME_CMD_RESV_RELEASE; } static void nvme_misc_cb(void *opaque, int ret) @@ -2692,6 +2707,302 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } +typedef struct NvmeKeyInfo { + uint64_t cr_key; + uint64_t nr_key; +} NvmeKeyInfo; + +static uint16_t nvme_resv_register(NvmeCtrl *n, NvmeRequest *req) +{ + int ret; + NvmeKeyInfo key_info; + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); + bool ignore_key = cdw10 >> 3 & 0x1; + uint8_t action = cdw10 & 0x7; + uint8_t ptpl = cdw10 >> 30 & 0x3; + bool aptpl; + + switch (ptpl) { + case NVME_RESV_PTPL_NO_CHANGE: + aptpl = (ns->id_ns.rescap & NVME_PR_CAP_PTPL) ? true : false; + break; + case NVME_RESV_PTPL_DISABLE: + aptpl = false; + break; + case NVME_RESV_PTPL_ENABLE: + aptpl = true; + break; + default: + return NVME_INVALID_FIELD; + } + + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req); + if (ret) { + return ret; + } + + switch (action) { + case NVME_RESV_REGISTER_ACTION_REGISTER: + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, 0, + key_info.nr_key, 0, aptpl, + ignore_key, nvme_misc_cb, + req); + break; + case NVME_RESV_REGISTER_ACTION_UNREGISTER: + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, 0, + 0, aptpl, ignore_key, + nvme_misc_cb, req); + break; + case NVME_RESV_REGISTER_ACTION_REPLACE: + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, + key_info.nr_key, 0, aptpl, ignore_key, + nvme_misc_cb, req); + break; + default: + return NVME_INVALID_FIELD; + } + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_resv_release(NvmeCtrl *n, NvmeRequest *req) +{ + int ret; + uint64_t cr_key; + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t action = cdw10 & 0x7; + NvmeResvType type = cdw10 >> 8 & 0xff; + + ret = nvme_h2c(n, (uint8_t *)&cr_key, sizeof(cr_key), req); + if (ret) { + return ret; + } + + switch (action) { + case NVME_RESV_RELEASE_ACTION_RELEASE: + req->aiocb = blk_aio_pr_release(ns->blkconf.blk, cr_key, + nvme_pr_type_to_block(type), + nvme_misc_cb, req); + break; + case NVME_RESV_RELEASE_ACTION_CLEAR: + req->aiocb = blk_aio_pr_clear(ns->blkconf.blk, cr_key, + nvme_misc_cb, req); + break; + default: + return NVME_INVALID_FIELD; + } + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_resv_acquire(NvmeCtrl *n, NvmeRequest *req) +{ + int ret; + NvmeKeyInfo key_info; + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t action = cdw10 & 0x7; + NvmeResvType type = cdw10 >> 8 & 0xff; + + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req); + if (ret) { + return ret; + } + + switch (action) { + case NVME_RESV_ACQUIRE_ACTION_ACQUIRE: + req->aiocb = blk_aio_pr_reserve(ns->blkconf.blk, key_info.cr_key, + nvme_pr_type_to_block(type), + nvme_misc_cb, req); + break; + case NVME_RESV_ACQUIRE_ACTION_PREEMPT: + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, + key_info.cr_key, key_info.nr_key, + nvme_pr_type_to_block(type), + false, nvme_misc_cb, req); + break; + case NVME_RESV_ACQUIRE_ACTION_PREEMPT_AND_ABORT: + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, key_info.cr_key, + key_info.nr_key, type, true, + nvme_misc_cb, req); + break; + default: + return NVME_INVALID_FIELD; + } + + return NVME_NO_COMPLETE; +} + +typedef struct NvmeResvKeys { + uint32_t generation; + uint32_t num_keys; + uint64_t *keys; + NvmeRequest *req; +} NvmeResvKeys; + +typedef struct NvmeReadReservation { + uint32_t generation; + uint64_t key; + BlockPrType type; + NvmeRequest *req; + NvmeResvKeys *keys_info; +} NvmeReadReservation; + +static int nvme_read_reservation_cb(NvmeReadReservation *reservation) +{ + int rc; + NvmeReservationStatus *nvme_status; + NvmeRequest *req = reservation->req; + NvmeCtrl *n = req->sq->ctrl; + NvmeResvKeys *keys_info = reservation->keys_info; + int len = sizeof(NvmeReservationStatusHeader) + + sizeof(NvmeRegisteredCtrl) * keys_info->num_keys; + + nvme_status = g_malloc(len); + nvme_status->header.gen = reservation->generation; + nvme_status->header.rtype = block_pr_type_to_nvme(reservation->type); + nvme_status->header.regctl = keys_info->num_keys; + for (int i = 0; i < keys_info->num_keys; i++) { + nvme_status->regctl_ds[i].cntlid = nvme_ctrl(req)->cntlid; + nvme_status->regctl_ds[i].rkey = keys_info->keys[i]; + nvme_status->regctl_ds[i].rcsts = keys_info->keys[i] == + reservation->key ? 1 : 0; + /* hostid is not supported currently */ + memset(&nvme_status->regctl_ds[i].hostid, 0, 8); + } + + rc = nvme_c2h(n, (uint8_t *)nvme_status, len, req); + g_free(nvme_status); + return rc; +} + +static int nvme_read_reservation_ext_cb(NvmeReadReservation *reservation) +{ + int rc; + NvmeReservationStatusExt *nvme_status_ext; + NvmeRequest *req = reservation->req; + NvmeCtrl *n = req->sq->ctrl; + NvmeResvKeys *keys_info = reservation->keys_info; + int len = sizeof(NvmeReservationStatusHeader) + + sizeof(uint8_t) * 40 + + sizeof(NvmeRegisteredCtrlExt) * keys_info->num_keys; + + nvme_status_ext = g_malloc(len); + nvme_status_ext->header.gen = cpu_to_be32(reservation->generation); + nvme_status_ext->header.rtype = block_pr_type_to_nvme(reservation->type); + nvme_status_ext->header.regctl = cpu_to_be16(keys_info->num_keys); + + for (int i = 0; i < keys_info->num_keys; i++) { + uint16_t ctnlid = nvme_ctrl(req)->cntlid; + nvme_status_ext->regctl_eds[i].cntlid = cpu_to_be16(ctnlid); + nvme_status_ext->regctl_eds[i].rkey = cpu_to_be64(keys_info->keys[i]); + nvme_status_ext->regctl_eds[i].rcsts = keys_info->keys[i] == + reservation->key ? 1 : 0; + /* hostid is not supported currently */ + memset(&nvme_status_ext->regctl_eds[i].hostid, 0, 16); + } + + rc = nvme_c2h(n, (uint8_t *)nvme_status_ext, len, req); + g_free(nvme_status_ext); + return rc; +} + +static void nvme_resv_read_reservation_cb(void *opaque, int ret) +{ + NvmeReadReservation *reservation = opaque; + NvmeRequest *req = reservation->req; + bool eds = req->cmd.cdw11 & 0x1; + NvmeResvKeys *keys_info = reservation->keys_info; + + if (ret < 0) { + goto out; + } + + if (eds) { + ret = nvme_read_reservation_ext_cb(reservation); + } else { + ret = nvme_read_reservation_cb(reservation); + } + +out: + g_free(keys_info); + g_free(reservation); + nvme_misc_cb(req, ret); +} + +static void nvme_resv_read_keys_cb(void *opaque, int ret) +{ + NvmeResvKeys *keys_info = opaque; + NvmeRequest *req = keys_info->req; + NvmeNamespace *ns = req->ns; + NvmeReadReservation *reservation; + + if (ret < 0) { + goto out; + } + + keys_info->num_keys = MIN(ret, keys_info->num_keys); + reservation = g_new0(NvmeReadReservation, 1); + memset(reservation, 0, sizeof(*reservation)); + reservation->req = req; + reservation->keys_info = keys_info; + + req->aiocb = blk_aio_pr_read_reservation(ns->blkconf.blk, + &reservation->generation, &reservation->key, + &reservation->type, nvme_resv_read_reservation_cb, + reservation); + return; + +out: + g_free(keys_info); + nvme_misc_cb(req, ret); +} + + +static uint16_t nvme_resv_report(NvmeCtrl *n, NvmeRequest *req) +{ + int num_keys; + uint32_t cdw10 = req->cmd.cdw10; + uint32_t cdw11 = req->cmd.cdw11; + int buflen = (cdw10 + 1) * sizeof(uint32_t); + bool eds = cdw11 & 0x1; + NvmeNamespace *ns = req->ns; + NvmeResvKeys *keys_info; + + if (eds) { + if (buflen < sizeof(NvmeReservationStatusHeader) + + sizeof(uint8_t) * 40) { + return NVME_INVALID_FIELD; + } + + num_keys = (buflen - sizeof(NvmeReservationStatusHeader) - + sizeof(uint8_t) * 40) / + sizeof(struct NvmeRegisteredCtrlExt); + } else { + if (buflen < sizeof(NvmeReservationStatusHeader)) { + return NVME_INVALID_FIELD; + } + + num_keys = (buflen - sizeof(NvmeReservationStatusHeader)) / + sizeof(struct NvmeRegisteredCtrl); + } + + keys_info = g_new0(NvmeResvKeys, 1); + keys_info->generation = 0; + /* num_keys is the maximum number of keys that can be transmitted */ + keys_info->num_keys = num_keys; + keys_info->keys = g_malloc(sizeof(uint64_t) * num_keys); + keys_info->req = req; + + req->aiocb = blk_aio_pr_read_keys(ns->blkconf.blk, &keys_info->generation, + keys_info->num_keys, keys_info->keys, + nvme_resv_read_keys_cb, keys_info); + + return NVME_NO_COMPLETE; +} + typedef struct NvmeCopyAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; @@ -4469,6 +4780,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_dsm(n, req); case NVME_CMD_VERIFY: return nvme_verify(n, req); + case NVME_CMD_RESV_REGISTER: + return nvme_resv_register(n, req); + case NVME_CMD_RESV_REPORT: + return nvme_resv_report(n, req); + case NVME_CMD_RESV_ACQUIRE: + return nvme_resv_acquire(n, req); + case NVME_CMD_RESV_RELEASE: + return nvme_resv_release(n, req); case NVME_CMD_COPY: return nvme_copy(n, req); case NVME_CMD_ZONE_MGMT_SEND: diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index b1ad27c8f2..808b26f086 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -470,6 +470,10 @@ static inline const char *nvme_io_opc_str(uint8_t opc) case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; + case NVME_CMD_RESV_REGISTER: return "NVME_CMD_RESV_REGISTER"; + case NVME_CMD_RESV_REPORT: return "NVME_CMD_RESV_REPORT"; + case NVME_CMD_RESV_ACQUIRE: return "NVME_CMD_RESV_ACQUIRE"; + case NVME_CMD_RESV_RELEASE: return "NVME_CMD_RESV_RELEASE"; default: return "NVME_NVM_CMD_UNKNOWN"; } } diff --git a/include/block/nvme.h b/include/block/nvme.h index da6ccb0f3b..3e88b8be69 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -702,6 +702,43 @@ typedef enum NVMEPrCap { NVME_PR_CAP_EX_AC_AR), } NvmePrCap; +typedef struct QEMU_PACKED NvmeRegisteredCtrl { + uint16_t cntlid; + uint8_t rcsts; + uint8_t rsvd3[5]; + uint8_t hostid[8]; + uint64_t rkey; +} NvmeRegisteredCtrl; + +typedef struct QEMU_PACKED NvmeRegisteredCtrlExt { + uint16_t cntlid; + uint8_t rcsts; + uint8_t rsvd3[5]; + uint64_t rkey; + uint8_t hostid[16]; + uint8_t rsvd32[32]; +} NvmeRegisteredCtrlExt; + +typedef struct QEMU_PACKED NvmeReservationStatusHeader { + uint32_t gen; + uint8_t rtype; + uint16_t regctl; + uint16_t resv5; + uint8_t ptpls; + uint8_t resv10[14]; +} NvmeReservationStatusHeader; + +typedef struct QEMU_PACKED NvmeReservationStatus { + struct NvmeReservationStatusHeader header; + struct NvmeRegisteredCtrl regctl_ds[]; +} NvmeReservationStatus; + +typedef struct QEMU_PACKED NvmeReservationStatusExt { + struct NvmeReservationStatusHeader header; + uint8_t rsvd24[40]; + struct NvmeRegisteredCtrlExt regctl_eds[]; +} NvmeReservationStatusExt; + typedef struct QEMU_PACKED NvmeDeleteQ { uint8_t opcode; uint8_t flags;