Message ID | 20240613071327.2498953-6-luchangqi.123@bytedance.com |
---|---|
State | New |
Headers | show |
Series | Support persistent reservation operations | expand |
On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote: > Add persistent reservation in/out operations in the > SCSI device layer. By introducing the persistent > reservation in/out api, this enables the SCSI device > to perform reservation-related tasks, including querying > keys, querying reservation status, registering reservation > keys, initiating and releasing reservations, as well as > clearing and preempting reservations held by other keys. > > These operations are crucial for management and control of > shared storage resources in a persistent manner. > > Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com> > Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> As mentioned in my reply to a previous version, I don't understand the buffer allocation/sizing in hw/scsi/ so I haven't been able to fully review this code for buffer overflows and input validation. cmd.xfer isn't consistently used for size checks in the new functions. Maybe some checks are missing? > --- > hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 352 insertions(+) > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > index 4bd7af9d0c..0e964dbd87 100644 > --- a/hw/scsi/scsi-disk.c > +++ b/hw/scsi/scsi-disk.c > @@ -32,6 +32,7 @@ > #include "migration/vmstate.h" > #include "hw/scsi/emulation.h" > #include "scsi/constants.h" > +#include "scsi/utils.h" > #include "sysemu/block-backend.h" > #include "sysemu/blockdev.h" > #include "hw/block/block.h" > @@ -42,6 +43,7 @@ > #include "qemu/cutils.h" > #include "trace.h" > #include "qom/object.h" > +#include "block/block_int.h" > > #ifdef __linux > #include <scsi/sg.h> > @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req) > scsi_req_complete(&r->req, GOOD); > } > > +typedef struct SCSIPrReadKeys { > + uint32_t generation; > + uint32_t num_keys; > + uint64_t *keys; > + void *req; Why is this field void * instead of SCSIDiskReq *? > +} SCSIPrReadKeys; > + > +typedef struct SCSIPrReadReservation { > + uint32_t generation; > + uint64_t key; > + BlockPrType type; > + void *req; Same here. > +} SCSIPrReadReservation; > + > +static void scsi_pr_read_keys_complete(void *opaque, int ret) > +{ > + int num_keys; > + uint8_t *buf; > + SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque; > + SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + buf = scsi_req_get_buf(&r->req); > + num_keys = MIN(blk_keys->num_keys, ret); > + blk_keys->generation = cpu_to_be32(blk_keys->generation); > + memcpy(&buf[0], &blk_keys->generation, 4); > + for (int i = 0; i < num_keys; i++) { > + blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]); > + memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8); > + } > + num_keys = cpu_to_be32(num_keys * 8); > + memcpy(&buf[4], &num_keys, 4); > + > + scsi_req_data(&r->req, r->buflen); > +done: > + scsi_req_unref(&r->req); > + g_free(blk_keys->keys); > + g_free(blk_keys); > +} > + > +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req) > +{ > + SCSIPrReadKeys *blk_keys; > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + int buflen = MIN(r->req.cmd.xfer, r->buflen); > + int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t); If buflen is an untrusted input then num_keys < 0 and maybe num_keys == 0 need to be rejected with an error. > + > + blk_keys = g_new0(SCSIPrReadKeys, 1); > + blk_keys->generation = 0; > + /* num_keys is the maximum number of keys that can be transmitted */ > + blk_keys->num_keys = num_keys; > + blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys); > + blk_keys->req = r; > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation, > + blk_keys->num_keys, blk_keys->keys, > + scsi_pr_read_keys_complete, blk_keys); > + return 0; > +} > + > +static void scsi_pr_read_reservation_complete(void *opaque, int ret) > +{ > + uint8_t *buf; > + uint32_t additional_len = 0; > + SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque; > + SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + buf = scsi_req_get_buf(&r->req); > + blk_rsv->generation = cpu_to_be32(blk_rsv->generation); > + memcpy(&buf[0], &blk_rsv->generation, 4); > + if (ret) { > + additional_len = cpu_to_be32(16); > + blk_rsv->key = cpu_to_be64(blk_rsv->key); > + memcpy(&buf[8], &blk_rsv->key, 8); > + buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf; > + } else { > + additional_len = cpu_to_be32(0); > + } > + > + memcpy(&buf[4], &additional_len, 4); > + scsi_req_data(&r->req, r->buflen); > + > +done: > + scsi_req_unref(&r->req); > + g_free(blk_rsv); > +} > + > +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req) > +{ > + SCSIPrReadReservation *blk_rsv; > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); There is a buflen check in scsi_disk_emulate_pr_report_capabilities() but not here. Is a check needed here to make sure up to 22 bytes are available? > + > + blk_rsv = g_malloc(sizeof(*blk_rsv)); > + blk_rsv->generation = 0; > + blk_rsv->key = 0; > + blk_rsv->type = 0; > + blk_rsv->req = r; > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk, > + &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type, > + scsi_pr_read_reservation_complete, blk_rsv); > + return 0; > +} > + > +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req) > +{ > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + BlockDriverState *bs = blk_bs(s->qdev.conf.blk); > + uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap; > + uint8_t *buf = scsi_req_get_buf(req); > + uint16_t pr_cap; > + /* > + * according to response of report_capabilities > + * command of chapter 6.13 of spc4 > + */ > + int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4; > + > + if (len > r->buflen) { > + return -EINVAL; > + } > + > + len = cpu_to_be16(len); > + memcpy(&buf[0], &len, 2); > + /* enable PTPL_C */ > + buf[2] = 1; > + buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0; > + /* enable TMV */ > + buf[3] |= 1 << 7; > + > + /* enable persistent reservation type */ > + pr_cap = block_pr_cap_to_scsi(blk_pr_cap); > + buf[4] = pr_cap >> 8 & 0xff; > + buf[5] = pr_cap & 0xff; > + > + scsi_req_data(&r->req, len); > + return 0; > +} > + > +static void scsi_aio_pr_complete(void *opaque, int ret) > +{ > + SCSIDiskReq *r = (SCSIDiskReq *)opaque; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + /* The request must only run in the BlockBackend's AioContext */ > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + scsi_req_complete(&r->req, GOOD); > + > +done: > + scsi_req_unref(&r->req); > +} > + > +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key, > + uint64_t sa_key, SCSIPrType type, > + uint8_t aptpl, bool ignore_key) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key, > + scsi_pr_type_to_block(type), > + aptpl ? true : false, > + ignore_key, scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key, > + SCSIPrType type) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key, > + scsi_pr_type_to_block(type), > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key, > + SCSIPrType type) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key, > + scsi_pr_type_to_block(type), > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key, > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key, > + uint64_t sa_key, SCSIPrType type, > + bool abort) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key, > + scsi_pr_type_to_block(type), abort, > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_in(SCSIRequest *req) > +{ > + int rc; > + SCSIPrInAction action = req->cmd.buf[1] & 0x1f; > + > + switch (action) { > + case SCSI_PR_IN_READ_KEYS: > + rc = scsi_disk_emulate_pr_read_keys(req); > + break; > + case SCSI_PR_IN_READ_RESERVATION: > + rc = scsi_disk_emulate_pr_read_reservation(req); > + break; > + case SCSI_PR_IN_REPORT_CAPABILITIES: > + rc = scsi_disk_emulate_pr_report_capabilities(req); > + break; > + default: > + return -ENOTSUP; > + } > + > + return rc; > +} > + > +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf) > +{ > + int rc; > + uint8_t aptpl; > + uint64_t r_key, sa_key; > + SCSIPrOutAction action; > + SCSIPrScope scope; > + SCSIPrType type; > + SCSIRequest *req = &r->req; > + > + memcpy(&r_key, &inbuf[0], 8); > + r_key = be64_to_cpu(r_key); > + memcpy(&sa_key, &inbuf[8], 8); > + sa_key = be64_to_cpu(sa_key); > + memcpy(&aptpl, &inbuf[20], 1); > + aptpl = aptpl & 0x01; > + action = req->cmd.buf[1] & 0x1f; > + scope = (req->cmd.buf[2] >> 4) & 0x0f; > + type = req->cmd.buf[2] & 0x0f; > + > + if (scope != SCSI_PR_LU_SCOPE) { > + return -ENOTSUP; > + } > + > + switch (action) { > + case SCSI_PR_OUT_REGISTER: > + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, > + aptpl, false); > + break; > + case SCSI_PR_OUT_REG_AND_IGNORE_KEY: > + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true); > + break; > + case SCSI_PR_OUT_RESERVE: > + rc = scsi_disk_emulate_pr_reserve(r, r_key, type); > + break; > + case SCSI_PR_OUT_RELEASE: > + rc = scsi_disk_emulate_pr_release(r, r_key, type); > + break; > + case SCSI_PR_OUT_CLEAR: > + rc = scsi_disk_emulate_pr_clear(r, r_key); > + break; > + case SCSI_PR_OUT_PREEMPT: > + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false); > + break; > + case SCSI_PR_OUT_PREEMPT_AND_ABORT: > + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true); > + break; > + default: > + return -ENOTSUP; > + } > + > + return rc; > +} > + > static int scsi_disk_check_mode_select(SCSIDiskState *s, int page, > uint8_t *inbuf, int inlen) > { > @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req) > scsi_req_complete(&r->req, GOOD); > break; > > + case PERSISTENT_RESERVE_OUT: > + scsi_disk_emulate_pr_out(r, r->iov.iov_base); > + break; > default: > abort(); > } > @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) > case FORMAT_UNIT: > trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer); > break; > + case PERSISTENT_RESERVE_OUT: > + break; > + case PERSISTENT_RESERVE_IN: > + scsi_disk_emulate_pr_in(req); > + return 0; > default: > trace_scsi_disk_emulate_command_UNKNOWN(buf[0], > scsi_command_name(buf[0])); > @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = { > [VERIFY_12] = &scsi_disk_emulate_reqops, > [VERIFY_16] = &scsi_disk_emulate_reqops, > [FORMAT_UNIT] = &scsi_disk_emulate_reqops, > + [PERSISTENT_RESERVE_IN] = &scsi_disk_emulate_reqops, > + [PERSISTENT_RESERVE_OUT] = &scsi_disk_emulate_reqops, > > [READ_6] = &scsi_disk_dma_reqops, > [READ_10] = &scsi_disk_dma_reqops, > -- > 2.20.1 >
On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote: > Add persistent reservation in/out operations in the > SCSI device layer. By introducing the persistent > reservation in/out api, this enables the SCSI device > to perform reservation-related tasks, including querying > keys, querying reservation status, registering reservation > keys, initiating and releasing reservations, as well as > clearing and preempting reservations held by other keys. > > These operations are crucial for management and control of > shared storage resources in a persistent manner. > > Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com> > Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> > --- > hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 352 insertions(+) > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > index 4bd7af9d0c..0e964dbd87 100644 > --- a/hw/scsi/scsi-disk.c > +++ b/hw/scsi/scsi-disk.c > @@ -32,6 +32,7 @@ > #include "migration/vmstate.h" > #include "hw/scsi/emulation.h" > #include "scsi/constants.h" > +#include "scsi/utils.h" > #include "sysemu/block-backend.h" > #include "sysemu/blockdev.h" > #include "hw/block/block.h" > @@ -42,6 +43,7 @@ > #include "qemu/cutils.h" > #include "trace.h" > #include "qom/object.h" > +#include "block/block_int.h" > > #ifdef __linux > #include <scsi/sg.h> > @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req) > scsi_req_complete(&r->req, GOOD); > } > > +typedef struct SCSIPrReadKeys { > + uint32_t generation; > + uint32_t num_keys; > + uint64_t *keys; > + void *req; > +} SCSIPrReadKeys; > + > +typedef struct SCSIPrReadReservation { > + uint32_t generation; > + uint64_t key; > + BlockPrType type; > + void *req; > +} SCSIPrReadReservation; > + > +static void scsi_pr_read_keys_complete(void *opaque, int ret) > +{ > + int num_keys; > + uint8_t *buf; > + SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque; > + SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + buf = scsi_req_get_buf(&r->req); > + num_keys = MIN(blk_keys->num_keys, ret); > + blk_keys->generation = cpu_to_be32(blk_keys->generation); > + memcpy(&buf[0], &blk_keys->generation, 4); > + for (int i = 0; i < num_keys; i++) { > + blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]); > + memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8); > + } > + num_keys = cpu_to_be32(num_keys * 8); > + memcpy(&buf[4], &num_keys, 4); > + > + scsi_req_data(&r->req, r->buflen); > +done: > + scsi_req_unref(&r->req); > + g_free(blk_keys->keys); > + g_free(blk_keys); > +} > + > +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req) > +{ > + SCSIPrReadKeys *blk_keys; > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + int buflen = MIN(r->req.cmd.xfer, r->buflen); > + int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t); > + > + blk_keys = g_new0(SCSIPrReadKeys, 1); > + blk_keys->generation = 0; > + /* num_keys is the maximum number of keys that can be transmitted */ > + blk_keys->num_keys = num_keys; > + blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys); > + blk_keys->req = r; > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation, > + blk_keys->num_keys, blk_keys->keys, > + scsi_pr_read_keys_complete, blk_keys); > + return 0; > +} > + > +static void scsi_pr_read_reservation_complete(void *opaque, int ret) > +{ > + uint8_t *buf; > + uint32_t additional_len = 0; > + SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque; > + SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + buf = scsi_req_get_buf(&r->req); > + blk_rsv->generation = cpu_to_be32(blk_rsv->generation); > + memcpy(&buf[0], &blk_rsv->generation, 4); > + if (ret) { > + additional_len = cpu_to_be32(16); > + blk_rsv->key = cpu_to_be64(blk_rsv->key); > + memcpy(&buf[8], &blk_rsv->key, 8); > + buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf; > + } else { > + additional_len = cpu_to_be32(0); > + } > + > + memcpy(&buf[4], &additional_len, 4); > + scsi_req_data(&r->req, r->buflen); > + > +done: > + scsi_req_unref(&r->req); > + g_free(blk_rsv); > +} > + > +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req) > +{ > + SCSIPrReadReservation *blk_rsv; > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + blk_rsv = g_malloc(sizeof(*blk_rsv)); > + blk_rsv->generation = 0; > + blk_rsv->key = 0; > + blk_rsv->type = 0; > + blk_rsv->req = r; > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk, > + &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type, > + scsi_pr_read_reservation_complete, blk_rsv); > + return 0; > +} > + > +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req) > +{ > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + BlockDriverState *bs = blk_bs(s->qdev.conf.blk); > + uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap; I'm not sure that file->bs is always valid here. Block layer code typically checks for !bs and !bs->drv. Also, ->file could be NULL if the SCSI disk points directly to --blockdev file without a --blockdev raw on top. I think the block layer should propagate pr_cap from the leaves of the block graph to the root node via bdrv_merge_limits() so that traversing the graph (->file) is not necessary. Instead this line should just be bs->bl.pr_cap. > + uint8_t *buf = scsi_req_get_buf(req); > + uint16_t pr_cap; > + /* > + * according to response of report_capabilities > + * command of chapter 6.13 of spc4 > + */ > + int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4; > + > + if (len > r->buflen) { > + return -EINVAL; > + } > + > + len = cpu_to_be16(len); > + memcpy(&buf[0], &len, 2); > + /* enable PTPL_C */ > + buf[2] = 1; > + buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0; > + /* enable TMV */ > + buf[3] |= 1 << 7; > + > + /* enable persistent reservation type */ > + pr_cap = block_pr_cap_to_scsi(blk_pr_cap); > + buf[4] = pr_cap >> 8 & 0xff; > + buf[5] = pr_cap & 0xff; > + > + scsi_req_data(&r->req, len); > + return 0; > +} > + > +static void scsi_aio_pr_complete(void *opaque, int ret) > +{ > + SCSIDiskReq *r = (SCSIDiskReq *)opaque; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > + > + /* The request must only run in the BlockBackend's AioContext */ > + assert(blk_get_aio_context(s->qdev.conf.blk) == > + qemu_get_current_aio_context()); > + > + assert(r->req.aiocb != NULL); > + r->req.aiocb = NULL; > + > + if (scsi_disk_req_check_error(r, ret, true)) { > + goto done; > + } > + > + scsi_req_complete(&r->req, GOOD); > + > +done: > + scsi_req_unref(&r->req); > +} > + > +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key, > + uint64_t sa_key, SCSIPrType type, > + uint8_t aptpl, bool ignore_key) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key, > + scsi_pr_type_to_block(type), > + aptpl ? true : false, > + ignore_key, scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key, > + SCSIPrType type) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key, > + scsi_pr_type_to_block(type), > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key, > + SCSIPrType type) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key, > + scsi_pr_type_to_block(type), > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key, > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key, > + uint64_t sa_key, SCSIPrType type, > + bool abort) > +{ > + SCSIRequest *req = &r->req; > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); > + > + /* The request is used as the AIO opaque value, so add a ref. */ > + scsi_req_ref(&r->req); > + r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key, > + scsi_pr_type_to_block(type), abort, > + scsi_aio_pr_complete, r); > + > + return 0; > +} > + > +static int scsi_disk_emulate_pr_in(SCSIRequest *req) > +{ > + int rc; > + SCSIPrInAction action = req->cmd.buf[1] & 0x1f; > + > + switch (action) { > + case SCSI_PR_IN_READ_KEYS: > + rc = scsi_disk_emulate_pr_read_keys(req); > + break; > + case SCSI_PR_IN_READ_RESERVATION: > + rc = scsi_disk_emulate_pr_read_reservation(req); > + break; > + case SCSI_PR_IN_REPORT_CAPABILITIES: > + rc = scsi_disk_emulate_pr_report_capabilities(req); > + break; > + default: > + return -ENOTSUP; > + } > + > + return rc; > +} > + > +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf) > +{ > + int rc; > + uint8_t aptpl; > + uint64_t r_key, sa_key; > + SCSIPrOutAction action; > + SCSIPrScope scope; > + SCSIPrType type; > + SCSIRequest *req = &r->req; > + > + memcpy(&r_key, &inbuf[0], 8); > + r_key = be64_to_cpu(r_key); > + memcpy(&sa_key, &inbuf[8], 8); > + sa_key = be64_to_cpu(sa_key); > + memcpy(&aptpl, &inbuf[20], 1); > + aptpl = aptpl & 0x01; > + action = req->cmd.buf[1] & 0x1f; > + scope = (req->cmd.buf[2] >> 4) & 0x0f; > + type = req->cmd.buf[2] & 0x0f; > + > + if (scope != SCSI_PR_LU_SCOPE) { > + return -ENOTSUP; > + } > + > + switch (action) { > + case SCSI_PR_OUT_REGISTER: > + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, > + aptpl, false); > + break; > + case SCSI_PR_OUT_REG_AND_IGNORE_KEY: > + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true); > + break; > + case SCSI_PR_OUT_RESERVE: > + rc = scsi_disk_emulate_pr_reserve(r, r_key, type); > + break; > + case SCSI_PR_OUT_RELEASE: > + rc = scsi_disk_emulate_pr_release(r, r_key, type); > + break; > + case SCSI_PR_OUT_CLEAR: > + rc = scsi_disk_emulate_pr_clear(r, r_key); > + break; > + case SCSI_PR_OUT_PREEMPT: > + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false); > + break; > + case SCSI_PR_OUT_PREEMPT_AND_ABORT: > + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true); > + break; > + default: > + return -ENOTSUP; > + } > + > + return rc; > +} > + > static int scsi_disk_check_mode_select(SCSIDiskState *s, int page, > uint8_t *inbuf, int inlen) > { > @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req) > scsi_req_complete(&r->req, GOOD); > break; > > + case PERSISTENT_RESERVE_OUT: > + scsi_disk_emulate_pr_out(r, r->iov.iov_base); > + break; > default: > abort(); > } > @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) > case FORMAT_UNIT: > trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer); > break; > + case PERSISTENT_RESERVE_OUT: > + break; > + case PERSISTENT_RESERVE_IN: > + scsi_disk_emulate_pr_in(req); > + return 0; > default: > trace_scsi_disk_emulate_command_UNKNOWN(buf[0], > scsi_command_name(buf[0])); > @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = { > [VERIFY_12] = &scsi_disk_emulate_reqops, > [VERIFY_16] = &scsi_disk_emulate_reqops, > [FORMAT_UNIT] = &scsi_disk_emulate_reqops, > + [PERSISTENT_RESERVE_IN] = &scsi_disk_emulate_reqops, > + [PERSISTENT_RESERVE_OUT] = &scsi_disk_emulate_reqops, > > [READ_6] = &scsi_disk_dma_reqops, > [READ_10] = &scsi_disk_dma_reqops, > -- > 2.20.1 >
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 4bd7af9d0c..0e964dbd87 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -32,6 +32,7 @@ #include "migration/vmstate.h" #include "hw/scsi/emulation.h" #include "scsi/constants.h" +#include "scsi/utils.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "hw/block/block.h" @@ -42,6 +43,7 @@ #include "qemu/cutils.h" #include "trace.h" #include "qom/object.h" +#include "block/block_int.h" #ifdef __linux #include <scsi/sg.h> @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req) scsi_req_complete(&r->req, GOOD); } +typedef struct SCSIPrReadKeys { + uint32_t generation; + uint32_t num_keys; + uint64_t *keys; + void *req; +} SCSIPrReadKeys; + +typedef struct SCSIPrReadReservation { + uint32_t generation; + uint64_t key; + BlockPrType type; + void *req; +} SCSIPrReadReservation; + +static void scsi_pr_read_keys_complete(void *opaque, int ret) +{ + int num_keys; + uint8_t *buf; + SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque; + SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } + + buf = scsi_req_get_buf(&r->req); + num_keys = MIN(blk_keys->num_keys, ret); + blk_keys->generation = cpu_to_be32(blk_keys->generation); + memcpy(&buf[0], &blk_keys->generation, 4); + for (int i = 0; i < num_keys; i++) { + blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]); + memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8); + } + num_keys = cpu_to_be32(num_keys * 8); + memcpy(&buf[4], &num_keys, 4); + + scsi_req_data(&r->req, r->buflen); +done: + scsi_req_unref(&r->req); + g_free(blk_keys->keys); + g_free(blk_keys); +} + +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req) +{ + SCSIPrReadKeys *blk_keys; + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + int buflen = MIN(r->req.cmd.xfer, r->buflen); + int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t); + + blk_keys = g_new0(SCSIPrReadKeys, 1); + blk_keys->generation = 0; + /* num_keys is the maximum number of keys that can be transmitted */ + blk_keys->num_keys = num_keys; + blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys); + blk_keys->req = r; + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation, + blk_keys->num_keys, blk_keys->keys, + scsi_pr_read_keys_complete, blk_keys); + return 0; +} + +static void scsi_pr_read_reservation_complete(void *opaque, int ret) +{ + uint8_t *buf; + uint32_t additional_len = 0; + SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque; + SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } + + buf = scsi_req_get_buf(&r->req); + blk_rsv->generation = cpu_to_be32(blk_rsv->generation); + memcpy(&buf[0], &blk_rsv->generation, 4); + if (ret) { + additional_len = cpu_to_be32(16); + blk_rsv->key = cpu_to_be64(blk_rsv->key); + memcpy(&buf[8], &blk_rsv->key, 8); + buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf; + } else { + additional_len = cpu_to_be32(0); + } + + memcpy(&buf[4], &additional_len, 4); + scsi_req_data(&r->req, r->buflen); + +done: + scsi_req_unref(&r->req); + g_free(blk_rsv); +} + +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req) +{ + SCSIPrReadReservation *blk_rsv; + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + blk_rsv = g_malloc(sizeof(*blk_rsv)); + blk_rsv->generation = 0; + blk_rsv->key = 0; + blk_rsv->type = 0; + blk_rsv->req = r; + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk, + &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type, + scsi_pr_read_reservation_complete, blk_rsv); + return 0; +} + +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req) +{ + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + BlockDriverState *bs = blk_bs(s->qdev.conf.blk); + uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap; + uint8_t *buf = scsi_req_get_buf(req); + uint16_t pr_cap; + /* + * according to response of report_capabilities + * command of chapter 6.13 of spc4 + */ + int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4; + + if (len > r->buflen) { + return -EINVAL; + } + + len = cpu_to_be16(len); + memcpy(&buf[0], &len, 2); + /* enable PTPL_C */ + buf[2] = 1; + buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0; + /* enable TMV */ + buf[3] |= 1 << 7; + + /* enable persistent reservation type */ + pr_cap = block_pr_cap_to_scsi(blk_pr_cap); + buf[4] = pr_cap >> 8 & 0xff; + buf[5] = pr_cap & 0xff; + + scsi_req_data(&r->req, len); + return 0; +} + +static void scsi_aio_pr_complete(void *opaque, int ret) +{ + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + /* The request must only run in the BlockBackend's AioContext */ + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); +} + +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key, + uint64_t sa_key, SCSIPrType type, + uint8_t aptpl, bool ignore_key) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key, + scsi_pr_type_to_block(type), + aptpl ? true : false, + ignore_key, scsi_aio_pr_complete, r); + + return 0; +} + +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key, + SCSIPrType type) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key, + scsi_pr_type_to_block(type), + scsi_aio_pr_complete, r); + + return 0; +} + +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key, + SCSIPrType type) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key, + scsi_pr_type_to_block(type), + scsi_aio_pr_complete, r); + + return 0; +} + +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key, + scsi_aio_pr_complete, r); + + return 0; +} + +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key, + uint64_t sa_key, SCSIPrType type, + bool abort) +{ + SCSIRequest *req = &r->req; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + /* The request is used as the AIO opaque value, so add a ref. */ + scsi_req_ref(&r->req); + r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key, + scsi_pr_type_to_block(type), abort, + scsi_aio_pr_complete, r); + + return 0; +} + +static int scsi_disk_emulate_pr_in(SCSIRequest *req) +{ + int rc; + SCSIPrInAction action = req->cmd.buf[1] & 0x1f; + + switch (action) { + case SCSI_PR_IN_READ_KEYS: + rc = scsi_disk_emulate_pr_read_keys(req); + break; + case SCSI_PR_IN_READ_RESERVATION: + rc = scsi_disk_emulate_pr_read_reservation(req); + break; + case SCSI_PR_IN_REPORT_CAPABILITIES: + rc = scsi_disk_emulate_pr_report_capabilities(req); + break; + default: + return -ENOTSUP; + } + + return rc; +} + +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf) +{ + int rc; + uint8_t aptpl; + uint64_t r_key, sa_key; + SCSIPrOutAction action; + SCSIPrScope scope; + SCSIPrType type; + SCSIRequest *req = &r->req; + + memcpy(&r_key, &inbuf[0], 8); + r_key = be64_to_cpu(r_key); + memcpy(&sa_key, &inbuf[8], 8); + sa_key = be64_to_cpu(sa_key); + memcpy(&aptpl, &inbuf[20], 1); + aptpl = aptpl & 0x01; + action = req->cmd.buf[1] & 0x1f; + scope = (req->cmd.buf[2] >> 4) & 0x0f; + type = req->cmd.buf[2] & 0x0f; + + if (scope != SCSI_PR_LU_SCOPE) { + return -ENOTSUP; + } + + switch (action) { + case SCSI_PR_OUT_REGISTER: + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, + aptpl, false); + break; + case SCSI_PR_OUT_REG_AND_IGNORE_KEY: + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true); + break; + case SCSI_PR_OUT_RESERVE: + rc = scsi_disk_emulate_pr_reserve(r, r_key, type); + break; + case SCSI_PR_OUT_RELEASE: + rc = scsi_disk_emulate_pr_release(r, r_key, type); + break; + case SCSI_PR_OUT_CLEAR: + rc = scsi_disk_emulate_pr_clear(r, r_key); + break; + case SCSI_PR_OUT_PREEMPT: + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false); + break; + case SCSI_PR_OUT_PREEMPT_AND_ABORT: + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true); + break; + default: + return -ENOTSUP; + } + + return rc; +} + static int scsi_disk_check_mode_select(SCSIDiskState *s, int page, uint8_t *inbuf, int inlen) { @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req) scsi_req_complete(&r->req, GOOD); break; + case PERSISTENT_RESERVE_OUT: + scsi_disk_emulate_pr_out(r, r->iov.iov_base); + break; default: abort(); } @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) case FORMAT_UNIT: trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer); break; + case PERSISTENT_RESERVE_OUT: + break; + case PERSISTENT_RESERVE_IN: + scsi_disk_emulate_pr_in(req); + return 0; default: trace_scsi_disk_emulate_command_UNKNOWN(buf[0], scsi_command_name(buf[0])); @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = { [VERIFY_12] = &scsi_disk_emulate_reqops, [VERIFY_16] = &scsi_disk_emulate_reqops, [FORMAT_UNIT] = &scsi_disk_emulate_reqops, + [PERSISTENT_RESERVE_IN] = &scsi_disk_emulate_reqops, + [PERSISTENT_RESERVE_OUT] = &scsi_disk_emulate_reqops, [READ_6] = &scsi_disk_dma_reqops, [READ_10] = &scsi_disk_dma_reqops,