diff mbox series

[v6,05/10] hw/scsi: add persistent reservation in/out api for scsi device

Message ID 20240613071327.2498953-6-luchangqi.123@bytedance.com
State New
Headers show
Series Support persistent reservation operations | expand

Commit Message

Changqi Lu June 13, 2024, 7:13 a.m. UTC
Add persistent reservation in/out operations in the
SCSI device layer. By introducing the persistent
reservation in/out api, this enables the SCSI device
to perform reservation-related tasks, including querying
keys, querying reservation status, registering reservation
keys, initiating and releasing reservations, as well as
clearing and preempting reservations held by other keys.

These operations are crucial for management and control of
shared storage resources in a persistent manner.

Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 352 insertions(+)

Comments

Stefan Hajnoczi July 4, 2024, 4:46 p.m. UTC | #1
On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>

As mentioned in my reply to a previous version, I don't understand the
buffer allocation/sizing in hw/scsi/ so I haven't been able to fully
review this code for buffer overflows and input validation. cmd.xfer
isn't consistently used for size checks in the new functions. Maybe some
checks are missing?

> ---
>  hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 352 insertions(+)
> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..0e964dbd87 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -42,6 +43,7 @@
>  #include "qemu/cutils.h"
>  #include "trace.h"
>  #include "qom/object.h"
> +#include "block/block_int.h"
>  
>  #ifdef __linux
>  #include <scsi/sg.h>
> @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req)
>      scsi_req_complete(&r->req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +    uint32_t generation;
> +    uint32_t num_keys;
> +    uint64_t *keys;
> +    void     *req;

Why is this field void * instead of SCSIDiskReq *?

> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +    uint32_t generation;
> +    uint64_t key;
> +    BlockPrType type;
> +    void *req;

Same here.

> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +    int num_keys;
> +    uint8_t *buf;
> +    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    num_keys = MIN(blk_keys->num_keys, ret);
> +    blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +    memcpy(&buf[0], &blk_keys->generation, 4);
> +    for (int i = 0; i < num_keys; i++) {
> +        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> +    }
> +    num_keys = cpu_to_be32(num_keys * 8);
> +    memcpy(&buf[4], &num_keys, 4);
> +
> +    scsi_req_data(&r->req, r->buflen);
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_keys->keys);
> +    g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +    SCSIPrReadKeys *blk_keys;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);

If buflen is an untrusted input then num_keys < 0 and maybe num_keys ==
0 need to be rejected with an error.

> +
> +    blk_keys = g_new0(SCSIPrReadKeys, 1);
> +    blk_keys->generation = 0;
> +    /* num_keys is the maximum number of keys that can be transmitted */
> +    blk_keys->num_keys = num_keys;
> +    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +    blk_keys->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation,
> +                                        blk_keys->num_keys, blk_keys->keys,
> +                                        scsi_pr_read_keys_complete, blk_keys);
> +    return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +    uint8_t *buf;
> +    uint32_t additional_len = 0;
> +    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +    memcpy(&buf[0], &blk_rsv->generation, 4);
> +    if (ret) {
> +        additional_len = cpu_to_be32(16);
> +        blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +        memcpy(&buf[8], &blk_rsv->key, 8);
> +        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +    } else {
> +        additional_len = cpu_to_be32(0);
> +    }
> +
> +    memcpy(&buf[4], &additional_len, 4);
> +    scsi_req_data(&r->req, r->buflen);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> +    SCSIPrReadReservation *blk_rsv;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);

There is a buflen check in scsi_disk_emulate_pr_report_capabilities()
but not here. Is a check needed here to make sure up to 22 bytes are
available?

> +
> +    blk_rsv = g_malloc(sizeof(*blk_rsv));
> +    blk_rsv->generation = 0;
> +    blk_rsv->key = 0;
> +    blk_rsv->type = 0;
> +    blk_rsv->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> +                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> +                   scsi_pr_read_reservation_complete, blk_rsv);
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req)
> +{
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    BlockDriverState *bs = blk_bs(s->qdev.conf.blk);
> +    uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap;
> +    uint8_t *buf = scsi_req_get_buf(req);
> +    uint16_t pr_cap;
> +    /*
> +     * according to response of report_capabilities
> +     * command of chapter 6.13 of spc4
> +     */
> +    int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4;
> +
> +    if (len > r->buflen) {
> +        return -EINVAL;
> +    }
> +
> +    len = cpu_to_be16(len);
> +    memcpy(&buf[0], &len, 2);
> +    /* enable PTPL_C */
> +    buf[2] = 1;
> +    buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0;
> +    /* enable TMV */
> +    buf[3] |= 1 << 7;
> +
> +    /* enable persistent reservation type */
> +    pr_cap = block_pr_cap_to_scsi(blk_pr_cap);
> +    buf[4] = pr_cap >> 8 & 0xff;
> +    buf[5] = pr_cap & 0xff;
> +
> +    scsi_req_data(&r->req, len);
> +    return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> +    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    /* The request must only run in the BlockBackend's AioContext */
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +           qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    scsi_req_complete(&r->req, GOOD);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key,
> +                                         uint64_t sa_key, SCSIPrType type,
> +                                         uint8_t aptpl, bool ignore_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key,
> +                                       scsi_pr_type_to_block(type),
> +                                       aptpl ? true : false,
> +                                       ignore_key, scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key,
> +                                    scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key,
> +                                        uint64_t sa_key, SCSIPrType type,
> +                                        bool abort)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key,
> +                                      scsi_pr_type_to_block(type), abort,
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> +    int rc;
> +    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> +    switch (action) {
> +    case SCSI_PR_IN_READ_KEYS:
> +        rc = scsi_disk_emulate_pr_read_keys(req);
> +        break;
> +    case SCSI_PR_IN_READ_RESERVATION:
> +        rc = scsi_disk_emulate_pr_read_reservation(req);
> +        break;
> +    case SCSI_PR_IN_REPORT_CAPABILITIES:
> +        rc = scsi_disk_emulate_pr_report_capabilities(req);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> +    int rc;
> +    uint8_t aptpl;
> +    uint64_t r_key, sa_key;
> +    SCSIPrOutAction action;
> +    SCSIPrScope scope;
> +    SCSIPrType type;
> +    SCSIRequest *req = &r->req;
> +
> +    memcpy(&r_key, &inbuf[0], 8);
> +    r_key = be64_to_cpu(r_key);
> +    memcpy(&sa_key, &inbuf[8], 8);
> +    sa_key = be64_to_cpu(sa_key);
> +    memcpy(&aptpl, &inbuf[20], 1);
> +    aptpl =  aptpl & 0x01;
> +    action = req->cmd.buf[1] & 0x1f;
> +    scope = (req->cmd.buf[2] >> 4) & 0x0f;
> +    type = req->cmd.buf[2] & 0x0f;
> +
> +    if (scope != SCSI_PR_LU_SCOPE) {
> +        return -ENOTSUP;
> +    }
> +
> +    switch (action) {
> +    case SCSI_PR_OUT_REGISTER:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type,
> +                                           aptpl, false);
> +        break;
> +    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true);
> +        break;
> +    case SCSI_PR_OUT_RESERVE:
> +        rc = scsi_disk_emulate_pr_reserve(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_RELEASE:
> +        rc = scsi_disk_emulate_pr_release(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_CLEAR:
> +        rc = scsi_disk_emulate_pr_clear(r, r_key);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
>  static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
>                                         uint8_t *inbuf, int inlen)
>  {
> @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
>          scsi_req_complete(&r->req, GOOD);
>          break;
>  
> +    case PERSISTENT_RESERVE_OUT:
> +        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> +        break;
>      default:
>          abort();
>      }
> @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
>      case FORMAT_UNIT:
>          trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
>          break;
> +    case PERSISTENT_RESERVE_OUT:
> +        break;
> +    case PERSISTENT_RESERVE_IN:
> +        scsi_disk_emulate_pr_in(req);
> +        return 0;
>      default:
>          trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
>                                                  scsi_command_name(buf[0]));
> @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
>      [VERIFY_12]                       = &scsi_disk_emulate_reqops,
>      [VERIFY_16]                       = &scsi_disk_emulate_reqops,
>      [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
>  
>      [READ_6]                          = &scsi_disk_dma_reqops,
>      [READ_10]                         = &scsi_disk_dma_reqops,
> -- 
> 2.20.1
>
Stefan Hajnoczi July 4, 2024, 6:28 p.m. UTC | #2
On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 352 insertions(+)
> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..0e964dbd87 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -42,6 +43,7 @@
>  #include "qemu/cutils.h"
>  #include "trace.h"
>  #include "qom/object.h"
> +#include "block/block_int.h"
>  
>  #ifdef __linux
>  #include <scsi/sg.h>
> @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req)
>      scsi_req_complete(&r->req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +    uint32_t generation;
> +    uint32_t num_keys;
> +    uint64_t *keys;
> +    void     *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +    uint32_t generation;
> +    uint64_t key;
> +    BlockPrType type;
> +    void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +    int num_keys;
> +    uint8_t *buf;
> +    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    num_keys = MIN(blk_keys->num_keys, ret);
> +    blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +    memcpy(&buf[0], &blk_keys->generation, 4);
> +    for (int i = 0; i < num_keys; i++) {
> +        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> +    }
> +    num_keys = cpu_to_be32(num_keys * 8);
> +    memcpy(&buf[4], &num_keys, 4);
> +
> +    scsi_req_data(&r->req, r->buflen);
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_keys->keys);
> +    g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +    SCSIPrReadKeys *blk_keys;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +    blk_keys = g_new0(SCSIPrReadKeys, 1);
> +    blk_keys->generation = 0;
> +    /* num_keys is the maximum number of keys that can be transmitted */
> +    blk_keys->num_keys = num_keys;
> +    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +    blk_keys->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation,
> +                                        blk_keys->num_keys, blk_keys->keys,
> +                                        scsi_pr_read_keys_complete, blk_keys);
> +    return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +    uint8_t *buf;
> +    uint32_t additional_len = 0;
> +    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +    memcpy(&buf[0], &blk_rsv->generation, 4);
> +    if (ret) {
> +        additional_len = cpu_to_be32(16);
> +        blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +        memcpy(&buf[8], &blk_rsv->key, 8);
> +        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +    } else {
> +        additional_len = cpu_to_be32(0);
> +    }
> +
> +    memcpy(&buf[4], &additional_len, 4);
> +    scsi_req_data(&r->req, r->buflen);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> +    SCSIPrReadReservation *blk_rsv;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    blk_rsv = g_malloc(sizeof(*blk_rsv));
> +    blk_rsv->generation = 0;
> +    blk_rsv->key = 0;
> +    blk_rsv->type = 0;
> +    blk_rsv->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> +                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> +                   scsi_pr_read_reservation_complete, blk_rsv);
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req)
> +{
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    BlockDriverState *bs = blk_bs(s->qdev.conf.blk);
> +    uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap;

I'm not sure that file->bs is always valid here. Block layer code
typically checks for !bs and !bs->drv.

Also, ->file could be NULL if the SCSI disk points directly to
--blockdev file without a --blockdev raw on top. I think the block layer
should propagate pr_cap from the leaves of the block graph to the root
node via bdrv_merge_limits() so that traversing the graph (->file) is
not necessary. Instead this line should just be bs->bl.pr_cap.

> +    uint8_t *buf = scsi_req_get_buf(req);
> +    uint16_t pr_cap;
> +    /*
> +     * according to response of report_capabilities
> +     * command of chapter 6.13 of spc4
> +     */
> +    int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4;
> +
> +    if (len > r->buflen) {
> +        return -EINVAL;
> +    }
> +
> +    len = cpu_to_be16(len);
> +    memcpy(&buf[0], &len, 2);
> +    /* enable PTPL_C */
> +    buf[2] = 1;
> +    buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0;
> +    /* enable TMV */
> +    buf[3] |= 1 << 7;
> +
> +    /* enable persistent reservation type */
> +    pr_cap = block_pr_cap_to_scsi(blk_pr_cap);
> +    buf[4] = pr_cap >> 8 & 0xff;
> +    buf[5] = pr_cap & 0xff;
> +
> +    scsi_req_data(&r->req, len);
> +    return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> +    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    /* The request must only run in the BlockBackend's AioContext */
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +           qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    scsi_req_complete(&r->req, GOOD);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key,
> +                                         uint64_t sa_key, SCSIPrType type,
> +                                         uint8_t aptpl, bool ignore_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key,
> +                                       scsi_pr_type_to_block(type),
> +                                       aptpl ? true : false,
> +                                       ignore_key, scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key,
> +                                    scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key,
> +                                        uint64_t sa_key, SCSIPrType type,
> +                                        bool abort)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key,
> +                                      scsi_pr_type_to_block(type), abort,
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> +    int rc;
> +    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> +    switch (action) {
> +    case SCSI_PR_IN_READ_KEYS:
> +        rc = scsi_disk_emulate_pr_read_keys(req);
> +        break;
> +    case SCSI_PR_IN_READ_RESERVATION:
> +        rc = scsi_disk_emulate_pr_read_reservation(req);
> +        break;
> +    case SCSI_PR_IN_REPORT_CAPABILITIES:
> +        rc = scsi_disk_emulate_pr_report_capabilities(req);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> +    int rc;
> +    uint8_t aptpl;
> +    uint64_t r_key, sa_key;
> +    SCSIPrOutAction action;
> +    SCSIPrScope scope;
> +    SCSIPrType type;
> +    SCSIRequest *req = &r->req;
> +
> +    memcpy(&r_key, &inbuf[0], 8);
> +    r_key = be64_to_cpu(r_key);
> +    memcpy(&sa_key, &inbuf[8], 8);
> +    sa_key = be64_to_cpu(sa_key);
> +    memcpy(&aptpl, &inbuf[20], 1);
> +    aptpl =  aptpl & 0x01;
> +    action = req->cmd.buf[1] & 0x1f;
> +    scope = (req->cmd.buf[2] >> 4) & 0x0f;
> +    type = req->cmd.buf[2] & 0x0f;
> +
> +    if (scope != SCSI_PR_LU_SCOPE) {
> +        return -ENOTSUP;
> +    }
> +
> +    switch (action) {
> +    case SCSI_PR_OUT_REGISTER:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type,
> +                                           aptpl, false);
> +        break;
> +    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true);
> +        break;
> +    case SCSI_PR_OUT_RESERVE:
> +        rc = scsi_disk_emulate_pr_reserve(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_RELEASE:
> +        rc = scsi_disk_emulate_pr_release(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_CLEAR:
> +        rc = scsi_disk_emulate_pr_clear(r, r_key);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
>  static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
>                                         uint8_t *inbuf, int inlen)
>  {
> @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
>          scsi_req_complete(&r->req, GOOD);
>          break;
>  
> +    case PERSISTENT_RESERVE_OUT:
> +        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> +        break;
>      default:
>          abort();
>      }
> @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
>      case FORMAT_UNIT:
>          trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
>          break;
> +    case PERSISTENT_RESERVE_OUT:
> +        break;
> +    case PERSISTENT_RESERVE_IN:
> +        scsi_disk_emulate_pr_in(req);
> +        return 0;
>      default:
>          trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
>                                                  scsi_command_name(buf[0]));
> @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
>      [VERIFY_12]                       = &scsi_disk_emulate_reqops,
>      [VERIFY_16]                       = &scsi_disk_emulate_reqops,
>      [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
>  
>      [READ_6]                          = &scsi_disk_dma_reqops,
>      [READ_10]                         = &scsi_disk_dma_reqops,
> -- 
> 2.20.1
>
diff mbox series

Patch

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 4bd7af9d0c..0e964dbd87 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -32,6 +32,7 @@ 
 #include "migration/vmstate.h"
 #include "hw/scsi/emulation.h"
 #include "scsi/constants.h"
+#include "scsi/utils.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/block/block.h"
@@ -42,6 +43,7 @@ 
 #include "qemu/cutils.h"
 #include "trace.h"
 #include "qom/object.h"
+#include "block/block_int.h"
 
 #ifdef __linux
 #include <scsi/sg.h>
@@ -1474,6 +1476,346 @@  static void scsi_disk_emulate_read_data(SCSIRequest *req)
     scsi_req_complete(&r->req, GOOD);
 }
 
+typedef struct SCSIPrReadKeys {
+    uint32_t generation;
+    uint32_t num_keys;
+    uint64_t *keys;
+    void     *req;
+} SCSIPrReadKeys;
+
+typedef struct SCSIPrReadReservation {
+    uint32_t generation;
+    uint64_t key;
+    BlockPrType type;
+    void *req;
+} SCSIPrReadReservation;
+
+static void scsi_pr_read_keys_complete(void *opaque, int ret)
+{
+    int num_keys;
+    uint8_t *buf;
+    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
+    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert(blk_get_aio_context(s->qdev.conf.blk) ==
+            qemu_get_current_aio_context());
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    if (scsi_disk_req_check_error(r, ret, true)) {
+        goto done;
+    }
+
+    buf = scsi_req_get_buf(&r->req);
+    num_keys = MIN(blk_keys->num_keys, ret);
+    blk_keys->generation = cpu_to_be32(blk_keys->generation);
+    memcpy(&buf[0], &blk_keys->generation, 4);
+    for (int i = 0; i < num_keys; i++) {
+        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
+        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
+    }
+    num_keys = cpu_to_be32(num_keys * 8);
+    memcpy(&buf[4], &num_keys, 4);
+
+    scsi_req_data(&r->req, r->buflen);
+done:
+    scsi_req_unref(&r->req);
+    g_free(blk_keys->keys);
+    g_free(blk_keys);
+}
+
+static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
+{
+    SCSIPrReadKeys *blk_keys;
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+    int buflen = MIN(r->req.cmd.xfer, r->buflen);
+    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
+
+    blk_keys = g_new0(SCSIPrReadKeys, 1);
+    blk_keys->generation = 0;
+    /* num_keys is the maximum number of keys that can be transmitted */
+    blk_keys->num_keys = num_keys;
+    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
+    blk_keys->req = r;
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation,
+                                        blk_keys->num_keys, blk_keys->keys,
+                                        scsi_pr_read_keys_complete, blk_keys);
+    return 0;
+}
+
+static void scsi_pr_read_reservation_complete(void *opaque, int ret)
+{
+    uint8_t *buf;
+    uint32_t additional_len = 0;
+    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
+    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    assert(blk_get_aio_context(s->qdev.conf.blk) ==
+            qemu_get_current_aio_context());
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    if (scsi_disk_req_check_error(r, ret, true)) {
+        goto done;
+    }
+
+    buf = scsi_req_get_buf(&r->req);
+    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
+    memcpy(&buf[0], &blk_rsv->generation, 4);
+    if (ret) {
+        additional_len = cpu_to_be32(16);
+        blk_rsv->key = cpu_to_be64(blk_rsv->key);
+        memcpy(&buf[8], &blk_rsv->key, 8);
+        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
+    } else {
+        additional_len = cpu_to_be32(0);
+    }
+
+    memcpy(&buf[4], &additional_len, 4);
+    scsi_req_data(&r->req, r->buflen);
+
+done:
+    scsi_req_unref(&r->req);
+    g_free(blk_rsv);
+}
+
+static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
+{
+    SCSIPrReadReservation *blk_rsv;
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    blk_rsv = g_malloc(sizeof(*blk_rsv));
+    blk_rsv->generation = 0;
+    blk_rsv->key = 0;
+    blk_rsv->type = 0;
+    blk_rsv->req = r;
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
+                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
+                   scsi_pr_read_reservation_complete, blk_rsv);
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req)
+{
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+    BlockDriverState *bs = blk_bs(s->qdev.conf.blk);
+    uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap;
+    uint8_t *buf = scsi_req_get_buf(req);
+    uint16_t pr_cap;
+    /*
+     * according to response of report_capabilities
+     * command of chapter 6.13 of spc4
+     */
+    int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4;
+
+    if (len > r->buflen) {
+        return -EINVAL;
+    }
+
+    len = cpu_to_be16(len);
+    memcpy(&buf[0], &len, 2);
+    /* enable PTPL_C */
+    buf[2] = 1;
+    buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0;
+    /* enable TMV */
+    buf[3] |= 1 << 7;
+
+    /* enable persistent reservation type */
+    pr_cap = block_pr_cap_to_scsi(blk_pr_cap);
+    buf[4] = pr_cap >> 8 & 0xff;
+    buf[5] = pr_cap & 0xff;
+
+    scsi_req_data(&r->req, len);
+    return 0;
+}
+
+static void scsi_aio_pr_complete(void *opaque, int ret)
+{
+    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    /* The request must only run in the BlockBackend's AioContext */
+    assert(blk_get_aio_context(s->qdev.conf.blk) ==
+           qemu_get_current_aio_context());
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    if (scsi_disk_req_check_error(r, ret, true)) {
+        goto done;
+    }
+
+    scsi_req_complete(&r->req, GOOD);
+
+done:
+    scsi_req_unref(&r->req);
+}
+
+static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key,
+                                         uint64_t sa_key, SCSIPrType type,
+                                         uint8_t aptpl, bool ignore_key)
+{
+    SCSIRequest *req = &r->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key,
+                                       scsi_pr_type_to_block(type),
+                                       aptpl ? true : false,
+                                       ignore_key, scsi_aio_pr_complete, r);
+
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key,
+                                        SCSIPrType type)
+{
+    SCSIRequest *req = &r->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key,
+                                      scsi_pr_type_to_block(type),
+                                      scsi_aio_pr_complete, r);
+
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key,
+                                        SCSIPrType type)
+{
+    SCSIRequest *req = &r->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key,
+                                      scsi_pr_type_to_block(type),
+                                      scsi_aio_pr_complete, r);
+
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key)
+{
+    SCSIRequest *req = &r->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key,
+                                    scsi_aio_pr_complete, r);
+
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key,
+                                        uint64_t sa_key, SCSIPrType type,
+                                        bool abort)
+{
+    SCSIRequest *req = &r->req;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key,
+                                      scsi_pr_type_to_block(type), abort,
+                                      scsi_aio_pr_complete, r);
+
+    return 0;
+}
+
+static int scsi_disk_emulate_pr_in(SCSIRequest *req)
+{
+    int rc;
+    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
+
+    switch (action) {
+    case SCSI_PR_IN_READ_KEYS:
+        rc = scsi_disk_emulate_pr_read_keys(req);
+        break;
+    case SCSI_PR_IN_READ_RESERVATION:
+        rc = scsi_disk_emulate_pr_read_reservation(req);
+        break;
+    case SCSI_PR_IN_REPORT_CAPABILITIES:
+        rc = scsi_disk_emulate_pr_report_capabilities(req);
+        break;
+    default:
+        return -ENOTSUP;
+    }
+
+    return rc;
+}
+
+static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
+{
+    int rc;
+    uint8_t aptpl;
+    uint64_t r_key, sa_key;
+    SCSIPrOutAction action;
+    SCSIPrScope scope;
+    SCSIPrType type;
+    SCSIRequest *req = &r->req;
+
+    memcpy(&r_key, &inbuf[0], 8);
+    r_key = be64_to_cpu(r_key);
+    memcpy(&sa_key, &inbuf[8], 8);
+    sa_key = be64_to_cpu(sa_key);
+    memcpy(&aptpl, &inbuf[20], 1);
+    aptpl =  aptpl & 0x01;
+    action = req->cmd.buf[1] & 0x1f;
+    scope = (req->cmd.buf[2] >> 4) & 0x0f;
+    type = req->cmd.buf[2] & 0x0f;
+
+    if (scope != SCSI_PR_LU_SCOPE) {
+        return -ENOTSUP;
+    }
+
+    switch (action) {
+    case SCSI_PR_OUT_REGISTER:
+        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type,
+                                           aptpl, false);
+        break;
+    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
+        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, true);
+        break;
+    case SCSI_PR_OUT_RESERVE:
+        rc = scsi_disk_emulate_pr_reserve(r, r_key, type);
+        break;
+    case SCSI_PR_OUT_RELEASE:
+        rc = scsi_disk_emulate_pr_release(r, r_key, type);
+        break;
+    case SCSI_PR_OUT_CLEAR:
+        rc = scsi_disk_emulate_pr_clear(r, r_key);
+        break;
+    case SCSI_PR_OUT_PREEMPT:
+        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false);
+        break;
+    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
+        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true);
+        break;
+    default:
+        return -ENOTSUP;
+    }
+
+    return rc;
+}
+
 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
                                        uint8_t *inbuf, int inlen)
 {
@@ -1957,6 +2299,9 @@  static void scsi_disk_emulate_write_data(SCSIRequest *req)
         scsi_req_complete(&r->req, GOOD);
         break;
 
+    case PERSISTENT_RESERVE_OUT:
+        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
+        break;
     default:
         abort();
     }
@@ -2213,6 +2558,11 @@  static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
     case FORMAT_UNIT:
         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
         break;
+    case PERSISTENT_RESERVE_OUT:
+        break;
+    case PERSISTENT_RESERVE_IN:
+        scsi_disk_emulate_pr_in(req);
+        return 0;
     default:
         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
                                                 scsi_command_name(buf[0]));
@@ -2632,6 +2982,8 @@  static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
+    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
+    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
 
     [READ_6]                          = &scsi_disk_dma_reqops,
     [READ_10]                         = &scsi_disk_dma_reqops,