@@ -22,6 +22,9 @@
typedef struct NvmeNamespaceParams {
uint32_t nsid;
uint16_t ms;
+ uint8_t pi;
+ uint8_t pil;
+ bool pi_init;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
@@ -182,6 +182,42 @@ static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
return sq->ctrl;
}
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t t10_dif_crc_table[256] = {
+ 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+ 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+ 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+ 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+ 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+ 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+ 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+ 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+ 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+ 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+ 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+ 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+ 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+ 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+ 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+ 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+ 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+ 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+ 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+ 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+ 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+ 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+ 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+ 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+ 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+ 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+ 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+ 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+ 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+ 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+ 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+ 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
#endif /* HW_NVME_H */
@@ -583,8 +583,11 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
+ NVME_RW_PRINFO_PRCHK_MASK = 7 << 10,
};
+#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
+
typedef struct QEMU_PACKED NvmeDsmCmd {
uint8_t opcode;
uint8_t flags;
@@ -1051,14 +1054,22 @@ enum NvmeNsIdentifierType {
#define NVME_ID_NS_DPC_TYPE_MASK 0x7
enum NvmeIdNsDps {
- DPS_TYPE_NONE = 0,
- DPS_TYPE_1 = 1,
- DPS_TYPE_2 = 2,
- DPS_TYPE_3 = 3,
- DPS_TYPE_MASK = 0x7,
- DPS_FIRST_EIGHT = 8,
+ NVME_ID_NS_DPS_TYPE_NONE = 0,
+ NVME_ID_NS_DPS_TYPE_1 = 1,
+ NVME_ID_NS_DPS_TYPE_2 = 2,
+ NVME_ID_NS_DPS_TYPE_3 = 3,
+ NVME_ID_NS_DPS_TYPE_MASK = 0x7,
+ NVME_ID_NS_DPS_FIRST_EIGHT = 8,
};
+#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
+
+typedef struct NvmeDifTuple {
+ uint16_t guard;
+ uint16_t apptag;
+ uint32_t reftag;
+} NvmeDifTuple;
+
static inline void _nvme_check_size(void)
{
QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
@@ -1080,5 +1091,6 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8);
}
#endif
@@ -25,11 +25,44 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
+#include "trace.h"
+
#include "nvme.h"
#include "nvme-ns.h"
#define MIN_DISCARD_GRANULARITY (4 * KiB)
+static int nvme_ns_init_pi(NvmeNamespace *ns, Error **errp)
+{
+ int nlbas = nvme_ns_nlbas(ns);
+ uint16_t pil = ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT ?
+ 0 : nvme_ns_ms(ns) - sizeof(NvmeDifTuple);
+ int64_t offset = 1 << nvme_ns_lbads(ns), stride = nvme_l2b(ns, 1);
+ int i, ret;
+
+ NvmeDifTuple dif = {
+ .apptag = 0xffff,
+ .reftag = 0xffffffff,
+ };
+
+ for (i = 0; i < nlbas; i++) {
+ if (i && i % 0x1000 == 0) {
+ trace_pci_nvme_ns_init_pi(i, nlbas);
+ }
+
+ ret = blk_pwrite(ns->blkconf.blk, i * stride + offset + pil, &dif, sizeof(dif),
+ 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "could not write");
+ return -1;
+ }
+ }
+
+ trace_pci_nvme_ns_init_pi(nlbas, nlbas);
+
+ return 0;
+}
+
static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
BlockDriverInfo bdi;
@@ -54,6 +87,15 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
} else {
id_ns->mc = 0x1;
id_ns->flbas |= 0x10;
+
+ id_ns->dpc = 0x1f;
+ id_ns->dps = (ns->params.pil << 3) | ns->params.pi;
+
+ if (ns->params.pi_init) {
+ if (nvme_ns_init_pi(ns, errp)) {
+ return -1;
+ }
+ }
}
id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
@@ -163,6 +205,9 @@ static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
+ DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
+ DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
+ DEFINE_PROP_BOOL("pi_init", NvmeNamespace, params.pi_init, false),
DEFINE_PROP_END_OF_LIST(),
};
@@ -158,6 +158,22 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
return pci_dma_read(&n->parent_obj, addr, buf, size);
}
+static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+ hwaddr hi = addr + size - 1;
+ if (hi < addr) {
+ return 1;
+ }
+
+ if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
+ memcpy(nvme_addr_to_cmb(n, addr), buf, size);
+ return 0;
+ }
+
+ return pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
+
static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
{
return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces);
@@ -725,6 +741,60 @@ static uint16_t nvme_tx_iov(uint8_t *ptr, uint32_t len, QEMUIOVector *iov,
return NVME_SUCCESS;
}
+static uint16_t nvme_tx_interleaved(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ uint32_t bytes, uint16_t skip_bytes,
+ NvmeTxDirection dir, NvmeRequest *req)
+{
+ hwaddr addr;
+ int i = 0;
+ int64_t offset = 0;
+ uint32_t trans_len, count = bytes;
+
+ /* assert that exactly one of qsg and iov carries data */
+ assert((req->qsg.nsg > 0) != (req->iov.niov > 0));
+
+ while (len) {
+ trans_len = MIN(len, count);
+
+ if (req->qsg.nsg > 0) {
+ trans_len = MIN(trans_len, req->qsg.sg[i].len - offset);
+ addr = req->qsg.sg[i].base + offset;
+ } else {
+ trans_len = MIN(trans_len, req->iov.iov[i].iov_len - offset);
+ addr = (hwaddr)req->iov.iov[i].iov_base + offset;
+ }
+
+ if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+ if (nvme_addr_read(n, addr, ptr, trans_len)) {
+ return NVME_DATA_TRAS_ERROR;
+ }
+ } else {
+ if (nvme_addr_write(n, addr, ptr, trans_len)) {
+ return NVME_DATA_TRAS_ERROR;
+ }
+ }
+
+ ptr += trans_len;
+ len -= trans_len;
+ count -= trans_len;
+ offset += trans_len;
+
+ if (count == 0) {
+ count = bytes;
+ ptr += skip_bytes;
+ len -= skip_bytes;
+ }
+
+ if ((req->qsg.nsg > 0 && offset == req->qsg.sg[i].len) ||
+ (req->iov.niov > 0 && offset == req->iov.iov[i].iov_len)) {
+ offset = 0;
+ i++;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
static uint16_t nvme_tx(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req)
{
@@ -961,6 +1031,143 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
+static uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl,
+ uint64_t slba, uint32_t reftag)
+{
+ if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
+ (slba & 0xffffffff) != reftag) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ return NVME_SUCCESS;
+}
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static uint16_t crc_t10dif(const unsigned char *buffer, size_t len)
+{
+ uint16_t crc = 0;
+ unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+ }
+
+ return crc;
+}
+
+static void nvme_e2e_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf,
+ size_t len, uint16_t apptag,
+ uint32_t reftag)
+{
+ uint8_t *end = buf + len;
+ size_t lba_size = nvme_l2b(ns, 1);
+ size_t chksum_len = 1 << nvme_ns_lbads(ns);
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ chksum_len += nvme_ns_ms(ns) - sizeof(NvmeDifTuple);
+ }
+
+ trace_pci_nvme_e2e_pract_generate_dif(len, lba_size, chksum_len, apptag,
+ reftag);
+
+ for (; buf < end; buf += lba_size) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(buf + chksum_len);
+
+ dif->guard = cpu_to_be16(crc_t10dif(buf, chksum_len));
+ dif->apptag = cpu_to_be32(apptag);
+ dif->reftag = cpu_to_be32(reftag);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+ reftag++;
+ }
+ }
+}
+
+static uint16_t nvme_e2e_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
+ uint8_t *buf, size_t len, uint16_t ctrl,
+ uint16_t apptag, uint16_t appmask,
+ uint32_t reftag)
+{
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_3:
+ if (be32_to_cpu(dif->reftag) != 0xffffffff) {
+ break;
+ }
+
+ /* fallthrough */
+ case NVME_ID_NS_DPS_TYPE_1:
+ case NVME_ID_NS_DPS_TYPE_2:
+ if (be16_to_cpu(dif->apptag) != 0xffff) {
+ break;
+ }
+
+ trace_pci_nvme_e2e_prchk_disabled(be16_to_cpu(dif->apptag),
+ be32_to_cpu(dif->reftag));
+
+ return NVME_SUCCESS;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
+ uint16_t crc = crc_t10dif(buf, len);
+ trace_pci_nvme_e2e_prchk_guard(be16_to_cpu(dif->guard), crc);
+
+ if (be16_to_cpu(dif->guard) != crc) {
+ return NVME_E2E_GUARD_ERROR;
+ }
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
+ trace_pci_nvme_e2e_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
+ appmask);
+
+ if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
+ return NVME_E2E_APP_ERROR;
+ }
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
+ trace_pci_nvme_e2e_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
+
+ if (be32_to_cpu(dif->reftag) != reftag) {
+ return NVME_E2E_REF_ERROR;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_e2e_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint16_t ctrl, uint16_t apptag,
+ uint16_t appmask, uint32_t reftag)
+{
+ uint8_t *end = buf + len;
+ size_t lba_size = nvme_l2b(ns, 1);
+ size_t chksum_len = 1 << nvme_ns_lbads(ns);
+ uint16_t status;
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ chksum_len += nvme_ns_ms(ns) - sizeof(NvmeDifTuple);
+ }
+
+ trace_pci_nvme_e2e_check(NVME_RW_PRINFO(ctrl), chksum_len);
+
+ for (; buf < end; buf += lba_size) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(buf + chksum_len);
+
+ status = nvme_e2e_prchk(ns, dif, buf, chksum_len, ctrl, apptag,
+ appmask, reftag);
+ if (status) {
+ return status;
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+ reftag++;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
static void nvme_aio_err(NvmeRequest *req, int ret)
{
uint16_t status = NVME_SUCCESS;
@@ -980,7 +1187,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret)
break;
}
- trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
error_setg_errno(&local_err, -ret, "aio failed");
error_report_err(local_err);
@@ -1017,6 +1224,73 @@ static void nvme_rw_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_e2e_ctx {
+ NvmeRequest *req;
+ QEMUIOVector iov;
+ uint8_t *bounce;
+};
+
+static void nvme_e2e_rw_cb(void *opaque, int ret)
+{
+ struct nvme_e2e_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+
+ trace_pci_nvme_e2e_rw_cb(nvme_cid(req));
+
+ qemu_iovec_destroy(&ctx->iov);
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_rw_cb(req, ret);
+}
+
+static void nvme_e2e_rw_check_cb(void *opaque, int ret)
+{
+ struct nvme_e2e_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCtrl *n = nvme_ctrl(req);
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint16_t status;
+
+ trace_pci_nvme_e2e_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
+ appmask, reftag);
+
+ if (ret) {
+ goto out;
+ }
+
+ status = nvme_e2e_check(ns, ctx->bounce, ctx->iov.size, ctrl, apptag,
+ appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRACT && nvme_ns_ms(ns) == 8) {
+ size_t lba_size = 1 << nvme_ns_lbads(ns);
+
+ status = nvme_tx_interleaved(n, ctx->bounce, nvme_l2b(ns, nlb),
+ lba_size, sizeof(NvmeDifTuple),
+ NVME_TX_DIRECTION_FROM_DEVICE, req);
+ } else {
+ status = nvme_tx(n, ctx->bounce, nvme_l2b(ns, nlb),
+ NVME_TX_DIRECTION_FROM_DEVICE, req);
+ }
+
+ if (status) {
+ req->status = status;
+ }
+
+out:
+ nvme_e2e_rw_cb(ctx, ret);
+}
+
static void nvme_aio_discard_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -1047,6 +1321,12 @@ static void nvme_compare_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
struct nvme_compare_ctx *ctx = req->opaque;
g_autofree uint8_t *buf = NULL;
uint16_t status;
@@ -1061,6 +1341,15 @@ static void nvme_compare_cb(void *opaque, int ret)
goto out;
}
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ status = nvme_e2e_check(ns, ctx->bounce, ctx->iov.size, ctrl,
+ apptag, appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+ }
+
buf = g_malloc(ctx->len);
status = nvme_h2c(nvme_ctrl(req), buf, ctx->len, req);
@@ -1069,6 +1358,50 @@ static void nvme_compare_cb(void *opaque, int ret)
goto out;
}
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ size_t stride = nvme_l2b(ns, 1);
+ uint16_t ms = nvme_ns_ms(ns);
+ uint64_t pos = 0;
+ bool first = !!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT);
+ size_t cmp_len;
+
+ status = nvme_e2e_check(ns, buf, nlb, ctrl, apptag, appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ /*
+ * When formatted with protection information, do not compare the DIF
+ * tuple.
+ */
+ cmp_len = 1 << nvme_ns_lbads(ns);
+ if (!first) {
+ cmp_len += ms - sizeof(NvmeDifTuple);
+ }
+
+ for (int i = 0; i < nlb; i++) {
+ if (memcmp(buf + pos, ctx->bounce + pos, cmp_len)) {
+ req->status = NVME_CMP_FAILURE;
+ break;
+ }
+
+ if (!first) {
+ pos += stride;
+ continue;
+ }
+
+ pos += cmp_len + sizeof(NvmeDifTuple);
+ if (memcmp(buf + pos, ctx->bounce + pos,
+ ms - sizeof(NvmeDifTuple))) {
+ req->status = NVME_CMP_FAILURE;
+ break;
+ }
+ }
+
+ goto out;
+ }
+
if (memcmp(buf, ctx->bounce, ctx->len)) {
req->status = NVME_CMP_FAILURE;
}
@@ -1162,12 +1495,24 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
int64_t offset = nvme_l2b(ns, slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
uint8_t *bounce = NULL;
struct nvme_compare_ctx *ctx = NULL;
uint16_t status;
trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ return status;
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ (ctrl & NVME_RW_PRINFO_PRACT)) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
status = nvme_check_mdts(n, len);
if (status) {
trace_pci_nvme_err_mdts(nvme_cid(req), len);
@@ -1220,10 +1565,23 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
uint64_t offset = nvme_l2b(ns, slba);
uint32_t count = nvme_l2b(ns, nlb);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ struct nvme_e2e_ctx *ctx;
uint16_t status;
trace_pci_nvme_write_zeroes(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+ if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ return status;
+ }
+
status = nvme_check_bounds(ns, slba, nlb);
if (status) {
trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
@@ -1232,19 +1590,118 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_WRITE);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) {
+ ctx = g_new(struct nvme_e2e_ctx, 1);
+ ctx->req = req;
+ ctx->bounce = g_malloc0(count);
+
+ qemu_iovec_init(&ctx->iov, 1);
+ qemu_iovec_add(&ctx->iov, ctx->bounce, count);
+
+ /* splice generated protection information into the buffer */
+ nvme_e2e_pract_generate_dif(ns, ctx->bounce, count, apptag, reftag);
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->iov, 0,
+ nvme_e2e_rw_cb, ctx);
+
+ return NVME_NO_COMPLETE;
+ }
+
req->aiocb = blk_aio_pwrite_zeroes(req->ns->blkconf.blk, offset, count,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
return NVME_NO_COMPLETE;
}
+static uint16_t nvme_e2e_rw(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ size_t len = nvme_l2b(ns, nlb);
+ size_t offset = nvme_l2b(ns, slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ struct nvme_e2e_ctx *ctx;
+ uint16_t status;
+
+ trace_pci_nvme_e2e_rw(!!(ctrl & NVME_RW_PRINFO_PRACT));
+
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ return status;
+ }
+
+ ctx = g_new(struct nvme_e2e_ctx, 1);
+ ctx->req = req;
+ ctx->bounce = g_malloc(len);
+
+ qemu_iovec_init(&ctx->iov, 1);
+ qemu_iovec_add(&ctx->iov, ctx->bounce, len);
+
+ if (req->cmd.opcode == NVME_CMD_READ) {
+ req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->iov, 0,
+ nvme_e2e_rw_check_cb, ctx);
+ return NVME_NO_COMPLETE;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRACT && nvme_ns_ms(ns) == 8) {
+ size_t lba_size = 1 << nvme_ns_lbads(ns);
+
+ /*
+ * For writes, transfer logical block data interleaved into a metadata
+ * extended buffer and splice the generated protection information into
+ * it afterwards.
+ */
+ status = nvme_tx_interleaved(n, ctx->bounce, len, lba_size, 8,
+ NVME_TX_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ goto err;
+ }
+ } else {
+ status = nvme_tx(n, ctx->bounce, len, NVME_TX_DIRECTION_TO_DEVICE,
+ req);
+ if (status) {
+ goto err;
+ }
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRACT) {
+ /* splice generated protection information into the buffer */
+ nvme_e2e_pract_generate_dif(ns, ctx->bounce, len, apptag, reftag);
+ } else {
+ status = nvme_e2e_check(ns, ctx->bounce, len, ctrl, apptag, appmask,
+ reftag);
+ if (status) {
+ goto err;
+ }
+ }
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->iov, 0,
+ nvme_e2e_rw_cb, ctx);
+
+ return NVME_NO_COMPLETE;
+
+err:
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ return status;
+}
+
static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
uint64_t slba = le64_to_cpu(rw->slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
uint64_t data_size = nvme_l2b(ns, nlb);
+ uint64_t real_data_size = data_size;
uint64_t data_offset = nvme_l2b(ns, slba);
enum BlockAcctType acct = req->cmd.opcode == NVME_CMD_WRITE ?
BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
@@ -1252,6 +1709,17 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
uint16_t status;
uint32_t sector_size;
+ /*
+ * If the namespace is formatted with protecting information, the number of
+ * metadata bytes is exactly 8 and the PRACT field is set, then the
+ * metadata is not resident in the host buffer.
+ */
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ (nvme_ns_ms(ns) == sizeof(NvmeDifTuple)) &&
+ (ctrl & NVME_RW_PRINFO_PRACT)) {
+ data_size -= nlb << 3;
+ }
+
trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode),
nvme_nsid(ns), nlb, data_size, slba);
@@ -1281,7 +1749,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
- block_acct_start(blk_get_stats(blk), &req->acct, data_size, acct);
+ block_acct_start(blk_get_stats(blk), &req->acct, real_data_size, acct);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ return nvme_e2e_rw(n, req);
+ }
+
if (req->qsg.sg) {
sector_size = nvme_l2b(ns, 1);
if (acct == BLOCK_ACCT_WRITE) {
@@ -30,6 +30,7 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t
# nvme.c
# nvme traces for successful events
pci_nvme_register_namespace(uint32_t nsid) "nsid %"PRIu32""
+pci_nvme_ns_init_pi(int blocks, int total) "blocks %d/%d"
pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
pci_nvme_irq_pin(void) "pulsing IRQ pin"
pci_nvme_irq_masked(void) "IRQ is masked"
@@ -42,6 +43,15 @@ pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, cons
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_e2e_rw(uint8_t pract) "pract 0x%"PRIx8""
+pci_nvme_e2e_rw_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_e2e_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_e2e_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_e2e_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
+pci_nvme_e2e_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_e2e_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
+pci_nvme_e2e_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
+pci_nvme_e2e_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
pci_nvme_write_zeroes(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""