From patchwork Mon Nov 11 12:25:26 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192902 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVfv6Dsyz9sR7 for ; Mon, 11 Nov 2019 23:33:59 +1100 (AEDT) Received: from localhost ([::1]:51858 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8tJ-0007E2-IT for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:33:57 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36862) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lg-0007ZK-1S for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:09 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8le-0003QA-Ah for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:03 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52126) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8la-0003NF-Jw; Mon, 11 Nov 2019 07:25:58 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 6C605BF600; Mon, 11 Nov 2019 12:25:57 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 02/21] nvme: move device parameters to separate struct Date: Mon, 11 Nov 2019 13:25:26 +0100 Message-Id: <20191111122545.252478-3-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Move device configuration parameters to separate struct to make it explicit what is configurable and what is set internally. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 44 ++++++++++++++++++++++---------------------- hw/block/nvme.h | 16 +++++++++++++--- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c06e3ca31905..277700fdcc58 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -64,12 +64,12 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -631,7 +631,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (unlikely(vector > n->num_queues)) { + if (unlikely(vector > n->params.num_queues)) { trace_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -783,7 +783,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: - result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); trace_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: @@ -827,9 +828,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_NUMBER_OF_QUEUES: trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, - n->num_queues - 1, n->num_queues - 1); - req->cqe.result = - cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + n->params.num_queues - 1, + n->params.num_queues - 1); + req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); @@ -900,12 +902,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n) blk_drain(n->conf.blk); - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->sq[i] != NULL) { nvme_free_sq(n->sq[i], n); } } - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->cq[i] != NULL) { nvme_free_cq(n->cq[i], n); } @@ -1308,7 +1310,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) int64_t bs_size; uint8_t *pci_conf; - if (!n->num_queues) { + if (!n->params.num_queues) { error_setg(errp, "num_queues can't be zero"); return; } @@ -1324,7 +1326,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - if (!n->serial) { + if (!n->params.serial) { error_setg(errp, "serial property not set"); return; } @@ -1341,25 +1343,25 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, 0x80); n->num_namespaces = 1; - n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); + n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); n->ns_size = bs_size / (uint64_t)n->num_namespaces; n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); - n->sq = g_new0(NvmeSQueue *, n->num_queues); - n->cq = g_new0(NvmeCQueue *, n->num_queues); + n->sq = g_new0(NvmeSQueue *, n->params.num_queues); + n->cq = g_new0(NvmeCQueue *, n->params.num_queues); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL); + msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1388,7 +1390,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); @@ -1399,7 +1401,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); n->cmbloc = n->bar.cmbloc; n->cmbsz = n->bar.cmbsz; @@ -1438,7 +1440,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cq); g_free(n->sq); - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { g_free(n->cmbuf); } msix_uninit_exclusive_bar(pci_dev); @@ -1446,9 +1448,7 @@ static void nvme_exit(PCIDevice *pci_dev) static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), - DEFINE_PROP_STRING("serial", NvmeCtrl, serial), - DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), + DEFINE_NVME_PROPERTIES(NvmeCtrl, params), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 557194ee1954..9957c4a200e2 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -1,7 +1,19 @@ #ifndef HW_NVME_H #define HW_NVME_H + #include "block/nvme.h" +#define DEFINE_NVME_PROPERTIES(_state, _props) \ + DEFINE_PROP_STRING("serial", _state, _props.serial), \ + DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ + DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64) + +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; + uint32_t cmb_size_mb; +} NvmeParams; + typedef struct NvmeAsyncEvent { QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; NvmeAerResult result; @@ -63,6 +75,7 @@ typedef struct NvmeCtrl { MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; + NvmeParams params; uint32_t page_size; uint16_t page_bits; @@ -71,10 +84,8 @@ typedef struct NvmeCtrl { uint16_t sqe_size; uint32_t reg_size; uint32_t num_namespaces; - uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; - uint32_t cmb_size_mb; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; @@ -82,7 +93,6 @@ typedef struct NvmeCtrl { uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - char *serial; NvmeNamespace *namespaces; NvmeSQueue **sq; NvmeCQueue **cq; From patchwork Mon Nov 11 12:25:27 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192889 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVW34Pqcz9sQw for ; Mon, 11 Nov 2019 23:27:09 +1100 (AEDT) Received: from localhost ([::1]:51768 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mg-0007vc-Pi for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:27:06 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36882) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lg-0007Zt-KF for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:05 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lf-0003QZ-AQ for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:04 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52134) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003NJ-JO; Mon, 11 Nov 2019 07:26:00 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id BD2A6BF624; Mon, 11 Nov 2019 12:25:57 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 03/21] nvme: add missing fields in the identify controller data structure Date: Mon, 11 Nov 2019 13:25:27 +0100 Message-Id: <20191111122545.252478-4-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Not used by the device model but added for completeness. See NVM Express 1.2.1, Section 5.11 ("Identify command"), Figure 90. Signed-off-by: Klaus Jensen --- include/block/nvme.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/include/block/nvme.h b/include/block/nvme.h index 8fb941c6537c..925f3f22792b 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -543,7 +543,13 @@ typedef struct NvmeIdCtrl { uint8_t ieee[3]; uint8_t cmic; uint8_t mdts; - uint8_t rsvd255[178]; + uint16_t cntlid; + uint32_t ver; + uint32_t rtd3r; + uint32_t rtd3e; + uint32_t oaes; + uint32_t ctratt; + uint8_t rsvd255[156]; uint16_t oacs; uint8_t acl; uint8_t aerl; @@ -551,10 +557,22 @@ typedef struct NvmeIdCtrl { uint8_t lpa; uint8_t elpe; uint8_t npss; - uint8_t rsvd511[248]; + uint8_t avscc; + uint8_t apsta; + uint16_t wctemp; + uint16_t cctemp; + uint16_t mtfa; + uint32_t hmpre; + uint32_t hmmin; + uint8_t tnvmcap[16]; + uint8_t unvmcap[16]; + uint32_t rpmbs; + uint8_t rsvd319[4]; + uint16_t kas; + uint8_t rsvd511[190]; uint8_t sqes; uint8_t cqes; - uint16_t rsvd515; + uint16_t maxcmd; uint32_t nn; uint16_t oncs; uint16_t fuses; @@ -562,8 +580,14 @@ typedef struct NvmeIdCtrl { uint8_t vwc; uint16_t awun; uint16_t awupf; - uint8_t rsvd703[174]; - uint8_t rsvd2047[1344]; + uint8_t nvscc; + uint8_t rsvd531; + uint16_t acwu; + uint8_t rsvd534[2]; + uint32_t sgls; + uint8_t rsvd540[228]; + uint8_t subnqn[256]; + uint8_t rsvd1024[1024]; NvmePSD psd[32]; uint8_t vs[1024]; } NvmeIdCtrl; From patchwork Mon Nov 11 12:25:28 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192893 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVZ849gLz9sPj for ; Mon, 11 Nov 2019 23:29:52 +1100 (AEDT) Received: from localhost ([::1]:51806 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8pK-0003QZ-CF for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:29:50 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36962) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8ll-0007hd-8i for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:10 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lf-0003RA-V9 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:08 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52152) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003NU-JK; Mon, 11 Nov 2019 07:26:00 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 3F1F6BF6BD; Mon, 11 Nov 2019 12:25:58 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 04/21] nvme: populate the mandatory subnqn and ver fields Date: Mon, 11 Nov 2019 13:25:28 +0100 Message-Id: <20191111122545.252478-5-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Required for compliance with NVMe revision 1.2.1 or later. See NVM Express 1.2.1, Section 5.11 ("Identify command"), Figure 90 and Section 7.9 ("NVMe Qualified Names"). This also bumps the supported version to 1.2.1. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 277700fdcc58..16f0fba10b08 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,9 +9,9 @@ */ /** - * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e + * Reference Specification: NVM Express 1.2.1 * - * http://www.nvmexpress.org/resources/ + * https://nvmexpress.org/resources/specifications/ */ /** @@ -1366,6 +1366,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; + id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); id->frmw = 7 << 1; id->lpa = 1 << 0; @@ -1373,6 +1374,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); + + strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:"); + pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial); + id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); @@ -1387,7 +1392,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010200; + n->bar.vs = 0x00010201; n->bar.intmc = n->bar.intms = 0; if (n->params.cmb_size_mb) { From patchwork Mon Nov 11 12:25:29 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192907 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVjg6MQjz9sR4 for ; Mon, 11 Nov 2019 23:36:23 +1100 (AEDT) Received: from localhost ([::1]:51882 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8vd-0002Mf-9S for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:36:21 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36956) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lk-0007hA-RP for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:09 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lf-0003Qp-Iz for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:08 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52158) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003Nf-IF; Mon, 11 Nov 2019 07:26:00 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 9CD64BF80B; Mon, 11 Nov 2019 12:25:58 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 05/21] nvme: allow completion queues in the cmb Date: Mon, 11 Nov 2019 13:25:29 +0100 Message-Id: <20191111122545.252478-6-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Allow completion queues in the controller memory buffer. This also inlines the nvme_addr_{read,write} functions and adds an nvme_addr_is_cmb helper. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 16f0fba10b08..daa2367b0863 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -52,14 +52,34 @@ static void nvme_process_sq(void *opaque); -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) +static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { - if (n->cmbsz && addr >= n->ctrl_mem.addr && - addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { - memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); - } else { - pci_dma_read(&n->parent_obj, addr, buf, size); + hwaddr low = n->ctrl_mem.addr; + hwaddr hi = n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size); + + return addr >= low && addr < hi; +} + +static inline void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, + int size) +{ + if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { + memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size); + return; } + + pci_dma_read(&n->parent_obj, addr, buf, size); +} + +static inline void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, + int size) +{ + if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { + memcpy((void *) &n->cmbuf[addr - n->ctrl_mem.addr], buf, size); + return; + } + + pci_dma_write(&n->parent_obj, addr, buf, size); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) @@ -281,6 +301,7 @@ static void nvme_post_cqes(void *opaque) QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { NvmeSQueue *sq; + NvmeCqe *cqe = &req->cqe; hwaddr addr; if (nvme_cq_full(cq)) { @@ -294,8 +315,7 @@ static void nvme_post_cqes(void *opaque) req->cqe.sq_head = cpu_to_le16(sq->head); addr = cq->dma_addr + cq->tail * n->cqe_size; nvme_inc_cq_tail(cq); - pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, - sizeof(req->cqe)); + nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe)); QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { @@ -1401,7 +1421,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); From patchwork Mon Nov 11 12:25:30 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192892 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVWX1l7Jz9sQp for ; Mon, 11 Nov 2019 23:27:36 +1100 (AEDT) Received: from localhost ([::1]:51776 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8n7-0000MN-A9 for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:27:33 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36877) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lg-0007Zf-GP for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:05 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lf-0003Qi-F5 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:04 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52164) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003Ni-JQ; Mon, 11 Nov 2019 07:26:00 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 188D8BF84E; Mon, 11 Nov 2019 12:25:59 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 06/21] nvme: add support for the abort command Date: Mon, 11 Nov 2019 13:25:30 +0100 Message-Id: <20191111122545.252478-7-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.1 ("Abort command"). The Abort command is a best effort command; for now, the device always fails to abort the given command. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index daa2367b0863..84e4f2ea7a15 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -741,6 +741,18 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) } } +static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0xffff; + + req->cqe.result = 1; + if (nvme_check_sqid(n, sqid)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) { trace_nvme_setfeat_timestamp(ts); @@ -859,6 +871,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } + return NVME_SUCCESS; } @@ -875,6 +888,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_create_cq(n, cmd); case NVME_ADM_CMD_IDENTIFY: return nvme_identify(n, cmd); + case NVME_ADM_CMD_ABORT: + return nvme_abort(n, cmd, req); case NVME_ADM_CMD_SET_FEATURES: return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: @@ -1388,6 +1403,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->ieee[2] = 0xb3; id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); + id->acl = 3; id->frmw = 7 << 1; id->lpa = 1 << 0; id->sqes = (0x6 << 4) | 0x6; From patchwork Mon Nov 11 12:25:31 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192901 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVft3tvPz9sR4 for ; Mon, 11 Nov 2019 23:33:58 +1100 (AEDT) Received: from localhost ([::1]:51856 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8tI-0007C5-5c for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:33:56 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36988) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lm-0007jj-9O for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:11 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lk-0003TE-Em for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:10 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52172) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003Nk-Id; Mon, 11 Nov 2019 07:26:00 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 798C2BF866; Mon, 11 Nov 2019 12:25:59 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 07/21] nvme: refactor device realization Date: Mon, 11 Nov 2019 13:25:31 +0100 Message-Id: <20191111122545.252478-8-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" This patch splits up nvme_realize into multiple individual functions, each initializing a different subset of the device. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 176 +++++++++++++++++++++++++++++++----------------- hw/block/nvme.h | 22 ++++++ 2 files changed, 135 insertions(+), 63 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 84e4f2ea7a15..1fdb3b8655ed 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -43,6 +43,8 @@ #include "trace.h" #include "nvme.h" +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE + #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -1336,67 +1338,106 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) { - NvmeCtrl *n = NVME(pci_dev); - NvmeIdCtrl *id = &n->id_ctrl; - - int i; - int64_t bs_size; - uint8_t *pci_conf; - - if (!n->params.num_queues) { - error_setg(errp, "num_queues can't be zero"); - return; - } + NvmeParams *params = &n->params; if (!n->conf.blk) { - error_setg(errp, "drive property not set"); - return; + error_setg(errp, "nvme: block backend not configured"); + return 1; } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); - return; + if (!params->serial) { + error_setg(errp, "nvme: serial not configured"); + return 1; } - if (!n->params.serial) { - error_setg(errp, "serial property not set"); - return; + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { + error_setg(errp, "nvme: invalid queue configuration"); + return 1; } + + return 0; +} + +static int nvme_init_blk(NvmeCtrl *n, Error **errp) +{ blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { - return; + false, errp)) { + return 1; } - pci_conf = pci_dev->config; - pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_dev->config, 0x2); - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); - pcie_endpoint_cap_init(pci_dev, 0x80); + return 0; +} +static void nvme_init_state(NvmeCtrl *n) +{ n->num_namespaces = 1; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); +} + +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + + n->cmbloc = n->bar.cmbloc; + n->cmbsz = n->bar.cmbsz; + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); +} + +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint8_t *pci_conf = pci_dev->config; - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, - "nvme", n->reg_size); + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_conf, 0x2); + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); + pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(pci_dev, 0x80); + + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } +} + +static void nvme_init_ctrl(NvmeCtrl *n) +{ + NvmeIdCtrl *id = &n->id_ctrl; + NvmeParams *params = &n->params; + uint8_t *pci_conf = n->parent_obj.config; + id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1430,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010201; n->bar.intmc = n->bar.intms = 0; +} - if (n->params.cmb_size_mb) { +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + int64_t bs_size; + NvmeIdNs *id_ns = &ns->id_ns; + + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "blk_getlength"); + return 1; + } + + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + n->ns_size = bs_size; + + id_ns->ncap = id_ns->nuse = id_ns->nsze = + cpu_to_le64(nvme_ns_nlbas(n, ns)); - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + return 0; +} - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + Error *local_err = NULL; + int i; - n->cmbloc = n->bar.cmbloc; - n->cmbsz = n->bar.cmbsz; + if (nvme_check_constraints(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); + return; + } - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + nvme_init_state(n); + if (nvme_init_blk(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); + return; } for (i = 0; i < n->num_namespaces; i++) { - NvmeNamespace *ns = &n->namespaces[i]; - NvmeIdNs *id_ns = &ns->id_ns; - id_ns->nsfeat = 0; - id_ns->nlbaf = 0; - id_ns->flbas = 0; - id_ns->mc = 0; - id_ns->dpc = 0; - id_ns->dps = 0; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); + if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); + return; + } } + + nvme_init_pci(n, pci_dev); + nvme_init_ctrl(n); } static void nvme_exit(PCIDevice *pci_dev) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 9957c4a200e2..7c3c07bde887 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -101,4 +101,26 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; +} + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns).ds; +} + +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) +{ + return n->ns_size >> nvme_ns_lbads(ns); +} + + #endif /* HW_NVME_H */ From patchwork Mon Nov 11 12:25:32 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192895 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVby6lvXz9sR4 for ; Mon, 11 Nov 2019 23:31:26 +1100 (AEDT) Received: from localhost ([::1]:51822 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8qq-0004C0-2E for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:31:24 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36940) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lj-0007f9-Ky for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:09 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lh-0003SA-Ne for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:07 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52184) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lc-0003Nn-TJ; Mon, 11 Nov 2019 07:26:01 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id C75AEBF91A; Mon, 11 Nov 2019 12:25:59 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 08/21] nvme: add support for the get log page command Date: Mon, 11 Nov 2019 13:25:32 +0100 Message-Id: <20191111122545.252478-9-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Add support for the Get Log Page command and basic implementations of the mandatory Error Information, SMART/Health Information and Firmware Slot Information log pages. In violation of the specification, the SMART/Health Information log page does not persist information over the lifetime of the controller because the device has no place to store such persistent state. Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.10 ("Get Log Page command"). Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 149 +++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 9 ++- hw/block/trace-events | 2 + include/block/nvme.h | 2 +- 4 files changed, 159 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 1fdb3b8655ed..c50683e0d171 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -44,6 +44,7 @@ #include "nvme.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE +#define NVME_TEMPERATURE 0x143 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -577,6 +578,136 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, + uint32_t buf_len, uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + if (off > sizeof(*n->elpes) * (n->params.elpe + 1)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(*n->elpes) * (n->params.elpe + 1) - off, buf_len); + + return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); + + uint32_t trans_len; + time_t current_ms; + uint64_t units_read = 0, units_written = 0, read_commands = 0, + write_commands = 0; + NvmeSmartLog smart; + BlockAcctStats *s; + + if (nsid && nsid != 0xffffffff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + s = blk_get_stats(n->conf.blk); + + units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; + units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; + read_commands = s->nr_ops[BLOCK_ACCT_READ]; + write_commands = s->nr_ops[BLOCK_ACCT_WRITE]; + + if (off > sizeof(smart)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(smart) - off, buf_len); + + memset(&smart, 0x0, sizeof(smart)); + + smart.data_units_read[0] = cpu_to_le64(units_read / 1000); + smart.data_units_written[0] = cpu_to_le64(units_written / 1000); + smart.host_read_commands[0] = cpu_to_le64(read_commands); + smart.host_write_commands[0] = cpu_to_le64(write_commands); + + smart.number_of_error_log_entries[0] = cpu_to_le64(0); + smart.temperature[0] = n->temperature & 0xff; + smart.temperature[1] = (n->temperature >> 8) & 0xff; + + if (n->features.temp_thresh <= n->temperature) { + smart.critical_warning |= NVME_SMART_TEMPERATURE; + } + + current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + smart.power_on_hours[0] = cpu_to_le64( + (((current_ms - n->starttime_ms) / 1000) / 60) / 60); + + return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + NvmeFwSlotInfoLog fw_log; + + if (off > sizeof(fw_log)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog)); + + trans_len = MIN(sizeof(fw_log) - off, buf_len); + + return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw12 = le32_to_cpu(cmd->cdw12); + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint16_t lid = dw10 & 0xff; + uint8_t rae = (dw10 >> 15) & 0x1; + uint32_t numdl, numdu; + uint64_t off, lpol, lpou; + size_t len; + + numdl = (dw10 >> 16); + numdu = (dw11 & 0xffff); + lpol = dw12; + lpou = dw13; + + len = (((numdu << 16) | numdl) + 1) << 2; + off = (lpou << 32ULL) | lpol; + + if (off & 0x3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trace_nvme_get_log(req->cid, lid, rae, len, off); + + switch (lid) { + case NVME_LOG_ERROR_INFO: + return nvme_error_info(n, cmd, len, off, req); + case NVME_LOG_SMART_INFO: + return nvme_smart_info(n, cmd, len, off, req); + case NVME_LOG_FW_SLOT_INFO: + return nvme_fw_log_info(n, cmd, len, off, req); + default: + trace_nvme_err_invalid_log_page(req->cid, lid); + return NVME_INVALID_LOG_ID | NVME_DNR; + } +} + static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) { n->cq[cq->cqid] = NULL; @@ -812,6 +943,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t result; switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + result = cpu_to_le32(n->features.temp_thresh); + break; case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); @@ -856,6 +990,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw11 = le32_to_cpu(cmd->cdw11); switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + n->features.temp_thresh = dw11; + break; + case NVME_VOLATILE_WRITE_CACHE: blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; @@ -884,6 +1022,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_del_sq(n, cmd); case NVME_ADM_CMD_CREATE_SQ: return nvme_create_sq(n, cmd); + case NVME_ADM_CMD_GET_LOG_PAGE: + return nvme_get_log(n, cmd, req); case NVME_ADM_CMD_DELETE_CQ: return nvme_del_cq(n, cmd); case NVME_ADM_CMD_CREATE_CQ: @@ -923,6 +1063,7 @@ static void nvme_process_sq(void *opaque) QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); memset(&req->cqe, 0, sizeof(req->cqe)); req->cqe.cid = cmd.cid; + req->cid = le16_to_cpu(cmd.cid); status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : nvme_admin_cmd(n, &cmd, req); @@ -1378,6 +1519,10 @@ static void nvme_init_state(NvmeCtrl *n) n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->elpes = g_new0(NvmeErrorLog, n->params.elpe + 1); + n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + n->temperature = NVME_TEMPERATURE; + n->features.temp_thresh = 0x14d; } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1446,7 +1591,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->oacs = cpu_to_le16(0); id->acl = 3; id->frmw = 7 << 1; - id->lpa = 1 << 0; + id->lpa = 1 << 2; + id->elpe = n->params.elpe; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); @@ -1530,6 +1676,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->namespaces); g_free(n->cq); g_free(n->sq); + g_free(n->elpes); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 7c3c07bde887..fcfd504e3f9f 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -6,12 +6,14 @@ #define DEFINE_NVME_PROPERTIES(_state, _props) \ DEFINE_PROP_STRING("serial", _state, _props.serial), \ DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ - DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64) + DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \ + DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24) typedef struct NvmeParams { char *serial; uint32_t num_queues; uint32_t cmb_size_mb; + uint8_t elpe; } NvmeParams; typedef struct NvmeAsyncEvent { @@ -23,6 +25,7 @@ typedef struct NvmeRequest { struct NvmeSQueue *sq; BlockAIOCB *aiocb; uint16_t status; + uint16_t cid; bool has_sg; NvmeCqe cqe; BlockAcctCookie acct; @@ -92,12 +95,16 @@ typedef struct NvmeCtrl { uint64_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint64_t starttime_ms; + uint16_t temperature; NvmeNamespace *namespaces; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; NvmeCQueue admin_cq; + NvmeFeatureVal features; + NvmeErrorLog *elpes; NvmeIdCtrl id_ctrl; } NvmeCtrl; diff --git a/hw/block/trace-events b/hw/block/trace-events index 13d1b21dd4e4..a2c7919c861e 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,7 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +nvme_get_log(uint16_t cid, uint16_t lid, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" @@ -85,6 +86,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion q nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" diff --git a/include/block/nvme.h b/include/block/nvme.h index 925f3f22792b..52abc3297b62 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -515,7 +515,7 @@ enum NvmeSmartWarn { NVME_SMART_FAILED_VOLATILE_MEDIA = 1 << 4, }; -enum LogIdentifier { +enum NvmeLogIdentifier { NVME_LOG_ERROR_INFO = 0x01, NVME_LOG_SMART_INFO = 0x02, NVME_LOG_FW_SLOT_INFO = 0x03, From patchwork Mon Nov 11 12:25:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192891 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVWB1qYmz9sPn for ; Mon, 11 Nov 2019 23:27:18 +1100 (AEDT) Received: from localhost ([::1]:51772 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mp-0008C8-MN for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:27:15 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36957) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8lk-0007hC-Uz for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:15 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8li-0003SP-K6 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:08 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52196) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8ld-0003OT-Gf; Mon, 11 Nov 2019 07:26:01 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 23C2BBF9A8; Mon, 11 Nov 2019 12:26:00 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 09/21] nvme: add support for the asynchronous event request command Date: Mon, 11 Nov 2019 13:25:33 +0100 Message-Id: <20191111122545.252478-10-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.2 ("Asynchronous Event Request command"). Mostly imported from Keith's qemu-nvme tree. Modified to not enqueue events if something of the same type is already queued (but not cleared by the host). Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 180 ++++++++++++++++++++++++++++++++++++++++-- hw/block/nvme.h | 13 ++- hw/block/trace-events | 8 ++ include/block/nvme.h | 4 +- 4 files changed, 196 insertions(+), 9 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c50683e0d171..f1a9b2cf5cbd 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -334,6 +334,46 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } +static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, + uint8_t event_info, uint8_t log_page) +{ + NvmeAsyncEvent *event; + + trace_nvme_enqueue_event(event_type, event_info, log_page); + + /* + * Do not enqueue the event if something of this type is already queued. + * This bounds the size of the event queue and makes sure it does not grow + * indefinitely when events are not processed by the host (i.e. does not + * issue any AERs). + */ + if (n->aer_mask_queued & (1 << event_type)) { + trace_nvme_enqueue_event_masked(event_type); + return; + } + n->aer_mask_queued |= (1 << event_type); + + event = g_new(NvmeAsyncEvent, 1); + event->result = (NvmeAerResult) { + .event_type = event_type, + .event_info = event_info, + .log_page = log_page, + }; + + QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry); + + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + +static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) +{ + n->aer_mask &= ~(1 << event_type); + if (!QTAILQ_EMPTY(&n->aer_queue)) { + timer_mod(n->aer_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } +} + static void nvme_rw_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -578,7 +618,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; @@ -591,12 +631,16 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, trans_len = MIN(sizeof(*n->elpes) * (n->params.elpe + 1) - off, buf_len); + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_ERROR); + } + return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, prp2); } -static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, - uint64_t off, NvmeRequest *req) +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, + uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint64_t prp1 = le64_to_cpu(cmd->prp1); uint64_t prp2 = le64_to_cpu(cmd->prp2); @@ -645,6 +689,10 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, smart.power_on_hours[0] = cpu_to_le64( (((current_ms - n->starttime_ms) / 1000) / 60) / 60); + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2); } @@ -697,9 +745,9 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (lid) { case NVME_LOG_ERROR_INFO: - return nvme_error_info(n, cmd, len, off, req); + return nvme_error_info(n, cmd, rae, len, off, req); case NVME_LOG_SMART_INFO: - return nvme_smart_info(n, cmd, len, off, req); + return nvme_smart_info(n, cmd, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, cmd, len, off, req); default: @@ -957,6 +1005,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + result = cpu_to_le32(n->features.async_config); + break; default: trace_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -992,6 +1043,12 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_TEMPERATURE_THRESHOLD: n->features.temp_thresh = dw11; + + if (n->features.temp_thresh <= n->temperature) { + nvme_enqueue_event(n, NVME_AER_TYPE_SMART, + NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO); + } + break; case NVME_VOLATILE_WRITE_CACHE: @@ -1007,6 +1064,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + n->features.async_config = dw11; + break; default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -1015,6 +1075,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + trace_nvme_aer(req->cid); + + if (n->outstanding_aers > n->params.aerl) { + trace_nvme_aer_aerl_exceeded(); + return NVME_AER_LIMIT_EXCEEDED; + } + + n->aer_reqs[n->outstanding_aers] = req; + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + n->outstanding_aers++; + + return NVME_NO_COMPLETE; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { switch (cmd->opcode) { @@ -1036,12 +1112,60 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_ASYNC_EV_REQ: + return nvme_aer(n, cmd, req); default: trace_nvme_err_invalid_admin_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } +static void nvme_process_aers(void *opaque) +{ + NvmeCtrl *n = opaque; + NvmeAsyncEvent *event, *next; + + trace_nvme_process_aers(); + + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + NvmeRequest *req; + NvmeAerResult *result; + + /* can't post cqe if there is nothing to complete */ + if (!n->outstanding_aers) { + trace_nvme_no_outstanding_aers(); + break; + } + + /* ignore if masked (cqe posted, but event not cleared) */ + if (n->aer_mask & (1 << event->result.event_type)) { + trace_nvme_aer_masked(event->result.event_type, n->aer_mask); + continue; + } + + QTAILQ_REMOVE(&n->aer_queue, event, entry); + + n->aer_mask |= 1 << event->result.event_type; + n->aer_mask_queued &= ~(1 << event->result.event_type); + n->outstanding_aers--; + + req = n->aer_reqs[n->outstanding_aers]; + + result = (NvmeAerResult *) &req->cqe.result; + result->event_type = event->result.event_type; + result->event_info = event->result.event_info; + result->log_page = event->result.log_page; + g_free(event); + + req->status = NVME_SUCCESS; + + trace_nvme_aer_post_cqe(result->event_type, result->event_info, + result->log_page); + + nvme_enqueue_req_completion(&n->admin_cq, req); + } +} + static void nvme_process_sq(void *opaque) { NvmeSQueue *sq = opaque; @@ -1091,6 +1215,20 @@ static void nvme_clear_ctrl(NvmeCtrl *n) } } + if (n->aer_timer) { + timer_del(n->aer_timer); + timer_free(n->aer_timer); + n->aer_timer = NULL; + } + + while (!QTAILQ_EMPTY(&n->aer_queue)) { + NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue); + QTAILQ_REMOVE(&n->aer_queue, event, entry); + g_free(event); + } + + n->outstanding_aers = 0; + blk_flush(n->conf.blk); n->bar.cc = 0; } @@ -1187,6 +1325,9 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); + n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n); + QTAILQ_INIT(&n->aer_queue); + return 0; } @@ -1379,6 +1520,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "completion queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1389,6 +1537,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_head=%"PRIu16", ignoring", qid, new_head); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1417,6 +1571,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "submission queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1427,6 +1588,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_tail=%"PRIu16", ignoring", qid, new_tail); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1523,6 +1690,7 @@ static void nvme_init_state(NvmeCtrl *n) n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->temperature = NVME_TEMPERATURE; n->features.temp_thresh = 0x14d; + n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1590,6 +1758,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); id->acl = 3; + id->aerl = n->params.aerl; id->frmw = 7 << 1; id->lpa = 1 << 2; id->elpe = n->params.elpe; @@ -1677,6 +1846,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cq); g_free(n->sq); g_free(n->elpes); + g_free(n->aer_reqs); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index fcfd504e3f9f..3fc36f577b46 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -7,17 +7,19 @@ DEFINE_PROP_STRING("serial", _state, _props.serial), \ DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \ - DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24) + DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24), \ + DEFINE_PROP_UINT8("aerl", _state, _props.aerl, 3) typedef struct NvmeParams { char *serial; uint32_t num_queues; uint32_t cmb_size_mb; uint8_t elpe; + uint8_t aerl; } NvmeParams; typedef struct NvmeAsyncEvent { - QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; + QTAILQ_ENTRY(NvmeAsyncEvent) entry; NvmeAerResult result; } NvmeAsyncEvent; @@ -89,6 +91,7 @@ typedef struct NvmeCtrl { uint32_t num_namespaces; uint32_t max_q_ents; uint64_t ns_size; + uint8_t outstanding_aers; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; @@ -98,6 +101,12 @@ typedef struct NvmeCtrl { uint64_t starttime_ms; uint16_t temperature; + QEMUTimer *aer_timer; + uint8_t aer_mask; + uint8_t aer_mask_queued; + NvmeRequest **aer_reqs; + QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; + NvmeNamespace *namespaces; NvmeSQueue **sq; NvmeCQueue **cq; diff --git a/hw/block/trace-events b/hw/block/trace-events index a2c7919c861e..6ddb13d34061 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -47,6 +47,14 @@ nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_coun nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" nvme_get_log(uint16_t cid, uint16_t lid, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" +nvme_process_aers(void) "processing aers" +nvme_aer(uint16_t cid) "cid %"PRIu16"" +nvme_aer_aerl_exceeded(void) "aerl exceeded" +nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" +nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 52abc3297b62..ded2ade1b462 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest { NVME_AER_TYPE_SMART = 1, NVME_AER_TYPE_IO_SPECIFIC = 6, NVME_AER_TYPE_VENDOR_SPECIFIC = 7, - NVME_AER_INFO_ERR_INVALID_SQ = 0, - NVME_AER_INFO_ERR_INVALID_DB = 1, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0, + NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1, NVME_AER_INFO_ERR_DIAG_FAIL = 2, NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3, NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4, From patchwork Mon Nov 11 12:25:34 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192894 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVZH45Bwz9sPj for ; Mon, 11 Nov 2019 23:29:59 +1100 (AEDT) Received: from localhost ([::1]:51810 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8pQ-0003So-Or for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:29:56 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:36920) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8li-0007ca-84 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:07 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8lh-0003Rv-0r for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:06 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52200) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8ld-0003Oa-GW; Mon, 11 Nov 2019 07:26:01 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 714AABFAE9; Mon, 11 Nov 2019 12:26:00 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 10/21] nvme: add logging to error information log page Date: Mon, 11 Nov 2019 13:25:34 +0100 Message-Id: <20191111122545.252478-11-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" This adds the nvme_set_error_page function which allows errors to be written to the error information log page. The functionality is largely unused in the device, but with this in place we can at least try to push new contributions to use it. NOTE: In violation of the specification the Error Count field is *not* retained across power off conditions because the device currently has no place to store this kind of persistent state. Cribbed from Keith's qemu-nvme tree. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 22 ++++++++++++++++++++-- hw/block/nvme.h | 2 ++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index f1a9b2cf5cbd..47f7c5cfcff9 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -161,6 +161,22 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) } } +static void nvme_set_error_page(NvmeCtrl *n, uint16_t sqid, uint16_t cid, + uint16_t status, uint16_t location, uint64_t lba, uint32_t nsid) +{ + NvmeErrorLog *elp; + + elp = &n->elpes[n->elp_index]; + elp->error_count = n->error_count++; + elp->sqid = sqid; + elp->cid = cid; + elp->status_field = status; + elp->param_error_location = location; + elp->lba = lba; + elp->nsid = nsid; + n->elp_index = (n->elp_index + 1) % n->params.elpe; +} + static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, uint64_t prp2, uint32_t len, NvmeCtrl *n) { @@ -386,7 +402,9 @@ static void nvme_rw_cb(void *opaque, int ret) req->status = NVME_SUCCESS; } else { block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); - req->status = NVME_INTERNAL_DEV_ERROR; + nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), + NVME_INTERNAL_DEV_ERROR, 0, 0, 1); + req->status = NVME_INTERNAL_DEV_ERROR | NVME_MORE; } if (req->has_sg) { qemu_sglist_destroy(&req->qsg); @@ -677,7 +695,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, smart.host_read_commands[0] = cpu_to_le64(read_commands); smart.host_write_commands[0] = cpu_to_le64(write_commands); - smart.number_of_error_log_entries[0] = cpu_to_le64(0); + smart.number_of_error_log_entries[0] = cpu_to_le64(n->error_count); smart.temperature[0] = n->temperature & 0xff; smart.temperature[1] = (n->temperature >> 8) & 0xff; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 3fc36f577b46..d74b0e0f9b2c 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -100,6 +100,8 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ uint64_t starttime_ms; uint16_t temperature; + uint8_t elp_index; + uint64_t error_count; QEMUTimer *aer_timer; uint8_t aer_mask; From patchwork Mon Nov 11 12:25:35 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192903 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVgm5xk1z9sRM for ; Mon, 11 Nov 2019 23:34:44 +1100 (AEDT) Received: from localhost ([::1]:51862 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8u2-00081M-5Z for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:34:42 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37119) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m3-0008Cy-EN for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:29 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m1-0003YH-4l for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:27 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52214) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lx-0003Oi-Qt; Mon, 11 Nov 2019 07:26:22 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id BB086BFAF3; Mon, 11 Nov 2019 12:26:00 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 11/21] nvme: add missing mandatory features Date: Mon, 11 Nov 2019 13:25:35 +0100 Message-Id: <20191111122545.252478-12-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Add support for returning a resonable response to Get/Set Features of mandatory features. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 51 ++++++++++++++++++++++++++++++++++++++++--- hw/block/trace-events | 2 ++ include/block/nvme.h | 3 ++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 47f7c5cfcff9..5c3dc49416ec 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1006,12 +1006,24 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); uint32_t result; + trace_nvme_getfeat(dw10); + switch (dw10) { + case NVME_ARBITRATION: + result = cpu_to_le32(n->features.arbitration); + break; + case NVME_POWER_MANAGEMENT: + result = cpu_to_le32(n->features.power_mgmt); + break; case NVME_TEMPERATURE_THRESHOLD: result = cpu_to_le32(n->features.temp_thresh); break; + case NVME_ERROR_RECOVERY: + result = cpu_to_le32(n->features.err_rec); + break; case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); @@ -1023,6 +1035,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); + case NVME_INTERRUPT_COALESCING: + result = cpu_to_le32(n->features.int_coalescing); + break; + case NVME_INTERRUPT_VECTOR_CONF: + if ((dw11 & 0xffff) > n->params.num_queues) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]); + break; + case NVME_WRITE_ATOMICITY: + result = cpu_to_le32(n->features.write_atomicity); + break; case NVME_ASYNCHRONOUS_EVENT_CONF: result = cpu_to_le32(n->features.async_config); break; @@ -1058,6 +1083,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw10 = le32_to_cpu(cmd->cdw10); uint32_t dw11 = le32_to_cpu(cmd->cdw11); + trace_nvme_setfeat(dw10, dw11); + switch (dw10) { case NVME_TEMPERATURE_THRESHOLD: n->features.temp_thresh = dw11; @@ -1085,6 +1112,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; break; + case NVME_ARBITRATION: + case NVME_POWER_MANAGEMENT: + case NVME_ERROR_RECOVERY: + case NVME_INTERRUPT_COALESCING: + case NVME_INTERRUPT_VECTOR_CONF: + case NVME_WRITE_ATOMICITY: + return NVME_FEAT_NOT_CHANGABLE | NVME_DNR; default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -1708,6 +1742,14 @@ static void nvme_init_state(NvmeCtrl *n) n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->temperature = NVME_TEMPERATURE; n->features.temp_thresh = 0x14d; + n->features.int_vector_config = g_malloc0_n(n->params.num_queues, + sizeof(*n->features.int_vector_config)); + + /* disable coalescing (not supported) */ + for (int i = 0; i < n->params.num_queues; i++) { + n->features.int_vector_config[i] = i | (1 << 16); + } + n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); } @@ -1785,15 +1827,17 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); + + if (blk_enable_write_cache(n->conf.blk)) { + id->vwc = 1; + } + strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:"); pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial); id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - if (blk_enable_write_cache(n->conf.blk)) { - id->vwc = 1; - } n->bar.cap = 0; NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); @@ -1865,6 +1909,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->sq); g_free(n->elpes); g_free(n->aer_reqs); + g_free(n->features.int_vector_config); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/trace-events b/hw/block/trace-events index 6ddb13d34061..a20a68d85d5a 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -41,6 +41,8 @@ nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" nvme_identify_ctrl(void) "identify controller" nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32"" +nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" diff --git a/include/block/nvme.h b/include/block/nvme.h index ded2ade1b462..6940b71e0e3e 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -445,7 +445,8 @@ enum NvmeStatusCodes { NVME_FW_REQ_RESET = 0x010b, NVME_INVALID_QUEUE_DEL = 0x010c, NVME_FID_NOT_SAVEABLE = 0x010d, - NVME_FID_NOT_NSID_SPEC = 0x010f, + NVME_FEAT_NOT_CHANGABLE = 0x010e, + NVME_FEAT_NOT_NSID_SPEC = 0x010f, NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, From patchwork Mon Nov 11 12:25:36 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192908 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVkN11YNz9sPn for ; Mon, 11 Nov 2019 23:37:00 +1100 (AEDT) Received: from localhost ([::1]:51890 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8wD-0003Aa-Kj for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:36:57 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37218) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m8-0008GS-K6 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:36 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m5-0003a5-S9 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:32 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52228) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8ly-0003Om-3e; Mon, 11 Nov 2019 07:26:22 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 0EB41BFAF8; Mon, 11 Nov 2019 12:26:01 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 12/21] nvme: bump supported specification version to 1.3 Date: Mon, 11 Nov 2019 13:25:36 +0100 Message-Id: <20191111122545.252478-13-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Add the new Namespace Identification Descriptor List (CNS 03h) and track creation of queues to enable the controller to return Command Sequence Error if Set Features is called for Number of Queues after any queues have been created. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 82 +++++++++++++++++++++++++++++++++++-------- hw/block/nvme.h | 1 + hw/block/trace-events | 8 +++-- include/block/nvme.h | 28 +++++++++++---- 4 files changed, 96 insertions(+), 23 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 5c3dc49416ec..7c10d0745e4e 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,20 +9,22 @@ */ /** - * Reference Specification: NVM Express 1.2.1 + * Reference Specification: NVM Express 1.3d * * https://nvmexpress.org/resources/specifications/ */ /** * Usage: add options: - * -drive file=,if=none,id= - * -device nvme,drive=,serial=,id=, \ - * cmb_size_mb=, \ - * num_queues= + * -drive file=,if=none,id= + * -device nvme,drive=,serial=,id= * - * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at - * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * Advanced optional options: + * + * num_queues= : Maximum number of IO Queues. + * Default: 64 + * cmb_size_mb= : Size of Controller Memory Buffer in MBs. + * Default: 0 (disabled) */ #include "qemu/osdep.h" @@ -345,6 +347,8 @@ static void nvme_post_cqes(void *opaque) static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) { assert(cq->cqid == req->sq->cqid); + + trace_nvme_enqueue_req_completion(req->cid, cq->cqid, req->status); QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); @@ -530,6 +534,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) if (sq->sqid) { g_free(sq); } + n->qs_created--; } static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -596,6 +601,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, cq = n->cq[cqid]; QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); n->sq[sqid] = sq; + n->qs_created++; } static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -741,7 +747,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw11 = le32_to_cpu(cmd->cdw11); uint32_t dw12 = le32_to_cpu(cmd->cdw12); uint32_t dw13 = le32_to_cpu(cmd->cdw13); - uint16_t lid = dw10 & 0xff; + uint8_t lid = dw10 & 0xff; + uint8_t lsp = (dw10 >> 8) & 0xf; uint8_t rae = (dw10 >> 15) & 0x1; uint32_t numdl, numdu; uint64_t off, lpol, lpou; @@ -759,7 +766,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } - trace_nvme_get_log(req->cid, lid, rae, len, off); + trace_nvme_get_log(req->cid, lid, lsp, rae, len, off); switch (lid) { case NVME_LOG_ERROR_INFO: @@ -783,6 +790,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) if (cq->cqid) { g_free(cq); } + n->qs_created--; } static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -823,6 +831,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, msix_vector_use(&n->parent_obj, cq->vector); n->cq[cqid] = cq; cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); + n->qs_created++; } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -896,7 +905,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) prp1, prp2); } -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) { static const int data_len = 4 * KiB; uint32_t min_nsid = le32_to_cpu(c->nsid); @@ -906,7 +915,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; - trace_nvme_identify_nslist(min_nsid); + trace_nvme_identify_ns_list(min_nsid); list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { @@ -923,6 +932,41 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) return ret; } +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *c) +{ + static const int len = 4096; + + struct ns_descr { + uint8_t nidt; + uint8_t nidl; + uint8_t rsvd2[2]; + uint8_t nid[16]; + }; + + uint32_t nsid = le32_to_cpu(c->nsid); + uint64_t prp1 = le64_to_cpu(c->prp1); + uint64_t prp2 = le64_to_cpu(c->prp2); + + struct ns_descr *list; + uint16_t ret; + + trace_nvme_identify_ns_descr_list(nsid); + + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + return NVME_INVALID_NSID | NVME_DNR; + } + + list = g_malloc0(len); + list->nidt = 0x3; + list->nidl = 0x10; + *(uint32_t *) &list->nid[12] = cpu_to_be32(nsid); + + ret = nvme_dma_read_prp(n, (uint8_t *) list, len, prp1, prp2); + g_free(list); + return ret; +} + static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) { NvmeIdentify *c = (NvmeIdentify *)cmd; @@ -933,7 +977,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) case 0x01: return nvme_identify_ctrl(n, c); case 0x02: - return nvme_identify_nslist(n, c); + return nvme_identify_ns_list(n, c); + case 0x03: + return nvme_identify_ns_descr_list(n, cmd); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -1100,6 +1146,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: + if (n->qs_created > 2) { + return NVME_CMD_SEQ_ERROR | NVME_DNR; + } + + if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, n->params.num_queues - 1, @@ -1815,7 +1869,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; - id->ver = cpu_to_le32(0x00010201); + id->ver = cpu_to_le32(0x00010300); id->oacs = cpu_to_le16(0); id->acl = 3; id->aerl = n->params.aerl; @@ -1846,7 +1900,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010201; + n->bar.vs = 0x00010300; n->bar.intmc = n->bar.intms = 0; } diff --git a/hw/block/nvme.h b/hw/block/nvme.h index d74b0e0f9b2c..3f7bd627e824 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -102,6 +102,7 @@ typedef struct NvmeCtrl { uint16_t temperature; uint8_t elp_index; uint64_t error_count; + uint32_t qs_created; QEMUTimer *aer_timer; uint8_t aer_mask; diff --git a/hw/block/trace-events b/hw/block/trace-events index a20a68d85d5a..f62fa99dc2cd 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -39,8 +39,9 @@ nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uin nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" nvme_identify_ctrl(void) "identify controller" -nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" -nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" +nvme_identify_ns_list(uint32_t ns) "nsid %"PRIu32"" +nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32"" nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" @@ -48,12 +49,13 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -nvme_get_log(uint16_t cid, uint16_t lid, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" +nvme_get_log(uint16_t cid, uint16_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx16" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" nvme_process_aers(void) "processing aers" nvme_aer(uint16_t cid) "cid %"PRIu16"" nvme_aer_aerl_exceeded(void) "aerl exceeded" nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" diff --git a/include/block/nvme.h b/include/block/nvme.h index 6940b71e0e3e..98ba45fc09e6 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -550,7 +550,9 @@ typedef struct NvmeIdCtrl { uint32_t rtd3e; uint32_t oaes; uint32_t ctratt; - uint8_t rsvd255[156]; + uint8_t rsvd100[12]; + uint8_t fguid[16]; + uint8_t rsvd128[128]; uint16_t oacs; uint8_t acl; uint8_t aerl; @@ -568,9 +570,15 @@ typedef struct NvmeIdCtrl { uint8_t tnvmcap[16]; uint8_t unvmcap[16]; uint32_t rpmbs; - uint8_t rsvd319[4]; + uint16_t edstt; + uint8_t dsto; + uint8_t fwug; uint16_t kas; - uint8_t rsvd511[190]; + uint16_t hctma; + uint16_t mntmt; + uint16_t mxtmt; + uint32_t sanicap; + uint8_t rsvd332[180]; uint8_t sqes; uint8_t cqes; uint16_t maxcmd; @@ -678,13 +686,21 @@ typedef struct NvmeIdNs { uint8_t mc; uint8_t dpc; uint8_t dps; - uint8_t nmic; uint8_t rescap; uint8_t fpi; uint8_t dlfeat; - - uint8_t res34[94]; + uint16_t nawun; + uint16_t nawupf; + uint16_t nacwu; + uint16_t nabsn; + uint16_t nabo; + uint16_t nabspf; + uint16_t noiob; + uint8_t nvmcap[16]; + uint8_t resv103[40]; + uint8_t nguid[16]; + uint64_t eui64; NvmeLBAF lbaf[16]; uint8_t res192[192]; uint8_t vs[3712]; From patchwork Mon Nov 11 12:25:37 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192904 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVgz37kJz9sQp for ; Mon, 11 Nov 2019 23:34:55 +1100 (AEDT) Received: from localhost ([::1]:51866 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8uC-0008KD-RE for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:34:52 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37245) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mA-0008HW-My for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:37 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m5-0003a8-SH for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:34 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52236) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8ly-0003PJ-HP; Mon, 11 Nov 2019 07:26:22 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 5E32CBFAFF; Mon, 11 Nov 2019 12:26:01 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 13/21] nvme: refactor prp mapping Date: Mon, 11 Nov 2019 13:25:37 +0100 Message-Id: <20191111122545.252478-14-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Instead of handling both QSGs and IOVs in multiple places, simply use QSGs everywhere by assuming that the request does not involve the controller memory buffer (CMB). If the request is found to involve the CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted to an IOV by the dma helpers anyway, so the CMB path is not unfairly affected by this simplifying change. As a side-effect, this patch also allows PRPs to be located in the CMB. The logic ensures that if some of the PRP is in the CMB, all of it must be located there, as per the specification. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 255 ++++++++++++++++++++++++++++-------------- hw/block/nvme.h | 4 +- hw/block/trace-events | 1 + include/block/nvme.h | 1 + 4 files changed, 174 insertions(+), 87 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7c10d0745e4e..43c139e1644c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -179,138 +179,200 @@ static void nvme_set_error_page(NvmeCtrl *n, uint16_t sqid, uint16_t cid, n->elp_index = (n->elp_index + 1) % n->params.elpe; } -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, - uint64_t prp2, uint32_t len, NvmeCtrl *n) +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, + uint64_t prp2, uint32_t len, NvmeRequest *req) { hwaddr trans_len = n->page_size - (prp1 % n->page_size); trans_len = MIN(len, trans_len); int num_prps = (len >> n->page_bits) + 1; + uint16_t status = NVME_SUCCESS; + bool prp_list_in_cmb = false; + + trace_nvme_map_prp(req->cid, req->cmd.opcode, trans_len, len, prp1, prp2, + num_prps); if (unlikely(!prp1)) { trace_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; - } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && - prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { - qsg->nsg = 0; - qemu_iovec_init(iov, num_prps); - qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); - } else { - pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); - qemu_sglist_add(qsg, prp1, trans_len); } + + if (nvme_addr_is_cmb(n, prp1)) { + req->is_cmb = true; + } + + pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); + qemu_sglist_add(qsg, prp1, trans_len); + len -= trans_len; if (len) { if (unlikely(!prp2)) { trace_nvme_err_invalid_prp2_missing(); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } + if (len > n->page_size) { uint64_t prp_list[n->max_prp_ents]; uint32_t nents, prp_trans; int i = 0; + if (nvme_addr_is_cmb(n, prp2)) { + prp_list_in_cmb = true; + } + nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); + nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); while (len != 0) { + bool addr_is_cmb; uint64_t prp_ent = le64_to_cpu(prp_list[i]); if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; + goto unmap; + } + + addr_is_cmb = nvme_addr_is_cmb(n, prp_ent); + if ((prp_list_in_cmb && !addr_is_cmb) || + (!prp_list_in_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; goto unmap; } i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp_ent, (void *)prp_list, - prp_trans); + nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans); prp_ent = le64_to_cpu(prp_list[i]); } if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } - trans_len = MIN(len, n->page_size); - if (qsg->nsg){ - qemu_sglist_add(qsg, prp_ent, trans_len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); + addr_is_cmb = nvme_addr_is_cmb(n, prp_ent); + if ((req->is_cmb && !addr_is_cmb) || + (!req->is_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; } + + trans_len = MIN(len, n->page_size); + qemu_sglist_add(qsg, prp_ent, trans_len); + len -= trans_len; i++; } } else { + bool addr_is_cmb = nvme_addr_is_cmb(n, prp2); + if ((req->is_cmb && !addr_is_cmb) || + (!req->is_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; + } + if (unlikely(prp2 & (n->page_size - 1))) { trace_nvme_err_invalid_prp2_align(prp2); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } - if (qsg->nsg) { - qemu_sglist_add(qsg, prp2, len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); - } + + qemu_sglist_add(qsg, prp2, len); } } + return NVME_SUCCESS; - unmap: +unmap: qemu_sglist_destroy(qsg); - return NVME_INVALID_FIELD | NVME_DNR; + + return status; +} + +static void dma_to_cmb(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov) +{ + for (int i = 0; i < qsg->nsg; i++) { + void *addr = &n->cmbuf[qsg->sg[i].base - n->ctrl_mem.addr]; + qemu_iovec_add(iov, addr, qsg->sg[i].len); + } } static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - uint64_t prp1, uint64_t prp2) + uint64_t prp1, uint64_t prp2, NvmeRequest *req) { QEMUSGList qsg; - QEMUIOVector iov; uint16_t status = NVME_SUCCESS; - if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { - return NVME_INVALID_FIELD | NVME_DNR; + status = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + if (status) { + return status; } - if (qsg.nsg > 0) { - if (dma_buf_write(ptr, len, &qsg)) { - status = NVME_INVALID_FIELD | NVME_DNR; - } - qemu_sglist_destroy(&qsg); - } else { - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } + qemu_iovec_destroy(&iov); + + return status; + } + + if (unlikely(dma_buf_write(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + status = NVME_INVALID_FIELD | NVME_DNR; } + + qemu_sglist_destroy(&qsg); + return status; } static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - uint64_t prp1, uint64_t prp2) + uint64_t prp1, uint64_t prp2, NvmeRequest *req) { QEMUSGList qsg; - QEMUIOVector iov; uint16_t status = NVME_SUCCESS; - trace_nvme_dma_read(prp1, prp2); - - if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { - return NVME_INVALID_FIELD | NVME_DNR; + status = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + if (status) { + return status; } - if (qsg.nsg > 0) { - if (unlikely(dma_buf_read(ptr, len, &qsg))) { - trace_nvme_err_invalid_dma(); - status = NVME_INVALID_FIELD | NVME_DNR; - } - qemu_sglist_destroy(&qsg); - } else { + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } + qemu_iovec_destroy(&iov); + + goto out; } + + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + status = NVME_INVALID_FIELD | NVME_DNR; + } + +out: + qemu_sglist_destroy(&qsg); + return status; } @@ -400,6 +462,7 @@ static void nvme_rw_cb(void *opaque, int ret) NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; + NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; if (!ret) { block_acct_done(blk_get_stats(n->conf.blk), &req->acct); @@ -407,19 +470,23 @@ static void nvme_rw_cb(void *opaque, int ret) } else { block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), - NVME_INTERNAL_DEV_ERROR, 0, 0, 1); + NVME_INTERNAL_DEV_ERROR, offsetof(NvmeRwCmd, slba), rw->slba, 1); req->status = NVME_INTERNAL_DEV_ERROR | NVME_MORE; } - if (req->has_sg) { + + if (req->qsg.nalloc) { qemu_sglist_destroy(&req->qsg); } + if (req->iov.nalloc) { + qemu_iovec_destroy(&req->iov); + } + nvme_enqueue_req_completion(cq, req); } static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { - req->has_sg = false; block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, BLOCK_ACCT_FLUSH); req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); @@ -443,7 +510,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_LBA_RANGE | NVME_DNR; } - req->has_sg = false; block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, BLOCK_ACCT_WRITE); req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, @@ -475,21 +541,21 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_LBA_RANGE | NVME_DNR; } - if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) { + if (nvme_map_prp(n, &req->qsg, prp1, prp2, data_size, req)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); return NVME_INVALID_FIELD | NVME_DNR; } dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); - if (req->qsg.nsg > 0) { - req->has_sg = true; + if (!req->is_cmb) { req->aiocb = is_write ? dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req) : dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req); } else { - req->has_sg = false; + qemu_iovec_init(&req->iov, req->qsg.nsg); + dma_to_cmb(n, &req->qsg, &req->iov); req->aiocb = is_write ? blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, req) : @@ -587,7 +653,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, sq->size = size; sq->cqid = cqid; sq->head = sq->tail = 0; - sq->io_req = g_new(NvmeRequest, sq->size); + sq->io_req = g_new0(NvmeRequest, sq->size); QTAILQ_INIT(&sq->req_list); QTAILQ_INIT(&sq->out_req_list); @@ -660,7 +726,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, } return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, - prp2); + prp2, req); } static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, @@ -718,7 +784,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, } return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, - prp2); + prp2, req); } static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, @@ -738,7 +804,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, - prp2); + prp2, req); } static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -874,7 +940,8 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); @@ -882,10 +949,11 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) trace_nvme_identify_ctrl(); return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), - prp1, prp2); + prp1, prp2, req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(c->nsid); @@ -902,10 +970,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) ns = &n->namespaces[nsid - 1]; return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), - prp1, prp2); + prp1, prp2, req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { static const int data_len = 4 * KiB; uint32_t min_nsid = le32_to_cpu(c->nsid); @@ -927,12 +996,13 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) break; } } - ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2); + ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *c) +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { static const int len = 4096; @@ -962,24 +1032,24 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *c) list->nidl = 0x10; *(uint32_t *) &list->nid[12] = cpu_to_be32(nsid); - ret = nvme_dma_read_prp(n, (uint8_t *) list, len, prp1, prp2); + ret = nvme_dma_read_prp(n, (uint8_t *) list, len, prp1, prp2, req); g_free(list); return ret; } -static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeIdentify *c = (NvmeIdentify *)cmd; switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, c); + return nvme_identify_ns(n, c, req); case 0x01: - return nvme_identify_ctrl(n, c); + return nvme_identify_ctrl(n, c, req); case 0x02: - return nvme_identify_ns_list(n, c); + return nvme_identify_ns_list(n, c, req); case 0x03: - return nvme_identify_ns_descr_list(n, cmd); + return nvme_identify_ns_descr_list(n, c, req); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -1038,15 +1108,16 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) return cpu_to_le64(ts.all); } -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, + NvmeRequest *req) { uint64_t prp1 = le64_to_cpu(cmd->prp1); uint64_t prp2 = le64_to_cpu(cmd->prp2); uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2); + return nvme_dma_read_prp(n, (uint8_t *)×tamp, sizeof(timestamp), + prp1, prp2, req); } static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -1080,7 +1151,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: - return nvme_get_feature_timestamp(n, cmd); + return nvme_get_feature_timestamp(n, cmd, req); case NVME_INTERRUPT_COALESCING: result = cpu_to_le32(n->features.int_coalescing); break; @@ -1106,7 +1177,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, + NvmeRequest *req) { uint16_t ret; uint64_t timestamp; @@ -1114,7 +1186,7 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) uint64_t prp2 = le64_to_cpu(cmd->prp2); ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2); + sizeof(timestamp), prp1, prp2, req); if (ret != NVME_SUCCESS) { return ret; } @@ -1162,7 +1234,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: - return nvme_set_feature_timestamp(n, cmd); + return nvme_set_feature_timestamp(n, cmd, req); case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; break; @@ -1211,7 +1283,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ADM_CMD_CREATE_CQ: return nvme_create_cq(n, cmd); case NVME_ADM_CMD_IDENTIFY: - return nvme_identify(n, cmd); + return nvme_identify(n, cmd, req); case NVME_ADM_CMD_ABORT: return nvme_abort(n, cmd, req); case NVME_ADM_CMD_SET_FEATURES: @@ -1272,6 +1344,18 @@ static void nvme_process_aers(void *opaque) } } +static void nvme_init_req(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + memset(&req->cqe, 0, sizeof(req->cqe)); + req->cqe.cid = cmd->cid; + req->cid = le16_to_cpu(cmd->cid); + + memcpy(&req->cmd, cmd, sizeof(NvmeCmd)); + req->status = NVME_SUCCESS; + req->is_cmb = false; + req->is_write = false; +} + static void nvme_process_sq(void *opaque) { NvmeSQueue *sq = opaque; @@ -1291,9 +1375,8 @@ static void nvme_process_sq(void *opaque) req = QTAILQ_FIRST(&sq->req_list); QTAILQ_REMOVE(&sq->req_list, req, entry); QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); - memset(&req->cqe, 0, sizeof(req->cqe)); - req->cqe.cid = cmd.cid; - req->cid = le16_to_cpu(cmd.cid); + + nvme_init_req(n, &cmd, req); status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : nvme_admin_cmd(n, &cmd, req); @@ -1814,7 +1897,7 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 3f7bd627e824..add9ff335aa5 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -28,11 +28,13 @@ typedef struct NvmeRequest { BlockAIOCB *aiocb; uint16_t status; uint16_t cid; - bool has_sg; + bool is_cmb; + bool is_write; NvmeCqe cqe; BlockAcctCookie acct; QEMUSGList qsg; QEMUIOVector iov; + NvmeCmd cmd; QTAILQ_ENTRY(NvmeRequest)entry; } NvmeRequest; diff --git a/hw/block/trace-events b/hw/block/trace-events index f62fa99dc2cd..e81bb3a64ed7 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -33,6 +33,7 @@ nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +nvme_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" diff --git a/include/block/nvme.h b/include/block/nvme.h index 98ba45fc09e6..2aaa45e20566 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -427,6 +427,7 @@ enum NvmeStatusCodes { NVME_CMD_ABORT_MISSING_FUSE = 0x000a, NVME_INVALID_NSID = 0x000b, NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_USE_OF_CMB = 0x0012, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, NVME_NS_NOT_READY = 0x0082, From patchwork Mon Nov 11 12:25:38 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192914 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVsT42RMz9sPj for ; Mon, 11 Nov 2019 23:43:09 +1100 (AEDT) Received: from localhost ([::1]:51982 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU92A-0001hX-7B for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:43:06 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37269) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mC-0008IH-Nb for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:39 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m8-0003bN-HB for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:36 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52246) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8ly-0003Q5-Rv; Mon, 11 Nov 2019 07:26:23 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id B1864BFB01; Mon, 11 Nov 2019 12:26:01 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 14/21] nvme: allow multiple aios per command Date: Mon, 11 Nov 2019 13:25:38 +0100 Message-Id: <20191111122545.252478-15-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" This refactors how the device issues asynchronous block backend requests. The NvmeRequest now holds a queue of NvmeAIOs that are associated with the command. This allows multiple aios to be issued for a command. Only when all requests have been completed will the device post a completion queue entry. Because the device is currently guaranteed to only issue a single aio request per command, the benefit is not immediately obvious. But this functionality is required to support metadata. Signed-off-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 455 +++++++++++++++++++++++++++++++++--------- hw/block/nvme.h | 165 ++++++++++++--- hw/block/trace-events | 8 + 3 files changed, 511 insertions(+), 117 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 43c139e1644c..230f4277f6e5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -25,6 +25,8 @@ * Default: 64 * cmb_size_mb= : Size of Controller Memory Buffer in MBs. * Default: 0 (disabled) + * mdts= : Maximum Data Transfer Size (power of two) + * Default: 7 */ #include "qemu/osdep.h" @@ -56,6 +58,7 @@ } while (0) static void nvme_process_sq(void *opaque); +static void nvme_aio_cb(void *opaque, int ret); static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { @@ -197,7 +200,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, } if (nvme_addr_is_cmb(n, prp1)) { - req->is_cmb = true; + nvme_req_set_cmb(req); } pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); @@ -255,8 +258,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, } addr_is_cmb = nvme_addr_is_cmb(n, prp_ent); - if ((req->is_cmb && !addr_is_cmb) || - (!req->is_cmb && addr_is_cmb)) { + if ((nvme_req_is_cmb(req) && !addr_is_cmb) || + (!nvme_req_is_cmb(req) && addr_is_cmb)) { status = NVME_INVALID_USE_OF_CMB | NVME_DNR; goto unmap; } @@ -269,8 +272,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, } } else { bool addr_is_cmb = nvme_addr_is_cmb(n, prp2); - if ((req->is_cmb && !addr_is_cmb) || - (!req->is_cmb && addr_is_cmb)) { + if ((nvme_req_is_cmb(req) && !addr_is_cmb) || + (!nvme_req_is_cmb(req) && addr_is_cmb)) { status = NVME_INVALID_USE_OF_CMB | NVME_DNR; goto unmap; } @@ -312,7 +315,7 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return status; } - if (req->is_cmb) { + if (nvme_req_is_cmb(req)) { QEMUIOVector iov; qemu_iovec_init(&iov, qsg.nsg); @@ -341,19 +344,18 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2, NvmeRequest *req) { - QEMUSGList qsg; uint16_t status = NVME_SUCCESS; - status = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + status = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); if (status) { return status; } - if (req->is_cmb) { + if (nvme_req_is_cmb(req)) { QEMUIOVector iov; - qemu_iovec_init(&iov, qsg.nsg); - dma_to_cmb(n, &qsg, &iov); + qemu_iovec_init(&iov, req->qsg.nsg); + dma_to_cmb(n, &req->qsg, &iov); if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); @@ -365,17 +367,137 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, goto out; } - if (unlikely(dma_buf_read(ptr, len, &qsg))) { + if (unlikely(dma_buf_read(ptr, len, &req->qsg))) { trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } out: - qemu_sglist_destroy(&qsg); + qemu_sglist_destroy(&req->qsg); return status; } +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + + uint32_t len = req->nlb << nvme_ns_lbads(ns); + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + return nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); +} + +static void nvme_aio_destroy(NvmeAIO *aio) +{ + if (aio->iov.nalloc) { + qemu_iovec_destroy(&aio->iov); + } + + g_free(aio); +} + +static NvmeAIO *nvme_aio_new(BlockBackend *blk, int64_t offset, + QEMUSGList *qsg, NvmeRequest *req, NvmeAIOCompletionFunc *cb) +{ + NvmeAIO *aio = g_malloc0(sizeof(*aio)); + + *aio = (NvmeAIO) { + .blk = blk, + .offset = offset, + .req = req, + .qsg = qsg, + .cb = cb, + }; + + if (qsg && nvme_req_is_cmb(req)) { + NvmeCtrl *n = nvme_ctrl(req); + + qemu_iovec_init(&aio->iov, qsg->nsg); + dma_to_cmb(n, qsg, &aio->iov); + + aio->qsg = NULL; + } + + return aio; +} + +static inline void nvme_req_register_aio(NvmeRequest *req, NvmeAIO *aio, + NvmeAIOOp opc) +{ + aio->opc = opc; + + trace_nvme_req_register_aio(nvme_cid(req), aio, blk_name(aio->blk), + aio->offset, aio->qsg ? aio->qsg->size : aio->iov.size, + nvme_aio_opc_str(aio), req); + + if (req) { + QTAILQ_INSERT_TAIL(&req->aio_tailq, aio, tailq_entry); + } +} + +static void nvme_aio(NvmeAIO *aio) +{ + BlockBackend *blk = aio->blk; + BlockAcctCookie *acct = &aio->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + bool is_write, dma; + + switch (aio->opc) { + case NVME_AIO_OPC_NONE: + break; + + case NVME_AIO_OPC_FLUSH: + block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH); + aio->aiocb = blk_aio_flush(blk, nvme_aio_cb, aio); + break; + + case NVME_AIO_OPC_WRITE_ZEROES: + block_acct_start(stats, acct, aio->iov.size, BLOCK_ACCT_WRITE); + aio->aiocb = blk_aio_pwrite_zeroes(aio->blk, aio->offset, + aio->iov.size, BDRV_REQ_MAY_UNMAP, nvme_aio_cb, aio); + break; + + case NVME_AIO_OPC_READ: + case NVME_AIO_OPC_WRITE: + dma = aio->qsg != NULL; + is_write = (aio->opc == NVME_AIO_OPC_WRITE); + + block_acct_start(stats, acct, + dma ? aio->qsg->size : aio->iov.size, + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + + if (dma) { + aio->aiocb = is_write ? + dma_blk_write(blk, aio->qsg, aio->offset, + BDRV_SECTOR_SIZE, nvme_aio_cb, aio) : + dma_blk_read(blk, aio->qsg, aio->offset, + BDRV_SECTOR_SIZE, nvme_aio_cb, aio); + + return; + } + + aio->aiocb = is_write ? + blk_aio_pwritev(blk, aio->offset, &aio->iov, 0, + nvme_aio_cb, aio) : + blk_aio_preadv(blk, aio->offset, &aio->iov, 0, + nvme_aio_cb, aio); + + break; + } +} + +static void nvme_rw_aio(BlockBackend *blk, uint64_t offset, QEMUSGList *qsg, + NvmeRequest *req) +{ + NvmeAIO *aio = nvme_aio_new(blk, offset, qsg, req, NULL); + nvme_req_register_aio(req, aio, nvme_req_is_write(req) ? + NVME_AIO_OPC_WRITE : NVME_AIO_OPC_READ); + nvme_aio(aio); +} + static void nvme_post_cqes(void *opaque) { NvmeCQueue *cq = opaque; @@ -411,6 +533,11 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) assert(cq->cqid == req->sq->cqid); trace_nvme_enqueue_req_completion(req->cid, cq->cqid, req->status); + + if (req->qsg.nalloc) { + qemu_sglist_destroy(&req->qsg); + } + QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); @@ -456,135 +583,266 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) } } -static void nvme_rw_cb(void *opaque, int ret) +static void nvme_rw_cb(NvmeRequest *req, void *opaque) { - NvmeRequest *req = opaque; NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; - NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; + + trace_nvme_rw_cb(req->cid, req->cmd.nsid); + + nvme_enqueue_req_completion(cq, req); +} + +static void nvme_aio_cb(void *opaque, int ret) +{ + NvmeAIO *aio = opaque; + NvmeRequest *req = aio->req; + + BlockBackend *blk = aio->blk; + BlockAcctCookie *acct = &aio->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + Error *local_err = NULL; + + trace_nvme_aio_cb(nvme_cid(req), aio, blk_name(aio->blk), aio->offset, + nvme_aio_opc_str(aio), req); + + if (req) { + QTAILQ_REMOVE(&req->aio_tailq, aio, tailq_entry); + } if (!ret) { - block_acct_done(blk_get_stats(n->conf.blk), &req->acct); - req->status = NVME_SUCCESS; + block_acct_done(stats, acct); + + if (aio->cb) { + aio->cb(aio, aio->cb_arg); + } } else { - block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); - nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), - NVME_INTERNAL_DEV_ERROR, offsetof(NvmeRwCmd, slba), rw->slba, 1); - req->status = NVME_INTERNAL_DEV_ERROR | NVME_MORE; - } + block_acct_failed(stats, acct); + + if (req) { + NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + uint16_t status; + + switch (aio->opc) { + case NVME_AIO_OPC_READ: + status = NVME_UNRECOVERED_READ; + break; + case NVME_AIO_OPC_WRITE: + case NVME_AIO_OPC_WRITE_ZEROES: + status = NVME_WRITE_FAULT; + break; + default: + status = NVME_INTERNAL_DEV_ERROR; + break; + } - if (req->qsg.nalloc) { - qemu_sglist_destroy(&req->qsg); + trace_nvme_err_aio(nvme_cid(req), aio, blk_name(aio->blk), + aio->offset, nvme_aio_opc_str(aio), req, status); + + nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), status, + offsetof(NvmeRwCmd, slba), rw->slba, 1); + + error_setg_errno(&local_err, -ret, "aio failed"); + error_report_err(local_err); + + /* + * An Internal Error trumps all other errors. For other errors, + * only set the first error encountered. Any additional errors will + * be recorded in the error information log page. + */ + if (!req->status || + nvme_is_error(status, NVME_INTERNAL_DEV_ERROR)) { + req->status = status; + } + } } - if (req->iov.nalloc) { - qemu_iovec_destroy(&req->iov); + + if (req && QTAILQ_EMPTY(&req->aio_tailq)) { + if (req->cb) { + req->cb(req, req->cb_arg); + } else { + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + NvmeCQueue *cq = n->cq[sq->cqid]; + + nvme_enqueue_req_completion(cq, req); + } } - nvme_enqueue_req_completion(cq, req); + nvme_aio_destroy(aio); } -static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, +static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len, NvmeRequest *req) { - block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, - BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + uint8_t mdts = n->params.mdts; - return NVME_NO_COMPLETE; + if (mdts && len > n->page_size << mdts) { + trace_nvme_err_mdts(nvme_cid(req), n->page_size << mdts, len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; } -static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, - NvmeRequest *req) +static inline uint16_t nvme_check_prinfo(NvmeCtrl *n, NvmeRequest *req) { - NvmeRwCmd *rw = (NvmeRwCmd *)cmd; - const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); - const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - uint64_t offset = slba << data_shift; - uint32_t count = nlb << data_shift; + NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; + NvmeNamespace *ns = req->ns; + + uint16_t ctrl = le16_to_cpu(rw->control); + + if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) { + trace_nvme_err_prinfo(nvme_cid(req), ctrl); + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} - if (unlikely(slba + nlb > ns->id_ns.nsze)) { - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); +static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); + + if (unlikely((req->slba + req->nlb) > nsze)) { + block_acct_invalid(blk_get_stats(n->conf.blk), + nvme_req_is_write(req) ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + trace_nvme_err_invalid_lba_range(req->slba, req->nlb, nsze); return NVME_LBA_RANGE | NVME_DNR; } - block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, - BLOCK_ACCT_WRITE); - req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, - BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req); + return NVME_SUCCESS; +} + +static uint16_t nvme_check_rw(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + size_t len = req->nlb << nvme_ns_lbads(ns); + uint16_t status; + + status = nvme_check_mdts(n, len, req); + if (status) { + return status; + } + + status = nvme_check_prinfo(n, req); + if (status) { + return status; + } + + status = nvme_check_bounds(n, req); + if (status) { + return status; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeAIO *aio = nvme_aio_new(n->conf.blk, 0x0, NULL, req, NULL); + + nvme_req_register_aio(req, aio, NVME_AIO_OPC_FLUSH); + nvme_aio(aio); + return NVME_NO_COMPLETE; } -static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeRwCmd *rw = (NvmeRwCmd *)cmd; - uint32_t nlb = le32_to_cpu(rw->nlb) + 1; - uint64_t slba = le64_to_cpu(rw->slba); - uint64_t prp1 = le64_to_cpu(rw->prp1); - uint64_t prp2 = le64_to_cpu(rw->prp2); + NvmeAIO *aio; - uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); - uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; - uint64_t data_size = (uint64_t)nlb << data_shift; - uint64_t data_offset = slba << data_shift; - int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; - enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *) cmd; - trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); + int64_t offset; + size_t count; + uint16_t status; - if (unlikely((slba + nlb) > ns->id_ns.nsze)) { - block_acct_invalid(blk_get_stats(n->conf.blk), acct); - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); - return NVME_LBA_RANGE | NVME_DNR; + req->slba = le64_to_cpu(rw->slba); + req->nlb = le16_to_cpu(rw->nlb) + 1; + + trace_nvme_write_zeros(req->cid, le32_to_cpu(cmd->nsid), req->slba, + req->nlb); + + status = nvme_check_bounds(n, req); + if (unlikely(status)) { + block_acct_invalid(blk_get_stats(n->conf.blk), BLOCK_ACCT_WRITE); + return status; } - if (nvme_map_prp(n, &req->qsg, prp1, prp2, data_size, req)) { + offset = req->slba << nvme_ns_lbads(ns); + count = req->nlb << nvme_ns_lbads(ns); + + aio = nvme_aio_new(n->conf.blk, offset, NULL, req, NULL); + + aio->iov.size = count; + + nvme_req_register_aio(req, aio, NVME_AIO_OPC_WRITE_ZEROES); + nvme_aio(aio); + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *) cmd; + NvmeNamespace *ns = req->ns; + int status; + + enum BlockAcctType acct = + nvme_req_is_write(req) ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; + + req->nlb = le16_to_cpu(rw->nlb) + 1; + req->slba = le64_to_cpu(rw->slba); + + trace_nvme_rw(nvme_req_is_write(req) ? "write" : "read", req->nlb, + req->nlb << nvme_ns_lbads(req->ns), req->slba); + + status = nvme_check_rw(n, req); + if (status) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); - return NVME_INVALID_FIELD | NVME_DNR; + return status; } - dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); - if (!req->is_cmb) { - req->aiocb = is_write ? - dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req) : - dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req); - } else { - qemu_iovec_init(&req->iov, req->qsg.nsg); - dma_to_cmb(n, &req->qsg, &req->iov); - req->aiocb = is_write ? - blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, - req) : - blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, - req); + status = nvme_map(n, cmd, req); + if (status) { + block_acct_invalid(blk_get_stats(n->conf.blk), acct); + return status; } + nvme_rw_aio(n->conf.blk, req->slba << nvme_ns_lbads(ns), &req->qsg, req); + nvme_req_set_cb(req, nvme_rw_cb, NULL); + return NVME_NO_COMPLETE; } static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(cmd->nsid); + trace_nvme_io_cmd(req->cid, nsid, le16_to_cpu(req->sq->sqid), cmd->opcode); + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { trace_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } - ns = &n->namespaces[nsid - 1]; + req->ns = &n->namespaces[nsid - 1]; + switch (cmd->opcode) { case NVME_CMD_FLUSH: - return nvme_flush(n, ns, cmd, req); + return nvme_flush(n, cmd, req); case NVME_CMD_WRITE_ZEROS: - return nvme_write_zeros(n, ns, cmd, req); + return nvme_write_zeros(n, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: - return nvme_rw(n, ns, cmd, req); + return nvme_rw(n, cmd, req); default: trace_nvme_err_invalid_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; @@ -609,6 +867,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) NvmeRequest *req, *next; NvmeSQueue *sq; NvmeCQueue *cq; + NvmeAIO *aio; uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_sqid(n, qid))) { @@ -621,8 +880,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { req = QTAILQ_FIRST(&sq->out_req_list); - assert(req->aiocb); - blk_aio_cancel(req->aiocb); + while (!QTAILQ_EMPTY(&req->aio_tailq)) { + aio = QTAILQ_FIRST(&req->aio_tailq); + assert(aio->aiocb); + blk_aio_cancel(aio->aiocb); + } } if (!nvme_check_cqid(n, sq->cqid)) { cq = n->cq[sq->cqid]; @@ -659,6 +921,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, QTAILQ_INIT(&sq->out_req_list); for (i = 0; i < sq->size; i++) { sq->io_req[i].sq = sq; + QTAILQ_INIT(&(sq->io_req[i].aio_tailq)); QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); @@ -819,6 +1082,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t numdl, numdu; uint64_t off, lpol, lpou; size_t len; + uint16_t status; numdl = (dw10 >> 16); numdu = (dw11 & 0xffff); @@ -834,6 +1098,11 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_get_log(req->cid, lid, lsp, rae, len, off); + status = nvme_check_mdts(n, len, req); + if (status) { + return status; + } + switch (lid) { case NVME_LOG_ERROR_INFO: return nvme_error_info(n, cmd, rae, len, off, req); @@ -1352,8 +1621,9 @@ static void nvme_init_req(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) memcpy(&req->cmd, cmd, sizeof(NvmeCmd)); req->status = NVME_SUCCESS; - req->is_cmb = false; - req->is_write = false; + req->flags = 0x0; + req->cb = NULL; + req->cb_arg = NULL; } static void nvme_process_sq(void *opaque) @@ -1952,6 +2222,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; + id->mdts = params->mdts; id->ver = cpu_to_le32(0x00010300); id->oacs = cpu_to_le16(0); id->acl = 3; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index add9ff335aa5..2f6706899de1 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -8,7 +8,8 @@ DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \ DEFINE_PROP_UINT8("elpe", _state, _props.elpe, 24), \ - DEFINE_PROP_UINT8("aerl", _state, _props.aerl, 3) + DEFINE_PROP_UINT8("aerl", _state, _props.aerl, 3), \ + DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7) typedef struct NvmeParams { char *serial; @@ -16,6 +17,7 @@ typedef struct NvmeParams { uint32_t cmb_size_mb; uint8_t elpe; uint8_t aerl; + uint8_t mdts; } NvmeParams; typedef struct NvmeAsyncEvent { @@ -23,23 +25,124 @@ typedef struct NvmeAsyncEvent { NvmeAerResult result; } NvmeAsyncEvent; -typedef struct NvmeRequest { - struct NvmeSQueue *sq; - BlockAIOCB *aiocb; - uint16_t status; - uint16_t cid; - bool is_cmb; - bool is_write; - NvmeCqe cqe; - BlockAcctCookie acct; - QEMUSGList qsg; - QEMUIOVector iov; - NvmeCmd cmd; - QTAILQ_ENTRY(NvmeRequest)entry; -} NvmeRequest; - -typedef struct NvmeSQueue { - struct NvmeCtrl *ctrl; +typedef enum NvmeAIOOp { + NVME_AIO_OPC_NONE = 0x0, + NVME_AIO_OPC_FLUSH = 0x1, + NVME_AIO_OPC_READ = 0x2, + NVME_AIO_OPC_WRITE = 0x3, + NVME_AIO_OPC_WRITE_ZEROES = 0x4, +} NvmeAIOOp; + +typedef struct NvmeRequest NvmeRequest; +typedef struct NvmeAIO NvmeAIO; +typedef void NvmeAIOCompletionFunc(NvmeAIO *aio, void *opaque); + +struct NvmeAIO { + NvmeRequest *req; + + NvmeAIOOp opc; + int64_t offset; + BlockBackend *blk; + BlockAIOCB *aiocb; + BlockAcctCookie acct; + + NvmeAIOCompletionFunc *cb; + void *cb_arg; + + QEMUSGList *qsg; + QEMUIOVector iov; + + QTAILQ_ENTRY(NvmeAIO) tailq_entry; +}; + +static inline const char *nvme_aio_opc_str(NvmeAIO *aio) +{ + switch (aio->opc) { + case NVME_AIO_OPC_NONE: return "NVME_AIO_OP_NONE"; + case NVME_AIO_OPC_FLUSH: return "NVME_AIO_OP_FLUSH"; + case NVME_AIO_OPC_READ: return "NVME_AIO_OP_READ"; + case NVME_AIO_OPC_WRITE: return "NVME_AIO_OP_WRITE"; + case NVME_AIO_OPC_WRITE_ZEROES: return "NVME_AIO_OP_WRITE_ZEROES"; + default: return "NVME_AIO_OP_UNKNOWN"; + } +} + +#define NVME_REQ_TRANSFER_DMA 0x1 +#define NVME_REQ_TRANSFER_CMB 0x2 +#define NVME_REQ_TRANSFER_MASK 0x3 + +typedef struct NvmeSQueue NvmeSQueue; +typedef struct NvmeNamespace NvmeNamespace; +typedef void NvmeRequestCompletionFunc(NvmeRequest *req, void *opaque); + +struct NvmeRequest { + NvmeSQueue *sq; + NvmeNamespace *ns; + NvmeCqe cqe; + NvmeCmd cmd; + + uint64_t slba; + uint32_t nlb; + uint16_t status; + uint16_t cid; + int flags; + + NvmeRequestCompletionFunc *cb; + void *cb_arg; + + QEMUSGList qsg; + + QTAILQ_HEAD(, NvmeAIO) aio_tailq; + QTAILQ_ENTRY(NvmeRequest) entry; +}; + +static inline void nvme_req_set_cb(NvmeRequest *req, + NvmeRequestCompletionFunc *cb, void *cb_arg) +{ + req->cb = cb; + req->cb_arg = cb_arg; +} + +static inline void nvme_req_clear_cb(NvmeRequest *req) +{ + req->cb = req->cb_arg = NULL; +} + +static inline uint16_t nvme_cid(NvmeRequest *req) +{ + if (req) { + return req->cid; + } + + return 0xffff; +} + +static inline bool nvme_req_is_cmb(NvmeRequest *req) +{ + return (req->flags & NVME_REQ_TRANSFER_MASK) == NVME_REQ_TRANSFER_CMB; +} + +static void nvme_req_set_cmb(NvmeRequest *req) +{ + req->flags = NVME_REQ_TRANSFER_CMB; +} + +static inline bool nvme_req_is_write(NvmeRequest *req) +{ + switch (req->cmd.opcode) { + case NVME_CMD_WRITE: + case NVME_CMD_WRITE_UNCOR: + case NVME_CMD_WRITE_ZEROS: + return true; + default: + return false; + } +} + +typedef struct NvmeCtrl NvmeCtrl; + +struct NvmeSQueue { + NvmeCtrl *ctrl; uint16_t sqid; uint16_t cqid; uint32_t head; @@ -51,10 +154,12 @@ typedef struct NvmeSQueue { QTAILQ_HEAD(, NvmeRequest) req_list; QTAILQ_HEAD(, NvmeRequest) out_req_list; QTAILQ_ENTRY(NvmeSQueue) entry; -} NvmeSQueue; +}; -typedef struct NvmeCQueue { - struct NvmeCtrl *ctrl; +typedef struct NvmeCQueue NvmeCQueue; + +struct NvmeCQueue { + NvmeCtrl *ctrl; uint8_t phase; uint16_t cqid; uint16_t irq_enabled; @@ -66,11 +171,11 @@ typedef struct NvmeCQueue { QEMUTimer *timer; QTAILQ_HEAD(, NvmeSQueue) sq_list; QTAILQ_HEAD(, NvmeRequest) req_list; -} NvmeCQueue; +}; -typedef struct NvmeNamespace { +struct NvmeNamespace { NvmeIdNs id_ns; -} NvmeNamespace; +}; #define TYPE_NVME "nvme" #define NVME(obj) \ @@ -122,6 +227,17 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) +{ + return req->sq->ctrl; +} + +static inline bool nvme_is_error(uint16_t status, uint16_t err) +{ + /* strip DNR and MORE */ + return (status & 0xfff) == err; +} + static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) { NvmeIdNs *id_ns = &ns->id_ns; @@ -143,5 +259,4 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) return n->ns_size >> nvme_ns_lbads(ns); } - #endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index e81bb3a64ed7..c7e0a1849043 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -34,7 +34,12 @@ nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" nvme_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" +nvme_req_register_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" count %"PRIu64" opc \"%s\" req %p" +nvme_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p" +nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8"" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +nvme_write_zeros(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" @@ -74,6 +79,9 @@ nvme_mmio_shutdown_set(void) "shutdown bit set" nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions +nvme_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts %"PRIu64" len %"PRIu64"" +nvme_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl %"PRIu16"" +nvme_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p status 0x%"PRIx16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" From patchwork Mon Nov 11 12:25:39 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192912 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVqf5YBtz9sPj for ; Mon, 11 Nov 2019 23:41:34 +1100 (AEDT) Received: from localhost ([::1]:51968 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU90e-0008I4-Id for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:41:32 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37246) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mA-0008HY-OT for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:37 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m6-0003aq-FL for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:34 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52254) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lz-0003QE-57; Mon, 11 Nov 2019 07:26:23 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 197F3BFB10; Mon, 11 Nov 2019 12:26:02 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 15/21] nvme: add support for scatter gather lists Date: Mon, 11 Nov 2019 13:25:39 +0100 Message-Id: <20191111122545.252478-16-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" For now, support the Data Block, Segment and Last Segment descriptor types. See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)"). Signed-off-by: Klaus Jensen Acked-by: Fam Zheng --- block/nvme.c | 18 +- hw/block/nvme.c | 380 ++++++++++++++++++++++++++++++++++++------ hw/block/trace-events | 3 + include/block/nvme.h | 62 ++++++- 4 files changed, 398 insertions(+), 65 deletions(-) diff --git a/block/nvme.c b/block/nvme.c index d41c4bda6e39..521f521054d5 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -446,7 +446,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) error_setg(errp, "Cannot map buffer for DMA"); goto out; } - cmd.prp1 = cpu_to_le64(iova); + cmd.dptr.prp.prp1 = cpu_to_le64(iova); if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { error_setg(errp, "Failed to identify controller"); @@ -545,7 +545,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_CQ, - .prp1 = cpu_to_le64(q->cq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->cq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x3), }; @@ -556,7 +556,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_SQ, - .prp1 = cpu_to_le64(q->sq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->sq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x1 | (n << 16)), }; @@ -906,16 +906,16 @@ try_map: case 0: abort(); case 1: - cmd->prp1 = pagelist[0]; - cmd->prp2 = 0; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = 0; break; case 2: - cmd->prp1 = pagelist[0]; - cmd->prp2 = pagelist[1]; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = pagelist[1]; break; default: - cmd->prp1 = pagelist[0]; - cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); break; } trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 230f4277f6e5..06b2530aa06c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -296,6 +296,198 @@ unmap: return status; } +static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor *segment, uint64_t nsgld, uint32_t *len, + NvmeRequest *req) +{ + dma_addr_t addr, trans_len; + + for (int i = 0; i < nsgld; i++) { + if (NVME_SGL_TYPE(segment[i].type) != SGL_DESCR_TYPE_DATA_BLOCK) { + trace_nvme_err_invalid_sgl_descriptor(req->cid, + NVME_SGL_TYPE(segment[i].type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + if (*len == 0) { + if (!NVME_CTRL_SGLS_EXCESS_LENGTH(n->id_ctrl.sgls)) { + trace_nvme_err_invalid_sgl_excess_length(req->cid); + return NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + } + + break; + } + + addr = le64_to_cpu(segment[i].addr); + trans_len = MIN(*len, le64_to_cpu(segment[i].len)); + + if (nvme_addr_is_cmb(n, addr)) { + /* + * All data and metadata, if any, associated with a particular + * command shall be located in either the CMB or host memory. Thus, + * if an address if found to be in the CMB and we have already + * mapped data that is in host memory, the use is invalid. + */ + if (!nvme_req_is_cmb(req) && qsg->size) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + nvme_req_set_cmb(req); + } else { + /* + * Similarly, if the address does not reference the CMB, but we + * have already established that the request has data or metadata + * in the CMB, the use is invalid. + */ + if (nvme_req_is_cmb(req)) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + } + + qemu_sglist_add(qsg, addr, trans_len); + + *len -= trans_len; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor sgl, uint32_t len, NvmeRequest *req) +{ + const int MAX_NSGLD = 256; + + NvmeSglDescriptor segment[MAX_NSGLD]; + uint64_t nsgld; + uint16_t status; + bool sgl_in_cmb = false; + hwaddr addr = le64_to_cpu(sgl.addr); + + trace_nvme_map_sgl(req->cid, NVME_SGL_TYPE(sgl.type), req->nlb, len); + + pci_dma_sglist_init(qsg, &n->parent_obj, 1); + + /* + * If the entire transfer can be described with a single data block it can + * be mapped directly. + */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, &sgl, 1, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* + * If the segment is located in the CMB, the submission queue of the + * request must also reside there. + */ + if (nvme_addr_is_cmb(n, addr)) { + if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + sgl_in_cmb = true; + } + + while (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_SEGMENT) { + bool addr_is_cmb; + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + /* read the segment in chunks of 256 descriptors (4k) */ + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + sgl = segment[nsgld - 1]; + addr = le64_to_cpu(sgl.addr); + + /* an SGL is allowed to end with a Data Block in a regular Segment */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* do not map last descriptor */ + status = nvme_map_sgl_data(n, qsg, segment, nsgld - 1, &len, req); + if (status) { + goto unmap; + } + + /* + * If the next segment is in the CMB, make sure that the sgl was + * already located there. + */ + addr_is_cmb = nvme_addr_is_cmb(n, addr); + if ((sgl_in_cmb && !addr_is_cmb) || (!sgl_in_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; + } + } + + /* + * If the segment did not end with a Data Block or a Segment descriptor, it + * must be a Last Segment descriptor. + */ + if (NVME_SGL_TYPE(sgl.type) != SGL_DESCR_TYPE_LAST_SEGMENT) { + trace_nvme_err_invalid_sgl_descriptor(req->cid, + NVME_SGL_TYPE(sgl.type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + +out: + /* if there is any residual left in len, the SGL was too short */ + if (len) { + status = NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + goto unmap; + } + + return NVME_SUCCESS; + +unmap: + qemu_sglist_destroy(qsg); + + return status; +} + static void dma_to_cmb(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov) { for (int i = 0; i < qsg->nsg; i++) { @@ -341,6 +533,56 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return status; } +static uint16_t nvme_dma_write_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (nvme_req_is_cmb(req)) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + return err; + } + + if (unlikely(dma_buf_write(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_write(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_write_sgl(n, ptr, len, cmd->dptr.sgl, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_write_prp(n, ptr, len, prp1, prp2, req); +} + static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2, NvmeRequest *req) { @@ -378,13 +620,68 @@ out: return status; } +static uint16_t nvme_dma_read_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeCmd *cmd, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (nvme_req_is_cmb(req)) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + goto out; + } + + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + +out: + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_read(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_read_sgl(n, ptr, len, cmd->dptr.sgl, cmd, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_read_prp(n, ptr, len, prp1, prp2, req); +} + static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeNamespace *ns = req->ns; + uint32_t len = req->nlb << nvme_ns_lbads(req->ns); + uint64_t prp1, prp2; + + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_map_sgl(n, &req->qsg, cmd->dptr.sgl, len, req); + } - uint32_t len = req->nlb << nvme_ns_lbads(ns); - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); + prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + prp2 = le64_to_cpu(cmd->dptr.prp.prp2); return nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); } @@ -975,8 +1272,6 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); if (off > sizeof(*n->elpes) * (n->params.elpe + 1)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -988,15 +1283,12 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_ERROR); } - return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) n->elpes + off, trans_len, cmd, req); } static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); uint32_t nsid = le32_to_cpu(cmd->nsid); uint32_t trans_len; @@ -1046,16 +1338,13 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_SMART); } - return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &smart + off, trans_len, cmd, req); } static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); NvmeFwSlotInfoLog fw_log; if (off > sizeof(fw_log)) { @@ -1066,8 +1355,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); - return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &fw_log + off, trans_len, cmd, req); } static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -1209,25 +1497,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ctrl(); - return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &n->id_ctrl, sizeof(n->id_ctrl), cmd, + req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeNamespace *ns; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); trace_nvme_identify_ns(nsid); @@ -1238,17 +1519,15 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, ns = &n->namespaces[nsid - 1]; - return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd, + req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int data_len = 4 * KiB; - uint32_t min_nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t min_nsid = le32_to_cpu(cmd->nsid); uint32_t *list; uint16_t ret; int i, j = 0; @@ -1265,12 +1544,12 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, break; } } - ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c, +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int len = 4096; @@ -1282,9 +1561,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c, uint8_t nid[16]; }; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); struct ns_descr *list; uint16_t ret; @@ -1301,7 +1578,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c, list->nidl = 0x10; *(uint32_t *) &list->nid[12] = cpu_to_be32(nsid); - ret = nvme_dma_read_prp(n, (uint8_t *) list, len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, len, cmd, req); g_free(list); return ret; } @@ -1312,13 +1589,13 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, c, req); + return nvme_identify_ns(n, cmd, req); case 0x01: - return nvme_identify_ctrl(n, c, req); + return nvme_identify_ctrl(n, cmd, req); case 0x02: - return nvme_identify_ns_list(n, c, req); + return nvme_identify_ns_list(n, cmd, req); case 0x03: - return nvme_identify_ns_descr_list(n, c, req); + return nvme_identify_ns_descr_list(n, cmd, req); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -1380,13 +1657,10 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read_prp(n, (uint8_t *)×tamp, sizeof(timestamp), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); } static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -1451,11 +1725,9 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, { uint16_t ret; uint64_t timestamp; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2, req); + ret = nvme_dma_write(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); if (ret != NVME_SUCCESS) { return ret; } @@ -2240,6 +2512,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->vwc = 1; } + id->sgls = cpu_to_le32(0x1); + strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:"); pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial); diff --git a/hw/block/trace-events b/hw/block/trace-events index c7e0a1849043..59d42a3b8c39 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -34,6 +34,7 @@ nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" nvme_map_prp(uint16_t cid, uint8_t opc, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" opc 0x%"PRIx8" trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" +nvme_map_sgl(uint16_t cid, uint8_t typ, uint32_t nlb, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" nlb %"PRIu32" len %"PRIu64"" nvme_req_register_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" count %"PRIu64" opc \"%s\" req %p" nvme_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p" nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8"" @@ -82,6 +83,8 @@ nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" nvme_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts %"PRIu64" len %"PRIu64"" nvme_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl %"PRIu16"" nvme_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p status 0x%"PRIx16"" +nvme_err_invalid_sgl_descriptor(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 2aaa45e20566..418dadf3c5c6 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -205,15 +205,53 @@ enum NvmeCmbszMask { #define NVME_CMBSZ_GETSIZE(cmbsz) \ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) +enum NvmeSglDescriptorType { + SGL_DESCR_TYPE_DATA_BLOCK = 0x0, + SGL_DESCR_TYPE_BIT_BUCKET = 0x1, + SGL_DESCR_TYPE_SEGMENT = 0x2, + SGL_DESCR_TYPE_LAST_SEGMENT = 0x3, + SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4, + + SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf, +}; + +enum NvmeSglDescriptorSubtype { + SGL_DESCR_SUBTYPE_ADDRESS = 0x0, + SGL_DESCR_SUBTYPE_OFFSET = 0x1, +}; + +typedef struct NvmeSglDescriptor { + uint64_t addr; + uint32_t len; + uint8_t rsvd[3]; + uint8_t type; +} NvmeSglDescriptor; + +#define NVME_SGL_TYPE(type) (type >> 4) + +typedef union NvmeCmdDptr { + struct { + uint64_t prp1; + uint64_t prp2; + } prp; + + NvmeSglDescriptor sgl; +} NvmeCmdDptr; + +enum NvmePsdt { + PSDT_PRP = 0x0, + PSDT_SGL_MPTR_CONTIGUOUS = 0x1, + PSDT_SGL_MPTR_SGL = 0x2, +}; + typedef struct NvmeCmd { uint8_t opcode; - uint8_t fuse; + uint8_t flags; uint16_t cid; uint32_t nsid; uint64_t res1; uint64_t mptr; - uint64_t prp1; - uint64_t prp2; + NvmeCmdDptr dptr; uint32_t cdw10; uint32_t cdw11; uint32_t cdw12; @@ -222,6 +260,9 @@ typedef struct NvmeCmd { uint32_t cdw15; } NvmeCmd; +#define NVME_CMD_FLAGS_FUSE(flags) (flags & 0x3) +#define NVME_CMD_FLAGS_PSDT(flags) ((flags >> 6) & 0x3) + enum NvmeAdminCommands { NVME_ADM_CMD_DELETE_SQ = 0x00, NVME_ADM_CMD_CREATE_SQ = 0x01, @@ -427,6 +468,11 @@ enum NvmeStatusCodes { NVME_CMD_ABORT_MISSING_FUSE = 0x000a, NVME_INVALID_NSID = 0x000b, NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_SGL_SEG_DESCRIPTOR = 0x000d, + NVME_INVALID_NUM_SGL_DESCRIPTORS = 0x000e, + NVME_DATA_SGL_LENGTH_INVALID = 0x000f, + NVME_METADATA_SGL_LENGTH_INVALID = 0x0010, + NVME_SGL_DESCRIPTOR_TYPE_INVALID = 0x0011, NVME_INVALID_USE_OF_CMB = 0x0012, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, @@ -623,6 +669,16 @@ enum NvmeIdCtrlOncs { #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) #define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) +#define NVME_CTRL_SGLS_SUPPORTED(sgls) ((sgls) & 0x3) +#define NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT(sgls) ((sgls) & (0x1 << 0)) +#define NVME_CTRL_SGLS_SUPPORTED_DWORD_ALIGNMENT(sgls) ((sgls) & (0x1 << 1)) +#define NVME_CTRL_SGLS_KEYED(sgls) ((sgls) & (0x1 << 2)) +#define NVME_CTRL_SGLS_BITBUCKET(sgls) ((sgls) & (0x1 << 16)) +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS(sgls) ((sgls) & (0x1 << 17)) +#define NVME_CTRL_SGLS_EXCESS_LENGTH(sgls) ((sgls) & (0x1 << 18)) +#define NVME_CTRL_SGLS_MPTR_SGL(sgls) ((sgls) & (0x1 << 19)) +#define NVME_CTRL_SGLS_ADDR_OFFSET(sgls) ((sgls) & (0x1 << 20)) + typedef struct NvmeFeatureVal { uint32_t arbitration; uint32_t power_mgmt; From patchwork Mon Nov 11 12:25:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192915 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVsV2Yc3z9sPn for ; Mon, 11 Nov 2019 23:43:10 +1100 (AEDT) Received: from localhost ([::1]:51986 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU92C-0001jg-61 for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:43:08 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37290) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mE-0008Km-Sl for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:42 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8mA-0003c7-N6 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:38 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52272) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lz-0003QK-Lq; Mon, 11 Nov 2019 07:26:23 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 6C035BFB11; Mon, 11 Nov 2019 12:26:02 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 16/21] nvme: support multiple namespaces Date: Mon, 11 Nov 2019 13:25:40 +0100 Message-Id: <20191111122545.252478-17-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" This adds support for multiple namespaces by introducing a new 'nvme-ns' device model. The nvme device creates a bus named from the device name ('id'). The nvme-ns devices then connect to this and registers themselves with the nvme device. This changes how an nvme device is created. Example with two namespaces: -drive file=nvme0n1.img,if=none,id=disk1 -drive file=nvme0n2.img,if=none,id=disk2 -device nvme,serial=deadbeef,id=nvme0 -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 The drive property is kept on the nvme device to keep the change backward compatible, but the property is now optional. Specifying a drive for the nvme device will always create the namespace with nsid 1. Signed-off-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/block/Makefile.objs | 2 +- hw/block/nvme-ns.c | 158 +++++++++++++++++++++++++++++ hw/block/nvme-ns.h | 60 +++++++++++ hw/block/nvme.c | 222 ++++++++++++++++++++++++----------------- hw/block/nvme.h | 60 ++++++----- hw/block/trace-events | 6 +- 6 files changed, 389 insertions(+), 119 deletions(-) create mode 100644 hw/block/nvme-ns.c create mode 100644 hw/block/nvme-ns.h diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index 28c2495a00dc..45f463462f1e 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o common-obj-$(CONFIG_XEN) += xen-block.o common-obj-$(CONFIG_ECC) += ecc.o common-obj-$(CONFIG_ONENAND) += onenand.o -common-obj-$(CONFIG_NVME_PCI) += nvme.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o common-obj-$(CONFIG_SWIM) += swim.o obj-$(CONFIG_SH4) += tc58128.o diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c new file mode 100644 index 000000000000..36deedee07a6 --- /dev/null +++ b/hw/block/nvme-ns.c @@ -0,0 +1,158 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "hw/block/block.h" +#include "hw/pci/msix.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" + +#include "hw/qdev-properties.h" +#include "hw/qdev-core.h" + +#include "nvme.h" +#include "nvme-ns.h" + +static int nvme_ns_init(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->nuse = id_ns->ncap = id_ns->nsze = + cpu_to_le64(nvme_ns_nlbas(ns)); + + return 0; +} + +static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id, + Error **errp) +{ + uint64_t perm, shared_perm; + + Error *local_err = NULL; + int ret; + + perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD; + + ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err); + if (ret) { + error_propagate_prepend(errp, local_err, "blk_set_perm: "); + return ret; + } + + ns->size = blk_getlength(ns->blk); + if (ns->size < 0) { + error_setg_errno(errp, -ns->size, "blk_getlength"); + return 1; + } + + switch (n->conf.wce) { + case ON_OFF_AUTO_ON: + n->features.volatile_wc = 1; + break; + case ON_OFF_AUTO_OFF: + n->features.volatile_wc = 0; + case ON_OFF_AUTO_AUTO: + n->features.volatile_wc = blk_enable_write_cache(ns->blk); + break; + default: + abort(); + } + + blk_set_enable_write_cache(ns->blk, n->features.volatile_wc); + + return 0; +} + +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) +{ + if (!ns->blk) { + error_setg(errp, "block backend not configured"); + return 1; + } + + return 0; +} + +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + Error *local_err = NULL; + + if (nvme_ns_check_constraints(ns, &local_err)) { + error_propagate_prepend(errp, local_err, + "nvme_ns_check_constraints: "); + return 1; + } + + if (nvme_ns_init_blk(n, ns, &n->id_ctrl, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_ns_init_blk: "); + return 1; + } + + nvme_ns_init(ns); + if (nvme_register_namespace(n, ns, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_register_namespace: "); + return 1; + } + + return 0; +} + +static void nvme_ns_realize(DeviceState *dev, Error **errp) +{ + NvmeNamespace *ns = NVME_NS(dev); + BusState *s = qdev_get_parent_bus(dev); + NvmeCtrl *n = NVME(s->parent); + Error *local_err = NULL; + + if (nvme_ns_setup(n, ns, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_ns_setup: "); + return; + } +} + +static Property nvme_ns_props[] = { + DEFINE_NVME_NS_PROPERTIES(NvmeNamespace, params), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_ns_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + + dc->bus_type = TYPE_NVME_BUS; + dc->realize = nvme_ns_realize; + dc->props = nvme_ns_props; + dc->desc = "virtual nvme namespace"; +} + +static void nvme_ns_instance_init(Object *obj) +{ + NvmeNamespace *ns = NVME_NS(obj); + char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); + + device_add_bootindex_property(obj, &ns->bootindex, "bootindex", + bootindex, DEVICE(obj), &error_abort); + + g_free(bootindex); +} + +static const TypeInfo nvme_ns_info = { + .name = TYPE_NVME_NS, + .parent = TYPE_DEVICE, + .class_init = nvme_ns_class_init, + .instance_size = sizeof(NvmeNamespace), + .instance_init = nvme_ns_instance_init, +}; + +static void nvme_ns_register_types(void) +{ + type_register_static(&nvme_ns_info); +} + +type_init(nvme_ns_register_types) diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h new file mode 100644 index 000000000000..b564bac25f6d --- /dev/null +++ b/hw/block/nvme-ns.h @@ -0,0 +1,60 @@ +#ifndef NVME_NS_H +#define NVME_NS_H + +#define TYPE_NVME_NS "nvme-ns" +#define NVME_NS(obj) \ + OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) + +#define DEFINE_NVME_NS_PROPERTIES(_state, _props) \ + DEFINE_PROP_DRIVE("drive", _state, blk), \ + DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0) + +typedef struct NvmeNamespaceParams { + uint32_t nsid; +} NvmeNamespaceParams; + +typedef struct NvmeNamespace { + DeviceState parent_obj; + BlockBackend *blk; + int32_t bootindex; + int64_t size; + + NvmeIdNs id_ns; + NvmeNamespaceParams params; +} NvmeNamespace; + +static inline uint32_t nvme_nsid(NvmeNamespace *ns) +{ + if (ns) { + return ns->params.nsid; + } + + return -1; +} + +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; +} + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns).ds; +} + +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + +static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) +{ + return ns->size >> nvme_ns_lbads(ns); +} + +typedef struct NvmeCtrl NvmeCtrl; + +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); + +#endif /* NVME_NS_H */ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 06b2530aa06c..7bd5c1bb2f55 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -17,7 +17,8 @@ /** * Usage: add options: * -drive file=,if=none,id= - * -device nvme,drive=,serial=,id= + * -device nvme,serial=,id=nvme0 + * -device nvme-ns,drive=,bus=nvme0,nsid=1 * * Advanced optional options: * @@ -31,6 +32,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/error-report.h" #include "hw/block/block.h" #include "hw/pci/msix.h" #include "hw/pci/pci.h" @@ -46,6 +48,7 @@ #include "qemu/cutils.h" #include "trace.h" #include "nvme.h" +#include "nvme-ns.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE #define NVME_TEMPERATURE 0x143 @@ -882,11 +885,12 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) static void nvme_rw_cb(NvmeRequest *req, void *opaque) { + NvmeNamespace *ns = req->ns; NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; - trace_nvme_rw_cb(req->cid, req->cmd.nsid); + trace_nvme_rw_cb(req->cid, nvme_nsid(ns)); nvme_enqueue_req_completion(cq, req); } @@ -919,6 +923,7 @@ static void nvme_aio_cb(void *opaque, int ret) block_acct_failed(stats, acct); if (req) { + NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; @@ -941,7 +946,7 @@ static void nvme_aio_cb(void *opaque, int ret) aio->offset, nvme_aio_opc_str(aio), req, status); nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), status, - offsetof(NvmeRwCmd, slba), rw->slba, 1); + offsetof(NvmeRwCmd, slba), rw->slba, nvme_nsid(ns)); error_setg_errno(&local_err, -ret, "aio failed"); error_report_err(local_err); @@ -1007,7 +1012,7 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeRequest *req) uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); if (unlikely((req->slba + req->nlb) > nsze)) { - block_acct_invalid(blk_get_stats(n->conf.blk), + block_acct_invalid(blk_get_stats(ns->blk), nvme_req_is_write(req) ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); trace_nvme_err_invalid_lba_range(req->slba, req->nlb, nsze); return NVME_LBA_RANGE | NVME_DNR; @@ -1042,7 +1047,9 @@ static uint16_t nvme_check_rw(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeAIO *aio = nvme_aio_new(n->conf.blk, 0x0, NULL, req, NULL); + NvmeNamespace *ns = req->ns; + + NvmeAIO *aio = nvme_aio_new(ns->blk, 0x0, NULL, req, NULL); nvme_req_register_aio(req, aio, NVME_AIO_OPC_FLUSH); nvme_aio(aio); @@ -1064,19 +1071,18 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->slba = le64_to_cpu(rw->slba); req->nlb = le16_to_cpu(rw->nlb) + 1; - trace_nvme_write_zeros(req->cid, le32_to_cpu(cmd->nsid), req->slba, - req->nlb); + trace_nvme_write_zeros(req->cid, nvme_nsid(ns), req->slba, req->nlb); status = nvme_check_bounds(n, req); if (unlikely(status)) { - block_acct_invalid(blk_get_stats(n->conf.blk), BLOCK_ACCT_WRITE); + block_acct_invalid(blk_get_stats(ns->blk), BLOCK_ACCT_WRITE); return status; } offset = req->slba << nvme_ns_lbads(ns); count = req->nlb << nvme_ns_lbads(ns); - aio = nvme_aio_new(n->conf.blk, offset, NULL, req, NULL); + aio = nvme_aio_new(ns->blk, offset, NULL, req, NULL); aio->iov.size = count; @@ -1098,22 +1104,23 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->nlb = le16_to_cpu(rw->nlb) + 1; req->slba = le64_to_cpu(rw->slba); - trace_nvme_rw(nvme_req_is_write(req) ? "write" : "read", req->nlb, - req->nlb << nvme_ns_lbads(req->ns), req->slba); + trace_nvme_rw(req->cid, nvme_req_is_write(req) ? "write" : "read", + nvme_nsid(ns), req->nlb, req->nlb << nvme_ns_lbads(ns), + req->slba); status = nvme_check_rw(n, req); if (status) { - block_acct_invalid(blk_get_stats(n->conf.blk), acct); + block_acct_invalid(blk_get_stats(ns->blk), acct); return status; } status = nvme_map(n, cmd, req); if (status) { - block_acct_invalid(blk_get_stats(n->conf.blk), acct); + block_acct_invalid(blk_get_stats(ns->blk), acct); return status; } - nvme_rw_aio(n->conf.blk, req->slba << nvme_ns_lbads(ns), &req->qsg, req); + nvme_rw_aio(ns->blk, req->slba << nvme_ns_lbads(ns), &req->qsg, req); nvme_req_set_cb(req, nvme_rw_cb, NULL); return NVME_NO_COMPLETE; @@ -1125,12 +1132,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_io_cmd(req->cid, nsid, le16_to_cpu(req->sq->sqid), cmd->opcode); - if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); - return NVME_INVALID_NSID | NVME_DNR; - } + req->ns = nvme_ns(n, nsid); - req->ns = &n->namespaces[nsid - 1]; + if (unlikely(!req->ns)) { + return nvme_nsid_err(n, nsid); + } switch (cmd->opcode) { case NVME_CMD_FLUSH: @@ -1296,18 +1302,24 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint64_t units_read = 0, units_written = 0, read_commands = 0, write_commands = 0; NvmeSmartLog smart; - BlockAcctStats *s; if (nsid && nsid != 0xffffffff) { return NVME_INVALID_FIELD | NVME_DNR; } - s = blk_get_stats(n->conf.blk); + for (int i = 1; i <= n->num_namespaces; i++) { + NvmeNamespace *ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + BlockAcctStats *s = blk_get_stats(ns->blk); - units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; - units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; - read_commands = s->nr_ops[BLOCK_ACCT_READ]; - write_commands = s->nr_ops[BLOCK_ACCT_WRITE]; + units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; + units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; + read_commands += s->nr_ops[BLOCK_ACCT_READ]; + write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; + } if (off > sizeof(smart)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -1507,20 +1519,25 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeNamespace *ns; + NvmeIdNs *id_ns, inactive = { 0 }; uint32_t nsid = le32_to_cpu(cmd->nsid); + NvmeNamespace *ns = nvme_ns(n, nsid); trace_nvme_identify_ns(nsid); - if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); - return NVME_INVALID_NSID | NVME_DNR; - } + if (unlikely(!ns)) { + uint16_t status = nvme_nsid_err(n, nsid); + + if (!nvme_is_error(status, NVME_INVALID_FIELD)) { + return status; + } - ns = &n->namespaces[nsid - 1]; + id_ns = &inactive; + } else { + id_ns = &ns->id_ns; + } - return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd, - req); + return nvme_dma_read(n, (uint8_t *) id_ns, sizeof(NvmeIdNs), cmd, req); } static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, @@ -1535,11 +1552,11 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, trace_nvme_identify_ns_list(min_nsid); list = g_malloc0(data_len); - for (i = 0; i < n->num_namespaces; i++) { - if (i < min_nsid) { + for (i = 1; i <= n->num_namespaces; i++) { + if (i <= min_nsid || !nvme_ns(n, i)) { continue; } - list[j++] = cpu_to_le32(i + 1); + list[j++] = cpu_to_le32(i); if (j == data_len / sizeof(uint32_t)) { break; } @@ -1568,9 +1585,8 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd, trace_nvme_identify_ns_descr_list(nsid); - if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); - return NVME_INVALID_NSID | NVME_DNR; + if (unlikely(!nvme_ns(n, nsid))) { + return nvme_nsid_err(n, nsid); } list = g_malloc0(len); @@ -1685,7 +1701,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) result = cpu_to_le32(n->features.err_rec); break; case NVME_VOLATILE_WRITE_CACHE: - result = blk_enable_write_cache(n->conf.blk); + result = cpu_to_le32(n->features.volatile_wc); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: @@ -1739,6 +1755,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { + NvmeNamespace *ns; + uint32_t dw10 = le32_to_cpu(cmd->cdw10); uint32_t dw11 = le32_to_cpu(cmd->cdw11); @@ -1756,8 +1774,19 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_VOLATILE_WRITE_CACHE: - blk_set_enable_write_cache(n->conf.blk, dw11 & 1); + n->features.volatile_wc = dw11; + + for (int i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + blk_set_enable_write_cache(ns->blk, dw11 & 1); + } + break; + case NVME_NUMBER_OF_QUEUES: if (n->qs_created > 2) { return NVME_CMD_SEQ_ERROR | NVME_DNR; @@ -1931,9 +1960,17 @@ static void nvme_process_sq(void *opaque) static void nvme_clear_ctrl(NvmeCtrl *n) { + NvmeNamespace *ns; int i; - blk_drain(n->conf.blk); + for (i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + blk_drain(ns->blk); + } for (i = 0; i < n->params.num_queues; i++) { if (n->sq[i] != NULL) { @@ -1960,7 +1997,15 @@ static void nvme_clear_ctrl(NvmeCtrl *n) n->outstanding_aers = 0; - blk_flush(n->conf.blk); + for (i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + blk_flush(ns->blk); + } + n->bar.cc = 0; } @@ -2381,8 +2426,8 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp) { NvmeParams *params = &n->params; - if (!n->conf.blk) { - error_setg(errp, "nvme: block backend not configured"); + if (!n->namespace.blk && !n->parent_obj.qdev.id) { + error_setg(errp, "nvme: invalid 'id' parameter"); return 1; } @@ -2399,22 +2444,10 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp) return 0; } -static int nvme_init_blk(NvmeCtrl *n, Error **errp) -{ - blkconf_blocksizes(&n->conf); - if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { - return 1; - } - - return 0; -} - static void nvme_init_state(NvmeCtrl *n) { - n->num_namespaces = 1; + n->num_namespaces = 0; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); n->elpes = g_new0(NvmeErrorLog, n->params.elpe + 1); @@ -2506,12 +2539,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); - - - if (blk_enable_write_cache(n->conf.blk)) { - id->vwc = 1; - } - + id->vwc = 1; id->sgls = cpu_to_le32(0x1); strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:"); @@ -2532,22 +2560,25 @@ static void nvme_init_ctrl(NvmeCtrl *n) n->bar.intmc = n->bar.intms = 0; } -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) { - int64_t bs_size; - NvmeIdNs *id_ns = &ns->id_ns; + uint32_t nsid = nvme_nsid(ns); + + if (nsid == 0 || nsid > NVME_MAX_NAMESPACES) { + error_setg(errp, "invalid nsid"); + return 1; + } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg_errno(errp, -bs_size, "blk_getlength"); + if (n->namespaces[nsid - 1]) { + error_setg(errp, "nsid must be unique"); return 1; } - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - n->ns_size = bs_size; + trace_nvme_register_namespace(nsid); - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(nvme_ns_nlbas(n, ns)); + n->namespaces[nsid - 1] = ns; + n->num_namespaces = MAX(n->num_namespaces, nsid); + n->id_ctrl.nn = cpu_to_le32(n->num_namespaces); return 0; } @@ -2555,30 +2586,31 @@ static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); + NvmeNamespace *ns; Error *local_err = NULL; - int i; if (nvme_check_constraints(n, &local_err)) { error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); return; } + qbus_create_inplace(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, + &pci_dev->qdev, n->parent_obj.qdev.id); + nvme_init_state(n); + nvme_init_pci(n, pci_dev); + nvme_init_ctrl(n); - if (nvme_init_blk(n, &local_err)) { - error_propagate_prepend(errp, local_err, "nvme_init_blk: "); - return; - } + /* setup a namespace if the controller drive property was given */ + if (n->namespace.blk) { + ns = &n->namespace; + ns->params.nsid = 1; - for (i = 0; i < n->num_namespaces; i++) { - if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { - error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); + if (nvme_ns_setup(n, ns, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_ns_setup: "); return; } } - - nvme_init_pci(n, pci_dev); - nvme_init_ctrl(n); } static void nvme_exit(PCIDevice *pci_dev) @@ -2600,7 +2632,8 @@ static void nvme_exit(PCIDevice *pci_dev) } static Property nvme_props[] = { - DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), + DEFINE_BLOCK_PROPERTIES_BASE(NvmeCtrl, conf), \ + DEFINE_PROP_DRIVE("drive", NvmeCtrl, namespace.blk), \ DEFINE_NVME_PROPERTIES(NvmeCtrl, params), DEFINE_PROP_END_OF_LIST(), }; @@ -2632,26 +2665,35 @@ static void nvme_instance_init(Object *obj) { NvmeCtrl *s = NVME(obj); - device_add_bootindex_property(obj, &s->conf.bootindex, - "bootindex", "/namespace@1,0", - DEVICE(obj), &error_abort); + if (s->namespace.blk) { + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/namespace@1,0", + DEVICE(obj), &error_abort); + } } static const TypeInfo nvme_info = { .name = TYPE_NVME, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(NvmeCtrl), - .class_init = nvme_class_init, .instance_init = nvme_instance_init, + .class_init = nvme_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { } }, }; +static const TypeInfo nvme_bus_info = { + .name = TYPE_NVME_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(NvmeBus), +}; + static void nvme_register_types(void) { type_register_static(&nvme_info); + type_register_static(&nvme_bus_info); } type_init(nvme_register_types) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 2f6706899de1..175a988bd85f 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -2,6 +2,10 @@ #define HW_NVME_H #include "block/nvme.h" +#include "nvme-ns.h" +#include "trace.h" + +#define NVME_MAX_NAMESPACES 256 #define DEFINE_NVME_PROPERTIES(_state, _props) \ DEFINE_PROP_STRING("serial", _state, _props.serial), \ @@ -72,7 +76,6 @@ static inline const char *nvme_aio_opc_str(NvmeAIO *aio) #define NVME_REQ_TRANSFER_MASK 0x3 typedef struct NvmeSQueue NvmeSQueue; -typedef struct NvmeNamespace NvmeNamespace; typedef void NvmeRequestCompletionFunc(NvmeRequest *req, void *opaque); struct NvmeRequest { @@ -122,7 +125,7 @@ static inline bool nvme_req_is_cmb(NvmeRequest *req) return (req->flags & NVME_REQ_TRANSFER_MASK) == NVME_REQ_TRANSFER_CMB; } -static void nvme_req_set_cmb(NvmeRequest *req) +static inline void nvme_req_set_cmb(NvmeRequest *req) { req->flags = NVME_REQ_TRANSFER_CMB; } @@ -173,9 +176,12 @@ struct NvmeCQueue { QTAILQ_HEAD(, NvmeRequest) req_list; }; -struct NvmeNamespace { - NvmeIdNs id_ns; -}; +#define TYPE_NVME_BUS "nvme-bus" +#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; #define TYPE_NVME "nvme" #define NVME(obj) \ @@ -186,8 +192,9 @@ typedef struct NvmeCtrl { MemoryRegion iomem; MemoryRegion ctrl_mem; NvmeBar bar; - BlockConf conf; NvmeParams params; + NvmeBus bus; + BlockConf conf; uint32_t page_size; uint16_t page_bits; @@ -197,7 +204,6 @@ typedef struct NvmeCtrl { uint32_t reg_size; uint32_t num_namespaces; uint32_t max_q_ents; - uint64_t ns_size; uint8_t outstanding_aers; uint32_t cmbsz; uint32_t cmbloc; @@ -217,7 +223,8 @@ typedef struct NvmeCtrl { NvmeRequest **aer_reqs; QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; - NvmeNamespace *namespaces; + NvmeNamespace namespace; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES]; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; @@ -227,36 +234,37 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; -static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) +static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) { - return req->sq->ctrl; -} + if (!nsid || nsid > n->num_namespaces) { + return NULL; + } -static inline bool nvme_is_error(uint16_t status, uint16_t err) -{ - /* strip DNR and MORE */ - return (status & 0xfff) == err; + return n->namespaces[nsid - 1]; } -static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) +static inline uint16_t nvme_nsid_err(NvmeCtrl *n, uint32_t nsid) { - NvmeIdNs *id_ns = &ns->id_ns; - return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; -} + if (nsid && nsid < n->num_namespaces) { + trace_nvme_err_inactive_ns(nsid, n->num_namespaces); + return NVME_INVALID_FIELD | NVME_DNR; + } -static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns).ds; + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + return NVME_INVALID_NSID | NVME_DNR; } -static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) { - return 1 << nvme_ns_lbads(ns); + return req->sq->ctrl; } -static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) +static inline bool nvme_is_error(uint16_t status, uint16_t err) { - return n->ns_size >> nvme_ns_lbads(ns); + /* strip DNR and MORE */ + return (status & 0xfff) == err; } +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); + #endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index 59d42a3b8c39..5df48cca55e4 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -29,6 +29,7 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t # nvme.c # nvme traces for successful events +nvme_register_namespace(uint32_t nsid) "nsid %"PRIu32"" nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" @@ -38,7 +39,7 @@ nvme_map_sgl(uint16_t cid, uint8_t typ, uint32_t nlb, uint64_t len) "cid %"PRIu1 nvme_req_register_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" count %"PRIu64" opc \"%s\" req %p" nvme_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p" nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8"" -nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" %s nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" nvme_write_zeros(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" @@ -90,7 +91,8 @@ nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not pa nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" nvme_err_invalid_prp(void) "invalid PRP" -nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" +nvme_err_invalid_ns(uint32_t nsid, uint32_t nn) "nsid %"PRIu32" nn %"PRIu32"" +nvme_err_inactive_ns(uint32_t nsid, uint32_t nn) "nsid %"PRIu32" nn %"PRIu32"" nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" From patchwork Mon Nov 11 12:25:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192913 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVqs3cq6z9sPj for ; Mon, 11 Nov 2019 23:41:45 +1100 (AEDT) Received: from localhost ([::1]:51970 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU90p-0008QC-Ae for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:41:43 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37165) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m5-0008ED-TZ for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:32 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m3-0003ZH-K0 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:29 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52274) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8lz-0003QT-Rd; Mon, 11 Nov 2019 07:26:24 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id C4A26BFB12; Mon, 11 Nov 2019 12:26:02 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 17/21] nvme: bump controller pci device id Date: Mon, 11 Nov 2019 13:25:41 +0100 Message-Id: <20191111122545.252478-18-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Since commits 9d6459d21a6e ("nvme: fix write zeroes offset and count") and c7fe50bcf1f1 ("nvme: support multiple namespaces") the controller device no longer has the quirks that the Linux kernel think it has. As the quirks are applied based on pci vendor and device id, bump the device id to get rid of them. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7bd5c1bb2f55..57e3a465c688 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2496,7 +2496,7 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) pci_conf[PCI_INTERRUPT_PIN] = 1; pci_config_set_prog_interface(pci_conf, 0x2); pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); - pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_device_id(pci_conf, 0x5846); pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); @@ -2652,7 +2652,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAGE_EXPRESS; pc->vendor_id = PCI_VENDOR_ID_INTEL; - pc->device_id = 0x5845; + pc->device_id = 0x5846; pc->revision = 2; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); From patchwork Mon Nov 11 12:25:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192917 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVvJ24Jcz9sRH for ; Mon, 11 Nov 2019 23:44:44 +1100 (AEDT) Received: from localhost ([::1]:51996 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU93h-0003UJ-Vv for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:44:42 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37289) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8mE-0008Kk-SO for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:41 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8mA-0003cC-NG for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:38 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52288) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8m0-0003Qs-8J; Mon, 11 Nov 2019 07:26:24 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 1AF8ABFB13; Mon, 11 Nov 2019 12:26:03 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 18/21] nvme: remove redundant NvmeCmd pointer parameter Date: Mon, 11 Nov 2019 13:25:42 +0100 Message-Id: <20191111122545.252478-19-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" The command struct is available in the NvmeRequest that we generally pass around anyway. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 218 +++++++++++++++++++++++------------------------- 1 file changed, 106 insertions(+), 112 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 57e3a465c688..81322bb79e4c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -574,14 +574,14 @@ static uint16_t nvme_dma_write_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, } static uint16_t nvme_dma_write(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeCmd *cmd, NvmeRequest *req) + NvmeRequest *req) { - if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { - return nvme_dma_write_sgl(n, ptr, len, cmd->dptr.sgl, req); + if (NVME_CMD_FLAGS_PSDT(req->cmd.flags)) { + return nvme_dma_write_sgl(n, ptr, len, req->cmd.dptr.sgl, req); } - uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); - uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + uint64_t prp1 = le64_to_cpu(req->cmd.dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(req->cmd.dptr.prp.prp2); return nvme_dma_write_prp(n, ptr, len, prp1, prp2, req); } @@ -624,7 +624,7 @@ out: } static uint16_t nvme_dma_read_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeSglDescriptor sgl, NvmeCmd *cmd, NvmeRequest *req) + NvmeSglDescriptor sgl, NvmeRequest *req) { QEMUSGList qsg; uint16_t err = NVME_SUCCESS; @@ -662,29 +662,29 @@ out: } static uint16_t nvme_dma_read(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeCmd *cmd, NvmeRequest *req) + NvmeRequest *req) { - if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { - return nvme_dma_read_sgl(n, ptr, len, cmd->dptr.sgl, cmd, req); + if (NVME_CMD_FLAGS_PSDT(req->cmd.flags)) { + return nvme_dma_read_sgl(n, ptr, len, req->cmd.dptr.sgl, req); } - uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); - uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + uint64_t prp1 = le64_to_cpu(req->cmd.dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(req->cmd.dptr.prp.prp2); return nvme_dma_read_prp(n, ptr, len, prp1, prp2, req); } -static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_map(NvmeCtrl *n, NvmeRequest *req) { uint32_t len = req->nlb << nvme_ns_lbads(req->ns); uint64_t prp1, prp2; - if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { - return nvme_map_sgl(n, &req->qsg, cmd->dptr.sgl, len, req); + if (NVME_CMD_FLAGS_PSDT(req->cmd.flags)) { + return nvme_map_sgl(n, &req->qsg, req->cmd.dptr.sgl, len, req); } - prp1 = le64_to_cpu(cmd->dptr.prp.prp1); - prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + prp1 = le64_to_cpu(req->cmd.dptr.prp.prp1); + prp2 = le64_to_cpu(req->cmd.dptr.prp.prp2); return nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); } @@ -1045,7 +1045,7 @@ static uint16_t nvme_check_rw(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -1057,12 +1057,12 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeRequest *req) { NvmeAIO *aio; NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *) cmd; + NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; int64_t offset; size_t count; @@ -1092,9 +1092,9 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req) { - NvmeRwCmd *rw = (NvmeRwCmd *) cmd; + NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd; NvmeNamespace *ns = req->ns; int status; @@ -1114,7 +1114,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return status; } - status = nvme_map(n, cmd, req); + status = nvme_map(n, req); if (status) { block_acct_invalid(blk_get_stats(ns->blk), acct); return status; @@ -1126,11 +1126,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { - uint32_t nsid = le32_to_cpu(cmd->nsid); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); - trace_nvme_io_cmd(req->cid, nsid, le16_to_cpu(req->sq->sqid), cmd->opcode); + trace_nvme_io_cmd(req->cid, nsid, le16_to_cpu(req->sq->sqid), + req->cmd.opcode); req->ns = nvme_ns(n, nsid); @@ -1138,16 +1139,16 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_nsid_err(n, nsid); } - switch (cmd->opcode) { + switch (req->cmd.opcode) { case NVME_CMD_FLUSH: - return nvme_flush(n, cmd, req); + return nvme_flush(n, req); case NVME_CMD_WRITE_ZEROS: - return nvme_write_zeros(n, cmd, req); + return nvme_write_zeros(n, req); case NVME_CMD_WRITE: case NVME_CMD_READ: - return nvme_rw(n, cmd, req); + return nvme_rw(n, req); default: - trace_nvme_err_invalid_opc(cmd->opcode); + trace_nvme_err_invalid_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -1164,10 +1165,10 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) n->qs_created--; } -static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) { - NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; - NvmeRequest *req, *next; + NvmeDeleteQ *c = (NvmeDeleteQ *) &req->cmd; + NvmeRequest *next; NvmeSQueue *sq; NvmeCQueue *cq; NvmeAIO *aio; @@ -1236,10 +1237,10 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, n->qs_created++; } -static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) { NvmeSQueue *sq; - NvmeCreateSq *c = (NvmeCreateSq *)cmd; + NvmeCreateSq *c = (NvmeCreateSq *) &req->cmd; uint16_t cqid = le16_to_cpu(c->cqid); uint16_t sqid = le16_to_cpu(c->sqid); @@ -1274,8 +1275,8 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, - uint32_t buf_len, uint64_t off, NvmeRequest *req) +static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) { uint32_t trans_len; @@ -1289,13 +1290,13 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_ERROR); } - return nvme_dma_read(n, (uint8_t *) n->elpes + off, trans_len, cmd, req); + return nvme_dma_read(n, (uint8_t *) n->elpes + off, trans_len, req); } -static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, - uint32_t buf_len, uint64_t off, NvmeRequest *req) +static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) { - uint32_t nsid = le32_to_cpu(cmd->nsid); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); uint32_t trans_len; time_t current_ms; @@ -1350,11 +1351,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_SMART); } - return nvme_dma_read(n, (uint8_t *) &smart + off, trans_len, cmd, req); + return nvme_dma_read(n, (uint8_t *) &smart + off, trans_len, req); } -static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, - uint64_t off, NvmeRequest *req) +static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, + NvmeRequest *req) { uint32_t trans_len; NvmeFwSlotInfoLog fw_log; @@ -1367,15 +1368,15 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); - return nvme_dma_read(n, (uint8_t *) &fw_log + off, trans_len, cmd, req); + return nvme_dma_read(n, (uint8_t *) &fw_log + off, trans_len, req); } -static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); - uint32_t dw12 = le32_to_cpu(cmd->cdw12); - uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint32_t dw12 = le32_to_cpu(req->cmd.cdw12); + uint32_t dw13 = le32_to_cpu(req->cmd.cdw13); uint8_t lid = dw10 & 0xff; uint8_t lsp = (dw10 >> 8) & 0xf; uint8_t rae = (dw10 >> 15) & 0x1; @@ -1405,11 +1406,11 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (lid) { case NVME_LOG_ERROR_INFO: - return nvme_error_info(n, cmd, rae, len, off, req); + return nvme_error_info(n, rae, len, off, req); case NVME_LOG_SMART_INFO: - return nvme_smart_info(n, cmd, rae, len, off, req); + return nvme_smart_info(n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: - return nvme_fw_log_info(n, cmd, len, off, req); + return nvme_fw_log_info(n, len, off, req); default: trace_nvme_err_invalid_log_page(req->cid, lid); return NVME_INVALID_LOG_ID | NVME_DNR; @@ -1428,9 +1429,9 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) n->qs_created--; } -static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) { - NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; + NvmeDeleteQ *c = (NvmeDeleteQ *) &req->cmd; NvmeCQueue *cq; uint16_t qid = le16_to_cpu(c->qid); @@ -1469,10 +1470,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, n->qs_created++; } -static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) { NvmeCQueue *cq; - NvmeCreateCq *c = (NvmeCreateCq *)cmd; + NvmeCreateCq *c = (NvmeCreateCq *) &req->cmd; uint16_t cqid = le16_to_cpu(c->cqid); uint16_t vector = le16_to_cpu(c->irq_vector); uint16_t qsize = le16_to_cpu(c->qsize); @@ -1509,18 +1510,17 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) { trace_nvme_identify_ctrl(); - return nvme_dma_read(n, (uint8_t *) &n->id_ctrl, sizeof(n->id_ctrl), cmd, - req); + return nvme_dma_read(n, (uint8_t *) &n->id_ctrl, sizeof(n->id_ctrl), req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) { NvmeIdNs *id_ns, inactive = { 0 }; - uint32_t nsid = le32_to_cpu(cmd->nsid); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); NvmeNamespace *ns = nvme_ns(n, nsid); trace_nvme_identify_ns(nsid); @@ -1537,14 +1537,13 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) id_ns = &ns->id_ns; } - return nvme_dma_read(n, (uint8_t *) id_ns, sizeof(NvmeIdNs), cmd, req); + return nvme_dma_read(n, (uint8_t *) id_ns, sizeof(NvmeIdNs), req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeRequest *req) { static const int data_len = 4 * KiB; - uint32_t min_nsid = le32_to_cpu(cmd->nsid); + uint32_t min_nsid = le32_to_cpu(req->cmd.nsid); uint32_t *list; uint16_t ret; int i, j = 0; @@ -1561,13 +1560,12 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, break; } } - ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) { static const int len = 4096; @@ -1578,7 +1576,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd, uint8_t nid[16]; }; - uint32_t nsid = le32_to_cpu(cmd->nsid); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); struct ns_descr *list; uint16_t ret; @@ -1594,33 +1592,33 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd, list->nidl = 0x10; *(uint32_t *) &list->nid[12] = cpu_to_be32(nsid); - ret = nvme_dma_read(n, (uint8_t *) list, len, cmd, req); + ret = nvme_dma_read(n, (uint8_t *) list, len, req); g_free(list); return ret; } -static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) { - NvmeIdentify *c = (NvmeIdentify *)cmd; + NvmeIdentify *c = (NvmeIdentify *) &req->cmd; switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, cmd, req); + return nvme_identify_ns(n, req); case 0x01: - return nvme_identify_ctrl(n, cmd, req); + return nvme_identify_ctrl(n, req); case 0x02: - return nvme_identify_ns_list(n, cmd, req); + return nvme_identify_ns_list(n, req); case 0x03: - return nvme_identify_ns_descr_list(n, cmd, req); + return nvme_identify_ns_descr_list(n, req); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; } } -static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) { - uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0xffff; + uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff; req->cqe.result = 1; if (nvme_check_sqid(n, sqid)) { @@ -1670,19 +1668,17 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) return cpu_to_le64(ts.all); } -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) { uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, - req); + return nvme_dma_read(n, (uint8_t *)×tamp, sizeof(timestamp), req); } -static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) { - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); uint32_t result; trace_nvme_getfeat(dw10); @@ -1710,7 +1706,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: - return nvme_get_feature_timestamp(n, cmd, req); + return nvme_get_feature_timestamp(n, req); case NVME_INTERRUPT_COALESCING: result = cpu_to_le32(n->features.int_coalescing); break; @@ -1736,14 +1732,12 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) { uint16_t ret; uint64_t timestamp; - ret = nvme_dma_write(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, - req); + ret = nvme_dma_write(n, (uint8_t *)×tamp, sizeof(timestamp), req); if (ret != NVME_SUCCESS) { return ret; } @@ -1753,12 +1747,12 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, return NVME_SUCCESS; } -static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); trace_nvme_setfeat(dw10, dw11); @@ -1804,7 +1798,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: - return nvme_set_feature_timestamp(n, cmd, req); + return nvme_set_feature_timestamp(n, req); case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; break; @@ -1823,7 +1817,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) { trace_nvme_aer(req->cid); @@ -1839,31 +1833,31 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { - switch (cmd->opcode) { + switch (req->cmd.opcode) { case NVME_ADM_CMD_DELETE_SQ: - return nvme_del_sq(n, cmd); + return nvme_del_sq(n, req); case NVME_ADM_CMD_CREATE_SQ: - return nvme_create_sq(n, cmd); + return nvme_create_sq(n, req); case NVME_ADM_CMD_GET_LOG_PAGE: - return nvme_get_log(n, cmd, req); + return nvme_get_log(n, req); case NVME_ADM_CMD_DELETE_CQ: - return nvme_del_cq(n, cmd); + return nvme_del_cq(n, req); case NVME_ADM_CMD_CREATE_CQ: - return nvme_create_cq(n, cmd); + return nvme_create_cq(n, req); case NVME_ADM_CMD_IDENTIFY: - return nvme_identify(n, cmd, req); + return nvme_identify(n, req); case NVME_ADM_CMD_ABORT: - return nvme_abort(n, cmd, req); + return nvme_abort(n, req); case NVME_ADM_CMD_SET_FEATURES: - return nvme_set_feature(n, cmd, req); + return nvme_set_feature(n, req); case NVME_ADM_CMD_GET_FEATURES: - return nvme_get_feature(n, cmd, req); + return nvme_get_feature(n, req); case NVME_ADM_CMD_ASYNC_EV_REQ: - return nvme_aer(n, cmd, req); + return nvme_aer(n, req); default: - trace_nvme_err_invalid_admin_opc(cmd->opcode); + trace_nvme_err_invalid_admin_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -1940,7 +1934,7 @@ static void nvme_process_sq(void *opaque) while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { addr = sq->dma_addr + sq->head * n->sqe_size; - nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd)); + nvme_addr_read(n, addr, (void *)&cmd, sizeof(NvmeCmd)); nvme_inc_sq_head(sq); req = QTAILQ_FIRST(&sq->req_list); @@ -1949,8 +1943,8 @@ static void nvme_process_sq(void *opaque) nvme_init_req(n, &cmd, req); - status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : - nvme_admin_cmd(n, &cmd, req); + status = sq->sqid ? nvme_io_cmd(n, req) : + nvme_admin_cmd(n, req); if (status != NVME_NO_COMPLETE) { req->status = status; nvme_enqueue_req_completion(cq, req); From patchwork Mon Nov 11 12:25:43 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192909 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVkQ1FRcz9sPn for ; Mon, 11 Nov 2019 23:37:02 +1100 (AEDT) Received: from localhost ([::1]:51894 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8wF-0003FO-Nw for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:36:59 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37166) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m5-0008EI-Tj for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:32 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m3-0003ZU-Re for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:29 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52298) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8m0-0003R5-I2; Mon, 11 Nov 2019 07:26:24 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 72ED1BFB17; Mon, 11 Nov 2019 12:26:03 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 19/21] nvme: make lba data size configurable Date: Mon, 11 Nov 2019 13:25:43 +0100 Message-Id: <20191111122545.252478-20-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Signed-off-by: Klaus Jensen --- hw/block/nvme-ns.c | 2 +- hw/block/nvme-ns.h | 4 +++- hw/block/nvme.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 36deedee07a6..dc7e63b01037 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -18,7 +18,7 @@ static int nvme_ns_init(NvmeNamespace *ns) { NvmeIdNs *id_ns = &ns->id_ns; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->lbaf[0].ds = ns->params.lbads; id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns)); diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index b564bac25f6d..9d519182c3a5 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -7,10 +7,12 @@ #define DEFINE_NVME_NS_PROPERTIES(_state, _props) \ DEFINE_PROP_DRIVE("drive", _state, blk), \ - DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0) + DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0), \ + DEFINE_PROP_UINT8("lbads", _state, _props.lbads, 9) typedef struct NvmeNamespaceParams { uint32_t nsid; + uint8_t lbads; } NvmeNamespaceParams; typedef struct NvmeNamespace { diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 81322bb79e4c..3acbaaa79008 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2599,6 +2599,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (n->namespace.blk) { ns = &n->namespace; ns->params.nsid = 1; + ns->params.lbads = 9; if (nvme_ns_setup(n, ns, &local_err)) { error_propagate_prepend(errp, local_err, "nvme_ns_setup: "); From patchwork Mon Nov 11 12:25:44 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192911 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVn53jb2z9sPj for ; Mon, 11 Nov 2019 23:39:18 +1100 (AEDT) Received: from localhost ([::1]:51940 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8yS-0005yw-Au for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:39:16 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37217) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m8-0008GR-K7 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:34 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m5-0003aF-SU for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:32 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52324) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8m0-0003RQ-Rv; Mon, 11 Nov 2019 07:26:24 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id B9789BF6BD; Mon, 11 Nov 2019 12:26:03 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 20/21] pci: pass along the return value of dma_memory_rw Date: Mon, 11 Nov 2019 13:25:44 +0100 Message-Id: <20191111122545.252478-21-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Some might actually care about the return value of dma_memory_rw. So let us pass it along instead of ignoring it. There are no existing users of the return value, so this patch should be safe. Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin --- include/hw/pci/pci.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index db75c6dfd05e..4d6f2b48a7f7 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -783,8 +783,7 @@ static inline AddressSpace *pci_get_address_space(PCIDevice *dev) static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr, void *buf, dma_addr_t len, DMADirection dir) { - dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir); - return 0; + return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir); } static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr, From patchwork Mon Nov 11 12:25:45 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 1192897 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org (client-ip=209.51.188.17; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=irrelevant.dk Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47BVcW4Z6rz9sPn for ; Mon, 11 Nov 2019 23:31:55 +1100 (AEDT) Received: from localhost ([::1]:51828 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8rI-0004XF-N9 for incoming@patchwork.ozlabs.org; Mon, 11 Nov 2019 07:31:52 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]:37220) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1iU8m8-0008GW-KZ for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:36 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1iU8m5-0003aK-T7 for qemu-devel@nongnu.org; Mon, 11 Nov 2019 07:26:32 -0500 Received: from charlie.dont.surf ([128.199.63.193]:52338) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1iU8m1-0003Rb-4I; Mon, 11 Nov 2019 07:26:25 -0500 Received: from apples.localdomain (unknown [194.62.217.57]) by charlie.dont.surf (Postfix) with ESMTPSA id 100A6BF5D9; Mon, 11 Nov 2019 12:26:04 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH v3 21/21] nvme: handle dma errors Date: Mon, 11 Nov 2019 13:25:45 +0100 Message-Id: <20191111122545.252478-22-its@irrelevant.dk> X-Mailer: git-send-email 2.24.0 In-Reply-To: <20191111122545.252478-1-its@irrelevant.dk> References: <20191111122545.252478-1-its@irrelevant.dk> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 128.199.63.193 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Javier Gonzalez , qemu-devel@nongnu.org, Max Reitz , Ross Lagerwall , Paul Durrant , Keith Busch , Stephen Bates Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" Handling DMA errors gracefully is required for the device to pass the block/011 test ("disable PCI device while doing I/O") in the blktests suite. With this patch the device passes the test by retrying "critical" transfers (posting of completion entries and processing of submission queue entries). If DMA errors occur at any other point in the execution of the command (say, while mapping the PRPs or SGLs), the command is aborted with a Data Transfer Error status code. Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 63 +++++++++++++++++++++++++++++++++---------- hw/block/trace-events | 2 ++ include/block/nvme.h | 2 +- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 3acbaaa79008..d9c06a5cef9b 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -71,26 +71,26 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) return addr >= low && addr < hi; } -static inline void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, +static inline int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size); - return; + return 0; } - pci_dma_read(&n->parent_obj, addr, buf, size); + return pci_dma_read(&n->parent_obj, addr, buf, size); } -static inline void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, +static inline int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) { if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { memcpy((void *) &n->cmbuf[addr - n->ctrl_mem.addr], buf, size); - return; + return 0; } - pci_dma_write(&n->parent_obj, addr, buf, size); + return pci_dma_write(&n->parent_obj, addr, buf, size); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) @@ -228,7 +228,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); + if (nvme_addr_read(n, prp2, (void *) prp_list, prp_trans)) { + trace_nvme_err_addr_read(prp2); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } while (len != 0) { bool addr_is_cmb; uint64_t prp_ent = le64_to_cpu(prp_list[i]); @@ -250,7 +254,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans); + if (nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans)) { + trace_nvme_err_addr_read(prp_ent); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } prp_ent = le64_to_cpu(prp_list[i]); } @@ -402,7 +410,11 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, /* read the segment in chunks of 256 descriptors (4k) */ while (nsgld > MAX_NSGLD) { - nvme_addr_read(n, addr, segment, sizeof(segment)); + if (nvme_addr_read(n, addr, segment, sizeof(segment))) { + trace_nvme_err_addr_read(addr); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); if (status) { @@ -413,7 +425,11 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); } - nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + if (nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor))) { + trace_nvme_err_addr_read(addr); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } sgl = segment[nsgld - 1]; addr = le64_to_cpu(sgl.addr); @@ -458,7 +474,11 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); while (nsgld > MAX_NSGLD) { - nvme_addr_read(n, addr, segment, sizeof(segment)); + if (nvme_addr_read(n, addr, segment, sizeof(segment))) { + trace_nvme_err_addr_read(addr); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); if (status) { @@ -469,7 +489,11 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); } - nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + if (nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor))) { + trace_nvme_err_addr_read(addr); + status = NVME_DATA_TRANSFER_ERROR; + goto unmap; + } status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); if (status) { @@ -819,8 +843,14 @@ static void nvme_post_cqes(void *opaque) req->cqe.sq_id = cpu_to_le16(sq->sqid); req->cqe.sq_head = cpu_to_le16(sq->head); addr = cq->dma_addr + cq->tail * n->cqe_size; + if (nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe))) { + trace_nvme_err_addr_write(addr); + QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + 100 * SCALE_MS); + break; + } nvme_inc_cq_tail(cq); - nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe)); QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { @@ -1934,7 +1964,12 @@ static void nvme_process_sq(void *opaque) while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { addr = sq->dma_addr + sq->head * n->sqe_size; - nvme_addr_read(n, addr, (void *)&cmd, sizeof(NvmeCmd)); + if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(NvmeCmd))) { + trace_nvme_err_addr_read(addr); + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + 100 * SCALE_MS); + break; + } nvme_inc_sq_head(sq); req = QTAILQ_FIRST(&sq->req_list); diff --git a/hw/block/trace-events b/hw/block/trace-events index 5df48cca55e4..140210938f3a 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -84,6 +84,8 @@ nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" nvme_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" mdts %"PRIu64" len %"PRIu64"" nvme_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl %"PRIu16"" nvme_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p status 0x%"PRIx16"" +nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" +nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" nvme_err_invalid_sgl_descriptor(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" diff --git a/include/block/nvme.h b/include/block/nvme.h index 418dadf3c5c6..ea75c2a9bbf9 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -459,7 +459,7 @@ enum NvmeStatusCodes { NVME_INVALID_OPCODE = 0x0001, NVME_INVALID_FIELD = 0x0002, NVME_CID_CONFLICT = 0x0003, - NVME_DATA_TRAS_ERROR = 0x0004, + NVME_DATA_TRANSFER_ERROR = 0x0004, NVME_POWER_LOSS_ABORT = 0x0005, NVME_INTERNAL_DEV_ERROR = 0x0006, NVME_CMD_ABORT_REQ = 0x0007,