@@ -36,6 +36,10 @@
* zoned.zasl=<N[optional]>, \
* zoned.auto_transition=<on|off[optional]>, \
* sriov_max_vfs=<N[optional]> \
+ * sriov_vq_flexible=<N[optional]> \
+ * sriov_vi_flexible=<N[optional]> \
+ * sriov_max_vi_per_vf=<N[optional]> \
+ * sriov_max_vq_per_vf=<N[optional]> \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -113,6 +117,29 @@
* enables reporting of both SR-IOV and ARI capabilities by the NVMe device.
* Virtual function controllers will not report SR-IOV capability.
*
+ * NOTE: Single Root I/O Virtualization support is experimental.
+ * All the related parameters may be subject to change.
+ *
+ * - `sriov_vq_flexible`
+ * Indicates the total number of flexible queue resources assignable to all
+ * the secondary controllers. Implicitly sets the number of primary
+ * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`.
+ *
+ * - `sriov_vi_flexible`
+ * Indicates the total number of flexible interrupt resources assignable to
+ * all the secondary controllers. Implicitly sets the number of primary
+ * controller's private resources to `(msix_qsize - sriov_vi_flexible)`.
+ *
+ * - `sriov_max_vi_per_vf`
+ * Indicates the maximum number of virtual interrupt resources assignable
+ * to a secondary controller. The default 0 resolves to
+ * `(sriov_vi_flexible / sriov_max_vfs)`.
+ *
+ * - `sriov_max_vq_per_vf`
+ * Indicates the maximum number of virtual queue resources assignable to
+ * a secondary controller. The default 0 resolves to
+ * `(sriov_vq_flexible / sriov_max_vfs)`.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `shared`
@@ -185,6 +212,7 @@
#define NVME_NUM_FW_SLOTS 1
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
#define NVME_MAX_VFS 127
+#define NVME_VF_RES_GRANULARITY 1
#define NVME_VF_OFFSET 0x1
#define NVME_VF_STRIDE 1
@@ -6656,6 +6684,53 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
error_setg(errp, "PMR is not supported with SR-IOV");
return;
}
+
+ if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) {
+ error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible"
+ " must be set for the use of SR-IOV");
+ return;
+ }
+
+ if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) {
+ error_setg(errp, "sriov_vq_flexible must be greater than or equal"
+ " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2);
+ return;
+ }
+
+ if (params->max_ioqpairs < params->sriov_vq_flexible + 2) {
+ error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be"
+ " greater than or equal to 2");
+ return;
+ }
+
+ if (params->sriov_vi_flexible < params->sriov_max_vfs) {
+ error_setg(errp, "sriov_vi_flexible must be greater than or equal"
+ " to %d (sriov_max_vfs)", params->sriov_max_vfs);
+ return;
+ }
+
+ if (params->msix_qsize < params->sriov_vi_flexible + 1) {
+ error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be"
+ " greater than or equal to 1");
+ return;
+ }
+
+ if (params->sriov_max_vi_per_vf &&
+ (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
+ error_setg(errp, "sriov_max_vi_per_vf must meet:"
+ " (sriov_max_vi_per_vf - 1) %% %d == 0 and"
+ " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY);
+ return;
+ }
+
+ if (params->sriov_max_vq_per_vf &&
+ (params->sriov_max_vq_per_vf < 2 ||
+ (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) {
+ error_setg(errp, "sriov_max_vq_per_vf must meet:"
+ " (sriov_max_vq_per_vf - 1) %% %d == 0 and"
+ " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY);
+ return;
+ }
}
}
@@ -6664,10 +6739,19 @@ static void nvme_init_state(NvmeCtrl *n)
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
NvmeSecCtrlList *list = &n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
+ uint8_t max_vfs;
int i;
- n->conf_ioqpairs = n->params.max_ioqpairs;
- n->conf_msix_qsize = n->params.msix_qsize;
+ if (pci_is_vf(&n->parent_obj)) {
+ sctrl = nvme_sctrl(n);
+ max_vfs = 0;
+ n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
+ n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
+ } else {
+ max_vfs = n->params.sriov_max_vfs;
+ n->conf_ioqpairs = n->params.max_ioqpairs;
+ n->conf_msix_qsize = n->params.msix_qsize;
+ }
n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
@@ -6676,14 +6760,41 @@ static void nvme_init_state(NvmeCtrl *n)
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
- list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
- for (i = 0; i < n->params.sriov_max_vfs; i++) {
+ list->numcntl = cpu_to_le16(max_vfs);
+ for (i = 0; i < max_vfs; i++) {
sctrl = &list->sec[i];
sctrl->pcid = cpu_to_le16(n->cntlid);
sctrl->vfn = cpu_to_le16(i + 1);
}
cap->cntlid = cpu_to_le16(n->cntlid);
+ cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
+
+ if (pci_is_vf(&n->parent_obj)) {
+ cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
+ } else {
+ cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
+ n->params.sriov_vq_flexible);
+ cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
+ cap->vqrfap = cap->vqfrt;
+ cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+ cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
+ cpu_to_le16(n->params.sriov_max_vq_per_vf) :
+ cap->vqfrt / MAX(max_vfs, 1);
+ }
+
+ if (pci_is_vf(&n->parent_obj)) {
+ cap->viprt = cpu_to_le16(n->conf_msix_qsize);
+ } else {
+ cap->viprt = cpu_to_le16(n->params.msix_qsize -
+ n->params.sriov_vi_flexible);
+ cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible);
+ cap->virfap = cap->vifrt;
+ cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+ cap->vifrsm = n->params.sriov_max_vi_per_vf ?
+ cpu_to_le16(n->params.sriov_max_vi_per_vf) :
+ cap->vifrt / MAX(max_vfs, 1);
+ }
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -6756,11 +6867,14 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
return bar_size;
}
-static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
- uint64_t bar_size)
+static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
{
uint16_t vf_dev_id = n->params.use_intel_id ?
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
+ NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+ le16_to_cpu(cap->vifrsm),
+ NULL, NULL);
pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
@@ -6858,7 +6972,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
}
if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
- nvme_init_sriov(n, pci_dev, 0x120, bar_size);
+ nvme_init_sriov(n, pci_dev, 0x120);
}
return 0;
@@ -6882,6 +6996,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NvmeIdCtrl *id = &n->id_ctrl;
uint8_t *pci_conf = pci_dev->config;
uint64_t cap = ldq_le_p(&n->bar.cap);
+ NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -6974,6 +7089,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
stl_le_p(&n->bar.vs, NVME_SPEC_VER);
n->bar.intmc = n->bar.intms = 0;
+
+ if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
+ stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
+ }
}
static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
@@ -7114,6 +7233,14 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
+ DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
+ params.sriov_vq_flexible, 0),
+ DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
+ params.sriov_vi_flexible, 0),
+ DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl,
+ params.sriov_max_vi_per_vf, 0),
+ DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
+ params.sriov_max_vq_per_vf, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -411,6 +411,10 @@ typedef struct NvmeParams {
bool auto_transition_zones;
bool legacy_cmb;
uint8_t sriov_max_vfs;
+ uint16_t sriov_vq_flexible;
+ uint16_t sriov_vi_flexible;
+ uint8_t sriov_max_vq_per_vf;
+ uint8_t sriov_max_vi_per_vf;
} NvmeParams;
typedef struct NvmeCtrl {
@@ -1576,6 +1576,11 @@ typedef struct QEMU_PACKED NvmePriCtrlCap {
uint8_t rsvd80[4016];
} NvmePriCtrlCap;
+typedef enum NvmePriCtrlCapCrt {
+ NVME_CRT_VQ = 1 << 0,
+ NVME_CRT_VI = 1 << 1,
+} NvmePriCtrlCapCrt;
+
typedef struct QEMU_PACKED NvmeSecCtrlEntry {
uint16_t scid;
uint16_t pcid;