Message ID | 20240627-reuse-v10-6-7ca0b8ed3d9f@daynix.com |
---|---|
State | New |
Headers | show |
Series | hw/pci: SR-IOV related fixes and improvements | expand |
Hello, This change introduced a regression on s390x. I could have spotted it earlier. Sorry about that. Here is the scenario, QEMU now creates automatically the PCI device objects representing the VFs when the PF device is realized in pcie_sriov_pf_init(). This is good to report errors early but it has an important drawback. On s390x, PCI devices have a dual S390PCIBusDevice object. This device model has 'uid' and 'fid' properties which can be either set by the VMM or, if not, auto-generated by the S390PCIBusDevice realize handler. In the VF case, these ids are auto-generated by QEMU and they can possibly conflict with the uid number space of libvirt. The conflict is detected when the machine is created and the start is aborted with a message : 2024-07-08T12:51:42.876883Z qemu-system-s390x: -device {"driver":"zpci","uid":17,"fid":16,"target":"hostdev0","id":"zpci17"}: uid 17 already in use This problem can occur today with a s390x VM using an IGB device. It worked fine when the VFs were created at OS runtime because the initial topology of the machine was in place. Adding VFs was more or less like hotplug. AIUI, libvirt should have full control on the machine topology and so, creating VFs in QEMU at init time in the back of libvirt seems like a violation of this rule. That said, the s390x case is specific and could perhaps be handled in a special way. Thanks, C. On 6/27/24 8:07 AM, Akihiko Odaki wrote: > Disable SR-IOV VF devices by reusing code to power down PCI devices > instead of removing them when the guest requests to disable VFs. This > allows to realize devices and report VF realization errors at PF > realization time. > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > --- > include/hw/pci/pci.h | 5 --- > include/hw/pci/pci_device.h | 15 +++++++ > include/hw/pci/pcie_sriov.h | 1 - > hw/pci/pci.c | 2 +- > hw/pci/pcie_sriov.c | 95 +++++++++++++++++++-------------------------- > 5 files changed, 56 insertions(+), 62 deletions(-) > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index 6c92b2f70008..442017b4865d 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -644,9 +644,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) > MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); > void pci_set_enabled(PCIDevice *pci_dev, bool state); > > -static inline void pci_set_power(PCIDevice *pci_dev, bool state) > -{ > - pci_set_enabled(pci_dev, state); > -} > - > #endif > diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h > index d57f9ce83884..ca151325085d 100644 > --- a/include/hw/pci/pci_device.h > +++ b/include/hw/pci/pci_device.h > @@ -205,6 +205,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev) > return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn); > } > > +static inline void pci_set_power(PCIDevice *pci_dev, bool state) > +{ > + /* > + * Don't change the enabled state of VFs when powering on/off the device. > + * > + * When powering on, VFs must not be enabled immediately but they must > + * wait until the guest configures SR-IOV. > + * When powering off, their corresponding PFs will be reset and disable > + * VFs. > + */ > + if (!pci_is_vf(pci_dev)) { > + pci_set_enabled(pci_dev, state); > + } > +} > + > uint16_t pci_requester_id(PCIDevice *dev); > > /* DMA access functions */ > diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h > index aa704e8f9d9f..70649236c18a 100644 > --- a/include/hw/pci/pcie_sriov.h > +++ b/include/hw/pci/pcie_sriov.h > @@ -18,7 +18,6 @@ > typedef struct PCIESriovPF { > uint16_t num_vfs; /* Number of virtual functions created */ > uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ > - const char *vfname; /* Reference to the device type used for the VFs */ > PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ > } PCIESriovPF; > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index 295a32714a4a..c682c3dcb68e 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -2822,7 +2822,7 @@ void pci_set_enabled(PCIDevice *d, bool state) > memory_region_set_enabled(&d->bus_master_enable_region, > (pci_get_word(d->config + PCI_COMMAND) > & PCI_COMMAND_MASTER) && d->enabled); > - if (!d->enabled) { > + if (d->qdev.realized) { > pci_device_reset(d); > } > } > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c > index f0bde0d3fc79..faadb0d2ea85 100644 > --- a/hw/pci/pcie_sriov.c > +++ b/hw/pci/pcie_sriov.c > @@ -20,9 +20,16 @@ > #include "qapi/error.h" > #include "trace.h" > > -static PCIDevice *register_vf(PCIDevice *pf, int devfn, > - const char *name, uint16_t vf_num); > -static void unregister_vfs(PCIDevice *dev); > +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) > +{ > + for (uint16_t i = 0; i < total_vfs; i++) { > + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; > + object_unparent(OBJECT(vf)); > + object_unref(OBJECT(vf)); > + } > + g_free(dev->exp.sriov_pf.vf); > + dev->exp.sriov_pf.vf = NULL; > +} > > bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > const char *vfname, uint16_t vf_dev_id, > @@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > uint16_t vf_offset, uint16_t vf_stride, > Error **errp) > { > + BusState *bus = qdev_get_parent_bus(&dev->qdev); > + int32_t devfn = dev->devfn + vf_offset; > uint8_t *cfg = dev->config + offset; > uint8_t *wmask; > > @@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > offset, PCI_EXT_CAP_SRIOV_SIZEOF); > dev->exp.sriov_cap = offset; > dev->exp.sriov_pf.num_vfs = 0; > - dev->exp.sriov_pf.vfname = g_strdup(vfname); > dev->exp.sriov_pf.vf = NULL; > > pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); > @@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > > qdev_prop_set_bit(&dev->qdev, "multifunction", true); > > + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); > + > + for (uint16_t i = 0; i < total_vfs; i++) { > + PCIDevice *vf = pci_new(devfn, vfname); > + vf->exp.sriov_vf.pf = dev; > + vf->exp.sriov_vf.vf_number = i; > + > + if (!qdev_realize(&vf->qdev, bus, errp)) { > + unparent_vfs(dev, i); > + return false; > + } > + > + /* set vid/did according to sr/iov spec - they are not used */ > + pci_config_set_vendor_id(vf->config, 0xffff); > + pci_config_set_device_id(vf->config, 0xffff); > + > + dev->exp.sriov_pf.vf[i] = vf; > + devfn += vf_stride; > + } > + > return true; > } > > void pcie_sriov_pf_exit(PCIDevice *dev) > { > - unregister_vfs(dev); > - g_free((char *)dev->exp.sriov_pf.vfname); > - dev->exp.sriov_pf.vfname = NULL; > + uint8_t *cfg = dev->config + dev->exp.sriov_cap; > + > + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); > } > > void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, > @@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, > } > } > > -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, > - uint16_t vf_num) > -{ > - PCIDevice *dev = pci_new(devfn, name); > - dev->exp.sriov_vf.pf = pf; > - dev->exp.sriov_vf.vf_number = vf_num; > - PCIBus *bus = pci_get_bus(pf); > - Error *local_err = NULL; > - > - qdev_realize(&dev->qdev, &bus->qbus, &local_err); > - if (local_err) { > - error_report_err(local_err); > - return NULL; > - } > - > - /* set vid/did according to sr/iov spec - they are not used */ > - pci_config_set_vendor_id(dev->config, 0xffff); > - pci_config_set_device_id(dev->config, 0xffff); > - > - return dev; > -} > - > static void register_vfs(PCIDevice *dev) > { > uint16_t num_vfs; > uint16_t i; > uint16_t sriov_cap = dev->exp.sriov_cap; > - uint16_t vf_offset = > - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); > - uint16_t vf_stride = > - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); > - int32_t devfn = dev->devfn + vf_offset; > > assert(sriov_cap > 0); > num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); > @@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev) > return; > } > > - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); > - > trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), > PCI_FUNC(dev->devfn), num_vfs); > for (i = 0; i < num_vfs; i++) { > - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, > - dev->exp.sriov_pf.vfname, i); > - if (!dev->exp.sriov_pf.vf[i]) { > - num_vfs = i; > - break; > - } > - devfn += vf_stride; > + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); > } > dev->exp.sriov_pf.num_vfs = num_vfs; > } > @@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev) > trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), > PCI_FUNC(dev->devfn), num_vfs); > for (i = 0; i < num_vfs; i++) { > - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; > - object_unparent(OBJECT(vf)); > - object_unref(OBJECT(vf)); > + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); > } > - g_free(dev->exp.sriov_pf.vf); > - dev->exp.sriov_pf.vf = NULL; > dev->exp.sriov_pf.num_vfs = 0; > } > > @@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, > PCI_FUNC(dev->devfn), off, val, len); > > if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { > - if (dev->exp.sriov_pf.num_vfs) { > - if (!(val & PCI_SRIOV_CTRL_VFE)) { > - unregister_vfs(dev); > - } > + if (val & PCI_SRIOV_CTRL_VFE) { > + register_vfs(dev); > } else { > - if (val & PCI_SRIOV_CTRL_VFE) { > - register_vfs(dev); > - } > + unregister_vfs(dev); > } > } > } >
On Wed, Jul 10, 2024 at 08:37:27AM +0200, Cédric Le Goater wrote: > Hello, > > This change introduced a regression on s390x. I could have spotted it > earlier. Sorry about that. Here is the scenario, > > QEMU now creates automatically the PCI device objects representing the > VFs when the PF device is realized in pcie_sriov_pf_init(). This is > good to report errors early but it has an important drawback. > > On s390x, PCI devices have a dual S390PCIBusDevice object. This device > model has 'uid' and 'fid' properties which can be either set by the VMM > or, if not, auto-generated by the S390PCIBusDevice realize handler. In > the VF case, these ids are auto-generated by QEMU and they can possibly > conflict with the uid number space of libvirt. The conflict is detected > when the machine is created and the start is aborted with a message : > > 2024-07-08T12:51:42.876883Z qemu-system-s390x: -device {"driver":"zpci","uid":17,"fid":16,"target":"hostdev0","id":"zpci17"}: uid 17 already in use > > This problem can occur today with a s390x VM using an IGB device. > > It worked fine when the VFs were created at OS runtime because the initial > topology of the machine was in place. Adding VFs was more or less like > hotplug. AIUI, libvirt should have full control on the machine topology > and so, creating VFs in QEMU at init time in the back of libvirt seems > like a violation of this rule. > > That said, the s390x case is specific and could perhaps be handled in a > special way. > > Thanks, > > C. Thanks for reporting this Cédric. Akihiko what's your plan to handle this? Do you have the time to address this issue?
On 2024/07/10 19:52, Michael S. Tsirkin wrote: > On Wed, Jul 10, 2024 at 08:37:27AM +0200, Cédric Le Goater wrote: >> Hello, >> >> This change introduced a regression on s390x. I could have spotted it >> earlier. Sorry about that. Here is the scenario, >> >> QEMU now creates automatically the PCI device objects representing the >> VFs when the PF device is realized in pcie_sriov_pf_init(). This is >> good to report errors early but it has an important drawback. >> >> On s390x, PCI devices have a dual S390PCIBusDevice object. This device >> model has 'uid' and 'fid' properties which can be either set by the VMM >> or, if not, auto-generated by the S390PCIBusDevice realize handler. In >> the VF case, these ids are auto-generated by QEMU and they can possibly >> conflict with the uid number space of libvirt. The conflict is detected >> when the machine is created and the start is aborted with a message : >> >> 2024-07-08T12:51:42.876883Z qemu-system-s390x: -device {"driver":"zpci","uid":17,"fid":16,"target":"hostdev0","id":"zpci17"}: uid 17 already in use >> >> This problem can occur today with a s390x VM using an IGB device. >> >> It worked fine when the VFs were created at OS runtime because the initial >> topology of the machine was in place. Adding VFs was more or less like >> hotplug. AIUI, libvirt should have full control on the machine topology >> and so, creating VFs in QEMU at init time in the back of libvirt seems >> like a violation of this rule. >> >> That said, the s390x case is specific and could perhaps be handled in a >> special way. >> >> Thanks, >> >> C. > > > Thanks for reporting this Cédric. Akihiko what's your > plan to handle this? Do you have the time to address this issue? Creating VFs at initialization time only makes problems apparent early. Even without this change, hot-plugging another PCI device after realizing a VF results in a similar situation. A proper way to handle this is to add new properties to igb and nvme to let libvirt specify the VF ids. However I wonder if it is a worthwhile addition (i.e., if igb and nvme's SR-IOV emulation will be used with s390x and libvirt). Regards, Akihiko Odaki
On Sat, Jul 13, 2024 at 09:45:07PM +0900, Akihiko Odaki wrote: > On 2024/07/10 19:52, Michael S. Tsirkin wrote: > > On Wed, Jul 10, 2024 at 08:37:27AM +0200, Cédric Le Goater wrote: > > > Hello, > > > > > > This change introduced a regression on s390x. I could have spotted it > > > earlier. Sorry about that. Here is the scenario, > > > > > > QEMU now creates automatically the PCI device objects representing the > > > VFs when the PF device is realized in pcie_sriov_pf_init(). This is > > > good to report errors early but it has an important drawback. > > > > > > On s390x, PCI devices have a dual S390PCIBusDevice object. This device > > > model has 'uid' and 'fid' properties which can be either set by the VMM > > > or, if not, auto-generated by the S390PCIBusDevice realize handler. In > > > the VF case, these ids are auto-generated by QEMU and they can possibly > > > conflict with the uid number space of libvirt. The conflict is detected > > > when the machine is created and the start is aborted with a message : > > > > > > 2024-07-08T12:51:42.876883Z qemu-system-s390x: -device {"driver":"zpci","uid":17,"fid":16,"target":"hostdev0","id":"zpci17"}: uid 17 already in use > > > > > > This problem can occur today with a s390x VM using an IGB device. > > > > > > It worked fine when the VFs were created at OS runtime because the initial > > > topology of the machine was in place. Adding VFs was more or less like > > > hotplug. AIUI, libvirt should have full control on the machine topology > > > and so, creating VFs in QEMU at init time in the back of libvirt seems > > > like a violation of this rule. > > > > > > That said, the s390x case is specific and could perhaps be handled in a > > > special way. > > > > > > Thanks, > > > > > > C. > > > > > > Thanks for reporting this Cédric. Akihiko what's your > > plan to handle this? Do you have the time to address this issue? > > Creating VFs at initialization time only makes problems apparent early. Even > without this change, hot-plugging another PCI device after realizing a VF > results in a similar situation. > > A proper way to handle this is to add new properties to igb and nvme to let > libvirt specify the VF ids. However I wonder if it is a worthwhile addition > (i.e., if igb and nvme's SR-IOV emulation will be used with s390x and > libvirt). > > Regards, > Akihiko Odaki Well okay but libvirt will not update overnight. If we need time to discuss the design, I can revert for the release and reapply after we have a fix.
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 6c92b2f70008..442017b4865d 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -644,9 +644,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); void pci_set_enabled(PCIDevice *pci_dev, bool state); -static inline void pci_set_power(PCIDevice *pci_dev, bool state) -{ - pci_set_enabled(pci_dev, state); -} - #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index d57f9ce83884..ca151325085d 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -205,6 +205,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev) return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn); } +static inline void pci_set_power(PCIDevice *pci_dev, bool state) +{ + /* + * Don't change the enabled state of VFs when powering on/off the device. + * + * When powering on, VFs must not be enabled immediately but they must + * wait until the guest configures SR-IOV. + * When powering off, their corresponding PFs will be reset and disable + * VFs. + */ + if (!pci_is_vf(pci_dev)) { + pci_set_enabled(pci_dev, state); + } +} + uint16_t pci_requester_id(PCIDevice *dev); /* DMA access functions */ diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index aa704e8f9d9f..70649236c18a 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,7 +18,6 @@ typedef struct PCIESriovPF { uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ - const char *vfname; /* Reference to the device type used for the VFs */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 295a32714a4a..c682c3dcb68e 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2822,7 +2822,7 @@ void pci_set_enabled(PCIDevice *d, bool state) memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_MASTER) && d->enabled); - if (!d->enabled) { + if (d->qdev.realized) { pci_device_reset(d); } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index f0bde0d3fc79..faadb0d2ea85 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,9 +20,16 @@ #include "qapi/error.h" #include "trace.h" -static PCIDevice *register_vf(PCIDevice *pf, int devfn, - const char *name, uint16_t vf_num); -static void unregister_vfs(PCIDevice *dev); +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) +{ + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); + } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; +} bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, @@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, uint16_t vf_offset, uint16_t vf_stride, Error **errp) { + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; @@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; dev->exp.sriov_pf.num_vfs = 0; - dev->exp.sriov_pf.vfname = g_strdup(vfname); dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = pci_new(devfn, vfname); + vf->exp.sriov_vf.pf = dev; + vf->exp.sriov_vf.vf_number = i; + + if (!qdev_realize(&vf->qdev, bus, errp)) { + unparent_vfs(dev, i); + return false; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vf->config, 0xffff); + pci_config_set_device_id(vf->config, 0xffff); + + dev->exp.sriov_pf.vf[i] = vf; + devfn += vf_stride; + } + return true; } void pcie_sriov_pf_exit(PCIDevice *dev) { - unregister_vfs(dev); - g_free((char *)dev->exp.sriov_pf.vfname); - dev->exp.sriov_pf.vfname = NULL; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, - uint16_t vf_num) -{ - PCIDevice *dev = pci_new(devfn, name); - dev->exp.sriov_vf.pf = pf; - dev->exp.sriov_vf.vf_number = vf_num; - PCIBus *bus = pci_get_bus(pf); - Error *local_err = NULL; - - qdev_realize(&dev->qdev, &bus->qbus, &local_err); - if (local_err) { - error_report_err(local_err); - return NULL; - } - - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(dev->config, 0xffff); - pci_config_set_device_id(dev->config, 0xffff); - - return dev; -} - static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; uint16_t i; uint16_t sriov_cap = dev->exp.sriov_cap; - uint16_t vf_offset = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); - uint16_t vf_stride = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - int32_t devfn = dev->devfn + vf_offset; assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); @@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev) return; } - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, - dev->exp.sriov_pf.vfname, i); - if (!dev->exp.sriov_pf.vf[i]) { - num_vfs = i; - break; - } - devfn += vf_stride; + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } dev->exp.sriov_pf.num_vfs = num_vfs; } @@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev) trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - object_unparent(OBJECT(vf)); - object_unref(OBJECT(vf)); + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } - g_free(dev->exp.sriov_pf.vf); - dev->exp.sriov_pf.vf = NULL; dev->exp.sriov_pf.num_vfs = 0; } @@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, PCI_FUNC(dev->devfn), off, val, len); if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (dev->exp.sriov_pf.num_vfs) { - if (!(val & PCI_SRIOV_CTRL_VFE)) { - unregister_vfs(dev); - } + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); } else { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } + unregister_vfs(dev); } } }
Disable SR-IOV VF devices by reusing code to power down PCI devices instead of removing them when the guest requests to disable VFs. This allows to realize devices and report VF realization errors at PF realization time. Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> --- include/hw/pci/pci.h | 5 --- include/hw/pci/pci_device.h | 15 +++++++ include/hw/pci/pcie_sriov.h | 1 - hw/pci/pci.c | 2 +- hw/pci/pcie_sriov.c | 95 +++++++++++++++++++-------------------------- 5 files changed, 56 insertions(+), 62 deletions(-)