diff mbox series

[v7,04/13] vfio: Add save and load functions for VFIO PCI devices

Message ID 1562665760-26158-5-git-send-email-kwankhede@nvidia.com
State New
Headers show
Series Add migration support for VFIO device | expand

Commit Message

Kirti Wankhede July 9, 2019, 9:49 a.m. UTC
These functions save and restore PCI device specific data - config
space of PCI device.
Tested save and restore with MSI and MSIX type.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio-common.h |   2 +
 2 files changed, 116 insertions(+)

Comments

Dr. David Alan Gilbert July 11, 2019, 12:07 p.m. UTC | #1
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> These functions save and restore PCI device specific data - config
> space of PCI device.
> Tested save and restore with MSI and MSIX type.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
>  include/hw/vfio/vfio-common.h |   2 +
>  2 files changed, 116 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index de0d286fc9dd..5fe4f8076cac 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
>      return OBJECT(vdev);
>  }
>  
> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint16_t pci_cmd;
> +    int i;
> +
> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> +        uint32_t bar;
> +
> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> +        qemu_put_be32(f, bar);
> +    }
> +
> +    qemu_put_be32(f, vdev->interrupt);
> +    if (vdev->interrupt == VFIO_INT_MSI) {
> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> +        bool msi_64bit;
> +
> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                                            2);
> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> +
> +        msi_addr_lo = pci_default_read_config(pdev,
> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> +        qemu_put_be32(f, msi_addr_lo);
> +
> +        if (msi_64bit) {
> +            msi_addr_hi = pci_default_read_config(pdev,
> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> +                                             4);
> +        }
> +        qemu_put_be32(f, msi_addr_hi);
> +
> +        msi_data = pci_default_read_config(pdev,
> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> +                2);
> +        qemu_put_be32(f, msi_data);
> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> +        uint16_t offset;
> +
> +        /* save enable bit and maskall bit */
> +        offset = pci_default_read_config(pdev,
> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> +        qemu_put_be16(f, offset);
> +        msix_save(pdev, f);
> +    }
> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> +    qemu_put_be16(f, pci_cmd);
> +}
> +
> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint32_t interrupt_type;
> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> +    uint16_t pci_cmd;
> +    bool msi_64bit;
> +    int i;
> +
> +    /* retore pci bar configuration */
> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> +        uint32_t bar = qemu_get_be32(f);
> +
> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> +    }

Is it possible to validate the bar's at all?  We just had a bug on a
virtual device where one version was asking for a larger bar than the
other; our validation caught this in some cases so we could tell that
the guest had a BAR that was aligned at the wrong alignment.

> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);

Can you explain what this is for?  You write the command register at the
end of the function with the original value; there's no guarantee that
the device is using IO for example, so ORing it seems odd.

Also, are the other flags in COMMAND safe at this point - e.g. what
about interrupts and stuff?

> +    interrupt_type = qemu_get_be32(f);
> +
> +    if (interrupt_type == VFIO_INT_MSI) {
> +        /* restore msi configuration */
> +        msi_flags = pci_default_read_config(pdev,
> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> +
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
> +
> +        msi_addr_lo = qemu_get_be32(f);
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
> +                              msi_addr_lo, 4);
> +
> +        msi_addr_hi = qemu_get_be32(f);
> +        if (msi_64bit) {
> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> +                                  msi_addr_hi, 4);
> +        }
> +        msi_data = qemu_get_be32(f);
> +        vfio_pci_write_config(pdev,
> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> +                msi_data, 2);
> +
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
> +    } else if (interrupt_type == VFIO_INT_MSIX) {
> +        uint16_t offset = qemu_get_be16(f);
> +
> +        /* load enable bit and maskall bit */
> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
> +                              offset, 2);
> +        msix_load(pdev, f);
> +    }
> +    pci_cmd = qemu_get_be16(f);
> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
> +}
> +
>  static VFIODeviceOps vfio_pci_ops = {
>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
>      .vfio_eoi = vfio_intx_eoi,
>      .vfio_get_object = vfio_pci_get_object,
> +    .vfio_save_config = vfio_pci_save_config,
> +    .vfio_load_config = vfio_pci_load_config,
>  };
>  
>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 771b6d59a3db..ee72bd984a36 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
>      void (*vfio_eoi)(VFIODevice *vdev);
>      Object *(*vfio_get_object)(VFIODevice *vdev);
> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
>  };
>  
>  typedef struct VFIOGroup {
> -- 
> 2.7.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Alex Williamson July 16, 2019, 9:14 p.m. UTC | #2
On Tue, 9 Jul 2019 15:19:11 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> These functions save and restore PCI device specific data - config
> space of PCI device.
> Tested save and restore with MSI and MSIX type.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
>  include/hw/vfio/vfio-common.h |   2 +
>  2 files changed, 116 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index de0d286fc9dd..5fe4f8076cac 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
>      return OBJECT(vdev);
>  }
>  
> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint16_t pci_cmd;
> +    int i;
> +
> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> +        uint32_t bar;
> +
> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> +        qemu_put_be32(f, bar);
> +    }
> +
> +    qemu_put_be32(f, vdev->interrupt);
> +    if (vdev->interrupt == VFIO_INT_MSI) {
> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> +        bool msi_64bit;
> +
> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                                            2);
> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> +
> +        msi_addr_lo = pci_default_read_config(pdev,
> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> +        qemu_put_be32(f, msi_addr_lo);
> +
> +        if (msi_64bit) {
> +            msi_addr_hi = pci_default_read_config(pdev,
> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> +                                             4);
> +        }
> +        qemu_put_be32(f, msi_addr_hi);
> +
> +        msi_data = pci_default_read_config(pdev,
> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> +                2);
> +        qemu_put_be32(f, msi_data);
> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> +        uint16_t offset;
> +
> +        /* save enable bit and maskall bit */
> +        offset = pci_default_read_config(pdev,
> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> +        qemu_put_be16(f, offset);
> +        msix_save(pdev, f);
> +    }
> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> +    qemu_put_be16(f, pci_cmd);
> +}
> +
> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint32_t interrupt_type;
> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> +    uint16_t pci_cmd;
> +    bool msi_64bit;
> +    int i;
> +
> +    /* retore pci bar configuration */
> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> +        uint32_t bar = qemu_get_be32(f);
> +
> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> +    }
> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> +
> +    interrupt_type = qemu_get_be32(f);
> +
> +    if (interrupt_type == VFIO_INT_MSI) {
> +        /* restore msi configuration */
> +        msi_flags = pci_default_read_config(pdev,
> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> +
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
> +
> +        msi_addr_lo = qemu_get_be32(f);
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
> +                              msi_addr_lo, 4);
> +
> +        msi_addr_hi = qemu_get_be32(f);
> +        if (msi_64bit) {
> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> +                                  msi_addr_hi, 4);
> +        }
> +        msi_data = qemu_get_be32(f);
> +        vfio_pci_write_config(pdev,
> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> +                msi_data, 2);
> +
> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
> +    } else if (interrupt_type == VFIO_INT_MSIX) {
> +        uint16_t offset = qemu_get_be16(f);
> +
> +        /* load enable bit and maskall bit */
> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
> +                              offset, 2);
> +        msix_load(pdev, f);
> +    }
> +    pci_cmd = qemu_get_be16(f);
> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
> +}

Pardon my migration ignorance, but there are bound to be more fields
and capabilities that get migrated over time.  How does this get
extended to support that and maintain backwards compatibility?  Thanks,

Alex

> +
>  static VFIODeviceOps vfio_pci_ops = {
>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
>      .vfio_eoi = vfio_intx_eoi,
>      .vfio_get_object = vfio_pci_get_object,
> +    .vfio_save_config = vfio_pci_save_config,
> +    .vfio_load_config = vfio_pci_load_config,
>  };
>  
>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 771b6d59a3db..ee72bd984a36 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
>      void (*vfio_eoi)(VFIODevice *vdev);
>      Object *(*vfio_get_object)(VFIODevice *vdev);
> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
>  };
>  
>  typedef struct VFIOGroup {
Dr. David Alan Gilbert July 17, 2019, 9:10 a.m. UTC | #3
* Alex Williamson (alex.williamson@redhat.com) wrote:
> On Tue, 9 Jul 2019 15:19:11 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
> > These functions save and restore PCI device specific data - config
> > space of PCI device.
> > Tested save and restore with MSI and MSIX type.
> > 
> > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > Reviewed-by: Neo Jia <cjia@nvidia.com>
> > ---
> >  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
> >  include/hw/vfio/vfio-common.h |   2 +
> >  2 files changed, 116 insertions(+)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index de0d286fc9dd..5fe4f8076cac 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
> >      return OBJECT(vdev);
> >  }
> >  
> > +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> > +{
> > +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> > +    PCIDevice *pdev = &vdev->pdev;
> > +    uint16_t pci_cmd;
> > +    int i;
> > +
> > +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > +        uint32_t bar;
> > +
> > +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> > +        qemu_put_be32(f, bar);
> > +    }
> > +
> > +    qemu_put_be32(f, vdev->interrupt);
> > +    if (vdev->interrupt == VFIO_INT_MSI) {
> > +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > +        bool msi_64bit;
> > +
> > +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> > +                                            2);
> > +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> > +
> > +        msi_addr_lo = pci_default_read_config(pdev,
> > +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> > +        qemu_put_be32(f, msi_addr_lo);
> > +
> > +        if (msi_64bit) {
> > +            msi_addr_hi = pci_default_read_config(pdev,
> > +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> > +                                             4);
> > +        }
> > +        qemu_put_be32(f, msi_addr_hi);
> > +
> > +        msi_data = pci_default_read_config(pdev,
> > +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> > +                2);
> > +        qemu_put_be32(f, msi_data);
> > +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> > +        uint16_t offset;
> > +
> > +        /* save enable bit and maskall bit */
> > +        offset = pci_default_read_config(pdev,
> > +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> > +        qemu_put_be16(f, offset);
> > +        msix_save(pdev, f);
> > +    }
> > +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > +    qemu_put_be16(f, pci_cmd);
> > +}
> > +
> > +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> > +{
> > +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> > +    PCIDevice *pdev = &vdev->pdev;
> > +    uint32_t interrupt_type;
> > +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > +    uint16_t pci_cmd;
> > +    bool msi_64bit;
> > +    int i;
> > +
> > +    /* retore pci bar configuration */
> > +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > +    vfio_pci_write_config(pdev, PCI_COMMAND,
> > +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> > +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > +        uint32_t bar = qemu_get_be32(f);
> > +
> > +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> > +    }
> > +    vfio_pci_write_config(pdev, PCI_COMMAND,
> > +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> > +
> > +    interrupt_type = qemu_get_be32(f);
> > +
> > +    if (interrupt_type == VFIO_INT_MSI) {
> > +        /* restore msi configuration */
> > +        msi_flags = pci_default_read_config(pdev,
> > +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
> > +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> > +
> > +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> > +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
> > +
> > +        msi_addr_lo = qemu_get_be32(f);
> > +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
> > +                              msi_addr_lo, 4);
> > +
> > +        msi_addr_hi = qemu_get_be32(f);
> > +        if (msi_64bit) {
> > +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> > +                                  msi_addr_hi, 4);
> > +        }
> > +        msi_data = qemu_get_be32(f);
> > +        vfio_pci_write_config(pdev,
> > +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> > +                msi_data, 2);
> > +
> > +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> > +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
> > +    } else if (interrupt_type == VFIO_INT_MSIX) {
> > +        uint16_t offset = qemu_get_be16(f);
> > +
> > +        /* load enable bit and maskall bit */
> > +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
> > +                              offset, 2);
> > +        msix_load(pdev, f);
> > +    }
> > +    pci_cmd = qemu_get_be16(f);
> > +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
> > +}
> 
> Pardon my migration ignorance, but there are bound to be more fields
> and capabilities that get migrated over time.  How does this get
> extended to support that and maintain backwards compatibility?  Thanks,

You normally tie those fields to a property on your device and set the
property in the machine type so that newer machine types send the extra
fields.

Dave

> Alex
> 
> > +
> >  static VFIODeviceOps vfio_pci_ops = {
> >      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
> >      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
> >      .vfio_eoi = vfio_intx_eoi,
> >      .vfio_get_object = vfio_pci_get_object,
> > +    .vfio_save_config = vfio_pci_save_config,
> > +    .vfio_load_config = vfio_pci_load_config,
> >  };
> >  
> >  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> > index 771b6d59a3db..ee72bd984a36 100644
> > --- a/include/hw/vfio/vfio-common.h
> > +++ b/include/hw/vfio/vfio-common.h
> > @@ -120,6 +120,8 @@ struct VFIODeviceOps {
> >      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
> >      void (*vfio_eoi)(VFIODevice *vdev);
> >      Object *(*vfio_get_object)(VFIODevice *vdev);
> > +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
> > +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
> >  };
> >  
> >  typedef struct VFIOGroup {
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Kirti Wankhede Aug. 22, 2019, 4:50 a.m. UTC | #4
Sorry for delay to respond.

On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
>> These functions save and restore PCI device specific data - config
>> space of PCI device.
>> Tested save and restore with MSI and MSIX type.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/vfio/vfio-common.h |   2 +
>>  2 files changed, 116 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index de0d286fc9dd..5fe4f8076cac 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
>>      return OBJECT(vdev);
>>  }
>>  
>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    uint16_t pci_cmd;
>> +    int i;
>> +
>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
>> +        uint32_t bar;
>> +
>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
>> +        qemu_put_be32(f, bar);
>> +    }
>> +
>> +    qemu_put_be32(f, vdev->interrupt);
>> +    if (vdev->interrupt == VFIO_INT_MSI) {
>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
>> +        bool msi_64bit;
>> +
>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>> +                                            2);
>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
>> +
>> +        msi_addr_lo = pci_default_read_config(pdev,
>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
>> +        qemu_put_be32(f, msi_addr_lo);
>> +
>> +        if (msi_64bit) {
>> +            msi_addr_hi = pci_default_read_config(pdev,
>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
>> +                                             4);
>> +        }
>> +        qemu_put_be32(f, msi_addr_hi);
>> +
>> +        msi_data = pci_default_read_config(pdev,
>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
>> +                2);
>> +        qemu_put_be32(f, msi_data);
>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
>> +        uint16_t offset;
>> +
>> +        /* save enable bit and maskall bit */
>> +        offset = pci_default_read_config(pdev,
>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
>> +        qemu_put_be16(f, offset);
>> +        msix_save(pdev, f);
>> +    }
>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
>> +    qemu_put_be16(f, pci_cmd);
>> +}
>> +
>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    uint32_t interrupt_type;
>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
>> +    uint16_t pci_cmd;
>> +    bool msi_64bit;
>> +    int i;
>> +
>> +    /* retore pci bar configuration */
>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
>> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
>> +        uint32_t bar = qemu_get_be32(f);
>> +
>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
>> +    }
> 
> Is it possible to validate the bar's at all?  We just had a bug on a
> virtual device where one version was asking for a larger bar than the
> other; our validation caught this in some cases so we could tell that
> the guest had a BAR that was aligned at the wrong alignment.
> 

"Validate the bars" does that means validate size of bars?

>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
>> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> 
> Can you explain what this is for?  You write the command register at the
> end of the function with the original value; there's no guarantee that
> the device is using IO for example, so ORing it seems odd.
> 

IO space and memory space accesses are disabled before writing BAR
addresses, only those are enabled here.

> Also, are the other flags in COMMAND safe at this point - e.g. what
> about interrupts and stuff?
> 

COMMAND registers is saved from stop-and-copy phase, interrupt should be
disabled, then restoring here when vCPU are not yet running.

>> +    interrupt_type = qemu_get_be32(f);
>> +
>> +    if (interrupt_type == VFIO_INT_MSI) {
>> +        /* restore msi configuration */
>> +        msi_flags = pci_default_read_config(pdev,
>> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
>> +
>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
>> +
>> +        msi_addr_lo = qemu_get_be32(f);
>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
>> +                              msi_addr_lo, 4);
>> +
>> +        msi_addr_hi = qemu_get_be32(f);
>> +        if (msi_64bit) {
>> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
>> +                                  msi_addr_hi, 4);
>> +        }
>> +        msi_data = qemu_get_be32(f);
>> +        vfio_pci_write_config(pdev,
>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
>> +                msi_data, 2);
>> +
>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
>> +    } else if (interrupt_type == VFIO_INT_MSIX) {
>> +        uint16_t offset = qemu_get_be16(f);
>> +
>> +        /* load enable bit and maskall bit */
>> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
>> +                              offset, 2);
>> +        msix_load(pdev, f);
>> +    }
>> +    pci_cmd = qemu_get_be16(f);
>> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
>> +}
>> +
>>  static VFIODeviceOps vfio_pci_ops = {
>>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
>>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
>>      .vfio_eoi = vfio_intx_eoi,
>>      .vfio_get_object = vfio_pci_get_object,
>> +    .vfio_save_config = vfio_pci_save_config,
>> +    .vfio_load_config = vfio_pci_load_config,
>>  };
>>  
>>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 771b6d59a3db..ee72bd984a36 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
>>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
>>      void (*vfio_eoi)(VFIODevice *vdev);
>>      Object *(*vfio_get_object)(VFIODevice *vdev);
>> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
>> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
>>  };
>>  
>>  typedef struct VFIOGroup {
>> -- 
>> 2.7.0
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert Aug. 22, 2019, 9:32 a.m. UTC | #5
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> Sorry for delay to respond.
> 
> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> >> These functions save and restore PCI device specific data - config
> >> space of PCI device.
> >> Tested save and restore with MSI and MSIX type.
> >>
> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> >> Reviewed-by: Neo Jia <cjia@nvidia.com>
> >> ---
> >>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/vfio/vfio-common.h |   2 +
> >>  2 files changed, 116 insertions(+)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index de0d286fc9dd..5fe4f8076cac 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
> >>      return OBJECT(vdev);
> >>  }
> >>  
> >> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> >> +{
> >> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> >> +    PCIDevice *pdev = &vdev->pdev;
> >> +    uint16_t pci_cmd;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> >> +        uint32_t bar;
> >> +
> >> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> >> +        qemu_put_be32(f, bar);
> >> +    }
> >> +
> >> +    qemu_put_be32(f, vdev->interrupt);
> >> +    if (vdev->interrupt == VFIO_INT_MSI) {
> >> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> >> +        bool msi_64bit;
> >> +
> >> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >> +                                            2);
> >> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> >> +
> >> +        msi_addr_lo = pci_default_read_config(pdev,
> >> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> >> +        qemu_put_be32(f, msi_addr_lo);
> >> +
> >> +        if (msi_64bit) {
> >> +            msi_addr_hi = pci_default_read_config(pdev,
> >> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> >> +                                             4);
> >> +        }
> >> +        qemu_put_be32(f, msi_addr_hi);
> >> +
> >> +        msi_data = pci_default_read_config(pdev,
> >> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> >> +                2);
> >> +        qemu_put_be32(f, msi_data);
> >> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> >> +        uint16_t offset;
> >> +
> >> +        /* save enable bit and maskall bit */
> >> +        offset = pci_default_read_config(pdev,
> >> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> >> +        qemu_put_be16(f, offset);
> >> +        msix_save(pdev, f);
> >> +    }
> >> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> >> +    qemu_put_be16(f, pci_cmd);
> >> +}
> >> +
> >> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> >> +{
> >> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> >> +    PCIDevice *pdev = &vdev->pdev;
> >> +    uint32_t interrupt_type;
> >> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> >> +    uint16_t pci_cmd;
> >> +    bool msi_64bit;
> >> +    int i;
> >> +
> >> +    /* retore pci bar configuration */
> >> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> >> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> >> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> >> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> >> +        uint32_t bar = qemu_get_be32(f);
> >> +
> >> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> >> +    }
> > 
> > Is it possible to validate the bar's at all?  We just had a bug on a
> > virtual device where one version was asking for a larger bar than the
> > other; our validation caught this in some cases so we could tell that
> > the guest had a BAR that was aligned at the wrong alignment.
> > 
> 
> "Validate the bars" does that means validate size of bars?

I meant validate the address programmed into the BAR against the size,
assuming you know the size; e.g. if it's a 128MB BAR, then make sure the
address programmed in is 128MB aligned.

> >> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> >> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> > 
> > Can you explain what this is for?  You write the command register at the
> > end of the function with the original value; there's no guarantee that
> > the device is using IO for example, so ORing it seems odd.
> > 
> 
> IO space and memory space accesses are disabled before writing BAR
> addresses, only those are enabled here.

But do you need to enable them here, or can it wait until the pci_cmd
write at the end of the function?

> > Also, are the other flags in COMMAND safe at this point - e.g. what
> > about interrupts and stuff?
> > 
> 
> COMMAND registers is saved from stop-and-copy phase, interrupt should be
> disabled, then restoring here when vCPU are not yet running.

Dave

> >> +    interrupt_type = qemu_get_be32(f);
> >> +
> >> +    if (interrupt_type == VFIO_INT_MSI) {
> >> +        /* restore msi configuration */
> >> +        msi_flags = pci_default_read_config(pdev,
> >> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
> >> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> >> +
> >> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
> >> +
> >> +        msi_addr_lo = qemu_get_be32(f);
> >> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
> >> +                              msi_addr_lo, 4);
> >> +
> >> +        msi_addr_hi = qemu_get_be32(f);
> >> +        if (msi_64bit) {
> >> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> >> +                                  msi_addr_hi, 4);
> >> +        }
> >> +        msi_data = qemu_get_be32(f);
> >> +        vfio_pci_write_config(pdev,
> >> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> >> +                msi_data, 2);
> >> +
> >> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
> >> +    } else if (interrupt_type == VFIO_INT_MSIX) {
> >> +        uint16_t offset = qemu_get_be16(f);
> >> +
> >> +        /* load enable bit and maskall bit */
> >> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
> >> +                              offset, 2);
> >> +        msix_load(pdev, f);
> >> +    }
> >> +    pci_cmd = qemu_get_be16(f);
> >> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
> >> +}
> >> +
> >>  static VFIODeviceOps vfio_pci_ops = {
> >>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
> >>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
> >>      .vfio_eoi = vfio_intx_eoi,
> >>      .vfio_get_object = vfio_pci_get_object,
> >> +    .vfio_save_config = vfio_pci_save_config,
> >> +    .vfio_load_config = vfio_pci_load_config,
> >>  };
> >>  
> >>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >> index 771b6d59a3db..ee72bd984a36 100644
> >> --- a/include/hw/vfio/vfio-common.h
> >> +++ b/include/hw/vfio/vfio-common.h
> >> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
> >>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
> >>      void (*vfio_eoi)(VFIODevice *vdev);
> >>      Object *(*vfio_get_object)(VFIODevice *vdev);
> >> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
> >> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
> >>  };
> >>  
> >>  typedef struct VFIOGroup {
> >> -- 
> >> 2.7.0
> >>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Kirti Wankhede Aug. 22, 2019, 7:10 p.m. UTC | #6
On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote:
> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
>> Sorry for delay to respond.
>>
>> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
>>> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
>>>> These functions save and restore PCI device specific data - config
>>>> space of PCI device.
>>>> Tested save and restore with MSI and MSIX type.
>>>>
>>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>>>> ---
>>>>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/vfio/vfio-common.h |   2 +
>>>>  2 files changed, 116 insertions(+)
>>>>
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index de0d286fc9dd..5fe4f8076cac 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
>>>>      return OBJECT(vdev);
>>>>  }
>>>>  
>>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
>>>> +{
>>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>>>> +    PCIDevice *pdev = &vdev->pdev;
>>>> +    uint16_t pci_cmd;
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
>>>> +        uint32_t bar;
>>>> +
>>>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
>>>> +        qemu_put_be32(f, bar);
>>>> +    }
>>>> +
>>>> +    qemu_put_be32(f, vdev->interrupt);
>>>> +    if (vdev->interrupt == VFIO_INT_MSI) {
>>>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
>>>> +        bool msi_64bit;
>>>> +
>>>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>>>> +                                            2);
>>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
>>>> +
>>>> +        msi_addr_lo = pci_default_read_config(pdev,
>>>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
>>>> +        qemu_put_be32(f, msi_addr_lo);
>>>> +
>>>> +        if (msi_64bit) {
>>>> +            msi_addr_hi = pci_default_read_config(pdev,
>>>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
>>>> +                                             4);
>>>> +        }
>>>> +        qemu_put_be32(f, msi_addr_hi);
>>>> +
>>>> +        msi_data = pci_default_read_config(pdev,
>>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
>>>> +                2);
>>>> +        qemu_put_be32(f, msi_data);
>>>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
>>>> +        uint16_t offset;
>>>> +
>>>> +        /* save enable bit and maskall bit */
>>>> +        offset = pci_default_read_config(pdev,
>>>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
>>>> +        qemu_put_be16(f, offset);
>>>> +        msix_save(pdev, f);
>>>> +    }
>>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
>>>> +    qemu_put_be16(f, pci_cmd);
>>>> +}
>>>> +
>>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
>>>> +{
>>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>>>> +    PCIDevice *pdev = &vdev->pdev;
>>>> +    uint32_t interrupt_type;
>>>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
>>>> +    uint16_t pci_cmd;
>>>> +    bool msi_64bit;
>>>> +    int i;
>>>> +
>>>> +    /* retore pci bar configuration */
>>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
>>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
>>>> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
>>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
>>>> +        uint32_t bar = qemu_get_be32(f);
>>>> +
>>>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
>>>> +    }
>>>
>>> Is it possible to validate the bar's at all?  We just had a bug on a
>>> virtual device where one version was asking for a larger bar than the
>>> other; our validation caught this in some cases so we could tell that
>>> the guest had a BAR that was aligned at the wrong alignment.
>>>
>>
>> "Validate the bars" does that means validate size of bars?
> 
> I meant validate the address programmed into the BAR against the size,
> assuming you know the size; e.g. if it's a 128MB BAR, then make sure the
> address programmed in is 128MB aligned.
> 

If this validation fails, migration resume should fail, right?


>>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
>>>> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
>>>
>>> Can you explain what this is for?  You write the command register at the
>>> end of the function with the original value; there's no guarantee that
>>> the device is using IO for example, so ORing it seems odd.
>>>
>>
>> IO space and memory space accesses are disabled before writing BAR
>> addresses, only those are enabled here.
> 
> But do you need to enable them here, or can it wait until the pci_cmd
> write at the end of the function?
>

Ok, it can wait.

Thanks,
Kirti


>>> Also, are the other flags in COMMAND safe at this point - e.g. what
>>> about interrupts and stuff?
>>>
>>
>> COMMAND registers is saved from stop-and-copy phase, interrupt should be
>> disabled, then restoring here when vCPU are not yet running.
> 
> Dave
> 
>>>> +    interrupt_type = qemu_get_be32(f);
>>>> +
>>>> +    if (interrupt_type == VFIO_INT_MSI) {
>>>> +        /* restore msi configuration */
>>>> +        msi_flags = pci_default_read_config(pdev,
>>>> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
>>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
>>>> +
>>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>>>> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
>>>> +
>>>> +        msi_addr_lo = qemu_get_be32(f);
>>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
>>>> +                              msi_addr_lo, 4);
>>>> +
>>>> +        msi_addr_hi = qemu_get_be32(f);
>>>> +        if (msi_64bit) {
>>>> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
>>>> +                                  msi_addr_hi, 4);
>>>> +        }
>>>> +        msi_data = qemu_get_be32(f);
>>>> +        vfio_pci_write_config(pdev,
>>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
>>>> +                msi_data, 2);
>>>> +
>>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
>>>> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
>>>> +    } else if (interrupt_type == VFIO_INT_MSIX) {
>>>> +        uint16_t offset = qemu_get_be16(f);
>>>> +
>>>> +        /* load enable bit and maskall bit */
>>>> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
>>>> +                              offset, 2);
>>>> +        msix_load(pdev, f);
>>>> +    }
>>>> +    pci_cmd = qemu_get_be16(f);
>>>> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
>>>> +}
>>>> +
>>>>  static VFIODeviceOps vfio_pci_ops = {
>>>>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
>>>>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
>>>>      .vfio_eoi = vfio_intx_eoi,
>>>>      .vfio_get_object = vfio_pci_get_object,
>>>> +    .vfio_save_config = vfio_pci_save_config,
>>>> +    .vfio_load_config = vfio_pci_load_config,
>>>>  };
>>>>  
>>>>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index 771b6d59a3db..ee72bd984a36 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
>>>>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
>>>>      void (*vfio_eoi)(VFIODevice *vdev);
>>>>      Object *(*vfio_get_object)(VFIODevice *vdev);
>>>> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
>>>> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
>>>>  };
>>>>  
>>>>  typedef struct VFIOGroup {
>>>> -- 
>>>> 2.7.0
>>>>
>>> --
>>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert Aug. 22, 2019, 7:13 p.m. UTC | #7
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> 
> 
> On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote:
> > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> >> Sorry for delay to respond.
> >>
> >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> >>>> These functions save and restore PCI device specific data - config
> >>>> space of PCI device.
> >>>> Tested save and restore with MSI and MSIX type.
> >>>>
> >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> >>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
> >>>> ---
> >>>>  hw/vfio/pci.c                 | 114 ++++++++++++++++++++++++++++++++++++++++++
> >>>>  include/hw/vfio/vfio-common.h |   2 +
> >>>>  2 files changed, 116 insertions(+)
> >>>>
> >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >>>> index de0d286fc9dd..5fe4f8076cac 100644
> >>>> --- a/hw/vfio/pci.c
> >>>> +++ b/hw/vfio/pci.c
> >>>> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
> >>>>      return OBJECT(vdev);
> >>>>  }
> >>>>  
> >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> >>>> +{
> >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> >>>> +    PCIDevice *pdev = &vdev->pdev;
> >>>> +    uint16_t pci_cmd;
> >>>> +    int i;
> >>>> +
> >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> >>>> +        uint32_t bar;
> >>>> +
> >>>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> >>>> +        qemu_put_be32(f, bar);
> >>>> +    }
> >>>> +
> >>>> +    qemu_put_be32(f, vdev->interrupt);
> >>>> +    if (vdev->interrupt == VFIO_INT_MSI) {
> >>>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> >>>> +        bool msi_64bit;
> >>>> +
> >>>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >>>> +                                            2);
> >>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> >>>> +
> >>>> +        msi_addr_lo = pci_default_read_config(pdev,
> >>>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> >>>> +        qemu_put_be32(f, msi_addr_lo);
> >>>> +
> >>>> +        if (msi_64bit) {
> >>>> +            msi_addr_hi = pci_default_read_config(pdev,
> >>>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> >>>> +                                             4);
> >>>> +        }
> >>>> +        qemu_put_be32(f, msi_addr_hi);
> >>>> +
> >>>> +        msi_data = pci_default_read_config(pdev,
> >>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> >>>> +                2);
> >>>> +        qemu_put_be32(f, msi_data);
> >>>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> >>>> +        uint16_t offset;
> >>>> +
> >>>> +        /* save enable bit and maskall bit */
> >>>> +        offset = pci_default_read_config(pdev,
> >>>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> >>>> +        qemu_put_be16(f, offset);
> >>>> +        msix_save(pdev, f);
> >>>> +    }
> >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> >>>> +    qemu_put_be16(f, pci_cmd);
> >>>> +}
> >>>> +
> >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> >>>> +{
> >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> >>>> +    PCIDevice *pdev = &vdev->pdev;
> >>>> +    uint32_t interrupt_type;
> >>>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> >>>> +    uint16_t pci_cmd;
> >>>> +    bool msi_64bit;
> >>>> +    int i;
> >>>> +
> >>>> +    /* retore pci bar configuration */
> >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> >>>> +                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> >>>> +        uint32_t bar = qemu_get_be32(f);
> >>>> +
> >>>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> >>>> +    }
> >>>
> >>> Is it possible to validate the bar's at all?  We just had a bug on a
> >>> virtual device where one version was asking for a larger bar than the
> >>> other; our validation caught this in some cases so we could tell that
> >>> the guest had a BAR that was aligned at the wrong alignment.
> >>>
> >>
> >> "Validate the bars" does that means validate size of bars?
> > 
> > I meant validate the address programmed into the BAR against the size,
> > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the
> > address programmed in is 128MB aligned.
> > 
> 
> If this validation fails, migration resume should fail, right?

Yes I think so; if you've got a device that wants 128MB alignment and
someone gives you a non-aligned address, who knows what will happen.

> 
> >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> >>>> +                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> >>>
> >>> Can you explain what this is for?  You write the command register at the
> >>> end of the function with the original value; there's no guarantee that
> >>> the device is using IO for example, so ORing it seems odd.
> >>>
> >>
> >> IO space and memory space accesses are disabled before writing BAR
> >> addresses, only those are enabled here.
> > 
> > But do you need to enable them here, or can it wait until the pci_cmd
> > write at the end of the function?
> >
> 
> Ok, it can wait.

Great.

Dave

> Thanks,
> Kirti
> 
> 
> >>> Also, are the other flags in COMMAND safe at this point - e.g. what
> >>> about interrupts and stuff?
> >>>
> >>
> >> COMMAND registers is saved from stop-and-copy phase, interrupt should be
> >> disabled, then restoring here when vCPU are not yet running.
> > 
> > Dave
> > 
> >>>> +    interrupt_type = qemu_get_be32(f);
> >>>> +
> >>>> +    if (interrupt_type == VFIO_INT_MSI) {
> >>>> +        /* restore msi configuration */
> >>>> +        msi_flags = pci_default_read_config(pdev,
> >>>> +                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
> >>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> >>>> +
> >>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >>>> +                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
> >>>> +
> >>>> +        msi_addr_lo = qemu_get_be32(f);
> >>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
> >>>> +                              msi_addr_lo, 4);
> >>>> +
> >>>> +        msi_addr_hi = qemu_get_be32(f);
> >>>> +        if (msi_64bit) {
> >>>> +            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> >>>> +                                  msi_addr_hi, 4);
> >>>> +        }
> >>>> +        msi_data = qemu_get_be32(f);
> >>>> +        vfio_pci_write_config(pdev,
> >>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> >>>> +                msi_data, 2);
> >>>> +
> >>>> +        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> >>>> +                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
> >>>> +    } else if (interrupt_type == VFIO_INT_MSIX) {
> >>>> +        uint16_t offset = qemu_get_be16(f);
> >>>> +
> >>>> +        /* load enable bit and maskall bit */
> >>>> +        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
> >>>> +                              offset, 2);
> >>>> +        msix_load(pdev, f);
> >>>> +    }
> >>>> +    pci_cmd = qemu_get_be16(f);
> >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
> >>>> +}
> >>>> +
> >>>>  static VFIODeviceOps vfio_pci_ops = {
> >>>>      .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
> >>>>      .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
> >>>>      .vfio_eoi = vfio_intx_eoi,
> >>>>      .vfio_get_object = vfio_pci_get_object,
> >>>> +    .vfio_save_config = vfio_pci_save_config,
> >>>> +    .vfio_load_config = vfio_pci_load_config,
> >>>>  };
> >>>>  
> >>>>  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >>>> index 771b6d59a3db..ee72bd984a36 100644
> >>>> --- a/include/hw/vfio/vfio-common.h
> >>>> +++ b/include/hw/vfio/vfio-common.h
> >>>> @@ -120,6 +120,8 @@ struct VFIODeviceOps {
> >>>>      int (*vfio_hot_reset_multi)(VFIODevice *vdev);
> >>>>      void (*vfio_eoi)(VFIODevice *vdev);
> >>>>      Object *(*vfio_get_object)(VFIODevice *vdev);
> >>>> +    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
> >>>> +    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
> >>>>  };
> >>>>  
> >>>>  typedef struct VFIOGroup {
> >>>> -- 
> >>>> 2.7.0
> >>>>
> >>> --
> >>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >>>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Tian, Kevin Aug. 22, 2019, 11:57 p.m. UTC | #8
> From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com]
> Sent: Friday, August 23, 2019 3:13 AM
> 
> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> >
> >
> > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote:
> > > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > >> Sorry for delay to respond.
> > >>
> > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > >>>> These functions save and restore PCI device specific data - config
> > >>>> space of PCI device.
> > >>>> Tested save and restore with MSI and MSIX type.
> > >>>>
> > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
> > >>>> ---
> > >>>>  hw/vfio/pci.c                 | 114
> ++++++++++++++++++++++++++++++++++++++++++
> > >>>>  include/hw/vfio/vfio-common.h |   2 +
> > >>>>  2 files changed, 116 insertions(+)
> > >>>>
> > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > >>>> index de0d286fc9dd..5fe4f8076cac 100644
> > >>>> --- a/hw/vfio/pci.c
> > >>>> +++ b/hw/vfio/pci.c
> > >>>> @@ -2395,11 +2395,125 @@ static Object
> *vfio_pci_get_object(VFIODevice *vbasedev)
> > >>>>      return OBJECT(vdev);
> > >>>>  }
> > >>>>
> > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> > >>>> +{
> > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> vbasedev);
> > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > >>>> +    uint16_t pci_cmd;
> > >>>> +    int i;
> > >>>> +
> > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > >>>> +        uint32_t bar;
> > >>>> +
> > >>>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i *
> 4, 4);
> > >>>> +        qemu_put_be32(f, bar);
> > >>>> +    }
> > >>>> +
> > >>>> +    qemu_put_be32(f, vdev->interrupt);
> > >>>> +    if (vdev->interrupt == VFIO_INT_MSI) {
> > >>>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > >>>> +        bool msi_64bit;
> > >>>> +
> > >>>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap +
> PCI_MSI_FLAGS,
> > >>>> +                                            2);
> > >>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> > >>>> +
> > >>>> +        msi_addr_lo = pci_default_read_config(pdev,
> > >>>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> > >>>> +        qemu_put_be32(f, msi_addr_lo);
> > >>>> +
> > >>>> +        if (msi_64bit) {
> > >>>> +            msi_addr_hi = pci_default_read_config(pdev,
> > >>>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> > >>>> +                                             4);
> > >>>> +        }
> > >>>> +        qemu_put_be32(f, msi_addr_hi);
> > >>>> +
> > >>>> +        msi_data = pci_default_read_config(pdev,
> > >>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 :
> PCI_MSI_DATA_32),
> > >>>> +                2);
> > >>>> +        qemu_put_be32(f, msi_data);
> > >>>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> > >>>> +        uint16_t offset;
> > >>>> +
> > >>>> +        /* save enable bit and maskall bit */
> > >>>> +        offset = pci_default_read_config(pdev,
> > >>>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> > >>>> +        qemu_put_be16(f, offset);
> > >>>> +        msix_save(pdev, f);
> > >>>> +    }
> > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > >>>> +    qemu_put_be16(f, pci_cmd);
> > >>>> +}
> > >>>> +
> > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> > >>>> +{
> > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> vbasedev);
> > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > >>>> +    uint32_t interrupt_type;
> > >>>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > >>>> +    uint16_t pci_cmd;
> > >>>> +    bool msi_64bit;
> > >>>> +    int i;
> > >>>> +
> > >>>> +    /* retore pci bar configuration */
> > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> > >>>> +                        pci_cmd & (!(PCI_COMMAND_IO |
> PCI_COMMAND_MEMORY)), 2);
> > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > >>>> +        uint32_t bar = qemu_get_be32(f);
> > >>>> +
> > >>>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> > >>>> +    }
> > >>>
> > >>> Is it possible to validate the bar's at all?  We just had a bug on a
> > >>> virtual device where one version was asking for a larger bar than the
> > >>> other; our validation caught this in some cases so we could tell that
> > >>> the guest had a BAR that was aligned at the wrong alignment.

I'm a bit confused here. Did you mean that src and dest include
different versions of the virtual device which implements different
BAR size? If that is the case, shouldn't the migration fail at the start
when doing compatibility check?

> > >>>
> > >>
> > >> "Validate the bars" does that means validate size of bars?
> > >
> > > I meant validate the address programmed into the BAR against the size,
> > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the
> > > address programmed in is 128MB aligned.
> > >
> >
> > If this validation fails, migration resume should fail, right?
> 
> Yes I think so; if you've got a device that wants 128MB alignment and
> someone gives you a non-aligned address, who knows what will happen.

If misalignment is really caused by the guest, shouldn't we just follow
the hardware behavior, i.e. hard-wiring the lower bits to 0 before
updating the cfg space? 

Thanks
Kevin
Dr. David Alan Gilbert Aug. 23, 2019, 9:26 a.m. UTC | #9
* Tian, Kevin (kevin.tian@intel.com) wrote:
> > From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com]
> > Sent: Friday, August 23, 2019 3:13 AM
> > 
> > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > >
> > >
> > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote:
> > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > >> Sorry for delay to respond.
> > > >>
> > > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> > > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > >>>> These functions save and restore PCI device specific data - config
> > > >>>> space of PCI device.
> > > >>>> Tested save and restore with MSI and MSIX type.
> > > >>>>
> > > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
> > > >>>> ---
> > > >>>>  hw/vfio/pci.c                 | 114
> > ++++++++++++++++++++++++++++++++++++++++++
> > > >>>>  include/hw/vfio/vfio-common.h |   2 +
> > > >>>>  2 files changed, 116 insertions(+)
> > > >>>>
> > > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > >>>> index de0d286fc9dd..5fe4f8076cac 100644
> > > >>>> --- a/hw/vfio/pci.c
> > > >>>> +++ b/hw/vfio/pci.c
> > > >>>> @@ -2395,11 +2395,125 @@ static Object
> > *vfio_pci_get_object(VFIODevice *vbasedev)
> > > >>>>      return OBJECT(vdev);
> > > >>>>  }
> > > >>>>
> > > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
> > > >>>> +{
> > > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> > vbasedev);
> > > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > > >>>> +    uint16_t pci_cmd;
> > > >>>> +    int i;
> > > >>>> +
> > > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > >>>> +        uint32_t bar;
> > > >>>> +
> > > >>>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i *
> > 4, 4);
> > > >>>> +        qemu_put_be32(f, bar);
> > > >>>> +    }
> > > >>>> +
> > > >>>> +    qemu_put_be32(f, vdev->interrupt);
> > > >>>> +    if (vdev->interrupt == VFIO_INT_MSI) {
> > > >>>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > > >>>> +        bool msi_64bit;
> > > >>>> +
> > > >>>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap +
> > PCI_MSI_FLAGS,
> > > >>>> +                                            2);
> > > >>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> > > >>>> +
> > > >>>> +        msi_addr_lo = pci_default_read_config(pdev,
> > > >>>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> > > >>>> +        qemu_put_be32(f, msi_addr_lo);
> > > >>>> +
> > > >>>> +        if (msi_64bit) {
> > > >>>> +            msi_addr_hi = pci_default_read_config(pdev,
> > > >>>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> > > >>>> +                                             4);
> > > >>>> +        }
> > > >>>> +        qemu_put_be32(f, msi_addr_hi);
> > > >>>> +
> > > >>>> +        msi_data = pci_default_read_config(pdev,
> > > >>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 :
> > PCI_MSI_DATA_32),
> > > >>>> +                2);
> > > >>>> +        qemu_put_be32(f, msi_data);
> > > >>>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> > > >>>> +        uint16_t offset;
> > > >>>> +
> > > >>>> +        /* save enable bit and maskall bit */
> > > >>>> +        offset = pci_default_read_config(pdev,
> > > >>>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> > > >>>> +        qemu_put_be16(f, offset);
> > > >>>> +        msix_save(pdev, f);
> > > >>>> +    }
> > > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > > >>>> +    qemu_put_be16(f, pci_cmd);
> > > >>>> +}
> > > >>>> +
> > > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
> > > >>>> +{
> > > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> > vbasedev);
> > > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > > >>>> +    uint32_t interrupt_type;
> > > >>>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > > >>>> +    uint16_t pci_cmd;
> > > >>>> +    bool msi_64bit;
> > > >>>> +    int i;
> > > >>>> +
> > > >>>> +    /* retore pci bar configuration */
> > > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > > >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> > > >>>> +                        pci_cmd & (!(PCI_COMMAND_IO |
> > PCI_COMMAND_MEMORY)), 2);
> > > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > >>>> +        uint32_t bar = qemu_get_be32(f);
> > > >>>> +
> > > >>>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
> > > >>>> +    }
> > > >>>
> > > >>> Is it possible to validate the bar's at all?  We just had a bug on a
> > > >>> virtual device where one version was asking for a larger bar than the
> > > >>> other; our validation caught this in some cases so we could tell that
> > > >>> the guest had a BAR that was aligned at the wrong alignment.
> 
> I'm a bit confused here. Did you mean that src and dest include
> different versions of the virtual device which implements different
> BAR size? If that is the case, shouldn't the migration fail at the start
> when doing compatibility check?

It was a mistake where the destination had accidentally changed the BAR
size; checking the alignment was the only check that failed.

> > > >>>
> > > >>
> > > >> "Validate the bars" does that means validate size of bars?
> > > >
> > > > I meant validate the address programmed into the BAR against the size,
> > > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the
> > > > address programmed in is 128MB aligned.
> > > >
> > >
> > > If this validation fails, migration resume should fail, right?
> > 
> > Yes I think so; if you've got a device that wants 128MB alignment and
> > someone gives you a non-aligned address, who knows what will happen.
> 
> If misalignment is really caused by the guest, shouldn't we just follow
> the hardware behavior, i.e. hard-wiring the lower bits to 0 before
> updating the cfg space? 

That should already happen on the source; but when loading a migration
stream I try and be very untrusting; so it's good to check that the
destination devices idea of the BAR matches what the register has.

Dave

> Thanks
> Kevin
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Tian, Kevin Aug. 23, 2019, 9:49 a.m. UTC | #10
> From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com]
> Sent: Friday, August 23, 2019 5:27 PM
> 
> * Tian, Kevin (kevin.tian@intel.com) wrote:
> > > From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com]
> > > Sent: Friday, August 23, 2019 3:13 AM
> > >
> > > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > >
> > > >
> > > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote:
> > > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > > >> Sorry for delay to respond.
> > > > >>
> > > > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote:
> > > > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > > >>>> These functions save and restore PCI device specific data - config
> > > > >>>> space of PCI device.
> > > > >>>> Tested save and restore with MSI and MSIX type.
> > > > >>>>
> > > > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > > > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
> > > > >>>> ---
> > > > >>>>  hw/vfio/pci.c                 | 114
> > > ++++++++++++++++++++++++++++++++++++++++++
> > > > >>>>  include/hw/vfio/vfio-common.h |   2 +
> > > > >>>>  2 files changed, 116 insertions(+)
> > > > >>>>
> > > > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > >>>> index de0d286fc9dd..5fe4f8076cac 100644
> > > > >>>> --- a/hw/vfio/pci.c
> > > > >>>> +++ b/hw/vfio/pci.c
> > > > >>>> @@ -2395,11 +2395,125 @@ static Object
> > > *vfio_pci_get_object(VFIODevice *vbasedev)
> > > > >>>>      return OBJECT(vdev);
> > > > >>>>  }
> > > > >>>>
> > > > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev,
> QEMUFile *f)
> > > > >>>> +{
> > > > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> > > vbasedev);
> > > > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > > > >>>> +    uint16_t pci_cmd;
> > > > >>>> +    int i;
> > > > >>>> +
> > > > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > > >>>> +        uint32_t bar;
> > > > >>>> +
> > > > >>>> +        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 +
> i *
> > > 4, 4);
> > > > >>>> +        qemu_put_be32(f, bar);
> > > > >>>> +    }
> > > > >>>> +
> > > > >>>> +    qemu_put_be32(f, vdev->interrupt);
> > > > >>>> +    if (vdev->interrupt == VFIO_INT_MSI) {
> > > > >>>> +        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > > > >>>> +        bool msi_64bit;
> > > > >>>> +
> > > > >>>> +        msi_flags = pci_default_read_config(pdev, pdev->msi_cap +
> > > PCI_MSI_FLAGS,
> > > > >>>> +                                            2);
> > > > >>>> +        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
> > > > >>>> +
> > > > >>>> +        msi_addr_lo = pci_default_read_config(pdev,
> > > > >>>> +                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> > > > >>>> +        qemu_put_be32(f, msi_addr_lo);
> > > > >>>> +
> > > > >>>> +        if (msi_64bit) {
> > > > >>>> +            msi_addr_hi = pci_default_read_config(pdev,
> > > > >>>> +                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> > > > >>>> +                                             4);
> > > > >>>> +        }
> > > > >>>> +        qemu_put_be32(f, msi_addr_hi);
> > > > >>>> +
> > > > >>>> +        msi_data = pci_default_read_config(pdev,
> > > > >>>> +                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 :
> > > PCI_MSI_DATA_32),
> > > > >>>> +                2);
> > > > >>>> +        qemu_put_be32(f, msi_data);
> > > > >>>> +    } else if (vdev->interrupt == VFIO_INT_MSIX) {
> > > > >>>> +        uint16_t offset;
> > > > >>>> +
> > > > >>>> +        /* save enable bit and maskall bit */
> > > > >>>> +        offset = pci_default_read_config(pdev,
> > > > >>>> +                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
> > > > >>>> +        qemu_put_be16(f, offset);
> > > > >>>> +        msix_save(pdev, f);
> > > > >>>> +    }
> > > > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > > > >>>> +    qemu_put_be16(f, pci_cmd);
> > > > >>>> +}
> > > > >>>> +
> > > > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev,
> QEMUFile *f)
> > > > >>>> +{
> > > > >>>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice,
> > > vbasedev);
> > > > >>>> +    PCIDevice *pdev = &vdev->pdev;
> > > > >>>> +    uint32_t interrupt_type;
> > > > >>>> +    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
> > > > >>>> +    uint16_t pci_cmd;
> > > > >>>> +    bool msi_64bit;
> > > > >>>> +    int i;
> > > > >>>> +
> > > > >>>> +    /* retore pci bar configuration */
> > > > >>>> +    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
> > > > >>>> +    vfio_pci_write_config(pdev, PCI_COMMAND,
> > > > >>>> +                        pci_cmd & (!(PCI_COMMAND_IO |
> > > PCI_COMMAND_MEMORY)), 2);
> > > > >>>> +    for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > > >>>> +        uint32_t bar = qemu_get_be32(f);
> > > > >>>> +
> > > > >>>> +        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar,
> 4);
> > > > >>>> +    }
> > > > >>>
> > > > >>> Is it possible to validate the bar's at all?  We just had a bug on a
> > > > >>> virtual device where one version was asking for a larger bar than the
> > > > >>> other; our validation caught this in some cases so we could tell that
> > > > >>> the guest had a BAR that was aligned at the wrong alignment.
> >
> > I'm a bit confused here. Did you mean that src and dest include
> > different versions of the virtual device which implements different
> > BAR size? If that is the case, shouldn't the migration fail at the start
> > when doing compatibility check?
> 
> It was a mistake where the destination had accidentally changed the BAR
> size; checking the alignment was the only check that failed.
> 
> > > > >>>
> > > > >>
> > > > >> "Validate the bars" does that means validate size of bars?
> > > > >
> > > > > I meant validate the address programmed into the BAR against the size,
> > > > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure
> the
> > > > > address programmed in is 128MB aligned.
> > > > >
> > > >
> > > > If this validation fails, migration resume should fail, right?
> > >
> > > Yes I think so; if you've got a device that wants 128MB alignment and
> > > someone gives you a non-aligned address, who knows what will happen.
> >
> > If misalignment is really caused by the guest, shouldn't we just follow
> > the hardware behavior, i.e. hard-wiring the lower bits to 0 before
> > updating the cfg space?
> 
> That should already happen on the source; but when loading a migration
> stream I try and be very untrusting; so it's good to check that the
> destination devices idea of the BAR matches what the register has.
> 

OK, it makes sense. 

Thanks
Kevin
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index de0d286fc9dd..5fe4f8076cac 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2395,11 +2395,125 @@  static Object *vfio_pci_get_object(VFIODevice *vbasedev)
     return OBJECT(vdev);
 }
 
+static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t pci_cmd;
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        uint32_t bar;
+
+        bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
+        qemu_put_be32(f, bar);
+    }
+
+    qemu_put_be32(f, vdev->interrupt);
+    if (vdev->interrupt == VFIO_INT_MSI) {
+        uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
+        bool msi_64bit;
+
+        msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+                                            2);
+        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
+
+        msi_addr_lo = pci_default_read_config(pdev,
+                                         pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
+        qemu_put_be32(f, msi_addr_lo);
+
+        if (msi_64bit) {
+            msi_addr_hi = pci_default_read_config(pdev,
+                                             pdev->msi_cap + PCI_MSI_ADDRESS_HI,
+                                             4);
+        }
+        qemu_put_be32(f, msi_addr_hi);
+
+        msi_data = pci_default_read_config(pdev,
+                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
+                2);
+        qemu_put_be32(f, msi_data);
+    } else if (vdev->interrupt == VFIO_INT_MSIX) {
+        uint16_t offset;
+
+        /* save enable bit and maskall bit */
+        offset = pci_default_read_config(pdev,
+                                       pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2);
+        qemu_put_be16(f, offset);
+        msix_save(pdev, f);
+    }
+    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
+    qemu_put_be16(f, pci_cmd);
+}
+
+static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t interrupt_type;
+    uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data;
+    uint16_t pci_cmd;
+    bool msi_64bit;
+    int i;
+
+    /* retore pci bar configuration */
+    pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2);
+    vfio_pci_write_config(pdev, PCI_COMMAND,
+                        pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        uint32_t bar = qemu_get_be32(f);
+
+        vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4);
+    }
+    vfio_pci_write_config(pdev, PCI_COMMAND,
+                          pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
+
+    interrupt_type = qemu_get_be32(f);
+
+    if (interrupt_type == VFIO_INT_MSI) {
+        /* restore msi configuration */
+        msi_flags = pci_default_read_config(pdev,
+                                            pdev->msi_cap + PCI_MSI_FLAGS, 2);
+        msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT);
+
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+                              msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2);
+
+        msi_addr_lo = qemu_get_be32(f);
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO,
+                              msi_addr_lo, 4);
+
+        msi_addr_hi = qemu_get_be32(f);
+        if (msi_64bit) {
+            vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
+                                  msi_addr_hi, 4);
+        }
+        msi_data = qemu_get_be32(f);
+        vfio_pci_write_config(pdev,
+                pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
+                msi_data, 2);
+
+        vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS,
+                              msi_flags | PCI_MSI_FLAGS_ENABLE, 2);
+    } else if (interrupt_type == VFIO_INT_MSIX) {
+        uint16_t offset = qemu_get_be16(f);
+
+        /* load enable bit and maskall bit */
+        vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1,
+                              offset, 2);
+        msix_load(pdev, f);
+    }
+    pci_cmd = qemu_get_be16(f);
+    vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2);
+}
+
 static VFIODeviceOps vfio_pci_ops = {
     .vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
     .vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
     .vfio_eoi = vfio_intx_eoi,
     .vfio_get_object = vfio_pci_get_object,
+    .vfio_save_config = vfio_pci_save_config,
+    .vfio_load_config = vfio_pci_load_config,
 };
 
 int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 771b6d59a3db..ee72bd984a36 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -120,6 +120,8 @@  struct VFIODeviceOps {
     int (*vfio_hot_reset_multi)(VFIODevice *vdev);
     void (*vfio_eoi)(VFIODevice *vdev);
     Object *(*vfio_get_object)(VFIODevice *vdev);
+    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
+    void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
 };
 
 typedef struct VFIOGroup {