diff mbox series

[v26,07/17] vfio: Register SaveVMHandlers for VFIO device

Message ID 1600817059-26721-8-git-send-email-kwankhede@nvidia.com
State New
Headers show
Series Add migration support for VFIO devices | expand

Commit Message

Kirti Wankhede Sept. 22, 2020, 11:24 p.m. UTC
Define flags to be used as delimeter in migration file stream.
Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
region from these functions at source during saving or pre-copy phase.
Set VFIO device state depending on VM's state. During live migration, VM is
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events |  2 ++
 2 files changed, 93 insertions(+)

Comments

Philippe Mathieu-Daudé Sept. 24, 2020, 3:15 p.m. UTC | #1
On 9/23/20 1:24 AM, Kirti Wankhede wrote:
> Define flags to be used as delimeter in migration file stream.

Typo "delimiter".

> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> region from these functions at source during saving or pre-copy phase.
> Set VFIO device state depending on VM's state. During live migration, VM is
> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |  2 ++
>  2 files changed, 93 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index f650fe9fc3c8..8e8adaa25779 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -8,12 +8,15 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/cutils.h"
>  #include <linux/vfio.h>
>  
>  #include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> +#include "migration/vmstate.h"
>  #include "migration/qemu-file.h"
>  #include "migration/register.h"
>  #include "migration/blocker.h"
> @@ -25,6 +28,17 @@
>  #include "trace.h"
>  #include "hw/hw.h"
>  
> +/*
> + * Flags used as delimiter:
> + * 0xffffffff => MSB 32-bit all 1s
> + * 0xef10     => emulated (virtual) function IO
> + * 0x0000     => 16-bits reserved for flags
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
> +
>  static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>                                    off_t off, bool iswrite)
>  {
> @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>      return 0;
>  }
>  
> +/* ---------------------------------------------------------------------- */
> +
> +static int vfio_save_setup(QEMUFile *f, void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    int ret;
> +
> +    trace_vfio_save_setup(vbasedev->name);
> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
> +
> +    if (migration->region.mmaps) {
> +        qemu_mutex_lock_iothread();
> +        ret = vfio_region_mmap(&migration->region);
> +        qemu_mutex_unlock_iothread();
> +        if (ret) {
> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
> +                         vbasedev->name, migration->region.nr,
> +                         strerror(-ret));
> +            error_report("%s: Falling back to slow path", vbasedev->name);
> +        }
> +    }
> +
> +    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
> +                                   VFIO_DEVICE_STATE_SAVING);
> +    if (ret) {
> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
> +        return ret;
> +    }
> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +
> +    ret = qemu_file_get_error(f);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_save_cleanup(void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +
> +    if (migration->region.mmaps) {
> +        vfio_region_unmap(&migration->region);
> +    }
> +    trace_vfio_save_cleanup(vbasedev->name);
> +}
> +
> +static SaveVMHandlers savevm_vfio_handlers = {
> +    .save_setup = vfio_save_setup,
> +    .save_cleanup = vfio_save_cleanup,
> +};
> +
> +/* ---------------------------------------------------------------------- */
> +
>  static void vfio_vmstate_change(void *opaque, int running, RunState state)
>  {
>      VFIODevice *vbasedev = opaque;
> @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>                                 struct vfio_region_info *info)
>  {
>      int ret = -EINVAL;
> +    char id[256] = "";
> +    Object *obj;
>  
>      if (!vbasedev->ops->vfio_get_object) {
>          return ret;
> @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>          return ret;
>      }
>  
> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
> +
> +    if (obj) {
> +        DeviceState *dev = DEVICE(obj);
> +        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
> +
> +        if (oid) {
> +            pstrcpy(id, sizeof(id), oid);
> +            pstrcat(id, sizeof(id), "/");
> +            g_free(oid);
> +        }
> +    }
> +    pstrcat(id, sizeof(id), "vfio");

Alternatively (easier to review, matter of taste):

 g_autofree char *path = NULL;

 if (oid) {
   path = g_strdup_printf("%s/vfio",
                          vmstate_if_get_id(VMSTATE_IF(obj)););
 } else {
   path = g_strdup("vfio");
 }
 strpadcpy(id, sizeof(id), path, '\0');

> +
> +    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
> +                         vbasedev);
>      vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>                                                            vbasedev);
>      vbasedev->migration_state.notify = vfio_migration_state_notifier;
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index bcb3fa7314d7..982d8dccb219 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>  vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>  vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>  vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
> +vfio_save_setup(const char *name) " (%s)"
> +vfio_save_cleanup(const char *name) " (%s)"
>
Cornelia Huck Sept. 25, 2020, 11:53 a.m. UTC | #2
On Wed, 23 Sep 2020 04:54:09 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Define flags to be used as delimeter in migration file stream.
> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> region from these functions at source during saving or pre-copy phase.
> Set VFIO device state depending on VM's state. During live migration, VM is
> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |  2 ++
>  2 files changed, 93 insertions(+)
> 

(...)

> +/*
> + * Flags used as delimiter:
> + * 0xffffffff => MSB 32-bit all 1s
> + * 0xef10     => emulated (virtual) function IO

Where is this value coming from?

> + * 0x0000     => 16-bits reserved for flags
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)

I think we need some more documentation what these values mean and how
they are used. From reading ahead a bit, it seems there is always
supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind
of data?

(...)
Alex Williamson Sept. 25, 2020, 8:20 p.m. UTC | #3
On Wed, 23 Sep 2020 04:54:09 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Define flags to be used as delimeter in migration file stream.
> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> region from these functions at source during saving or pre-copy phase.
> Set VFIO device state depending on VM's state. During live migration, VM is
> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |  2 ++
>  2 files changed, 93 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index f650fe9fc3c8..8e8adaa25779 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -8,12 +8,15 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/cutils.h"
>  #include <linux/vfio.h>
>  
>  #include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> +#include "migration/vmstate.h"
>  #include "migration/qemu-file.h"
>  #include "migration/register.h"
>  #include "migration/blocker.h"
> @@ -25,6 +28,17 @@
>  #include "trace.h"
>  #include "hw/hw.h"
>  
> +/*
> + * Flags used as delimiter:
> + * 0xffffffff => MSB 32-bit all 1s
> + * 0xef10     => emulated (virtual) function IO
> + * 0x0000     => 16-bits reserved for flags
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
> +
>  static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>                                    off_t off, bool iswrite)
>  {
> @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>      return 0;
>  }
>  
> +/* ---------------------------------------------------------------------- */
> +
> +static int vfio_save_setup(QEMUFile *f, void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    int ret;
> +
> +    trace_vfio_save_setup(vbasedev->name);
> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
> +
> +    if (migration->region.mmaps) {
> +        qemu_mutex_lock_iothread();
> +        ret = vfio_region_mmap(&migration->region);
> +        qemu_mutex_unlock_iothread();

Please add a comment identifying why the iothread mutex lock is
necessary here.

> +        if (ret) {
> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
> +                         vbasedev->name, migration->region.nr,

We don't support multiple migration regions, is it useful to include
the region index here?

> +                         strerror(-ret));
> +            error_report("%s: Falling back to slow path", vbasedev->name);
> +        }
> +    }
> +
> +    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
> +                                   VFIO_DEVICE_STATE_SAVING);
> +    if (ret) {
> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
> +        return ret;
> +    }

Again, doesn't match the function semantics that success only means the
device is in a non-error state, maybe the one that was asked for.

> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);

What's the overall purpose of writing these markers into the migration
stream?  vfio_load_state() doesn't do anything with this other than
validate that the end-of-state immediately follows.  Is this a
placeholder for something in the future?

> +
> +    ret = qemu_file_get_error(f);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_save_cleanup(void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +
> +    if (migration->region.mmaps) {
> +        vfio_region_unmap(&migration->region);
> +    }
> +    trace_vfio_save_cleanup(vbasedev->name);
> +}
> +
> +static SaveVMHandlers savevm_vfio_handlers = {
> +    .save_setup = vfio_save_setup,
> +    .save_cleanup = vfio_save_cleanup,
> +};
> +
> +/* ---------------------------------------------------------------------- */
> +
>  static void vfio_vmstate_change(void *opaque, int running, RunState state)
>  {
>      VFIODevice *vbasedev = opaque;
> @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>                                 struct vfio_region_info *info)
>  {
>      int ret = -EINVAL;
> +    char id[256] = "";
> +    Object *obj;
>  
>      if (!vbasedev->ops->vfio_get_object) {
>          return ret;
> @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>          return ret;
>      }
>  
> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
> +
> +    if (obj) {
> +        DeviceState *dev = DEVICE(obj);
> +        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
> +
> +        if (oid) {
> +            pstrcpy(id, sizeof(id), oid);
> +            pstrcat(id, sizeof(id), "/");
> +            g_free(oid);
> +        }
> +    }

Here's where vfio_migration_init() starts using vfio_get_object() as I
referenced back on patch 04.  We might as well get the object before
calling vfio_migration_region_init() and then pass the object.  The
conditional branch to handle obj is strange here too, it's fatal if
vfio_migration_region_init() doesn't find an object, why do we handle
it as optional here?  Also, what is this doing?  Comments would be
nice...  Thanks,

Alex

> +    pstrcat(id, sizeof(id), "vfio");
> +
> +    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
> +                         vbasedev);
>      vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>                                                            vbasedev);
>      vbasedev->migration_state.notify = vfio_migration_state_notifier;
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index bcb3fa7314d7..982d8dccb219 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>  vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>  vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>  vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
> +vfio_save_setup(const char *name) " (%s)"
> +vfio_save_cleanup(const char *name) " (%s)"
Dr. David Alan Gilbert Sept. 29, 2020, 10:19 a.m. UTC | #4
* Philippe Mathieu-Daudé (philmd@redhat.com) wrote:
> On 9/23/20 1:24 AM, Kirti Wankhede wrote:
> > Define flags to be used as delimeter in migration file stream.
> 
> Typo "delimiter".
> 
> > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> > region from these functions at source during saving or pre-copy phase.
> > Set VFIO device state depending on VM's state. During live migration, VM is
> > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> > device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> > 
> > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > Reviewed-by: Neo Jia <cjia@nvidia.com>
> > ---
> >  hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/vfio/trace-events |  2 ++
> >  2 files changed, 93 insertions(+)
> > 
> > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> > index f650fe9fc3c8..8e8adaa25779 100644
> > --- a/hw/vfio/migration.c
> > +++ b/hw/vfio/migration.c
> > @@ -8,12 +8,15 @@
> >   */
> >  
> >  #include "qemu/osdep.h"
> > +#include "qemu/main-loop.h"
> > +#include "qemu/cutils.h"
> >  #include <linux/vfio.h>
> >  
> >  #include "sysemu/runstate.h"
> >  #include "hw/vfio/vfio-common.h"
> >  #include "cpu.h"
> >  #include "migration/migration.h"
> > +#include "migration/vmstate.h"
> >  #include "migration/qemu-file.h"
> >  #include "migration/register.h"
> >  #include "migration/blocker.h"
> > @@ -25,6 +28,17 @@
> >  #include "trace.h"
> >  #include "hw/hw.h"
> >  
> > +/*
> > + * Flags used as delimiter:
> > + * 0xffffffff => MSB 32-bit all 1s
> > + * 0xef10     => emulated (virtual) function IO
> > + * 0x0000     => 16-bits reserved for flags
> > + */
> > +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> > +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
> > +
> >  static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
> >                                    off_t off, bool iswrite)
> >  {
> > @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
> >      return 0;
> >  }
> >  
> > +/* ---------------------------------------------------------------------- */
> > +
> > +static int vfio_save_setup(QEMUFile *f, void *opaque)
> > +{
> > +    VFIODevice *vbasedev = opaque;
> > +    VFIOMigration *migration = vbasedev->migration;
> > +    int ret;
> > +
> > +    trace_vfio_save_setup(vbasedev->name);
> > +
> > +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
> > +
> > +    if (migration->region.mmaps) {
> > +        qemu_mutex_lock_iothread();
> > +        ret = vfio_region_mmap(&migration->region);
> > +        qemu_mutex_unlock_iothread();
> > +        if (ret) {
> > +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
> > +                         vbasedev->name, migration->region.nr,
> > +                         strerror(-ret));
> > +            error_report("%s: Falling back to slow path", vbasedev->name);
> > +        }
> > +    }
> > +
> > +    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
> > +                                   VFIO_DEVICE_STATE_SAVING);
> > +    if (ret) {
> > +        error_report("%s: Failed to set state SAVING", vbasedev->name);
> > +        return ret;
> > +    }
> > +
> > +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> > +
> > +    ret = qemu_file_get_error(f);
> > +    if (ret) {
> > +        return ret;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void vfio_save_cleanup(void *opaque)
> > +{
> > +    VFIODevice *vbasedev = opaque;
> > +    VFIOMigration *migration = vbasedev->migration;
> > +
> > +    if (migration->region.mmaps) {
> > +        vfio_region_unmap(&migration->region);
> > +    }
> > +    trace_vfio_save_cleanup(vbasedev->name);
> > +}
> > +
> > +static SaveVMHandlers savevm_vfio_handlers = {
> > +    .save_setup = vfio_save_setup,
> > +    .save_cleanup = vfio_save_cleanup,
> > +};
> > +
> > +/* ---------------------------------------------------------------------- */
> > +
> >  static void vfio_vmstate_change(void *opaque, int running, RunState state)
> >  {
> >      VFIODevice *vbasedev = opaque;
> > @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
> >                                 struct vfio_region_info *info)
> >  {
> >      int ret = -EINVAL;
> > +    char id[256] = "";
> > +    Object *obj;
> >  
> >      if (!vbasedev->ops->vfio_get_object) {
> >          return ret;
> > @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev,
> >          return ret;
> >      }
> >  
> > +    obj = vbasedev->ops->vfio_get_object(vbasedev);
> > +
> > +    if (obj) {
> > +        DeviceState *dev = DEVICE(obj);
> > +        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
> > +
> > +        if (oid) {
> > +            pstrcpy(id, sizeof(id), oid);
> > +            pstrcat(id, sizeof(id), "/");
> > +            g_free(oid);
> > +        }
> > +    }
> > +    pstrcat(id, sizeof(id), "vfio");
> 
> Alternatively (easier to review, matter of taste):
> 
>  g_autofree char *path = NULL;
> 
>  if (oid) {
>    path = g_strdup_printf("%s/vfio",
>                           vmstate_if_get_id(VMSTATE_IF(obj)););
>  } else {
>    path = g_strdup("vfio");
>  }
>  strpadcpy(id, sizeof(id), path, '\0');

Maybe, although it's a straight copy of the magic in unregister_savevm.

Dave

> > +
> > +    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
> > +                         vbasedev);
> >      vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
> >                                                            vbasedev);
> >      vbasedev->migration_state.notify = vfio_migration_state_notifier;
> > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> > index bcb3fa7314d7..982d8dccb219 100644
> > --- a/hw/vfio/trace-events
> > +++ b/hw/vfio/trace-events
> > @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
> >  vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
> >  vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
> >  vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
> > +vfio_save_setup(const char *name) " (%s)"
> > +vfio_save_cleanup(const char *name) " (%s)"
> > 
>
Kirti Wankhede Oct. 17, 2020, 8:36 p.m. UTC | #5
On 9/29/2020 3:49 PM, Dr. David Alan Gilbert wrote:
> * Philippe Mathieu-Daudé (philmd@redhat.com) wrote:
>> On 9/23/20 1:24 AM, Kirti Wankhede wrote:
>>> Define flags to be used as delimeter in migration file stream.
>>
>> Typo "delimiter".
>>
>>> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
>>> region from these functions at source during saving or pre-copy phase.
>>> Set VFIO device state depending on VM's state. During live migration, VM is
>>> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
>>> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>>>
>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>>> ---
>>>   hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   hw/vfio/trace-events |  2 ++
>>>   2 files changed, 93 insertions(+)
>>>
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index f650fe9fc3c8..8e8adaa25779 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -8,12 +8,15 @@
>>>    */
>>>   
>>>   #include "qemu/osdep.h"
>>> +#include "qemu/main-loop.h"
>>> +#include "qemu/cutils.h"
>>>   #include <linux/vfio.h>
>>>   
>>>   #include "sysemu/runstate.h"
>>>   #include "hw/vfio/vfio-common.h"
>>>   #include "cpu.h"
>>>   #include "migration/migration.h"
>>> +#include "migration/vmstate.h"
>>>   #include "migration/qemu-file.h"
>>>   #include "migration/register.h"
>>>   #include "migration/blocker.h"
>>> @@ -25,6 +28,17 @@
>>>   #include "trace.h"
>>>   #include "hw/hw.h"
>>>   
>>> +/*
>>> + * Flags used as delimiter:
>>> + * 0xffffffff => MSB 32-bit all 1s
>>> + * 0xef10     => emulated (virtual) function IO
>>> + * 0x0000     => 16-bits reserved for flags
>>> + */
>>> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
>>> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
>>> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
>>> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
>>> +
>>>   static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>>>                                     off_t off, bool iswrite)
>>>   {
>>> @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>>>       return 0;
>>>   }
>>>   
>>> +/* ---------------------------------------------------------------------- */
>>> +
>>> +static int vfio_save_setup(QEMUFile *f, void *opaque)
>>> +{
>>> +    VFIODevice *vbasedev = opaque;
>>> +    VFIOMigration *migration = vbasedev->migration;
>>> +    int ret;
>>> +
>>> +    trace_vfio_save_setup(vbasedev->name);
>>> +
>>> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>>> +
>>> +    if (migration->region.mmaps) {
>>> +        qemu_mutex_lock_iothread();
>>> +        ret = vfio_region_mmap(&migration->region);
>>> +        qemu_mutex_unlock_iothread();
>>> +        if (ret) {
>>> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
>>> +                         vbasedev->name, migration->region.nr,
>>> +                         strerror(-ret));
>>> +            error_report("%s: Falling back to slow path", vbasedev->name);
>>> +        }
>>> +    }
>>> +
>>> +    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
>>> +                                   VFIO_DEVICE_STATE_SAVING);
>>> +    if (ret) {
>>> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
>>> +        return ret;
>>> +    }
>>> +
>>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>>> +
>>> +    ret = qemu_file_get_error(f);
>>> +    if (ret) {
>>> +        return ret;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void vfio_save_cleanup(void *opaque)
>>> +{
>>> +    VFIODevice *vbasedev = opaque;
>>> +    VFIOMigration *migration = vbasedev->migration;
>>> +
>>> +    if (migration->region.mmaps) {
>>> +        vfio_region_unmap(&migration->region);
>>> +    }
>>> +    trace_vfio_save_cleanup(vbasedev->name);
>>> +}
>>> +
>>> +static SaveVMHandlers savevm_vfio_handlers = {
>>> +    .save_setup = vfio_save_setup,
>>> +    .save_cleanup = vfio_save_cleanup,
>>> +};
>>> +
>>> +/* ---------------------------------------------------------------------- */
>>> +
>>>   static void vfio_vmstate_change(void *opaque, int running, RunState state)
>>>   {
>>>       VFIODevice *vbasedev = opaque;
>>> @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>>                                  struct vfio_region_info *info)
>>>   {
>>>       int ret = -EINVAL;
>>> +    char id[256] = "";
>>> +    Object *obj;
>>>   
>>>       if (!vbasedev->ops->vfio_get_object) {
>>>           return ret;
>>> @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>>           return ret;
>>>       }
>>>   
>>> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
>>> +
>>> +    if (obj) {
>>> +        DeviceState *dev = DEVICE(obj);
>>> +        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
>>> +
>>> +        if (oid) {
>>> +            pstrcpy(id, sizeof(id), oid);
>>> +            pstrcat(id, sizeof(id), "/");
>>> +            g_free(oid);
>>> +        }
>>> +    }
>>> +    pstrcat(id, sizeof(id), "vfio");
>>
>> Alternatively (easier to review, matter of taste):
>>
>>   g_autofree char *path = NULL;
>>
>>   if (oid) {
>>     path = g_strdup_printf("%s/vfio",
>>                            vmstate_if_get_id(VMSTATE_IF(obj)););
>>   } else {
>>     path = g_strdup("vfio");
>>   }
>>   strpadcpy(id, sizeof(id), path, '\0');
> 
> Maybe, although it's a straight copy of the magic in unregister_savevm.
> 

Ok. Changing it as Philippe suggested.

Thanks,
Kirti

> Dave
> 
>>> +
>>> +    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
>>> +                         vbasedev);
>>>       vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>>>                                                             vbasedev);
>>>       vbasedev->migration_state.notify = vfio_migration_state_notifier;
>>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>>> index bcb3fa7314d7..982d8dccb219 100644
>>> --- a/hw/vfio/trace-events
>>> +++ b/hw/vfio/trace-events
>>> @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>>>   vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>>>   vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>>>   vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
>>> +vfio_save_setup(const char *name) " (%s)"
>>> +vfio_save_cleanup(const char *name) " (%s)"
>>>
>>
Kirti Wankhede Oct. 18, 2020, 5:40 p.m. UTC | #6
On 9/26/2020 1:50 AM, Alex Williamson wrote:
> On Wed, 23 Sep 2020 04:54:09 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> Define flags to be used as delimeter in migration file stream.
>> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
>> region from these functions at source during saving or pre-copy phase.
>> Set VFIO device state depending on VM's state. During live migration, VM is
>> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
>> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>>   hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events |  2 ++
>>   2 files changed, 93 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index f650fe9fc3c8..8e8adaa25779 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -8,12 +8,15 @@
>>    */
>>   
>>   #include "qemu/osdep.h"
>> +#include "qemu/main-loop.h"
>> +#include "qemu/cutils.h"
>>   #include <linux/vfio.h>
>>   
>>   #include "sysemu/runstate.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "cpu.h"
>>   #include "migration/migration.h"
>> +#include "migration/vmstate.h"
>>   #include "migration/qemu-file.h"
>>   #include "migration/register.h"
>>   #include "migration/blocker.h"
>> @@ -25,6 +28,17 @@
>>   #include "trace.h"
>>   #include "hw/hw.h"
>>   
>> +/*
>> + * Flags used as delimiter:
>> + * 0xffffffff => MSB 32-bit all 1s
>> + * 0xef10     => emulated (virtual) function IO
>> + * 0x0000     => 16-bits reserved for flags
>> + */
>> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
>> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
>> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
>> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
>> +
>>   static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>>                                     off_t off, bool iswrite)
>>   {
>> @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>>       return 0;
>>   }
>>   
>> +/* ---------------------------------------------------------------------- */
>> +
>> +static int vfio_save_setup(QEMUFile *f, void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    int ret;
>> +
>> +    trace_vfio_save_setup(vbasedev->name);
>> +
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>> +
>> +    if (migration->region.mmaps) {
>> +        qemu_mutex_lock_iothread();
>> +        ret = vfio_region_mmap(&migration->region);
>> +        qemu_mutex_unlock_iothread();
> 
> Please add a comment identifying why the iothread mutex lock is
> necessary here.
> 
>> +        if (ret) {
>> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
>> +                         vbasedev->name, migration->region.nr,
> 
> We don't support multiple migration regions, is it useful to include
> the region index here?
> 

Ok. Removing region.nr


>> +                         strerror(-ret));
>> +            error_report("%s: Falling back to slow path", vbasedev->name);
>> +        }
>> +    }
>> +
>> +    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
>> +                                   VFIO_DEVICE_STATE_SAVING);
>> +    if (ret) {
>> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
>> +        return ret;
>> +    }
> 
> Again, doesn't match the function semantics that success only means the
> device is in a non-error state, maybe the one that was asked for.
> 

Fixed in patch 05.

>> +
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> 
> What's the overall purpose of writing these markers into the migration
> stream?  vfio_load_state() doesn't do anything with this other than
> validate that the end-of-state immediately follows.  Is this a
> placeholder for something in the future?
> 

Its not placeholder, it is used in vfio_load_state() to determine upto 
what point to loop to fetch data for each state, otherwise how would we 
know when to stop reading data from stream for that VFIO device.

>> +
>> +    ret = qemu_file_get_error(f);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void vfio_save_cleanup(void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +
>> +    if (migration->region.mmaps) {
>> +        vfio_region_unmap(&migration->region);
>> +    }
>> +    trace_vfio_save_cleanup(vbasedev->name);
>> +}
>> +
>> +static SaveVMHandlers savevm_vfio_handlers = {
>> +    .save_setup = vfio_save_setup,
>> +    .save_cleanup = vfio_save_cleanup,
>> +};
>> +
>> +/* ---------------------------------------------------------------------- */
>> +
>>   static void vfio_vmstate_change(void *opaque, int running, RunState state)
>>   {
>>       VFIODevice *vbasedev = opaque;
>> @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>                                  struct vfio_region_info *info)
>>   {
>>       int ret = -EINVAL;
>> +    char id[256] = "";
>> +    Object *obj;
>>   
>>       if (!vbasedev->ops->vfio_get_object) {
>>           return ret;
>> @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>           return ret;
>>       }
>>   
>> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
>> +
>> +    if (obj) {
>> +        DeviceState *dev = DEVICE(obj);
>> +        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
>> +
>> +        if (oid) {
>> +            pstrcpy(id, sizeof(id), oid);
>> +            pstrcat(id, sizeof(id), "/");
>> +            g_free(oid);
>> +        }
>> +    }
> 
> Here's where vfio_migration_init() starts using vfio_get_object() as I
> referenced back on patch 04.  We might as well get the object before
> calling vfio_migration_region_init() and then pass the object.  The
> conditional branch to handle obj is strange here too, it's fatal if
> vfio_migration_region_init() doesn't find an object, why do we handle
> it as optional here?  Also, what is this doing?  Comments would be
> nice...  Thanks,
> 

Changing it as I mentioned in other patch reply.

Thanks.

> Alex
> 
>> +    pstrcat(id, sizeof(id), "vfio");
>> +
>> +    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
>> +                         vbasedev);
>>       vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>>                                                             vbasedev);
>>       vbasedev->migration_state.notify = vfio_migration_state_notifier;
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index bcb3fa7314d7..982d8dccb219 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>>   vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>>   vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>>   vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
>> +vfio_save_setup(const char *name) " (%s)"
>> +vfio_save_cleanup(const char *name) " (%s)"
>
Kirti Wankhede Oct. 18, 2020, 8:55 p.m. UTC | #7
On 9/25/2020 5:23 PM, Cornelia Huck wrote:
> On Wed, 23 Sep 2020 04:54:09 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> Define flags to be used as delimeter in migration file stream.
>> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
>> region from these functions at source during saving or pre-copy phase.
>> Set VFIO device state depending on VM's state. During live migration, VM is
>> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
>> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>>   hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events |  2 ++
>>   2 files changed, 93 insertions(+)
>>
> 
> (...)
> 
>> +/*
>> + * Flags used as delimiter:
>> + * 0xffffffff => MSB 32-bit all 1s
>> + * 0xef10     => emulated (virtual) function IO
> 
> Where is this value coming from?
> 

Delimiter flags should be unique and this is a magic number that 
represents (e)mulated (f)unction (10) representing IO.

>> + * 0x0000     => 16-bits reserved for flags
>> + */
>> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
>> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
>> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
>> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
> 
> I think we need some more documentation what these values mean and how
> they are used. From reading ahead a bit, it seems there is always
> supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind
> of data?
> 

Adding comment as below, hope it helps.

/*
  * Flags used as delimiter for VFIO devices should be unique in 
migration stream
  * These flags are composed as:
  * 0xffffffff => MSB 32-bit all 1s
  * 0xef10     => Magic ID, represents emulated (virtual) function IO
  * 0x0000     => 16-bits reserved for flags
  *
  * Flags _DEV_CONFIG_STATE, _DEV_SETUP_STATE and _DEV_DATA_STATE marks 
start of
  * respective states in migration stream.
  * FLAG _END_OF_STATE indicates end of current state, state could be any
  * of above states.
  */

Thanks,
Kirti
Cornelia Huck Oct. 20, 2020, 3:51 p.m. UTC | #8
On Mon, 19 Oct 2020 02:25:28 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 9/25/2020 5:23 PM, Cornelia Huck wrote:
> > On Wed, 23 Sep 2020 04:54:09 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> >> Define flags to be used as delimeter in migration file stream.
> >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> >> region from these functions at source during saving or pre-copy phase.
> >> Set VFIO device state depending on VM's state. During live migration, VM is
> >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> >>
> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> >> Reviewed-by: Neo Jia <cjia@nvidia.com>
> >> ---
> >>   hw/vfio/migration.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   hw/vfio/trace-events |  2 ++
> >>   2 files changed, 93 insertions(+)
> >>  
> > 
> > (...)
> >   
> >> +/*
> >> + * Flags used as delimiter:
> >> + * 0xffffffff => MSB 32-bit all 1s
> >> + * 0xef10     => emulated (virtual) function IO  
> > 
> > Where is this value coming from?
> >   
> 
> Delimiter flags should be unique and this is a magic number that 
> represents (e)mulated (f)unction (10) representing IO.
> 
> >> + * 0x0000     => 16-bits reserved for flags
> >> + */
> >> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)  
> > 
> > I think we need some more documentation what these values mean and how
> > they are used. From reading ahead a bit, it seems there is always
> > supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind
> > of data?
> >   
> 
> Adding comment as below, hope it helps.
> 
> /*
>   * Flags used as delimiter for VFIO devices should be unique in 
> migration stream

Maybe

"Flags to be used as unique delimiters for VFIO devices in the
migration stream" ?

>   * These flags are composed as:
>   * 0xffffffff => MSB 32-bit all 1s
>   * 0xef10     => Magic ID, represents emulated (virtual) function IO
>   * 0x0000     => 16-bits reserved for flags
>   *
>   * Flags _DEV_CONFIG_STATE, _DEV_SETUP_STATE and _DEV_DATA_STATE marks 
> start of
>   * respective states in migration stream.
>   * FLAG _END_OF_STATE indicates end of current state, state could be any
>   * of above states.
>   */

"The beginning of state information is marked by _DEV_CONFIG_STATE,
_DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
certain state information is marked by _END_OF_STATE." ?

> 
> Thanks,
> Kirti
>
diff mbox series

Patch

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index f650fe9fc3c8..8e8adaa25779 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -8,12 +8,15 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
 #include <linux/vfio.h>
 
 #include "sysemu/runstate.h"
 #include "hw/vfio/vfio-common.h"
 #include "cpu.h"
 #include "migration/migration.h"
+#include "migration/vmstate.h"
 #include "migration/qemu-file.h"
 #include "migration/register.h"
 #include "migration/blocker.h"
@@ -25,6 +28,17 @@ 
 #include "trace.h"
 #include "hw/hw.h"
 
+/*
+ * Flags used as delimiter:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10     => emulated (virtual) function IO
+ * 0x0000     => 16-bits reserved for flags
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
+
 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
                                   off_t off, bool iswrite)
 {
@@ -166,6 +180,65 @@  static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
     return 0;
 }
 
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret;
+
+    trace_vfio_save_setup(vbasedev->name);
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+    if (migration->region.mmaps) {
+        qemu_mutex_lock_iothread();
+        ret = vfio_region_mmap(&migration->region);
+        qemu_mutex_unlock_iothread();
+        if (ret) {
+            error_report("%s: Failed to mmap VFIO migration region %d: %s",
+                         vbasedev->name, migration->region.nr,
+                         strerror(-ret));
+            error_report("%s: Falling back to slow path", vbasedev->name);
+        }
+    }
+
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+                                   VFIO_DEVICE_STATE_SAVING);
+    if (ret) {
+        error_report("%s: Failed to set state SAVING", vbasedev->name);
+        return ret;
+    }
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+    ret = qemu_file_get_error(f);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    if (migration->region.mmaps) {
+        vfio_region_unmap(&migration->region);
+    }
+    trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+    .save_setup = vfio_save_setup,
+    .save_cleanup = vfio_save_cleanup,
+};
+
+/* ---------------------------------------------------------------------- */
+
 static void vfio_vmstate_change(void *opaque, int running, RunState state)
 {
     VFIODevice *vbasedev = opaque;
@@ -225,6 +298,8 @@  static int vfio_migration_init(VFIODevice *vbasedev,
                                struct vfio_region_info *info)
 {
     int ret = -EINVAL;
+    char id[256] = "";
+    Object *obj;
 
     if (!vbasedev->ops->vfio_get_object) {
         return ret;
@@ -241,6 +316,22 @@  static int vfio_migration_init(VFIODevice *vbasedev,
         return ret;
     }
 
+    obj = vbasedev->ops->vfio_get_object(vbasedev);
+
+    if (obj) {
+        DeviceState *dev = DEVICE(obj);
+        char *oid = vmstate_if_get_id(VMSTATE_IF(dev));
+
+        if (oid) {
+            pstrcpy(id, sizeof(id), oid);
+            pstrcat(id, sizeof(id), "/");
+            g_free(oid);
+        }
+    }
+    pstrcat(id, sizeof(id), "vfio");
+
+    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
+                         vbasedev);
     vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
                                                           vbasedev);
     vbasedev->migration_state.notify = vfio_migration_state_notifier;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index bcb3fa7314d7..982d8dccb219 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -152,3 +152,5 @@  vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
 vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
 vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
+vfio_save_setup(const char *name) " (%s)"
+vfio_save_cleanup(const char *name) " (%s)"