diff mbox series

[QEMU,v23,08/18] vfio: Register SaveVMHandlers for VFIO device

Message ID 1589999088-31477-9-git-send-email-kwankhede@nvidia.com
State New
Headers show
Series Add migration support for VFIO devices | expand

Commit Message

Kirti Wankhede May 20, 2020, 6:24 p.m. UTC
Define flags to be used as delimeter in migration file stream.
Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
region from these functions at source during saving or pre-copy phase.
Set VFIO device state depending on VM's state. During live migration, VM is
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 hw/vfio/migration.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events |  2 ++
 2 files changed, 75 insertions(+)

Comments

Dr. David Alan Gilbert May 21, 2020, 2:18 p.m. UTC | #1
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> Define flags to be used as delimeter in migration file stream.
> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> region from these functions at source during saving or pre-copy phase.
> Set VFIO device state depending on VM's state. During live migration, VM is
> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/migration.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |  2 ++
>  2 files changed, 75 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index c2f5564b51c3..773c8d16b1c1 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -8,12 +8,14 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/main-loop.h"
>  #include <linux/vfio.h>
>  
>  #include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> +#include "migration/vmstate.h"
>  #include "migration/qemu-file.h"
>  #include "migration/register.h"
>  #include "migration/blocker.h"
> @@ -24,6 +26,17 @@
>  #include "pci.h"
>  #include "trace.h"
>  
> +/*
> + * Flags used as delimiter:
> + * 0xffffffff => MSB 32-bit all 1s
> + * 0xef10     => emulated (virtual) function IO
> + * 0x0000     => 16-bits reserved for flags
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
> +
>  static void vfio_migration_region_exit(VFIODevice *vbasedev)
>  {
>      VFIOMigration *migration = vbasedev->migration;
> @@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>      return 0;
>  }
>  
> +/* ---------------------------------------------------------------------- */
> +
> +static int vfio_save_setup(QEMUFile *f, void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    int ret;
> +
> +    trace_vfio_save_setup(vbasedev->name);
> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
> +
> +    if (migration->region.mmaps) {
> +        qemu_mutex_lock_iothread();
> +        ret = vfio_region_mmap(&migration->region);
> +        qemu_mutex_unlock_iothread();
> +        if (ret) {
> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
> +                         vbasedev->name, migration->region.index,
> +                         strerror(-ret));
> +            return ret;
> +        }
> +    }
> +
> +    ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING);
> +    if (ret) {
> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
> +        return ret;
> +    }
> +
> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +
> +    ret = qemu_file_get_error(f);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_save_cleanup(void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +
> +    if (migration->region.mmaps) {
> +        vfio_region_unmap(&migration->region);
> +    }
> +    trace_vfio_save_cleanup(vbasedev->name);
> +}
> +
> +static SaveVMHandlers savevm_vfio_handlers = {
> +    .save_setup = vfio_save_setup,
> +    .save_cleanup = vfio_save_cleanup,
> +};
> +
> +/* ---------------------------------------------------------------------- */
> +
>  static void vfio_vmstate_change(void *opaque, int running, RunState state)
>  {
>      VFIODevice *vbasedev = opaque;
> @@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>          return ret;
>      }
>  
> +    register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1,
> +                         &savevm_vfio_handlers, vbasedev);

Hi,
  This is still the only bit which worries me, and I saw your note
saying you'd tested it; to calm my nerves, can you run with the
'qemu_loadvm_state_section_startfull' trace enabled with 2 devices
and show me the output and qemu command line?
I'm trying to figure out how they end up represented in the stream.

Dave


>      vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>                                                            vbasedev);
>  
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index bd3d47b005cb..86c18def016e 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>  vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>  vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>  vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
> +vfio_save_setup(const char *name) " (%s)"
> +vfio_save_cleanup(const char *name) " (%s)"
> -- 
> 2.7.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Kirti Wankhede May 21, 2020, 6 p.m. UTC | #2
On 5/21/2020 7:48 PM, Dr. David Alan Gilbert wrote:
> * Kirti Wankhede (kwankhede@nvidia.com) wrote:
>> Define flags to be used as delimeter in migration file stream.
>> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
>> region from these functions at source during saving or pre-copy phase.
>> Set VFIO device state depending on VM's state. During live migration, VM is
>> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
>> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>>   hw/vfio/migration.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events |  2 ++
>>   2 files changed, 75 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index c2f5564b51c3..773c8d16b1c1 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -8,12 +8,14 @@
>>    */
>>   
>>   #include "qemu/osdep.h"
>> +#include "qemu/main-loop.h"
>>   #include <linux/vfio.h>
>>   
>>   #include "sysemu/runstate.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "cpu.h"
>>   #include "migration/migration.h"
>> +#include "migration/vmstate.h"
>>   #include "migration/qemu-file.h"
>>   #include "migration/register.h"
>>   #include "migration/blocker.h"
>> @@ -24,6 +26,17 @@
>>   #include "pci.h"
>>   #include "trace.h"
>>   
>> +/*
>> + * Flags used as delimiter:
>> + * 0xffffffff => MSB 32-bit all 1s
>> + * 0xef10     => emulated (virtual) function IO
>> + * 0x0000     => 16-bits reserved for flags
>> + */
>> +#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
>> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
>> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
>> +#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
>> +
>>   static void vfio_migration_region_exit(VFIODevice *vbasedev)
>>   {
>>       VFIOMigration *migration = vbasedev->migration;
>> @@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>>       return 0;
>>   }
>>   
>> +/* ---------------------------------------------------------------------- */
>> +
>> +static int vfio_save_setup(QEMUFile *f, void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    int ret;
>> +
>> +    trace_vfio_save_setup(vbasedev->name);
>> +
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>> +
>> +    if (migration->region.mmaps) {
>> +        qemu_mutex_lock_iothread();
>> +        ret = vfio_region_mmap(&migration->region);
>> +        qemu_mutex_unlock_iothread();
>> +        if (ret) {
>> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
>> +                         vbasedev->name, migration->region.index,
>> +                         strerror(-ret));
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING);
>> +    if (ret) {
>> +        error_report("%s: Failed to set state SAVING", vbasedev->name);
>> +        return ret;
>> +    }
>> +
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +
>> +    ret = qemu_file_get_error(f);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void vfio_save_cleanup(void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +
>> +    if (migration->region.mmaps) {
>> +        vfio_region_unmap(&migration->region);
>> +    }
>> +    trace_vfio_save_cleanup(vbasedev->name);
>> +}
>> +
>> +static SaveVMHandlers savevm_vfio_handlers = {
>> +    .save_setup = vfio_save_setup,
>> +    .save_cleanup = vfio_save_cleanup,
>> +};
>> +
>> +/* ---------------------------------------------------------------------- */
>> +
>>   static void vfio_vmstate_change(void *opaque, int running, RunState state)
>>   {
>>       VFIODevice *vbasedev = opaque;
>> @@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>           return ret;
>>       }
>>   
>> +    register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1,
>> +                         &savevm_vfio_handlers, vbasedev);
> 
> Hi,
>    This is still the only bit which worries me, and I saw your note
> saying you'd tested it; to calm my nerves, can you run with the
> 'qemu_loadvm_state_section_startfull' trace enabled with 2 devices
> and show me the output and qemu command line?
> I'm trying to figure out how they end up represented in the stream.
> 

Created mtty devices for source VM:
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1233" > 
/sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1234" > 
/sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create

for destination VM:
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1235" > 
/sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1236" > 
/sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create

Source qemu-cmdline:
/usr/libexec/qemu-kvm \
  -name guest=rhel75-mig,debug-threads=on \
  -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
  -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
  -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
  -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
  -drive 
file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none 
\
  -device 
ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on 
\
  -vnc 127.0.0.1:0 \
  -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 
-netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \
  -device 
vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1233 
\
  -device 
vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234 
\
  --trace events=/root/vfio_events \
  -monitor unix:/tmp/qmp_socket1,server,nowait \
  -serial stdio \
  -msg timestamp=on

Destination qemu-cmdline:
/usr/libexec/qemu-kvm \
  -name guest=rhel75-mig,debug-threads=on \
  -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
  -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
  -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
  -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
  -drive 
file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none 
\
  -device 
ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on 
\
  -vnc 127.0.0.1:1 \
  -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 
-netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \
  -device 
vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1235 
\
  -device 
vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1236 
\
  -incoming unix:/tmp/mig_socket \
  --trace events=/root/vfio_events \
  -monitor unix:/tmp/qmp_socket2,server,nowait \
  -serial stdio \
  -msg timestamp=on

Migrate:
echo "migrate_set_speed 0" | sudo nc -U /tmp/qmp_socket1
echo "migrate -d unix:/tmp/mig_socket" | sudo nc -U $/tmp/qmp_socket1

After migration, 'qemu_loadvm_state_section_startfull' traces:

qemu_loadvm_state_section_startfull 0.000 pid=1457 section_id=0x2 
idstr=b'ram' instance_id=0x0 version_id=0x4
qemu_loadvm_state_section_startfull 515.606 pid=1457 section_id=0x2e 
idstr=b'vfio' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 10.661 pid=1457 section_id=0x2f 
idstr=b'vfio' instance_id=0x1 version_id=0x1
qemu_loadvm_state_section_startfull 1120000.237 pid=1457 section_id=0x0 
idstr=b'timer' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 9.058 pid=1457 section_id=0x4 
idstr=b'cpu_common' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 26.453 pid=1457 section_id=0x5 
idstr=b'cpu' instance_id=0x0 version_id=0xc
qemu_loadvm_state_section_startfull 105.173 pid=1457 section_id=0x6 
idstr=b'kvm-tpr-opt' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 940.028 pid=1457 section_id=0x7 
idstr=b'apic' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 69.939 pid=1457 section_id=0x8 
idstr=b'cpu_common' instance_id=0x1 version_id=0x1
qemu_loadvm_state_section_startfull 14.319 pid=1457 section_id=0x9 
idstr=b'cpu' instance_id=0x1 version_id=0xc
qemu_loadvm_state_section_startfull 102.986 pid=1457 section_id=0xa 
idstr=b'apic' instance_id=0x1 version_id=0x3
qemu_loadvm_state_section_startfull 107.910 pid=1457 section_id=0xb 
idstr=b'kvmclock' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 8.349 pid=1457 section_id=0xc 
idstr=b'fw_cfg' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 8.603 pid=1457 section_id=0xd 
idstr=b'PCIBUS' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 6.557 pid=1457 section_id=0xe 
idstr=b'0000:00:00.0/I440FX' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 633.727 pid=1457 section_id=0xf 
idstr=b'0000:00:01.0/PIIX3' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 14.907 pid=1457 section_id=0x10 
idstr=b'i8259' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 11.465 pid=1457 section_id=0x11 
idstr=b'i8259' instance_id=0x1 version_id=0x1
qemu_loadvm_state_section_startfull 5.663 pid=1457 section_id=0x12 
idstr=b'ioapic' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 11.787 pid=1457 section_id=0x13 
idstr=b'0000:00:02.0/vga' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 1718.618 pid=1457 section_id=0x14 
idstr=b'hpet' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 16.212 pid=1457 section_id=0x15 
idstr=b'mc146818rtc' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 9.946 pid=1457 section_id=0x16 
idstr=b'i8254' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 12.879 pid=1457 section_id=0x17 
idstr=b'pcspk' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 3.115 pid=1457 section_id=0x18 
idstr=b'dma' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 10.432 pid=1457 section_id=0x19 
idstr=b'dma' instance_id=0x1 version_id=0x1
qemu_loadvm_state_section_startfull 12.263 pid=1457 section_id=0x1a 
idstr=b'serial' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 7.299 pid=1457 section_id=0x1b 
idstr=b'parallel_isa' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 3.399 pid=1457 section_id=0x1c 
idstr=b'fdc' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 33.307 pid=1457 section_id=0x1d 
idstr=b'ps2kbd' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 6.961 pid=1457 section_id=0x1e 
idstr=b'ps2mouse' instance_id=0x0 version_id=0x2
qemu_loadvm_state_section_startfull 5.485 pid=1457 section_id=0x1f 
idstr=b'pckbd' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 3.984 pid=1457 section_id=0x20 
idstr=b'vmmouse' instance_id=0x0 version_id=0x0
qemu_loadvm_state_section_startfull 105.948 pid=1457 section_id=0x21 
idstr=b'port92' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 2.443 pid=1457 section_id=0x22 
idstr=b'0000:00:01.1/ide' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 1094.861 pid=1457 section_id=0x23 
idstr=b'i2c_bus' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 3.416 pid=1457 section_id=0x24 
idstr=b'0000:00:01.3/piix4_pm' instance_id=0x0 version_id=0x3
qemu_loadvm_state_section_startfull 2266.518 pid=1457 section_id=0x2d 
idstr=b'0000:00:03.0/rtl8139' instance_id=0x0 version_id=0x5
qemu_loadvm_state_section_startfull 1619.840 pid=1457 section_id=0x30 
idstr=b'acpi_build' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 4.200 pid=1457 section_id=0x31 
idstr=b'globalstate' instance_id=0x0 version_id=0x1

Thanks,
Kirti
Dr. David Alan Gilbert May 21, 2020, 7:35 p.m. UTC | #3
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> 
> 
> On 5/21/2020 7:48 PM, Dr. David Alan Gilbert wrote:
> > * Kirti Wankhede (kwankhede@nvidia.com) wrote:
> > > Define flags to be used as delimeter in migration file stream.
> > > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration
> > > region from these functions at source during saving or pre-copy phase.
> > > Set VFIO device state depending on VM's state. During live migration, VM is
> > > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> > > device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
> > > 
> > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > > Reviewed-by: Neo Jia <cjia@nvidia.com>

<snip>

> > > +    register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1,
> > > +                         &savevm_vfio_handlers, vbasedev);
> > 
> > Hi,
> >    This is still the only bit which worries me, and I saw your note
> > saying you'd tested it; to calm my nerves, can you run with the
> > 'qemu_loadvm_state_section_startfull' trace enabled with 2 devices
> > and show me the output and qemu command line?
> > I'm trying to figure out how they end up represented in the stream.
> > 
> 
> Created mtty devices for source VM:
> echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1233" >
> /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
> echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1234" >
> /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
> 
> for destination VM:
> echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1235" >
> /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
> echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1236" >
> /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create
> 
> Source qemu-cmdline:
> /usr/libexec/qemu-kvm \
>  -name guest=rhel75-mig,debug-threads=on \
>  -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
>  -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
>  -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
>  -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
>  -drive
> file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none
> \
>  -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on
> \
>  -vnc 127.0.0.1:0 \
>  -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3
> -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \
>  -device
> vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1233
> \
>  -device
> vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234
> \
>  --trace events=/root/vfio_events \
>  -monitor unix:/tmp/qmp_socket1,server,nowait \
>  -serial stdio \
>  -msg timestamp=on
> 
> Destination qemu-cmdline:
> /usr/libexec/qemu-kvm \
>  -name guest=rhel75-mig,debug-threads=on \
>  -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \
>  -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \
>  -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \
>  -uuid eefb718c-137c-d416-e573-dd74ecd3490d \
>  -drive
> file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none
> \
>  -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on
> \
>  -vnc 127.0.0.1:1 \
>  -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3
> -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \
>  -device
> vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1235
> \
>  -device
> vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1236
> \
>  -incoming unix:/tmp/mig_socket \
>  --trace events=/root/vfio_events \
>  -monitor unix:/tmp/qmp_socket2,server,nowait \
>  -serial stdio \
>  -msg timestamp=on
> 
> Migrate:
> echo "migrate_set_speed 0" | sudo nc -U /tmp/qmp_socket1
> echo "migrate -d unix:/tmp/mig_socket" | sudo nc -U $/tmp/qmp_socket1
> 
> After migration, 'qemu_loadvm_state_section_startfull' traces:
> 
> qemu_loadvm_state_section_startfull 0.000 pid=1457 section_id=0x2
> idstr=b'ram' instance_id=0x0 version_id=0x4
> qemu_loadvm_state_section_startfull 515.606 pid=1457 section_id=0x2e
> idstr=b'vfio' instance_id=0x0 version_id=0x1
> qemu_loadvm_state_section_startfull 10.661 pid=1457 section_id=0x2f
> idstr=b'vfio' instance_id=0x1 version_id=0x1

Right, so this is my worry - we have two devices in the stream called
'vfio' with I think sequential id's - what makes each of your source
vfio devices go to the correct destination vfio device?  If the two
devices were different vfio devices, how would you ensure that they
ended up in the write place?  There's no requirement for
the order of the qemu command line on the source and the destination
to be the same, or for qemu to maintain semantics based on the order -
but I bet that's the ordering were getting here.

> idstr=b'0000:00:03.0/rtl8139' instance_id=0x0 version_id=0x5

Now you see that PCI NIC has a nice PCI address as it's name in the
stream; if you have two NICs defined then they end up getting loaded
into the destination device with the same guest PCI address - so it's
nice and repeatable (especially if you specify the PCI devices address
on the command line).

Dave



> qemu_loadvm_state_section_startfull 1619.840 pid=1457 section_id=0x30
> idstr=b'acpi_build' instance_id=0x0 version_id=0x1
> qemu_loadvm_state_section_startfull 4.200 pid=1457 section_id=0x31
> idstr=b'globalstate' instance_id=0x0 version_id=0x1
> 
> Thanks,
> Kirti
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c2f5564b51c3..773c8d16b1c1 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -8,12 +8,14 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include <linux/vfio.h>
 
 #include "sysemu/runstate.h"
 #include "hw/vfio/vfio-common.h"
 #include "cpu.h"
 #include "migration/migration.h"
+#include "migration/vmstate.h"
 #include "migration/qemu-file.h"
 #include "migration/register.h"
 #include "migration/blocker.h"
@@ -24,6 +26,17 @@ 
 #include "pci.h"
 #include "trace.h"
 
+/*
+ * Flags used as delimiter:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10     => emulated (virtual) function IO
+ * 0x0000     => 16-bits reserved for flags
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
+
 static void vfio_migration_region_exit(VFIODevice *vbasedev)
 {
     VFIOMigration *migration = vbasedev->migration;
@@ -126,6 +139,64 @@  static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
     return 0;
 }
 
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret;
+
+    trace_vfio_save_setup(vbasedev->name);
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+    if (migration->region.mmaps) {
+        qemu_mutex_lock_iothread();
+        ret = vfio_region_mmap(&migration->region);
+        qemu_mutex_unlock_iothread();
+        if (ret) {
+            error_report("%s: Failed to mmap VFIO migration region %d: %s",
+                         vbasedev->name, migration->region.index,
+                         strerror(-ret));
+            return ret;
+        }
+    }
+
+    ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING);
+    if (ret) {
+        error_report("%s: Failed to set state SAVING", vbasedev->name);
+        return ret;
+    }
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+    ret = qemu_file_get_error(f);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    if (migration->region.mmaps) {
+        vfio_region_unmap(&migration->region);
+    }
+    trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+    .save_setup = vfio_save_setup,
+    .save_cleanup = vfio_save_cleanup,
+};
+
+/* ---------------------------------------------------------------------- */
+
 static void vfio_vmstate_change(void *opaque, int running, RunState state)
 {
     VFIODevice *vbasedev = opaque;
@@ -192,6 +263,8 @@  static int vfio_migration_init(VFIODevice *vbasedev,
         return ret;
     }
 
+    register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1,
+                         &savevm_vfio_handlers, vbasedev);
     vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
                                                           vbasedev);
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index bd3d47b005cb..86c18def016e 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -149,3 +149,5 @@  vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
 vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
 vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
+vfio_save_setup(const char *name) " (%s)"
+vfio_save_cleanup(const char *name) " (%s)"