Message ID | 1589999088-31477-9-git-send-email-kwankhede@nvidia.com |
---|---|
State | New |
Headers | show |
Series | Add migration support for VFIO devices | expand |
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > Define flags to be used as delimeter in migration file stream. > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 75 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index c2f5564b51c3..773c8d16b1c1 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -8,12 +8,14 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > #include <linux/vfio.h> > > #include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > +#include "migration/vmstate.h" > #include "migration/qemu-file.h" > #include "migration/register.h" > #include "migration/blocker.h" > @@ -24,6 +26,17 @@ > #include "pci.h" > #include "trace.h" > > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > + > static void vfio_migration_region_exit(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > @@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > return 0; > } > > +/* ---------------------------------------------------------------------- */ > + > +static int vfio_save_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret; > + > + trace_vfio_save_setup(vbasedev->name); > + > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > + > + if (migration->region.mmaps) { > + qemu_mutex_lock_iothread(); > + ret = vfio_region_mmap(&migration->region); > + qemu_mutex_unlock_iothread(); > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.index, > + strerror(-ret)); > + return ret; > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); > + if (ret) { > + error_report("%s: Failed to set state SAVING", vbasedev->name); > + return ret; > + } > + > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > + > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + > + return 0; > +} > + > +static void vfio_save_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + trace_vfio_save_cleanup(vbasedev->name); > +} > + > +static SaveVMHandlers savevm_vfio_handlers = { > + .save_setup = vfio_save_setup, > + .save_cleanup = vfio_save_cleanup, > +}; > + > +/* ---------------------------------------------------------------------- */ > + > static void vfio_vmstate_change(void *opaque, int running, RunState state) > { > VFIODevice *vbasedev = opaque; > @@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1, > + &savevm_vfio_handlers, vbasedev); Hi, This is still the only bit which worries me, and I saw your note saying you'd tested it; to calm my nerves, can you run with the 'qemu_loadvm_state_section_startfull' trace enabled with 2 devices and show me the output and qemu command line? I'm trying to figure out how they end up represented in the stream. Dave > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > vbasedev); > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index bd3d47b005cb..86c18def016e 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" > vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" > +vfio_save_setup(const char *name) " (%s)" > +vfio_save_cleanup(const char *name) " (%s)" > -- > 2.7.0 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 5/21/2020 7:48 PM, Dr. David Alan Gilbert wrote: > * Kirti Wankhede (kwankhede@nvidia.com) wrote: >> Define flags to be used as delimeter in migration file stream. >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration >> region from these functions at source during saving or pre-copy phase. >> Set VFIO device state depending on VM's state. During live migration, VM is >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> --- >> hw/vfio/migration.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 2 ++ >> 2 files changed, 75 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index c2f5564b51c3..773c8d16b1c1 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -8,12 +8,14 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include "qemu/main-loop.h" >> #include <linux/vfio.h> >> >> #include "sysemu/runstate.h" >> #include "hw/vfio/vfio-common.h" >> #include "cpu.h" >> #include "migration/migration.h" >> +#include "migration/vmstate.h" >> #include "migration/qemu-file.h" >> #include "migration/register.h" >> #include "migration/blocker.h" >> @@ -24,6 +26,17 @@ >> #include "pci.h" >> #include "trace.h" >> >> +/* >> + * Flags used as delimiter: >> + * 0xffffffff => MSB 32-bit all 1s >> + * 0xef10 => emulated (virtual) function IO >> + * 0x0000 => 16-bits reserved for flags >> + */ >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) >> + >> static void vfio_migration_region_exit(VFIODevice *vbasedev) >> { >> VFIOMigration *migration = vbasedev->migration; >> @@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >> return 0; >> } >> >> +/* ---------------------------------------------------------------------- */ >> + >> +static int vfio_save_setup(QEMUFile *f, void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + int ret; >> + >> + trace_vfio_save_setup(vbasedev->name); >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); >> + >> + if (migration->region.mmaps) { >> + qemu_mutex_lock_iothread(); >> + ret = vfio_region_mmap(&migration->region); >> + qemu_mutex_unlock_iothread(); >> + if (ret) { >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", >> + vbasedev->name, migration->region.index, >> + strerror(-ret)); >> + return ret; >> + } >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); >> + if (ret) { >> + error_report("%s: Failed to set state SAVING", vbasedev->name); >> + return ret; >> + } >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); >> + >> + ret = qemu_file_get_error(f); >> + if (ret) { >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +static void vfio_save_cleanup(void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + >> + if (migration->region.mmaps) { >> + vfio_region_unmap(&migration->region); >> + } >> + trace_vfio_save_cleanup(vbasedev->name); >> +} >> + >> +static SaveVMHandlers savevm_vfio_handlers = { >> + .save_setup = vfio_save_setup, >> + .save_cleanup = vfio_save_cleanup, >> +}; >> + >> +/* ---------------------------------------------------------------------- */ >> + >> static void vfio_vmstate_change(void *opaque, int running, RunState state) >> { >> VFIODevice *vbasedev = opaque; >> @@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, >> return ret; >> } >> >> + register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1, >> + &savevm_vfio_handlers, vbasedev); > > Hi, > This is still the only bit which worries me, and I saw your note > saying you'd tested it; to calm my nerves, can you run with the > 'qemu_loadvm_state_section_startfull' trace enabled with 2 devices > and show me the output and qemu command line? > I'm trying to figure out how they end up represented in the stream. > Created mtty devices for source VM: echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1233" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1234" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create for destination VM: echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1235" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1236" > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create Source qemu-cmdline: /usr/libexec/qemu-kvm \ -name guest=rhel75-mig,debug-threads=on \ -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \ -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \ -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \ -uuid eefb718c-137c-d416-e573-dd74ecd3490d \ -drive file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none \ -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on \ -vnc 127.0.0.1:0 \ -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1233 \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234 \ --trace events=/root/vfio_events \ -monitor unix:/tmp/qmp_socket1,server,nowait \ -serial stdio \ -msg timestamp=on Destination qemu-cmdline: /usr/libexec/qemu-kvm \ -name guest=rhel75-mig,debug-threads=on \ -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \ -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \ -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \ -uuid eefb718c-137c-d416-e573-dd74ecd3490d \ -drive file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none \ -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on \ -vnc 127.0.0.1:1 \ -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1235 \ -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1236 \ -incoming unix:/tmp/mig_socket \ --trace events=/root/vfio_events \ -monitor unix:/tmp/qmp_socket2,server,nowait \ -serial stdio \ -msg timestamp=on Migrate: echo "migrate_set_speed 0" | sudo nc -U /tmp/qmp_socket1 echo "migrate -d unix:/tmp/mig_socket" | sudo nc -U $/tmp/qmp_socket1 After migration, 'qemu_loadvm_state_section_startfull' traces: qemu_loadvm_state_section_startfull 0.000 pid=1457 section_id=0x2 idstr=b'ram' instance_id=0x0 version_id=0x4 qemu_loadvm_state_section_startfull 515.606 pid=1457 section_id=0x2e idstr=b'vfio' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 10.661 pid=1457 section_id=0x2f idstr=b'vfio' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 1120000.237 pid=1457 section_id=0x0 idstr=b'timer' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 9.058 pid=1457 section_id=0x4 idstr=b'cpu_common' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 26.453 pid=1457 section_id=0x5 idstr=b'cpu' instance_id=0x0 version_id=0xc qemu_loadvm_state_section_startfull 105.173 pid=1457 section_id=0x6 idstr=b'kvm-tpr-opt' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 940.028 pid=1457 section_id=0x7 idstr=b'apic' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 69.939 pid=1457 section_id=0x8 idstr=b'cpu_common' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 14.319 pid=1457 section_id=0x9 idstr=b'cpu' instance_id=0x1 version_id=0xc qemu_loadvm_state_section_startfull 102.986 pid=1457 section_id=0xa idstr=b'apic' instance_id=0x1 version_id=0x3 qemu_loadvm_state_section_startfull 107.910 pid=1457 section_id=0xb idstr=b'kvmclock' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 8.349 pid=1457 section_id=0xc idstr=b'fw_cfg' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 8.603 pid=1457 section_id=0xd idstr=b'PCIBUS' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 6.557 pid=1457 section_id=0xe idstr=b'0000:00:00.0/I440FX' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 633.727 pid=1457 section_id=0xf idstr=b'0000:00:01.0/PIIX3' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 14.907 pid=1457 section_id=0x10 idstr=b'i8259' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 11.465 pid=1457 section_id=0x11 idstr=b'i8259' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 5.663 pid=1457 section_id=0x12 idstr=b'ioapic' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 11.787 pid=1457 section_id=0x13 idstr=b'0000:00:02.0/vga' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 1718.618 pid=1457 section_id=0x14 idstr=b'hpet' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 16.212 pid=1457 section_id=0x15 idstr=b'mc146818rtc' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 9.946 pid=1457 section_id=0x16 idstr=b'i8254' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 12.879 pid=1457 section_id=0x17 idstr=b'pcspk' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.115 pid=1457 section_id=0x18 idstr=b'dma' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 10.432 pid=1457 section_id=0x19 idstr=b'dma' instance_id=0x1 version_id=0x1 qemu_loadvm_state_section_startfull 12.263 pid=1457 section_id=0x1a idstr=b'serial' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 7.299 pid=1457 section_id=0x1b idstr=b'parallel_isa' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.399 pid=1457 section_id=0x1c idstr=b'fdc' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 33.307 pid=1457 section_id=0x1d idstr=b'ps2kbd' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 6.961 pid=1457 section_id=0x1e idstr=b'ps2mouse' instance_id=0x0 version_id=0x2 qemu_loadvm_state_section_startfull 5.485 pid=1457 section_id=0x1f idstr=b'pckbd' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 3.984 pid=1457 section_id=0x20 idstr=b'vmmouse' instance_id=0x0 version_id=0x0 qemu_loadvm_state_section_startfull 105.948 pid=1457 section_id=0x21 idstr=b'port92' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 2.443 pid=1457 section_id=0x22 idstr=b'0000:00:01.1/ide' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 1094.861 pid=1457 section_id=0x23 idstr=b'i2c_bus' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 3.416 pid=1457 section_id=0x24 idstr=b'0000:00:01.3/piix4_pm' instance_id=0x0 version_id=0x3 qemu_loadvm_state_section_startfull 2266.518 pid=1457 section_id=0x2d idstr=b'0000:00:03.0/rtl8139' instance_id=0x0 version_id=0x5 qemu_loadvm_state_section_startfull 1619.840 pid=1457 section_id=0x30 idstr=b'acpi_build' instance_id=0x0 version_id=0x1 qemu_loadvm_state_section_startfull 4.200 pid=1457 section_id=0x31 idstr=b'globalstate' instance_id=0x0 version_id=0x1 Thanks, Kirti
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > On 5/21/2020 7:48 PM, Dr. David Alan Gilbert wrote: > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > Define flags to be used as delimeter in migration file stream. > > > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > > > region from these functions at source during saving or pre-copy phase. > > > Set VFIO device state depending on VM's state. During live migration, VM is > > > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > > > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > > > > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > > Reviewed-by: Neo Jia <cjia@nvidia.com> <snip> > > > + register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1, > > > + &savevm_vfio_handlers, vbasedev); > > > > Hi, > > This is still the only bit which worries me, and I saw your note > > saying you'd tested it; to calm my nerves, can you run with the > > 'qemu_loadvm_state_section_startfull' trace enabled with 2 devices > > and show me the output and qemu command line? > > I'm trying to figure out how they end up represented in the stream. > > > > Created mtty devices for source VM: > echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1233" > > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create > echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1234" > > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create > > for destination VM: > echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1235" > > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create > echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1236" > > /sys/class/mdev_bus/mtty/mdev_supported_types/mtty-2/create > > Source qemu-cmdline: > /usr/libexec/qemu-kvm \ > -name guest=rhel75-mig,debug-threads=on \ > -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \ > -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \ > -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \ > -uuid eefb718c-137c-d416-e573-dd74ecd3490d \ > -drive > file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none > \ > -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on > \ > -vnc 127.0.0.1:0 \ > -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 > -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ > -device > vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1233 > \ > -device > vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234 > \ > --trace events=/root/vfio_events \ > -monitor unix:/tmp/qmp_socket1,server,nowait \ > -serial stdio \ > -msg timestamp=on > > Destination qemu-cmdline: > /usr/libexec/qemu-kvm \ > -name guest=rhel75-mig,debug-threads=on \ > -machine pc-i440fx-3.1,accel=kvm,usb=off,dump-guest-core=off \ > -cpu SandyBridge,vme=on,hypervisor=on,arat=on,xsaveopt=on \ > -m 2048 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 \ > -uuid eefb718c-137c-d416-e573-dd74ecd3490d \ > -drive > file=/home/vm/rhel-75.qcow2,format=qcow2,if=none,id=drive-ide0-0-0,cache=none > \ > -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1,write-cache=on > \ > -vnc 127.0.0.1:1 \ > -device rtl8139,netdev=net0,mac=52:54:b2:88:86:2a,bus=pci.0,addr=0x3 > -netdev tap,id=net0,script=/root/qemu-ifup,downscript=no \ > -device > vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1235 > \ > -device > vfio-pci,sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1236 > \ > -incoming unix:/tmp/mig_socket \ > --trace events=/root/vfio_events \ > -monitor unix:/tmp/qmp_socket2,server,nowait \ > -serial stdio \ > -msg timestamp=on > > Migrate: > echo "migrate_set_speed 0" | sudo nc -U /tmp/qmp_socket1 > echo "migrate -d unix:/tmp/mig_socket" | sudo nc -U $/tmp/qmp_socket1 > > After migration, 'qemu_loadvm_state_section_startfull' traces: > > qemu_loadvm_state_section_startfull 0.000 pid=1457 section_id=0x2 > idstr=b'ram' instance_id=0x0 version_id=0x4 > qemu_loadvm_state_section_startfull 515.606 pid=1457 section_id=0x2e > idstr=b'vfio' instance_id=0x0 version_id=0x1 > qemu_loadvm_state_section_startfull 10.661 pid=1457 section_id=0x2f > idstr=b'vfio' instance_id=0x1 version_id=0x1 Right, so this is my worry - we have two devices in the stream called 'vfio' with I think sequential id's - what makes each of your source vfio devices go to the correct destination vfio device? If the two devices were different vfio devices, how would you ensure that they ended up in the write place? There's no requirement for the order of the qemu command line on the source and the destination to be the same, or for qemu to maintain semantics based on the order - but I bet that's the ordering were getting here. > idstr=b'0000:00:03.0/rtl8139' instance_id=0x0 version_id=0x5 Now you see that PCI NIC has a nice PCI address as it's name in the stream; if you have two NICs defined then they end up getting loaded into the destination device with the same guest PCI address - so it's nice and repeatable (especially if you specify the PCI devices address on the command line). Dave > qemu_loadvm_state_section_startfull 1619.840 pid=1457 section_id=0x30 > idstr=b'acpi_build' instance_id=0x0 version_id=0x1 > qemu_loadvm_state_section_startfull 4.200 pid=1457 section_id=0x31 > idstr=b'globalstate' instance_id=0x0 version_id=0x1 > > Thanks, > Kirti > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index c2f5564b51c3..773c8d16b1c1 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -8,12 +8,14 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include <linux/vfio.h> #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "cpu.h" #include "migration/migration.h" +#include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "migration/blocker.h" @@ -24,6 +26,17 @@ #include "pci.h" #include "trace.h" +/* + * Flags used as delimiter: + * 0xffffffff => MSB 32-bit all 1s + * 0xef10 => emulated (virtual) function IO + * 0x0000 => 16-bits reserved for flags + */ +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) + static void vfio_migration_region_exit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -126,6 +139,64 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, return 0; } +/* ---------------------------------------------------------------------- */ + +static int vfio_save_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + int ret; + + trace_vfio_save_setup(vbasedev->name); + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); + + if (migration->region.mmaps) { + qemu_mutex_lock_iothread(); + ret = vfio_region_mmap(&migration->region); + qemu_mutex_unlock_iothread(); + if (ret) { + error_report("%s: Failed to mmap VFIO migration region %d: %s", + vbasedev->name, migration->region.index, + strerror(-ret)); + return ret; + } + } + + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); + if (ret) { + error_report("%s: Failed to set state SAVING", vbasedev->name); + return ret; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); + if (ret) { + return ret; + } + + return 0; +} + +static void vfio_save_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } + trace_vfio_save_cleanup(vbasedev->name); +} + +static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +}; + +/* ---------------------------------------------------------------------- */ + static void vfio_vmstate_change(void *opaque, int running, RunState state) { VFIODevice *vbasedev = opaque; @@ -192,6 +263,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, return ret; } + register_savevm_live("vfio", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_vfio_handlers, vbasedev); vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, vbasedev); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index bd3d47b005cb..86c18def016e 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" +vfio_save_setup(const char *name) " (%s)" +vfio_save_cleanup(const char *name) " (%s)"