@@ -4388,6 +4388,15 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
}
}
+ /*
+ * This needs to be done before resuming a postcopy. Note: for newer
+ * QEMUs we will delay the channel creation until postcopy_start(), to
+ * avoid disorder of channel creations.
+ */
+ if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
+ postcopy_preempt_setup(s);
+ }
+
if (resume) {
/* Wakeup the main migration thread to do the recovery */
migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -384,12 +384,19 @@ struct MigrationState {
* - postcopy preempt src QEMU instance will generate an EOS message at
* the end of migration to shut the preempt channel on dest side.
*
+ * - postcopy preempt channel will be created at the setup phase on src
+ QEMU.
+ *
* When clear:
*
* - postcopy preempt src QEMU instance will _not_ generate an EOS
* message at the end of migration, the dest qemu will shutdown the
* channel itself.
*
+ * - postcopy preempt channel will be created at the switching phase
+ * from precopy -> postcopy (to avoid race condtion of misordered
+ * creation of channels).
+ *
* NOTE: See message-id <ZBoShWArKDPpX/D7@work-vm> on qemu-devel
* mailing list for more information on the possible race. Everyone
* should probably just keep this value untouched after set by the
@@ -1630,8 +1630,14 @@ int postcopy_preempt_establish_channel(MigrationState *s)
return 0;
}
- /* Kick off async task to establish preempt channel */
- postcopy_preempt_setup(s);
+ /*
+ * Kick off async task to establish preempt channel. Only do so with
+ * 8.0+ machines, because 7.1/7.2 require the channel to be created in
+ * setup phase of migration (even if racy in an unreliable network).
+ */
+ if (!s->preempt_pre_7_2) {
+ postcopy_preempt_setup(s);
+ }
/*
* We need the postcopy preempt channel to be established before
In 8.0 devel window we reworked preempt channel creation, so that there'll be no race condition when the migration channel and preempt channel got established in the wrong order in commit 5655aab079. However no one noticed that the change will also be not compatible with older qemus, majorly 7.1/7.2 versions where preempt mode started to be supported. Leverage the same pre-7.2 flag introduced in the previous patch to recover the behavior hopefully before 8.0 releases, so we don't break migration when we migrate from 8.0 to older qemu binaries. Fixes: 5655aab079 ("migration: Postpone postcopy preempt channel to be after main") Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/migration.c | 9 +++++++++ migration/migration.h | 7 +++++++ migration/postcopy-ram.c | 10 ++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-)