@@ -142,6 +142,9 @@
#
# @postcopy-paused: during postcopy but paused. (since 3.0)
#
+# @postcopy-recover-setup: setup phase for a postcopy recovery process,
+# preparing for a recovery phase to start. (since 9.1)
+#
# @postcopy-recover: trying to recover from a paused postcopy. (since
# 3.0)
#
@@ -166,6 +169,7 @@
{ 'enum': 'MigrationStatus',
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'postcopy-paused',
+ 'postcopy-recover-setup',
'postcopy-recover', 'completed', 'failed', 'colo',
'pre-switchover', 'device', 'wait-unplug' ] }
##
@@ -13,6 +13,8 @@
#ifndef QEMU_POSTCOPY_RAM_H
#define QEMU_POSTCOPY_RAM_H
+#include "qapi/qapi-types-migration.h"
+
/* Return true if the host supports everything we need to do postcopy-ram */
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
Error **errp);
@@ -193,5 +195,6 @@ enum PostcopyChannels {
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
void postcopy_preempt_setup(MigrationState *s);
int postcopy_preempt_establish_channel(MigrationState *s);
+bool postcopy_is_paused(MigrationStatus status);
#endif
@@ -1117,6 +1117,7 @@ bool migration_is_setup_or_active(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1139,6 +1140,7 @@ bool migration_is_running(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1276,6 +1278,7 @@ static void fill_source_migration_info(MigrationInfo *info)
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
/* TODO add some postcopy stats */
populate_time_info(info, s);
@@ -1482,9 +1485,30 @@ static void migrate_error_free(MigrationState *s)
static void migrate_fd_error(MigrationState *s, const Error *error)
{
+ MigrationStatus current = s->state;
+ MigrationStatus next;
+
assert(s->to_dst_file == NULL);
- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
- MIGRATION_STATUS_FAILED);
+
+ switch (current) {
+ case MIGRATION_STATUS_SETUP:
+ next = MIGRATION_STATUS_FAILED;
+ break;
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
+ /* Never fail a postcopy migration; switch back to PAUSED instead */
+ next = MIGRATION_STATUS_POSTCOPY_PAUSED;
+ break;
+ default:
+ /*
+ * This really shouldn't happen. Just be careful to not crash a VM
+ * just for this. Instead, dump something.
+ */
+ error_report("%s: Illegal migration status (%s) detected",
+ __func__, MigrationStatus_str(current));
+ return;
+ }
+
+ migrate_set_state(&s->state, current, next);
migrate_set_error(s, error);
}
@@ -1585,6 +1609,7 @@ bool migration_in_postcopy(void)
switch (s->state) {
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
case MIGRATION_STATUS_POSTCOPY_RECOVER:
return true;
default:
@@ -1972,6 +1997,9 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
return false;
}
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+ MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
+
/* This is a resume, skip init status */
return true;
}
@@ -3004,9 +3032,9 @@ static MigThrError postcopy_pause(MigrationState *s)
* We wait until things fixed up. Then someone will setup the
* status back for us.
*/
- while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ do {
qemu_sem_wait(&s->postcopy_pause_sem);
- }
+ } while (postcopy_is_paused(s->state));
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
/* Woken up by a recover procedure. Give it a shot */
@@ -3702,7 +3730,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
uint64_t rate_limit;
- bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
+ bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
int ret;
/*
@@ -3769,7 +3797,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
if (resume) {
/* Wakeup the main migration thread to do the recovery */
- migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
MIGRATION_STATUS_POSTCOPY_RECOVER);
qemu_sem_post(&s->postcopy_pause_sem);
return;
@@ -1770,3 +1770,9 @@ void *postcopy_preempt_thread(void *opaque)
return NULL;
}
+
+bool postcopy_is_paused(MigrationStatus status)
+{
+ return status == MIGRATION_STATUS_POSTCOPY_PAUSED ||
+ status == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP;
+}
@@ -2864,9 +2864,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
error_report("Detected IO failure for postcopy. "
"Migration paused.");
- while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ do {
qemu_sem_wait(&mis->postcopy_pause_sem_dst);
- }
+ } while (postcopy_is_paused(mis->state));
trace_postcopy_pause_incoming_continued();