diff mbox series

[v4,10/25] migration: Add Error** argument to qemu_savevm_state_setup()

Message ID 20240306133441.2351700-11-clg@redhat.com
State New
Headers show
Series migration: Improve error reporting | expand

Commit Message

Cédric Le Goater March 6, 2024, 1:34 p.m. UTC
This prepares ground for the changes coming next which add an Error**
argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
now handle the error and fail earlier setting the migration state from
MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.

In qemu_savevm_state(), move the cleanup to preserve the error
reported by .save_setup() handlers.

Since the previous behavior was to ignore errors at this step of
migration, this change should be examined closely to check that
cleanups are still correctly done.

Signed-off-by: Cédric Le Goater <clg@redhat.com>
---

 Changes in v4:
 
 - Merged cleanup change in qemu_savevm_state()
   
 Changes in v3:
 
 - Set migration state to MIGRATION_STATUS_FAILED 
 - Fixed error handling to be done under lock in bg_migration_thread()
 - Made sure an error is always set in case of failure in
   qemu_savevm_state_setup()
   
 migration/savevm.h    |  2 +-
 migration/migration.c | 27 ++++++++++++++++++++++++---
 migration/savevm.c    | 26 +++++++++++++++-----------
 3 files changed, 40 insertions(+), 15 deletions(-)

Comments

Fabiano Rosas March 7, 2024, 12:45 p.m. UTC | #1
Cédric Le Goater <clg@redhat.com> writes:

> This prepares ground for the changes coming next which add an Error**
> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> now handle the error and fail earlier setting the migration state from
> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>
> In qemu_savevm_state(), move the cleanup to preserve the error
> reported by .save_setup() handlers.
>
> Since the previous behavior was to ignore errors at this step of
> migration, this change should be examined closely to check that
> cleanups are still correctly done.
>
> Signed-off-by: Cédric Le Goater <clg@redhat.com>

Reviewed-by: Fabiano Rosas <farosas@suse.de>
Peter Xu March 8, 2024, 12:56 p.m. UTC | #2
On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
> This prepares ground for the changes coming next which add an Error**
> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> now handle the error and fail earlier setting the migration state from
> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
> 
> In qemu_savevm_state(), move the cleanup to preserve the error
> reported by .save_setup() handlers.
> 
> Since the previous behavior was to ignore errors at this step of
> migration, this change should be examined closely to check that
> cleanups are still correctly done.
> 
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
> 
>  Changes in v4:
>  
>  - Merged cleanup change in qemu_savevm_state()
>    
>  Changes in v3:
>  
>  - Set migration state to MIGRATION_STATUS_FAILED 
>  - Fixed error handling to be done under lock in bg_migration_thread()
>  - Made sure an error is always set in case of failure in
>    qemu_savevm_state_setup()
>    
>  migration/savevm.h    |  2 +-
>  migration/migration.c | 27 ++++++++++++++++++++++++---
>  migration/savevm.c    | 26 +++++++++++++++-----------
>  3 files changed, 40 insertions(+), 15 deletions(-)
> 
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -32,7 +32,7 @@
>  bool qemu_savevm_state_blocked(Error **errp);
>  void qemu_savevm_non_migratable_list(strList **reasons);
>  int qemu_savevm_state_prepare(Error **errp);
> -void qemu_savevm_state_setup(QEMUFile *f);
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>  bool qemu_savevm_state_guest_unplug_pending(void);
>  int qemu_savevm_state_resume_prepare(MigrationState *s);
>  void qemu_savevm_state_header(QEMUFile *f);
> diff --git a/migration/migration.c b/migration/migration.c
> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      MigThrError thr_error;
>      bool urgent = false;
> +    Error *local_err = NULL;
> +    int ret;
>  
>      thread = migration_threads_add("live_migration", qemu_get_thread_id());
>  
> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>      }
>  
>      bql_lock();
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>      bql_unlock();
>  
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> +                          MIGRATION_STATUS_FAILED);
> +        goto out;
> +     }

There's a small indent issue, I can fix it.

The bigger problem is I _think_ this will trigger a ci failure in the
virtio-net-failover test:

▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR         
121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
stderr:
qemu-system-x86_64: ram_save_setup failed: Input/output error
**
ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
(test program exited with status code -6)
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――

I am not familiar enough with the failover code, and may not have time
today to follow this up, copy Laurent.  Cedric, if you have time, please
have a look.  I'll give it a shot on Monday to find a solution, otherwise
we may need to postpone some of the patches to 9.1.

Thanks,

> +
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
>  
> @@ -3530,6 +3540,9 @@ static void *bg_migration_thread(void *opaque)
>      MigThrError thr_error;
>      QEMUFile *fb;
>      bool early_fail = true;
> +    bool setup_fail = true;
> +    Error *local_err = NULL;
> +    int ret;
>  
>      rcu_register_thread();
>      object_ref(OBJECT(s));
> @@ -3563,9 +3576,16 @@ static void *bg_migration_thread(void *opaque)
>  
>      bql_lock();
>      qemu_savevm_state_header(s->to_dst_file);
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        goto fail;
> +    }
>      bql_unlock();
>  
> +    setup_fail = false;
> +
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
>  
> @@ -3632,7 +3652,8 @@ static void *bg_migration_thread(void *opaque)
>  
>  fail:
>      if (early_fail) {
> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +        migrate_set_state(&s->state,
> +                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
>                  MIGRATION_STATUS_FAILED);
>          bql_unlock();
>      }
> diff --git a/migration/savevm.c b/migration/savevm.c
> index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>      return 0;
>  }
>  
> -void qemu_savevm_state_setup(QEMUFile *f)
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>  {
> +    ERRP_GUARD();
>      MigrationState *ms = migrate_get_current();
>      SaveStateEntry *se;
> -    Error *local_err = NULL;
>      int ret = 0;
>  
>      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>      trace_savevm_state_setup();
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (se->vmsd && se->vmsd->early_setup) {
> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>              if (ret) {
> -                migrate_set_error(ms, local_err);
> -                error_report_err(local_err);
> +                migrate_set_error(ms, *errp);
>                  qemu_file_set_error(f, ret);
>                  break;
>              }
> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>          ret = se->ops->save_setup(f, se->opaque);
>          save_section_footer(f, se);
>          if (ret < 0) {
> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>              qemu_file_set_error(f, ret);
>              break;
>          }
>      }
>  
>      if (ret) {
> -        return;
> +        return ret;
>      }
>  
> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
> -        error_report_err(local_err);
> -    }
> +    /* TODO: Should we check that errp is set in case of failure ? */
> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>  }
>  
>  int qemu_savevm_state_resume_prepare(MigrationState *s)
> @@ -1728,7 +1728,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>      ms->to_dst_file = f;
>  
>      qemu_savevm_state_header(f);
> -    qemu_savevm_state_setup(f);
> +    ret = qemu_savevm_state_setup(f, errp);
> +    if (ret) {
> +        goto cleanup;
> +    }
>  
>      while (qemu_file_get_error(f) == 0) {
>          if (qemu_savevm_state_iterate(f, false) > 0) {
> @@ -1741,10 +1744,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>          qemu_savevm_state_complete_precopy(f, false, false);
>          ret = qemu_file_get_error(f);
>      }
> -    qemu_savevm_state_cleanup();
>      if (ret != 0) {
>          error_setg_errno(errp, -ret, "Error while writing VM state");
>      }
> +cleanup:
> +    qemu_savevm_state_cleanup();
>  
>      if (ret != 0) {
>          status = MIGRATION_STATUS_FAILED;
> -- 
> 2.44.0
>
Cédric Le Goater March 8, 2024, 1:14 p.m. UTC | #3
On 3/8/24 13:56, Peter Xu wrote:
> On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
>> This prepares ground for the changes coming next which add an Error**
>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>> now handle the error and fail earlier setting the migration state from
>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>
>> In qemu_savevm_state(), move the cleanup to preserve the error
>> reported by .save_setup() handlers.
>>
>> Since the previous behavior was to ignore errors at this step of
>> migration, this change should be examined closely to check that
>> cleanups are still correctly done.
>>
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>>
>>   Changes in v4:
>>   
>>   - Merged cleanup change in qemu_savevm_state()
>>     
>>   Changes in v3:
>>   
>>   - Set migration state to MIGRATION_STATUS_FAILED
>>   - Fixed error handling to be done under lock in bg_migration_thread()
>>   - Made sure an error is always set in case of failure in
>>     qemu_savevm_state_setup()
>>     
>>   migration/savevm.h    |  2 +-
>>   migration/migration.c | 27 ++++++++++++++++++++++++---
>>   migration/savevm.c    | 26 +++++++++++++++-----------
>>   3 files changed, 40 insertions(+), 15 deletions(-)
>>
>> diff --git a/migration/savevm.h b/migration/savevm.h
>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>> --- a/migration/savevm.h
>> +++ b/migration/savevm.h
>> @@ -32,7 +32,7 @@
>>   bool qemu_savevm_state_blocked(Error **errp);
>>   void qemu_savevm_non_migratable_list(strList **reasons);
>>   int qemu_savevm_state_prepare(Error **errp);
>> -void qemu_savevm_state_setup(QEMUFile *f);
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>   bool qemu_savevm_state_guest_unplug_pending(void);
>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
>>   void qemu_savevm_state_header(QEMUFile *f);
>> diff --git a/migration/migration.c b/migration/migration.c
>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>       MigThrError thr_error;
>>       bool urgent = false;
>> +    Error *local_err = NULL;
>> +    int ret;
>>   
>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>   
>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>       }
>>   
>>       bql_lock();
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>       bql_unlock();
>>   
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>> +                          MIGRATION_STATUS_FAILED);
>> +        goto out;
>> +     }
> 
> There's a small indent issue, I can fix it.

checkpatch did report anything.

> 
> The bigger problem is I _think_ this will trigger a ci failure in the
> virtio-net-failover test:
> 
> ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
> 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
> ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
> stderr:
> qemu-system-x86_64: ram_save_setup failed: Input/output error
> **
> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> (test program exited with status code -6)
> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> 
> I am not familiar enough with the failover code, and may not have time
> today to follow this up, copy Laurent.  Cedric, if you have time, please
> have a look.  


Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
aarch64. Let me check again.


Thanks,

C.



> I'll give it a shot on Monday to find a solution, otherwise
> we may need to postpone some of the patches to 9.1.
> 
> Thanks,
> 
>> +
>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                  MIGRATION_STATUS_ACTIVE);
>>   
>> @@ -3530,6 +3540,9 @@ static void *bg_migration_thread(void *opaque)
>>       MigThrError thr_error;
>>       QEMUFile *fb;
>>       bool early_fail = true;
>> +    bool setup_fail = true;
>> +    Error *local_err = NULL;
>> +    int ret;
>>   
>>       rcu_register_thread();
>>       object_ref(OBJECT(s));
>> @@ -3563,9 +3576,16 @@ static void *bg_migration_thread(void *opaque)
>>   
>>       bql_lock();
>>       qemu_savevm_state_header(s->to_dst_file);
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        goto fail;
>> +    }
>>       bql_unlock();
>>   
>> +    setup_fail = false;
>> +
>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                  MIGRATION_STATUS_ACTIVE);
>>   
>> @@ -3632,7 +3652,8 @@ static void *bg_migration_thread(void *opaque)
>>   
>>   fail:
>>       if (early_fail) {
>> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>> +        migrate_set_state(&s->state,
>> +                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
>>                   MIGRATION_STATUS_FAILED);
>>           bql_unlock();
>>       }
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>>       return 0;
>>   }
>>   
>> -void qemu_savevm_state_setup(QEMUFile *f)
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>>   {
>> +    ERRP_GUARD();
>>       MigrationState *ms = migrate_get_current();
>>       SaveStateEntry *se;
>> -    Error *local_err = NULL;
>>       int ret = 0;
>>   
>>       json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
>> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>       trace_savevm_state_setup();
>>       QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>>           if (se->vmsd && se->vmsd->early_setup) {
>> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
>> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>>               if (ret) {
>> -                migrate_set_error(ms, local_err);
>> -                error_report_err(local_err);
>> +                migrate_set_error(ms, *errp);
>>                   qemu_file_set_error(f, ret);
>>                   break;
>>               }
>> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>           ret = se->ops->save_setup(f, se->opaque);
>>           save_section_footer(f, se);
>>           if (ret < 0) {
>> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
>> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>>               qemu_file_set_error(f, ret);
>>               break;
>>           }
>>       }
>>   
>>       if (ret) {
>> -        return;
>> +        return ret;
>>       }
>>   
>> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
>> -        error_report_err(local_err);
>> -    }
>> +    /* TODO: Should we check that errp is set in case of failure ? */
>> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>>   }
>>   
>>   int qemu_savevm_state_resume_prepare(MigrationState *s)
>> @@ -1728,7 +1728,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>       ms->to_dst_file = f;
>>   
>>       qemu_savevm_state_header(f);
>> -    qemu_savevm_state_setup(f);
>> +    ret = qemu_savevm_state_setup(f, errp);
>> +    if (ret) {
>> +        goto cleanup;
>> +    }
>>   
>>       while (qemu_file_get_error(f) == 0) {
>>           if (qemu_savevm_state_iterate(f, false) > 0) {
>> @@ -1741,10 +1744,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>           qemu_savevm_state_complete_precopy(f, false, false);
>>           ret = qemu_file_get_error(f);
>>       }
>> -    qemu_savevm_state_cleanup();
>>       if (ret != 0) {
>>           error_setg_errno(errp, -ret, "Error while writing VM state");
>>       }
>> +cleanup:
>> +    qemu_savevm_state_cleanup();
>>   
>>       if (ret != 0) {
>>           status = MIGRATION_STATUS_FAILED;
>> -- 
>> 2.44.0
>>
>
Cédric Le Goater March 8, 2024, 1:39 p.m. UTC | #4
On 3/8/24 14:14, Cédric Le Goater wrote:
> On 3/8/24 13:56, Peter Xu wrote:
>> On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
>>> This prepares ground for the changes coming next which add an Error**
>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>> now handle the error and fail earlier setting the migration state from
>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>
>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>> reported by .save_setup() handlers.
>>>
>>> Since the previous behavior was to ignore errors at this step of
>>> migration, this change should be examined closely to check that
>>> cleanups are still correctly done.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>> ---
>>>
>>>   Changes in v4:
>>>   - Merged cleanup change in qemu_savevm_state()
>>>   Changes in v3:
>>>   - Set migration state to MIGRATION_STATUS_FAILED
>>>   - Fixed error handling to be done under lock in bg_migration_thread()
>>>   - Made sure an error is always set in case of failure in
>>>     qemu_savevm_state_setup()
>>>   migration/savevm.h    |  2 +-
>>>   migration/migration.c | 27 ++++++++++++++++++++++++---
>>>   migration/savevm.c    | 26 +++++++++++++++-----------
>>>   3 files changed, 40 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>> --- a/migration/savevm.h
>>> +++ b/migration/savevm.h
>>> @@ -32,7 +32,7 @@
>>>   bool qemu_savevm_state_blocked(Error **errp);
>>>   void qemu_savevm_non_migratable_list(strList **reasons);
>>>   int qemu_savevm_state_prepare(Error **errp);
>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>   bool qemu_savevm_state_guest_unplug_pending(void);
>>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>   void qemu_savevm_state_header(QEMUFile *f);
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>       MigThrError thr_error;
>>>       bool urgent = false;
>>> +    Error *local_err = NULL;
>>> +    int ret;
>>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>       }
>>>       bql_lock();
>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>       bql_unlock();
>>> +    if (ret) {
>>> +        migrate_set_error(s, local_err);
>>> +        error_free(local_err);
>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>> +                          MIGRATION_STATUS_FAILED);
>>> +        goto out;
>>> +     }
>>
>> There's a small indent issue, I can fix it.
> 
> checkpatch did report anything.
> 
>>
>> The bigger problem is I _think_ this will trigger a ci failure in the
>> virtio-net-failover test:
>>
>> ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
>> 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
>> ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
>> stderr:
>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>> **
>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>> (test program exited with status code -6)
>> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
>>
>> I am not familiar enough with the failover code, and may not have time
>> today to follow this up, copy Laurent.  Cedric, if you have time, please
>> have a look. 
> 
> 
> Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
> aarch64. Let me check again.

I see one timeout error on s390x but not always. See below. It occurs with
or without this patchset. the other x86_64, ppc64 arches run fine (a part
from one io  test failing from time to time)

Thanks,

C.






# Start of compress tests
# Running /s390x/migration/postcopy/recovery/compress/plain
# Using machine type: s390-ccw-virtio-9.0
# starting QEMU: exec ./qemu-system-s390x -qtest unix:/tmp/qtest-3064311.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-3064311.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine s390-ccw-virtio-9.0, -name source,debug-threads=on -m 128M -serial file:/tmp/migration-test-TO8BK2/src_serial -bios /tmp/migration-test-TO8BK2/bootsect    2>/dev/null -accel qtest
# starting QEMU: exec ./qemu-system-s390x -qtest unix:/tmp/qtest-3064311.sock -qtest-log /dev/null -chardev socket,path=/tmp/qtest-3064311.qmp,id=char0 -mon chardev=char0,mode=control -display none -audio none -accel kvm -accel tcg -machine s390-ccw-virtio-9.0, -name target,debug-threads=on -m 128M -serial file:/tmp/migration-test-TO8BK2/dest_serial -incoming defer -bios /tmp/migration-test-TO8BK2/bootsect    2>/dev/null -accel qtest

**
ERROR:../tests/qtest/migration-helpers.c:205:wait_for_migration_status: assertion failed: (g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT)
Bail out! ERROR:../tests/qtest/migration-helpers.c:205:wait_for_migration_status: assertion failed: (g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT)
../tests/qtest/libqtest.c:204: kill_qemu() detected QEMU death from signal 9 (Killed)
Aborted (core dumped)
Cédric Le Goater March 8, 2024, 1:55 p.m. UTC | #5
On 3/8/24 14:39, Cédric Le Goater wrote:
> On 3/8/24 14:14, Cédric Le Goater wrote:
>> On 3/8/24 13:56, Peter Xu wrote:
>>> On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
>>>> This prepares ground for the changes coming next which add an Error**
>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>> now handle the error and fail earlier setting the migration state from
>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>
>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>> reported by .save_setup() handlers.
>>>>
>>>> Since the previous behavior was to ignore errors at this step of
>>>> migration, this change should be examined closely to check that
>>>> cleanups are still correctly done.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>> ---
>>>>
>>>>   Changes in v4:
>>>>   - Merged cleanup change in qemu_savevm_state()
>>>>   Changes in v3:
>>>>   - Set migration state to MIGRATION_STATUS_FAILED
>>>>   - Fixed error handling to be done under lock in bg_migration_thread()
>>>>   - Made sure an error is always set in case of failure in
>>>>     qemu_savevm_state_setup()
>>>>   migration/savevm.h    |  2 +-
>>>>   migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>   migration/savevm.c    | 26 +++++++++++++++-----------
>>>>   3 files changed, 40 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>> --- a/migration/savevm.h
>>>> +++ b/migration/savevm.h
>>>> @@ -32,7 +32,7 @@
>>>>   bool qemu_savevm_state_blocked(Error **errp);
>>>>   void qemu_savevm_non_migratable_list(strList **reasons);
>>>>   int qemu_savevm_state_prepare(Error **errp);
>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>   bool qemu_savevm_state_guest_unplug_pending(void);
>>>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>   void qemu_savevm_state_header(QEMUFile *f);
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>       MigThrError thr_error;
>>>>       bool urgent = false;
>>>> +    Error *local_err = NULL;
>>>> +    int ret;
>>>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>       }
>>>>       bql_lock();
>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>       bql_unlock();
>>>> +    if (ret) {
>>>> +        migrate_set_error(s, local_err);
>>>> +        error_free(local_err);
>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>> +                          MIGRATION_STATUS_FAILED);
>>>> +        goto out;
>>>> +     }
>>>
>>> There's a small indent issue, I can fix it.
>>
>> checkpatch did report anything.
>>
>>>
>>> The bigger problem is I _think_ this will trigger a ci failure in the
>>> virtio-net-failover test:
>>>
>>> ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
>>> 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>>>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
>>> ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
>>> stderr:
>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>> **
>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>> (test program exited with status code -6)
>>> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
>>>
>>> I am not familiar enough with the failover code, and may not have time
>>> today to follow this up, copy Laurent.  Cedric, if you have time, please
>>> have a look. 
>>
>>
>> Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
>> aarch64. Let me check again.
> 
> I see one timeout error on s390x but not always. See below. It occurs with
> or without this patchset. the other x86_64, ppc64 arches run fine (a part
> from one io  test failing from time to time)

Ah ! I got this once on aarch64 :

  161/486 ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL ERROR
161/486 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover                  ERROR            5.98s   killed by signal 6 SIGABRT
>>> G_TEST_DBUS_DAEMON=/home/legoater/work/qemu/qemu.git/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=119 QTEST_QEMU_BINARY=./qemu-system-x86_64 QTEST_QEMU_IMG=./qemu-img PYTHON=/home/legoater/work/qemu/qemu.git/build/pyvenv/bin/python3 QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/legoater/work/qemu/qemu.git/build/tests/qtest/virtio-net-failover --tap -k
―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
stderr:
qemu-system-x86_64: ram_save_setup failed: Input/output error
**
ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL

(test program exited with status code -6)
―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――

I couldn't reproduce yet :/

Thanks,

C.
Fabiano Rosas March 8, 2024, 2:11 p.m. UTC | #6
Peter Xu <peterx@redhat.com> writes:

> On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
>> This prepares ground for the changes coming next which add an Error**
>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>> now handle the error and fail earlier setting the migration state from
>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>> 
>> In qemu_savevm_state(), move the cleanup to preserve the error
>> reported by .save_setup() handlers.
>> 
>> Since the previous behavior was to ignore errors at this step of
>> migration, this change should be examined closely to check that
>> cleanups are still correctly done.
>> 
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>> 
>>  Changes in v4:
>>  
>>  - Merged cleanup change in qemu_savevm_state()
>>    
>>  Changes in v3:
>>  
>>  - Set migration state to MIGRATION_STATUS_FAILED 
>>  - Fixed error handling to be done under lock in bg_migration_thread()
>>  - Made sure an error is always set in case of failure in
>>    qemu_savevm_state_setup()
>>    
>>  migration/savevm.h    |  2 +-
>>  migration/migration.c | 27 ++++++++++++++++++++++++---
>>  migration/savevm.c    | 26 +++++++++++++++-----------
>>  3 files changed, 40 insertions(+), 15 deletions(-)
>> 
>> diff --git a/migration/savevm.h b/migration/savevm.h
>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>> --- a/migration/savevm.h
>> +++ b/migration/savevm.h
>> @@ -32,7 +32,7 @@
>>  bool qemu_savevm_state_blocked(Error **errp);
>>  void qemu_savevm_non_migratable_list(strList **reasons);
>>  int qemu_savevm_state_prepare(Error **errp);
>> -void qemu_savevm_state_setup(QEMUFile *f);
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>  bool qemu_savevm_state_guest_unplug_pending(void);
>>  int qemu_savevm_state_resume_prepare(MigrationState *s);
>>  void qemu_savevm_state_header(QEMUFile *f);
>> diff --git a/migration/migration.c b/migration/migration.c
>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>      MigThrError thr_error;
>>      bool urgent = false;
>> +    Error *local_err = NULL;
>> +    int ret;
>>  
>>      thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>  
>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>      }
>>  
>>      bql_lock();
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>      bql_unlock();
>>  
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>> +                          MIGRATION_STATUS_FAILED);
>> +        goto out;
>> +     }
>
> There's a small indent issue, I can fix it.
>
> The bigger problem is I _think_ this will trigger a ci failure in the
> virtio-net-failover test:
>
> ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR         
> 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
> ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
> stderr:
> qemu-system-x86_64: ram_save_setup failed: Input/output error
> **
> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")

I would say testing for the CANCELLING state is unreliable, there's even
code a few lines below in the test that does the proper thing of testing
for CANCELLED in a loop.

However, the comment: 

 /* while the card is not ejected, we must be in "cancelling" state */

seems to imply that after migrate_fd_cancel (state==CANCELLING), the
migrate_fd_cleanup (state==CANCELLED) would only be executed after
"unplugging the card". So there must be "some logic" that has the effect
of preventing cleanup.

> (test program exited with status code -6)
> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
>
> I am not familiar enough with the failover code, and may not have time
> today to follow this up, copy Laurent.  Cedric, if you have time, please
> have a look.  I'll give it a shot on Monday to find a solution, otherwise
> we may need to postpone some of the patches to 9.1.
>
> Thanks,
>
>> +
>>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                 MIGRATION_STATUS_ACTIVE);
>>  
>> @@ -3530,6 +3540,9 @@ static void *bg_migration_thread(void *opaque)
>>      MigThrError thr_error;
>>      QEMUFile *fb;
>>      bool early_fail = true;
>> +    bool setup_fail = true;
>> +    Error *local_err = NULL;
>> +    int ret;
>>  
>>      rcu_register_thread();
>>      object_ref(OBJECT(s));
>> @@ -3563,9 +3576,16 @@ static void *bg_migration_thread(void *opaque)
>>  
>>      bql_lock();
>>      qemu_savevm_state_header(s->to_dst_file);
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        goto fail;
>> +    }
>>      bql_unlock();
>>  
>> +    setup_fail = false;
>> +
>>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                 MIGRATION_STATUS_ACTIVE);
>>  
>> @@ -3632,7 +3652,8 @@ static void *bg_migration_thread(void *opaque)
>>  
>>  fail:
>>      if (early_fail) {
>> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>> +        migrate_set_state(&s->state,
>> +                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
>>                  MIGRATION_STATUS_FAILED);
>>          bql_unlock();
>>      }
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>>      return 0;
>>  }
>>  
>> -void qemu_savevm_state_setup(QEMUFile *f)
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>>  {
>> +    ERRP_GUARD();
>>      MigrationState *ms = migrate_get_current();
>>      SaveStateEntry *se;
>> -    Error *local_err = NULL;
>>      int ret = 0;
>>  
>>      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
>> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>      trace_savevm_state_setup();
>>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>>          if (se->vmsd && se->vmsd->early_setup) {
>> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
>> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>>              if (ret) {
>> -                migrate_set_error(ms, local_err);
>> -                error_report_err(local_err);
>> +                migrate_set_error(ms, *errp);
>>                  qemu_file_set_error(f, ret);
>>                  break;
>>              }
>> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>          ret = se->ops->save_setup(f, se->opaque);
>>          save_section_footer(f, se);
>>          if (ret < 0) {
>> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
>> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>>              qemu_file_set_error(f, ret);
>>              break;
>>          }
>>      }
>>  
>>      if (ret) {
>> -        return;
>> +        return ret;
>>      }
>>  
>> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
>> -        error_report_err(local_err);
>> -    }
>> +    /* TODO: Should we check that errp is set in case of failure ? */
>> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>>  }
>>  
>>  int qemu_savevm_state_resume_prepare(MigrationState *s)
>> @@ -1728,7 +1728,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>      ms->to_dst_file = f;
>>  
>>      qemu_savevm_state_header(f);
>> -    qemu_savevm_state_setup(f);
>> +    ret = qemu_savevm_state_setup(f, errp);
>> +    if (ret) {
>> +        goto cleanup;
>> +    }
>>  
>>      while (qemu_file_get_error(f) == 0) {
>>          if (qemu_savevm_state_iterate(f, false) > 0) {
>> @@ -1741,10 +1744,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>          qemu_savevm_state_complete_precopy(f, false, false);
>>          ret = qemu_file_get_error(f);
>>      }
>> -    qemu_savevm_state_cleanup();
>>      if (ret != 0) {
>>          error_setg_errno(errp, -ret, "Error while writing VM state");
>>      }
>> +cleanup:
>> +    qemu_savevm_state_cleanup();
>>  
>>      if (ret != 0) {
>>          status = MIGRATION_STATUS_FAILED;
>> -- 
>> 2.44.0
>>
Peter Xu March 8, 2024, 2:17 p.m. UTC | #7
On Fri, Mar 08, 2024 at 02:55:30PM +0100, Cédric Le Goater wrote:
> On 3/8/24 14:39, Cédric Le Goater wrote:
> > On 3/8/24 14:14, Cédric Le Goater wrote:
> > > On 3/8/24 13:56, Peter Xu wrote:
> > > > On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
> > > > > This prepares ground for the changes coming next which add an Error**
> > > > > argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> > > > > now handle the error and fail earlier setting the migration state from
> > > > > MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
> > > > > 
> > > > > In qemu_savevm_state(), move the cleanup to preserve the error
> > > > > reported by .save_setup() handlers.
> > > > > 
> > > > > Since the previous behavior was to ignore errors at this step of
> > > > > migration, this change should be examined closely to check that
> > > > > cleanups are still correctly done.
> > > > > 
> > > > > Signed-off-by: Cédric Le Goater <clg@redhat.com>
> > > > > ---
> > > > > 
> > > > >   Changes in v4:
> > > > >   - Merged cleanup change in qemu_savevm_state()
> > > > >   Changes in v3:
> > > > >   - Set migration state to MIGRATION_STATUS_FAILED
> > > > >   - Fixed error handling to be done under lock in bg_migration_thread()
> > > > >   - Made sure an error is always set in case of failure in
> > > > >     qemu_savevm_state_setup()
> > > > >   migration/savevm.h    |  2 +-
> > > > >   migration/migration.c | 27 ++++++++++++++++++++++++---
> > > > >   migration/savevm.c    | 26 +++++++++++++++-----------
> > > > >   3 files changed, 40 insertions(+), 15 deletions(-)
> > > > > 
> > > > > diff --git a/migration/savevm.h b/migration/savevm.h
> > > > > index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> > > > > --- a/migration/savevm.h
> > > > > +++ b/migration/savevm.h
> > > > > @@ -32,7 +32,7 @@
> > > > >   bool qemu_savevm_state_blocked(Error **errp);
> > > > >   void qemu_savevm_non_migratable_list(strList **reasons);
> > > > >   int qemu_savevm_state_prepare(Error **errp);
> > > > > -void qemu_savevm_state_setup(QEMUFile *f);
> > > > > +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
> > > > >   bool qemu_savevm_state_guest_unplug_pending(void);
> > > > >   int qemu_savevm_state_resume_prepare(MigrationState *s);
> > > > >   void qemu_savevm_state_header(QEMUFile *f);
> > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> > > > > --- a/migration/migration.c
> > > > > +++ b/migration/migration.c
> > > > > @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
> > > > >       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> > > > >       MigThrError thr_error;
> > > > >       bool urgent = false;
> > > > > +    Error *local_err = NULL;
> > > > > +    int ret;
> > > > >       thread = migration_threads_add("live_migration", qemu_get_thread_id());
> > > > > @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
> > > > >       }
> > > > >       bql_lock();
> > > > > -    qemu_savevm_state_setup(s->to_dst_file);
> > > > > +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> > > > >       bql_unlock();
> > > > > +    if (ret) {
> > > > > +        migrate_set_error(s, local_err);
> > > > > +        error_free(local_err);
> > > > > +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> > > > > +                          MIGRATION_STATUS_FAILED);
> > > > > +        goto out;
> > > > > +     }
> > > > 
> > > > There's a small indent issue, I can fix it.
> > > 
> > > checkpatch did report anything.
> > > 
> > > > 
> > > > The bigger problem is I _think_ this will trigger a ci failure in the
> > > > virtio-net-failover test:
> > > > 
> > > > ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
> > > > 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
> > > > > > > PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
> > > > ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
> > > > stderr:
> > > > qemu-system-x86_64: ram_save_setup failed: Input/output error
> > > > **
> > > > ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> > > > (test program exited with status code -6)
> > > > ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> > > > 
> > > > I am not familiar enough with the failover code, and may not have time
> > > > today to follow this up, copy Laurent.  Cedric, if you have time, please
> > > > have a look.
> > > 
> > > 
> > > Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
> > > aarch64. Let me check again.
> > 
> > I see one timeout error on s390x but not always. See below. It occurs with
> > or without this patchset. the other x86_64, ppc64 arches run fine (a part
> > from one io  test failing from time to time)
> 
> Ah ! I got this once on aarch64 :
> 
>  161/486 ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL ERROR
> 161/486 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover                  ERROR            5.98s   killed by signal 6 SIGABRT
> > > > G_TEST_DBUS_DAEMON=/home/legoater/work/qemu/qemu.git/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=119 QTEST_QEMU_BINARY=./qemu-system-x86_64 QTEST_QEMU_IMG=./qemu-img PYTHON=/home/legoater/work/qemu/qemu.git/build/pyvenv/bin/python3 QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/legoater/work/qemu/qemu.git/build/tests/qtest/virtio-net-failover --tap -k
> ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> stderr:
> qemu-system-x86_64: ram_save_setup failed: Input/output error
> **
> ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL
> 
> (test program exited with status code -6)
> ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――

Hmm, this one seems different..

> 
> I couldn't reproduce yet :/

I never reproduced it locally on x86, and my failure is always at checking
"cancelling" v.s. "cancelled" rather than the NULL check.  It's much easier
to trigger on CI in check-system-centos (I don't know why centos..):

https://gitlab.com/peterx/qemu/-/jobs/6351020546

I think at least for the error I hit, the problem is the failover test will
cancel the migration, but if it cancels too fast and during setup now it
can already fail it (while it won't fail before when we ignore
qemu_savevm_state_setup() errors), and I think it'll skip:

    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                               MIGRATION_STATUS_ACTIVE);

It seems the test wants the "cancelling" to hold until later:

    /* while the card is not ejected, we must be in "cancelling" state */
    ret = migrate_status(qts);

    status = qdict_get_str(ret, "status");
    g_assert_cmpstr(status, ==, "cancelling");
    qobject_unref(ret);

    /* OS unplugs the cards, QEMU can move from wait-unplug state */
    qtest_outl(qts, ACPI_PCIHP_ADDR_ICH9 + PCI_EJ_BASE, 1);

Again, since I'll need to read the failover code, not much I can tell.
Laurent might have a clue.

/me disappears..
Fabiano Rosas March 8, 2024, 2:36 p.m. UTC | #8
Cédric Le Goater <clg@redhat.com> writes:

> This prepares ground for the changes coming next which add an Error**
> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> now handle the error and fail earlier setting the migration state from
> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>
> In qemu_savevm_state(), move the cleanup to preserve the error
> reported by .save_setup() handlers.
>
> Since the previous behavior was to ignore errors at this step of
> migration, this change should be examined closely to check that
> cleanups are still correctly done.
>
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>
>  Changes in v4:
>  
>  - Merged cleanup change in qemu_savevm_state()
>    
>  Changes in v3:
>  
>  - Set migration state to MIGRATION_STATUS_FAILED 
>  - Fixed error handling to be done under lock in bg_migration_thread()
>  - Made sure an error is always set in case of failure in
>    qemu_savevm_state_setup()
>    
>  migration/savevm.h    |  2 +-
>  migration/migration.c | 27 ++++++++++++++++++++++++---
>  migration/savevm.c    | 26 +++++++++++++++-----------
>  3 files changed, 40 insertions(+), 15 deletions(-)
>
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -32,7 +32,7 @@
>  bool qemu_savevm_state_blocked(Error **errp);
>  void qemu_savevm_non_migratable_list(strList **reasons);
>  int qemu_savevm_state_prepare(Error **errp);
> -void qemu_savevm_state_setup(QEMUFile *f);
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>  bool qemu_savevm_state_guest_unplug_pending(void);
>  int qemu_savevm_state_resume_prepare(MigrationState *s);
>  void qemu_savevm_state_header(QEMUFile *f);
> diff --git a/migration/migration.c b/migration/migration.c
> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      MigThrError thr_error;
>      bool urgent = false;
> +    Error *local_err = NULL;
> +    int ret;
>  
>      thread = migration_threads_add("live_migration", qemu_get_thread_id());
>  
> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>      }
>  
>      bql_lock();
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>      bql_unlock();
>  
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> +                          MIGRATION_STATUS_FAILED);
> +        goto out;
> +     }
> +
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);

This^ should be before the new block it seems:

GOOD:
migrate_set_state new state setup
migrate_set_state new state wait-unplug
migrate_fd_cancel 
migrate_set_state new state cancelling
migrate_fd_cleanup 
migrate_set_state new state cancelled
migrate_fd_cancel 
ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug

BAD:
migrate_set_state new state setup
migrate_fd_cancel 
migrate_set_state new state cancelling
migrate_fd_cleanup 
migrate_set_state new state cancelled
qemu-system-x86_64: ram_save_setup failed: Input/output error
**
ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
assertion failed (status == "cancelling"): ("cancelled" == "cancelling")

Otherwise migration_iteration_finish() will schedule the cleanup BH and
that will run concurrently with migrate_fd_cancel() issued by the test
and bad things happens.

=====
PS: I guess the next level in our Freestyle Concurrency video-game is to
make migrate_fd_cancel() stop setting state and poking files and only
set a flag that's tested in the other parts of the code.

>  
> @@ -3530,6 +3540,9 @@ static void *bg_migration_thread(void *opaque)
>      MigThrError thr_error;
>      QEMUFile *fb;
>      bool early_fail = true;
> +    bool setup_fail = true;
> +    Error *local_err = NULL;
> +    int ret;
>  
>      rcu_register_thread();
>      object_ref(OBJECT(s));
> @@ -3563,9 +3576,16 @@ static void *bg_migration_thread(void *opaque)
>  
>      bql_lock();
>      qemu_savevm_state_header(s->to_dst_file);
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        goto fail;
> +    }
>      bql_unlock();
>  
> +    setup_fail = false;
> +
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
>  
> @@ -3632,7 +3652,8 @@ static void *bg_migration_thread(void *opaque)
>  
>  fail:
>      if (early_fail) {
> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +        migrate_set_state(&s->state,
> +                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
>                  MIGRATION_STATUS_FAILED);
>          bql_unlock();
>      }
> diff --git a/migration/savevm.c b/migration/savevm.c
> index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>      return 0;
>  }
>  
> -void qemu_savevm_state_setup(QEMUFile *f)
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>  {
> +    ERRP_GUARD();
>      MigrationState *ms = migrate_get_current();
>      SaveStateEntry *se;
> -    Error *local_err = NULL;
>      int ret = 0;
>  
>      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>      trace_savevm_state_setup();
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (se->vmsd && se->vmsd->early_setup) {
> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>              if (ret) {
> -                migrate_set_error(ms, local_err);
> -                error_report_err(local_err);
> +                migrate_set_error(ms, *errp);
>                  qemu_file_set_error(f, ret);
>                  break;
>              }
> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>          ret = se->ops->save_setup(f, se->opaque);
>          save_section_footer(f, se);
>          if (ret < 0) {
> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>              qemu_file_set_error(f, ret);
>              break;
>          }
>      }
>  
>      if (ret) {
> -        return;
> +        return ret;
>      }
>  
> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
> -        error_report_err(local_err);
> -    }
> +    /* TODO: Should we check that errp is set in case of failure ? */
> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>  }
>  
>  int qemu_savevm_state_resume_prepare(MigrationState *s)
> @@ -1728,7 +1728,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>      ms->to_dst_file = f;
>  
>      qemu_savevm_state_header(f);
> -    qemu_savevm_state_setup(f);
> +    ret = qemu_savevm_state_setup(f, errp);
> +    if (ret) {
> +        goto cleanup;
> +    }
>  
>      while (qemu_file_get_error(f) == 0) {
>          if (qemu_savevm_state_iterate(f, false) > 0) {
> @@ -1741,10 +1744,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>          qemu_savevm_state_complete_precopy(f, false, false);
>          ret = qemu_file_get_error(f);
>      }
> -    qemu_savevm_state_cleanup();
>      if (ret != 0) {
>          error_setg_errno(errp, -ret, "Error while writing VM state");
>      }
> +cleanup:
> +    qemu_savevm_state_cleanup();
>  
>      if (ret != 0) {
>          status = MIGRATION_STATUS_FAILED;
Cédric Le Goater March 11, 2024, 6:12 p.m. UTC | #9
On 3/8/24 15:17, Peter Xu wrote:
> On Fri, Mar 08, 2024 at 02:55:30PM +0100, Cédric Le Goater wrote:
>> On 3/8/24 14:39, Cédric Le Goater wrote:
>>> On 3/8/24 14:14, Cédric Le Goater wrote:
>>>> On 3/8/24 13:56, Peter Xu wrote:
>>>>> On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
>>>>>> This prepares ground for the changes coming next which add an Error**
>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>>> now handle the error and fail earlier setting the migration state from
>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>>
>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>>> reported by .save_setup() handlers.
>>>>>>
>>>>>> Since the previous behavior was to ignore errors at this step of
>>>>>> migration, this change should be examined closely to check that
>>>>>> cleanups are still correctly done.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>> ---
>>>>>>
>>>>>>    Changes in v4:
>>>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>>>    Changes in v3:
>>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>>    - Made sure an error is always set in case of failure in
>>>>>>      qemu_savevm_state_setup()
>>>>>>    migration/savevm.h    |  2 +-
>>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>>
>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>>> --- a/migration/savevm.h
>>>>>> +++ b/migration/savevm.h
>>>>>> @@ -32,7 +32,7 @@
>>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>>> --- a/migration/migration.c
>>>>>> +++ b/migration/migration.c
>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>>        MigThrError thr_error;
>>>>>>        bool urgent = false;
>>>>>> +    Error *local_err = NULL;
>>>>>> +    int ret;
>>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>>        }
>>>>>>        bql_lock();
>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>>        bql_unlock();
>>>>>> +    if (ret) {
>>>>>> +        migrate_set_error(s, local_err);
>>>>>> +        error_free(local_err);
>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>>> +        goto out;
>>>>>> +     }
>>>>>
>>>>> There's a small indent issue, I can fix it.
>>>>
>>>> checkpatch did report anything.
>>>>
>>>>>
>>>>> The bigger problem is I _think_ this will trigger a ci failure in the
>>>>> virtio-net-failover test:
>>>>>
>>>>> ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
>>>>> 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
>>>>>>>> PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
>>>>> ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
>>>>> stderr:
>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>>> **
>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>> (test program exited with status code -6)
>>>>> ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
>>>>>
>>>>> I am not familiar enough with the failover code, and may not have time
>>>>> today to follow this up, copy Laurent.  Cedric, if you have time, please
>>>>> have a look.
>>>>
>>>>
>>>> Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
>>>> aarch64. Let me check again.
>>>
>>> I see one timeout error on s390x but not always. See below. It occurs with
>>> or without this patchset. the other x86_64, ppc64 arches run fine (a part
>>> from one io  test failing from time to time)
>>
>> Ah ! I got this once on aarch64 :
>>
>>   161/486 ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL ERROR
>> 161/486 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover                  ERROR            5.98s   killed by signal 6 SIGABRT
>>>>> G_TEST_DBUS_DAEMON=/home/legoater/work/qemu/qemu.git/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=119 QTEST_QEMU_BINARY=./qemu-system-x86_64 QTEST_QEMU_IMG=./qemu-img PYTHON=/home/legoater/work/qemu/qemu.git/build/pyvenv/bin/python3 QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/legoater/work/qemu/qemu.git/build/tests/qtest/virtio-net-failover --tap -k
>> ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
>> stderr:
>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>> **
>> ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL
>>
>> (test program exited with status code -6)
>> ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> 
> Hmm, this one seems different..
> 
>>
>> I couldn't reproduce yet :/
> 
> I never reproduced it locally on x86, and my failure is always at checking
> "cancelling" v.s. "cancelled" rather than the NULL check.  It's much easier
> to trigger on CI in check-system-centos (I don't know why centos..):
> 
> https://gitlab.com/peterx/qemu/-/jobs/6351020546
> 
> I think at least for the error I hit, the problem is the failover test will
> cancel the migration, but if it cancels too fast and during setup now it
> can already fail it (while it won't fail before when we ignore
> qemu_savevm_state_setup() errors), and I think it'll skip:
> 
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
> 
> It seems the test wants the "cancelling" to hold until later:
> 
>      /* while the card is not ejected, we must be in "cancelling" state */
>      ret = migrate_status(qts);
> 
>      status = qdict_get_str(ret, "status");
>      g_assert_cmpstr(status, ==, "cancelling");
>      qobject_unref(ret);
> 
>      /* OS unplugs the cards, QEMU can move from wait-unplug state */
>      qtest_outl(qts, ACPI_PCIHP_ADDR_ICH9 + PCI_EJ_BASE, 1);
> 
> Again, since I'll need to read the failover code, not much I can tell.
> Laurent might have a clue.

I guess we need to fix the test to handle failures and this looks
like a complex task.


C.
Cédric Le Goater March 11, 2024, 6:15 p.m. UTC | #10
On 3/8/24 15:36, Fabiano Rosas wrote:
> Cédric Le Goater <clg@redhat.com> writes:
> 
>> This prepares ground for the changes coming next which add an Error**
>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>> now handle the error and fail earlier setting the migration state from
>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>
>> In qemu_savevm_state(), move the cleanup to preserve the error
>> reported by .save_setup() handlers.
>>
>> Since the previous behavior was to ignore errors at this step of
>> migration, this change should be examined closely to check that
>> cleanups are still correctly done.
>>
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>>
>>   Changes in v4:
>>   
>>   - Merged cleanup change in qemu_savevm_state()
>>     
>>   Changes in v3:
>>   
>>   - Set migration state to MIGRATION_STATUS_FAILED
>>   - Fixed error handling to be done under lock in bg_migration_thread()
>>   - Made sure an error is always set in case of failure in
>>     qemu_savevm_state_setup()
>>     
>>   migration/savevm.h    |  2 +-
>>   migration/migration.c | 27 ++++++++++++++++++++++++---
>>   migration/savevm.c    | 26 +++++++++++++++-----------
>>   3 files changed, 40 insertions(+), 15 deletions(-)
>>
>> diff --git a/migration/savevm.h b/migration/savevm.h
>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>> --- a/migration/savevm.h
>> +++ b/migration/savevm.h
>> @@ -32,7 +32,7 @@
>>   bool qemu_savevm_state_blocked(Error **errp);
>>   void qemu_savevm_non_migratable_list(strList **reasons);
>>   int qemu_savevm_state_prepare(Error **errp);
>> -void qemu_savevm_state_setup(QEMUFile *f);
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>   bool qemu_savevm_state_guest_unplug_pending(void);
>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
>>   void qemu_savevm_state_header(QEMUFile *f);
>> diff --git a/migration/migration.c b/migration/migration.c
>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>       MigThrError thr_error;
>>       bool urgent = false;
>> +    Error *local_err = NULL;
>> +    int ret;
>>   
>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>   
>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>       }
>>   
>>       bql_lock();
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>       bql_unlock();
>>   
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>> +                          MIGRATION_STATUS_FAILED);
>> +        goto out;
>> +     }
>> +
>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                  MIGRATION_STATUS_ACTIVE);
> 
> This^ should be before the new block it seems:
> 
> GOOD:
> migrate_set_state new state setup
> migrate_set_state new state wait-unplug
> migrate_fd_cancel
> migrate_set_state new state cancelling
> migrate_fd_cleanup
> migrate_set_state new state cancelled
> migrate_fd_cancel
> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
> 
> BAD:
> migrate_set_state new state setup
> migrate_fd_cancel
> migrate_set_state new state cancelling
> migrate_fd_cleanup
> migrate_set_state new state cancelled
> qemu-system-x86_64: ram_save_setup failed: Input/output error
> **
> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> 
> Otherwise migration_iteration_finish() will schedule the cleanup BH and
> that will run concurrently with migrate_fd_cancel() issued by the test
> and bad things happens.

This hack makes things work :

@@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
          qemu_savevm_send_colo_enable(s->to_dst_file);
      }
  
+    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
+                            MIGRATION_STATUS_SETUP);
+
      bql_lock();
      ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
      bql_unlock();

We should fix the test instead :) Unless waiting for failover devices
to unplug before the save_setup handlers and not after is ok.

commit c7e0acd5a3f8 ("migration: add new migration state wait-unplug")
is not clear about the justification.:

     This patch adds a new migration state called wait-unplug.  It is entered
     after the SETUP state if failover devices are present. It will transition
     into ACTIVE once all devices were succesfully unplugged from the guest.


> =====
> PS: I guess the next level in our Freestyle Concurrency video-game is to
> make migrate_fd_cancel() stop setting state and poking files and only
> set a flag that's tested in the other parts of the code.

Is that a new item on the TODO list?

Thanks,

C.


> 
>>   
>> @@ -3530,6 +3540,9 @@ static void *bg_migration_thread(void *opaque)
>>       MigThrError thr_error;
>>       QEMUFile *fb;
>>       bool early_fail = true;
>> +    bool setup_fail = true;
>> +    Error *local_err = NULL;
>> +    int ret;
>>   
>>       rcu_register_thread();
>>       object_ref(OBJECT(s));
>> @@ -3563,9 +3576,16 @@ static void *bg_migration_thread(void *opaque)
>>   
>>       bql_lock();
>>       qemu_savevm_state_header(s->to_dst_file);
>> -    qemu_savevm_state_setup(s->to_dst_file);
>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>> +    if (ret) {
>> +        migrate_set_error(s, local_err);
>> +        error_free(local_err);
>> +        goto fail;
>> +    }
>>       bql_unlock();
>>   
>> +    setup_fail = false;
>> +
>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>                                  MIGRATION_STATUS_ACTIVE);
>>   
>> @@ -3632,7 +3652,8 @@ static void *bg_migration_thread(void *opaque)
>>   
>>   fail:
>>       if (early_fail) {
>> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>> +        migrate_set_state(&s->state,
>> +                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
>>                   MIGRATION_STATUS_FAILED);
>>           bql_unlock();
>>       }
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>>       return 0;
>>   }
>>   
>> -void qemu_savevm_state_setup(QEMUFile *f)
>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>>   {
>> +    ERRP_GUARD();
>>       MigrationState *ms = migrate_get_current();
>>       SaveStateEntry *se;
>> -    Error *local_err = NULL;
>>       int ret = 0;
>>   
>>       json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
>> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>       trace_savevm_state_setup();
>>       QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>>           if (se->vmsd && se->vmsd->early_setup) {
>> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
>> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>>               if (ret) {
>> -                migrate_set_error(ms, local_err);
>> -                error_report_err(local_err);
>> +                migrate_set_error(ms, *errp);
>>                   qemu_file_set_error(f, ret);
>>                   break;
>>               }
>> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>>           ret = se->ops->save_setup(f, se->opaque);
>>           save_section_footer(f, se);
>>           if (ret < 0) {
>> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
>> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>>               qemu_file_set_error(f, ret);
>>               break;
>>           }
>>       }
>>   
>>       if (ret) {
>> -        return;
>> +        return ret;
>>       }
>>   
>> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
>> -        error_report_err(local_err);
>> -    }
>> +    /* TODO: Should we check that errp is set in case of failure ? */
>> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>>   }
>>   
>>   int qemu_savevm_state_resume_prepare(MigrationState *s)
>> @@ -1728,7 +1728,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>       ms->to_dst_file = f;
>>   
>>       qemu_savevm_state_header(f);
>> -    qemu_savevm_state_setup(f);
>> +    ret = qemu_savevm_state_setup(f, errp);
>> +    if (ret) {
>> +        goto cleanup;
>> +    }
>>   
>>       while (qemu_file_get_error(f) == 0) {
>>           if (qemu_savevm_state_iterate(f, false) > 0) {
>> @@ -1741,10 +1744,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>>           qemu_savevm_state_complete_precopy(f, false, false);
>>           ret = qemu_file_get_error(f);
>>       }
>> -    qemu_savevm_state_cleanup();
>>       if (ret != 0) {
>>           error_setg_errno(errp, -ret, "Error while writing VM state");
>>       }
>> +cleanup:
>> +    qemu_savevm_state_cleanup();
>>   
>>       if (ret != 0) {
>>           status = MIGRATION_STATUS_FAILED;
>
Fabiano Rosas March 11, 2024, 7:03 p.m. UTC | #11
Cédric Le Goater <clg@redhat.com> writes:

> On 3/8/24 15:36, Fabiano Rosas wrote:
>> Cédric Le Goater <clg@redhat.com> writes:
>> 
>>> This prepares ground for the changes coming next which add an Error**
>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>> now handle the error and fail earlier setting the migration state from
>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>
>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>> reported by .save_setup() handlers.
>>>
>>> Since the previous behavior was to ignore errors at this step of
>>> migration, this change should be examined closely to check that
>>> cleanups are still correctly done.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>> ---
>>>
>>>   Changes in v4:
>>>   
>>>   - Merged cleanup change in qemu_savevm_state()
>>>     
>>>   Changes in v3:
>>>   
>>>   - Set migration state to MIGRATION_STATUS_FAILED
>>>   - Fixed error handling to be done under lock in bg_migration_thread()
>>>   - Made sure an error is always set in case of failure in
>>>     qemu_savevm_state_setup()
>>>     
>>>   migration/savevm.h    |  2 +-
>>>   migration/migration.c | 27 ++++++++++++++++++++++++---
>>>   migration/savevm.c    | 26 +++++++++++++++-----------
>>>   3 files changed, 40 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>> --- a/migration/savevm.h
>>> +++ b/migration/savevm.h
>>> @@ -32,7 +32,7 @@
>>>   bool qemu_savevm_state_blocked(Error **errp);
>>>   void qemu_savevm_non_migratable_list(strList **reasons);
>>>   int qemu_savevm_state_prepare(Error **errp);
>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>   bool qemu_savevm_state_guest_unplug_pending(void);
>>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>   void qemu_savevm_state_header(QEMUFile *f);
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>       MigThrError thr_error;
>>>       bool urgent = false;
>>> +    Error *local_err = NULL;
>>> +    int ret;
>>>   
>>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>   
>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>       }
>>>   
>>>       bql_lock();
>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>       bql_unlock();
>>>   
>>> +    if (ret) {
>>> +        migrate_set_error(s, local_err);
>>> +        error_free(local_err);
>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>> +                          MIGRATION_STATUS_FAILED);
>>> +        goto out;
>>> +     }
>>> +
>>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>                                  MIGRATION_STATUS_ACTIVE);
>> 
>> This^ should be before the new block it seems:
>> 
>> GOOD:
>> migrate_set_state new state setup
>> migrate_set_state new state wait-unplug
>> migrate_fd_cancel
>> migrate_set_state new state cancelling
>> migrate_fd_cleanup
>> migrate_set_state new state cancelled
>> migrate_fd_cancel
>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>> 
>> BAD:
>> migrate_set_state new state setup
>> migrate_fd_cancel
>> migrate_set_state new state cancelling
>> migrate_fd_cleanup
>> migrate_set_state new state cancelled
>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>> **
>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>> 
>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>> that will run concurrently with migrate_fd_cancel() issued by the test
>> and bad things happens.
>
> This hack makes things work :
>
> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>           qemu_savevm_send_colo_enable(s->to_dst_file);
>       }
>   
> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> +                            MIGRATION_STATUS_SETUP);
> +

Why move it all the way up here? Has moving the wait_unplug before the
'if (ret)' block not worked for you?

>       bql_lock();
>       ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>       bql_unlock();
>
> We should fix the test instead :) Unless waiting for failover devices
> to unplug before the save_setup handlers and not after is ok.
>
> commit c7e0acd5a3f8 ("migration: add new migration state wait-unplug")
> is not clear about the justification.:
>
>      This patch adds a new migration state called wait-unplug.  It is entered
>      after the SETUP state if failover devices are present. It will transition
>      into ACTIVE once all devices were succesfully unplugged from the guest.

This is not clear indeed, but to me it seems having the wait-unplug
after setup was important.

>
>
>> =====
>> PS: I guess the next level in our Freestyle Concurrency video-game is to
>> make migrate_fd_cancel() stop setting state and poking files and only
>> set a flag that's tested in the other parts of the code.
>
> Is that a new item on the TODO list?

Yep, I'll add it to the wiki.
Peter Xu March 11, 2024, 8:10 p.m. UTC | #12
On Mon, Mar 11, 2024 at 04:03:14PM -0300, Fabiano Rosas wrote:
> Cédric Le Goater <clg@redhat.com> writes:
> 
> > On 3/8/24 15:36, Fabiano Rosas wrote:
> >> Cédric Le Goater <clg@redhat.com> writes:
> >> 
> >>> This prepares ground for the changes coming next which add an Error**
> >>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> >>> now handle the error and fail earlier setting the migration state from
> >>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
> >>>
> >>> In qemu_savevm_state(), move the cleanup to preserve the error
> >>> reported by .save_setup() handlers.
> >>>
> >>> Since the previous behavior was to ignore errors at this step of
> >>> migration, this change should be examined closely to check that
> >>> cleanups are still correctly done.
> >>>
> >>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> >>> ---
> >>>
> >>>   Changes in v4:
> >>>   
> >>>   - Merged cleanup change in qemu_savevm_state()
> >>>     
> >>>   Changes in v3:
> >>>   
> >>>   - Set migration state to MIGRATION_STATUS_FAILED
> >>>   - Fixed error handling to be done under lock in bg_migration_thread()
> >>>   - Made sure an error is always set in case of failure in
> >>>     qemu_savevm_state_setup()
> >>>     
> >>>   migration/savevm.h    |  2 +-
> >>>   migration/migration.c | 27 ++++++++++++++++++++++++---
> >>>   migration/savevm.c    | 26 +++++++++++++++-----------
> >>>   3 files changed, 40 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/migration/savevm.h b/migration/savevm.h
> >>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> >>> --- a/migration/savevm.h
> >>> +++ b/migration/savevm.h
> >>> @@ -32,7 +32,7 @@
> >>>   bool qemu_savevm_state_blocked(Error **errp);
> >>>   void qemu_savevm_non_migratable_list(strList **reasons);
> >>>   int qemu_savevm_state_prepare(Error **errp);
> >>> -void qemu_savevm_state_setup(QEMUFile *f);
> >>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
> >>>   bool qemu_savevm_state_guest_unplug_pending(void);
> >>>   int qemu_savevm_state_resume_prepare(MigrationState *s);
> >>>   void qemu_savevm_state_header(QEMUFile *f);
> >>> diff --git a/migration/migration.c b/migration/migration.c
> >>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> >>> --- a/migration/migration.c
> >>> +++ b/migration/migration.c
> >>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
> >>>       int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>>       MigThrError thr_error;
> >>>       bool urgent = false;
> >>> +    Error *local_err = NULL;
> >>> +    int ret;
> >>>   
> >>>       thread = migration_threads_add("live_migration", qemu_get_thread_id());
> >>>   
> >>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
> >>>       }
> >>>   
> >>>       bql_lock();
> >>> -    qemu_savevm_state_setup(s->to_dst_file);
> >>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> >>>       bql_unlock();
> >>>   
> >>> +    if (ret) {
> >>> +        migrate_set_error(s, local_err);
> >>> +        error_free(local_err);
> >>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> >>> +                          MIGRATION_STATUS_FAILED);
> >>> +        goto out;
> >>> +     }
> >>> +
> >>>       qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> >>>                                  MIGRATION_STATUS_ACTIVE);
> >> 
> >> This^ should be before the new block it seems:
> >> 
> >> GOOD:
> >> migrate_set_state new state setup
> >> migrate_set_state new state wait-unplug
> >> migrate_fd_cancel
> >> migrate_set_state new state cancelling
> >> migrate_fd_cleanup
> >> migrate_set_state new state cancelled
> >> migrate_fd_cancel
> >> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
> >> 
> >> BAD:
> >> migrate_set_state new state setup
> >> migrate_fd_cancel
> >> migrate_set_state new state cancelling
> >> migrate_fd_cleanup
> >> migrate_set_state new state cancelled
> >> qemu-system-x86_64: ram_save_setup failed: Input/output error
> >> **
> >> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
> >> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> >> 
> >> Otherwise migration_iteration_finish() will schedule the cleanup BH and
> >> that will run concurrently with migrate_fd_cancel() issued by the test
> >> and bad things happens.
> >
> > This hack makes things work :
> >
> > @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
> >           qemu_savevm_send_colo_enable(s->to_dst_file);
> >       }
> >   
> > +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> > +                            MIGRATION_STATUS_SETUP);
> > +
> 
> Why move it all the way up here? Has moving the wait_unplug before the
> 'if (ret)' block not worked for you?
> 
> >       bql_lock();
> >       ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> >       bql_unlock();
> >
> > We should fix the test instead :) Unless waiting for failover devices
> > to unplug before the save_setup handlers and not after is ok.
> >
> > commit c7e0acd5a3f8 ("migration: add new migration state wait-unplug")
> > is not clear about the justification.:
> >
> >      This patch adds a new migration state called wait-unplug.  It is entered
> >      after the SETUP state if failover devices are present. It will transition
> >      into ACTIVE once all devices were succesfully unplugged from the guest.
> 
> This is not clear indeed, but to me it seems having the wait-unplug
> after setup was important.

Finally got some time to read this code..

So far I didn't see an issue if it's called before the setup hooks.
Actually it looks to me it should better do that before those hooks.

IIUC what that qemu_savevm_wait_unplug() does is waiting for all the
primary devices to be completely unplugged before moving on the migration.

Here setup() hook, or to be explicit, the primary devices' VMSDs (if ever
existed, and if that was the concern) should have zero impact on such wait,
because the "unplug" should also contain one step to unregister those
vmsds; see the virtio_net_handle_migration_primary() where it has:

        if (failover_unplug_primary(n, dev)) {
            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
            ...
        }

So qemu_savevm_wait_unplug() looks like a pure wait function to me until
all the unplug is processed by the guest OS.  And it makes some sense to me
to avoid calling setup() (which can start to hold resources, like in RAM we
create bitmaps etc to prepare for migration) before such possible long halts.

In all cases, I guess it's still too rush to figure out a plan, meanwhile
anything proposed for either test/code changes would better get some
reviews from either Laurent or other virtio-net guys.  I think I'll go
ahead the pull without the 2nd batch of patches.

> 
> >
> >
> >> =====
> >> PS: I guess the next level in our Freestyle Concurrency video-game is to
> >> make migrate_fd_cancel() stop setting state and poking files and only
> >> set a flag that's tested in the other parts of the code.
> >
> > Is that a new item on the TODO list?
> 
> Yep, I'll add it to the wiki.

Sounds like a good thing, however let's be aware of the evils (that are
always in the details..), where there can be users/tests relying on that
"CANCELLING" state, so it can be part of the ABIs.. :-(
Peter Xu March 11, 2024, 8:15 p.m. UTC | #13
On Mon, Mar 11, 2024 at 07:12:11PM +0100, Cédric Le Goater wrote:
> On 3/8/24 15:17, Peter Xu wrote:
> > On Fri, Mar 08, 2024 at 02:55:30PM +0100, Cédric Le Goater wrote:
> > > On 3/8/24 14:39, Cédric Le Goater wrote:
> > > > On 3/8/24 14:14, Cédric Le Goater wrote:
> > > > > On 3/8/24 13:56, Peter Xu wrote:
> > > > > > On Wed, Mar 06, 2024 at 02:34:25PM +0100, Cédric Le Goater wrote:
> > > > > > > This prepares ground for the changes coming next which add an Error**
> > > > > > > argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> > > > > > > now handle the error and fail earlier setting the migration state from
> > > > > > > MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
> > > > > > > 
> > > > > > > In qemu_savevm_state(), move the cleanup to preserve the error
> > > > > > > reported by .save_setup() handlers.
> > > > > > > 
> > > > > > > Since the previous behavior was to ignore errors at this step of
> > > > > > > migration, this change should be examined closely to check that
> > > > > > > cleanups are still correctly done.
> > > > > > > 
> > > > > > > Signed-off-by: Cédric Le Goater <clg@redhat.com>
> > > > > > > ---
> > > > > > > 
> > > > > > >    Changes in v4:
> > > > > > >    - Merged cleanup change in qemu_savevm_state()
> > > > > > >    Changes in v3:
> > > > > > >    - Set migration state to MIGRATION_STATUS_FAILED
> > > > > > >    - Fixed error handling to be done under lock in bg_migration_thread()
> > > > > > >    - Made sure an error is always set in case of failure in
> > > > > > >      qemu_savevm_state_setup()
> > > > > > >    migration/savevm.h    |  2 +-
> > > > > > >    migration/migration.c | 27 ++++++++++++++++++++++++---
> > > > > > >    migration/savevm.c    | 26 +++++++++++++++-----------
> > > > > > >    3 files changed, 40 insertions(+), 15 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/migration/savevm.h b/migration/savevm.h
> > > > > > > index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> > > > > > > --- a/migration/savevm.h
> > > > > > > +++ b/migration/savevm.h
> > > > > > > @@ -32,7 +32,7 @@
> > > > > > >    bool qemu_savevm_state_blocked(Error **errp);
> > > > > > >    void qemu_savevm_non_migratable_list(strList **reasons);
> > > > > > >    int qemu_savevm_state_prepare(Error **errp);
> > > > > > > -void qemu_savevm_state_setup(QEMUFile *f);
> > > > > > > +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
> > > > > > >    bool qemu_savevm_state_guest_unplug_pending(void);
> > > > > > >    int qemu_savevm_state_resume_prepare(MigrationState *s);
> > > > > > >    void qemu_savevm_state_header(QEMUFile *f);
> > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> > > > > > > --- a/migration/migration.c
> > > > > > > +++ b/migration/migration.c
> > > > > > > @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
> > > > > > >        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> > > > > > >        MigThrError thr_error;
> > > > > > >        bool urgent = false;
> > > > > > > +    Error *local_err = NULL;
> > > > > > > +    int ret;
> > > > > > >        thread = migration_threads_add("live_migration", qemu_get_thread_id());
> > > > > > > @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
> > > > > > >        }
> > > > > > >        bql_lock();
> > > > > > > -    qemu_savevm_state_setup(s->to_dst_file);
> > > > > > > +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> > > > > > >        bql_unlock();
> > > > > > > +    if (ret) {
> > > > > > > +        migrate_set_error(s, local_err);
> > > > > > > +        error_free(local_err);
> > > > > > > +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> > > > > > > +                          MIGRATION_STATUS_FAILED);
> > > > > > > +        goto out;
> > > > > > > +     }
> > > > > > 
> > > > > > There's a small indent issue, I can fix it.
> > > > > 
> > > > > checkpatch did report anything.
> > > > > 
> > > > > > 
> > > > > > The bigger problem is I _think_ this will trigger a ci failure in the
> > > > > > virtio-net-failover test:
> > > > > > 
> > > > > > ▶ 121/464 ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling") ERROR
> > > > > > 121/464 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover    ERROR            4.77s   killed by signal 6 SIGABRT
> > > > > > > > > PYTHON=/builds/peterx/qemu/build/pyvenv/bin/python3.8 G_TEST_DBUS_DAEMON=/builds/peterx/qemu/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=161 QTEST_QEMU_IMG=./qemu-img QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon QTEST_QEMU_BINARY=./qemu-system-x86_64 /builds/peterx/qemu/build/tests/qtest/virtio-net-failover --tap -k
> > > > > > ――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――
> > > > > > stderr:
> > > > > > qemu-system-x86_64: ram_save_setup failed: Input/output error
> > > > > > **
> > > > > > ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug: assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> > > > > > (test program exited with status code -6)
> > > > > > ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> > > > > > 
> > > > > > I am not familiar enough with the failover code, and may not have time
> > > > > > today to follow this up, copy Laurent.  Cedric, if you have time, please
> > > > > > have a look.
> > > > > 
> > > > > 
> > > > > Sure. Weird because I usually run make check on x86_64, s390x, ppc64 and
> > > > > aarch64. Let me check again.
> > > > 
> > > > I see one timeout error on s390x but not always. See below. It occurs with
> > > > or without this patchset. the other x86_64, ppc64 arches run fine (a part
> > > > from one io  test failing from time to time)
> > > 
> > > Ah ! I got this once on aarch64 :
> > > 
> > >   161/486 ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL ERROR
> > > 161/486 qemu:qtest+qtest-x86_64 / qtest-x86_64/virtio-net-failover                  ERROR            5.98s   killed by signal 6 SIGABRT
> > > > > > G_TEST_DBUS_DAEMON=/home/legoater/work/qemu/qemu.git/tests/dbus-vmstate-daemon.sh MALLOC_PERTURB_=119 QTEST_QEMU_BINARY=./qemu-system-x86_64 QTEST_QEMU_IMG=./qemu-img PYTHON=/home/legoater/work/qemu/qemu.git/build/pyvenv/bin/python3 QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon /home/legoater/work/qemu/qemu.git/build/tests/qtest/virtio-net-failover --tap -k
> > > ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― ✀  ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> > > stderr:
> > > qemu-system-x86_64: ram_save_setup failed: Input/output error
> > > **
> > > ERROR:../tests/qtest/virtio-net-failover.c:1222:test_migrate_abort_wait_unplug: 'device' should not be NULL
> > > 
> > > (test program exited with status code -6)
> > > ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
> > 
> > Hmm, this one seems different..
> > 
> > > 
> > > I couldn't reproduce yet :/
> > 
> > I never reproduced it locally on x86, and my failure is always at checking
> > "cancelling" v.s. "cancelled" rather than the NULL check.  It's much easier
> > to trigger on CI in check-system-centos (I don't know why centos..):
> > 
> > https://gitlab.com/peterx/qemu/-/jobs/6351020546
> > 
> > I think at least for the error I hit, the problem is the failover test will
> > cancel the migration, but if it cancels too fast and during setup now it
> > can already fail it (while it won't fail before when we ignore
> > qemu_savevm_state_setup() errors), and I think it'll skip:
> > 
> >      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> >                                 MIGRATION_STATUS_ACTIVE);
> > 
> > It seems the test wants the "cancelling" to hold until later:
> > 
> >      /* while the card is not ejected, we must be in "cancelling" state */
> >      ret = migrate_status(qts);
> > 
> >      status = qdict_get_str(ret, "status");
> >      g_assert_cmpstr(status, ==, "cancelling");
> >      qobject_unref(ret);
> > 
> >      /* OS unplugs the cards, QEMU can move from wait-unplug state */
> >      qtest_outl(qts, ACPI_PCIHP_ADDR_ICH9 + PCI_EJ_BASE, 1);
> > 
> > Again, since I'll need to read the failover code, not much I can tell.
> > Laurent might have a clue.
> 
> I guess we need to fix the test to handle failures and this looks
> like a complex task.

It may not be as complicated, but that'll need some reviews outside
migration people, and someone will need to post a formal patch.  That'll
definitely take some time.

We can keep working on this issue during the 9.0-rc if you want.  Our
current plan for migration (at least what I have in mind in this
release... Fabiano will take over 9.1 release pulls, so he has the freedom
to change this) is we will allow patches to be queued even during RCs,
while I'll prepare two trees, one for -next and one for -stable, then -next
candidates will only be included in the first 9.1 pull, -stable for RC
pulls if necessary.

Thanks,
Cédric Le Goater March 12, 2024, 12:32 p.m. UTC | #14
On 3/11/24 20:03, Fabiano Rosas wrote:
> Cédric Le Goater <clg@redhat.com> writes:
> 
>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>> Cédric Le Goater <clg@redhat.com> writes:
>>>
>>>> This prepares ground for the changes coming next which add an Error**
>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>> now handle the error and fail earlier setting the migration state from
>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>
>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>> reported by .save_setup() handlers.
>>>>
>>>> Since the previous behavior was to ignore errors at this step of
>>>> migration, this change should be examined closely to check that
>>>> cleanups are still correctly done.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>> ---
>>>>
>>>>    Changes in v4:
>>>>    
>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>      
>>>>    Changes in v3:
>>>>    
>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>    - Made sure an error is always set in case of failure in
>>>>      qemu_savevm_state_setup()
>>>>      
>>>>    migration/savevm.h    |  2 +-
>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>> --- a/migration/savevm.h
>>>> +++ b/migration/savevm.h
>>>> @@ -32,7 +32,7 @@
>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>        MigThrError thr_error;
>>>>        bool urgent = false;
>>>> +    Error *local_err = NULL;
>>>> +    int ret;
>>>>    
>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>    
>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>        }
>>>>    
>>>>        bql_lock();
>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>        bql_unlock();
>>>>    
>>>> +    if (ret) {
>>>> +        migrate_set_error(s, local_err);
>>>> +        error_free(local_err);
>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>> +                          MIGRATION_STATUS_FAILED);
>>>> +        goto out;
>>>> +     }
>>>> +
>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>                                   MIGRATION_STATUS_ACTIVE);
>>>
>>> This^ should be before the new block it seems:
>>>
>>> GOOD:
>>> migrate_set_state new state setup
>>> migrate_set_state new state wait-unplug
>>> migrate_fd_cancel
>>> migrate_set_state new state cancelling
>>> migrate_fd_cleanup
>>> migrate_set_state new state cancelled
>>> migrate_fd_cancel
>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>
>>> BAD:
>>> migrate_set_state new state setup
>>> migrate_fd_cancel
>>> migrate_set_state new state cancelling
>>> migrate_fd_cleanup
>>> migrate_set_state new state cancelled
>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>> **
>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>
>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>> and bad things happens.
>>
>> This hack makes things work :
>>
>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>>        }
>>    
>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>> +                            MIGRATION_STATUS_SETUP);
>> +
> 
> Why move it all the way up here? Has moving the wait_unplug before the
> 'if (ret)' block not worked for you?

We could be sleeping while holding the BQL. It looked wrong.


Thanks,

C.


> 
>>        bql_lock();
>>        ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>        bql_unlock();
>>
>> We should fix the test instead :) Unless waiting for failover devices
>> to unplug before the save_setup handlers and not after is ok.
>>
>> commit c7e0acd5a3f8 ("migration: add new migration state wait-unplug")
>> is not clear about the justification.:
>>
>>       This patch adds a new migration state called wait-unplug.  It is entered
>>       after the SETUP state if failover devices are present. It will transition
>>       into ACTIVE once all devices were succesfully unplugged from the guest.
> 
> This is not clear indeed, but to me it seems having the wait-unplug
> after setup was important.
> 
>>
>>
>>> =====
>>> PS: I guess the next level in our Freestyle Concurrency video-game is to
>>> make migrate_fd_cancel() stop setting state and poking files and only
>>> set a flag that's tested in the other parts of the code.
>>
>> Is that a new item on the TODO list?
> 
> Yep, I'll add it to the wiki.
>
Cédric Le Goater March 12, 2024, 1:01 p.m. UTC | #15
On 3/11/24 21:10, Peter Xu wrote:
> On Mon, Mar 11, 2024 at 04:03:14PM -0300, Fabiano Rosas wrote:
>> Cédric Le Goater <clg@redhat.com> writes:
>>
>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>
>>>>> This prepares ground for the changes coming next which add an Error**
>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>> now handle the error and fail earlier setting the migration state from
>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>
>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>> reported by .save_setup() handlers.
>>>>>
>>>>> Since the previous behavior was to ignore errors at this step of
>>>>> migration, this change should be examined closely to check that
>>>>> cleanups are still correctly done.
>>>>>
>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>> ---
>>>>>
>>>>>    Changes in v4:
>>>>>    
>>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>>      
>>>>>    Changes in v3:
>>>>>    
>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>    - Made sure an error is always set in case of failure in
>>>>>      qemu_savevm_state_setup()
>>>>>      
>>>>>    migration/savevm.h    |  2 +-
>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>
>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>> --- a/migration/savevm.h
>>>>> +++ b/migration/savevm.h
>>>>> @@ -32,7 +32,7 @@
>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>> --- a/migration/migration.c
>>>>> +++ b/migration/migration.c
>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>        MigThrError thr_error;
>>>>>        bool urgent = false;
>>>>> +    Error *local_err = NULL;
>>>>> +    int ret;
>>>>>    
>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>>    
>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>        }
>>>>>    
>>>>>        bql_lock();
>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>        bql_unlock();
>>>>>    
>>>>> +    if (ret) {
>>>>> +        migrate_set_error(s, local_err);
>>>>> +        error_free(local_err);
>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>> +        goto out;
>>>>> +     }
>>>>> +
>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>                                   MIGRATION_STATUS_ACTIVE);
>>>>
>>>> This^ should be before the new block it seems:
>>>>
>>>> GOOD:
>>>> migrate_set_state new state setup
>>>> migrate_set_state new state wait-unplug
>>>> migrate_fd_cancel
>>>> migrate_set_state new state cancelling
>>>> migrate_fd_cleanup
>>>> migrate_set_state new state cancelled
>>>> migrate_fd_cancel
>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>>
>>>> BAD:
>>>> migrate_set_state new state setup
>>>> migrate_fd_cancel
>>>> migrate_set_state new state cancelling
>>>> migrate_fd_cleanup
>>>> migrate_set_state new state cancelled
>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>> **
>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>
>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>>> and bad things happens.
>>>
>>> This hack makes things work :
>>>
>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>>>        }
>>>    
>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>> +                            MIGRATION_STATUS_SETUP);
>>> +
>>
>> Why move it all the way up here? Has moving the wait_unplug before the
>> 'if (ret)' block not worked for you?
>>
>>>        bql_lock();
>>>        ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>        bql_unlock();
>>>
>>> We should fix the test instead :) Unless waiting for failover devices
>>> to unplug before the save_setup handlers and not after is ok.
>>>
>>> commit c7e0acd5a3f8 ("migration: add new migration state wait-unplug")
>>> is not clear about the justification.:
>>>
>>>       This patch adds a new migration state called wait-unplug.  It is entered
>>>       after the SETUP state if failover devices are present. It will transition
>>>       into ACTIVE once all devices were succesfully unplugged from the guest.
>>
>> This is not clear indeed, but to me it seems having the wait-unplug
>> after setup was important.
> 
> Finally got some time to read this code..
> 
> So far I didn't see an issue if it's called before the setup hooks.
> Actually it looks to me it should better do that before those hooks.
>
> IIUC what that qemu_savevm_wait_unplug() does is waiting for all the
> primary devices to be completely unplugged before moving on the migration.
> 
> Here setup() hook, or to be explicit, the primary devices' VMSDs (if ever
> existed, and if that was the concern) should have zero impact on such wait,
> because the "unplug" should also contain one step to unregister those
> vmsds; see the virtio_net_handle_migration_primary() where it has:
> 
>          if (failover_unplug_primary(n, dev)) {
>              vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
>              ...
>          }
> 
> So qemu_savevm_wait_unplug() looks like a pure wait function to me until
> all the unplug is processed by the guest OS.  And it makes some sense to me
> to avoid calling setup() (which can start to hold resources, like in RAM we
> create bitmaps etc to prepare for migration) before such possible long halts.

I think so too now. VFIO is already sending state.

> 
> In all cases, I guess it's still too rush to figure out a plan, meanwhile
> anything proposed for either test/code changes would better get some
> reviews from either Laurent or other virtio-net guys.  I think I'll go
> ahead the pull without the 2nd batch of patches.
> 
>>
>>>
>>>
>>>> =====
>>>> PS: I guess the next level in our Freestyle Concurrency video-game is to
>>>> make migrate_fd_cancel() stop setting state and poking files and only
>>>> set a flag that's tested in the other parts of the code.
>>>
>>> Is that a new item on the TODO list?
>>
>> Yep, I'll add it to the wiki.
> 
> Sounds like a good thing, however let's be aware of the evils (that are
> always in the details..), where there can be users/tests relying on that
> "CANCELLING" state, so it can be part of the ABIs.. :-(

That's a good reason to move qemu_savevm_wait_unplug() and avoid breaking
the ABI.

Thanks,

C.
Cédric Le Goater March 12, 2024, 1:34 p.m. UTC | #16
On 3/12/24 13:32, Cédric Le Goater wrote:
> On 3/11/24 20:03, Fabiano Rosas wrote:
>> Cédric Le Goater <clg@redhat.com> writes:
>>
>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>
>>>>> This prepares ground for the changes coming next which add an Error**
>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>> now handle the error and fail earlier setting the migration state from
>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>
>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>> reported by .save_setup() handlers.
>>>>>
>>>>> Since the previous behavior was to ignore errors at this step of
>>>>> migration, this change should be examined closely to check that
>>>>> cleanups are still correctly done.
>>>>>
>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>> ---
>>>>>
>>>>>    Changes in v4:
>>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>>    Changes in v3:
>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>    - Made sure an error is always set in case of failure in
>>>>>      qemu_savevm_state_setup()
>>>>>    migration/savevm.h    |  2 +-
>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>
>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>> --- a/migration/savevm.h
>>>>> +++ b/migration/savevm.h
>>>>> @@ -32,7 +32,7 @@
>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>> --- a/migration/migration.c
>>>>> +++ b/migration/migration.c
>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>        MigThrError thr_error;
>>>>>        bool urgent = false;
>>>>> +    Error *local_err = NULL;
>>>>> +    int ret;
>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>        }
>>>>>        bql_lock();
>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>        bql_unlock();
>>>>> +    if (ret) {
>>>>> +        migrate_set_error(s, local_err);
>>>>> +        error_free(local_err);
>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>> +        goto out;
>>>>> +     }
>>>>> +
>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>                                   MIGRATION_STATUS_ACTIVE);
>>>>
>>>> This^ should be before the new block it seems:
>>>>
>>>> GOOD:
>>>> migrate_set_state new state setup
>>>> migrate_set_state new state wait-unplug
>>>> migrate_fd_cancel
>>>> migrate_set_state new state cancelling
>>>> migrate_fd_cleanup
>>>> migrate_set_state new state cancelled
>>>> migrate_fd_cancel
>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>>
>>>> BAD:
>>>> migrate_set_state new state setup
>>>> migrate_fd_cancel
>>>> migrate_set_state new state cancelling
>>>> migrate_fd_cleanup
>>>> migrate_set_state new state cancelled
>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>> **
>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>
>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>>> and bad things happens.
>>>
>>> This hack makes things work :
>>>
>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>>>        }
>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>> +                            MIGRATION_STATUS_SETUP);
>>> +
>>
>> Why move it all the way up here? Has moving the wait_unplug before the
>> 'if (ret)' block not worked for you?
> 
> We could be sleeping while holding the BQL. It looked wrong.

Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
I can reproduce easily with an x86 guest running on PPC64.

Thanks,

C.
Cédric Le Goater March 12, 2024, 2:01 p.m. UTC | #17
On 3/12/24 14:34, Cédric Le Goater wrote:
> On 3/12/24 13:32, Cédric Le Goater wrote:
>> On 3/11/24 20:03, Fabiano Rosas wrote:
>>> Cédric Le Goater <clg@redhat.com> writes:
>>>
>>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>>
>>>>>> This prepares ground for the changes coming next which add an Error**
>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>>> now handle the error and fail earlier setting the migration state from
>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>>
>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>>> reported by .save_setup() handlers.
>>>>>>
>>>>>> Since the previous behavior was to ignore errors at this step of
>>>>>> migration, this change should be examined closely to check that
>>>>>> cleanups are still correctly done.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>> ---
>>>>>>
>>>>>>    Changes in v4:
>>>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>>>    Changes in v3:
>>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>>    - Made sure an error is always set in case of failure in
>>>>>>      qemu_savevm_state_setup()
>>>>>>    migration/savevm.h    |  2 +-
>>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>>
>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>>> --- a/migration/savevm.h
>>>>>> +++ b/migration/savevm.h
>>>>>> @@ -32,7 +32,7 @@
>>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>>> --- a/migration/migration.c
>>>>>> +++ b/migration/migration.c
>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>>        MigThrError thr_error;
>>>>>>        bool urgent = false;
>>>>>> +    Error *local_err = NULL;
>>>>>> +    int ret;
>>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>>        }
>>>>>>        bql_lock();
>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>>        bql_unlock();
>>>>>> +    if (ret) {
>>>>>> +        migrate_set_error(s, local_err);
>>>>>> +        error_free(local_err);
>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>>> +        goto out;
>>>>>> +     }
>>>>>> +
>>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>>                                   MIGRATION_STATUS_ACTIVE);
>>>>>
>>>>> This^ should be before the new block it seems:
>>>>>
>>>>> GOOD:
>>>>> migrate_set_state new state setup
>>>>> migrate_set_state new state wait-unplug
>>>>> migrate_fd_cancel
>>>>> migrate_set_state new state cancelling
>>>>> migrate_fd_cleanup
>>>>> migrate_set_state new state cancelled
>>>>> migrate_fd_cancel
>>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>>>
>>>>> BAD:
>>>>> migrate_set_state new state setup
>>>>> migrate_fd_cancel
>>>>> migrate_set_state new state cancelling
>>>>> migrate_fd_cleanup
>>>>> migrate_set_state new state cancelled
>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>>> **
>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>>
>>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>>>> and bad things happens.
>>>>
>>>> This hack makes things work :
>>>>
>>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>>>>        }
>>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>> +                            MIGRATION_STATUS_SETUP);
>>>> +
>>>
>>> Why move it all the way up here? Has moving the wait_unplug before the
>>> 'if (ret)' block not worked for you?
>>
>> We could be sleeping while holding the BQL. It looked wrong.
> 
> Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
> I can reproduce easily with an x86 guest running on PPC64.

That works just the same.

Peter, Fabiano,

What would you prefer  ?

1. move qemu_savevm_wait_unplug() before qemu_savevm_state_setup(),
    means one new patch.

2. leave qemu_savevm_wait_unplug() after qemu_savevm_state_setup()
    and handle state_setup() errors after waiting. means an update
    of this patch.


Thanks,

C.
Fabiano Rosas March 12, 2024, 2:24 p.m. UTC | #18
Cédric Le Goater <clg@redhat.com> writes:

> On 3/12/24 14:34, Cédric Le Goater wrote:
>> On 3/12/24 13:32, Cédric Le Goater wrote:
>>> On 3/11/24 20:03, Fabiano Rosas wrote:
>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>
>>>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>>>
>>>>>>> This prepares ground for the changes coming next which add an Error**
>>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>>>> now handle the error and fail earlier setting the migration state from
>>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>>>
>>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>>>> reported by .save_setup() handlers.
>>>>>>>
>>>>>>> Since the previous behavior was to ignore errors at this step of
>>>>>>> migration, this change should be examined closely to check that
>>>>>>> cleanups are still correctly done.
>>>>>>>
>>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>>> ---
>>>>>>>
>>>>>>>    Changes in v4:
>>>>>>>    - Merged cleanup change in qemu_savevm_state()
>>>>>>>    Changes in v3:
>>>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>>>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>>>    - Made sure an error is always set in case of failure in
>>>>>>>      qemu_savevm_state_setup()
>>>>>>>    migration/savevm.h    |  2 +-
>>>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>>>
>>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>>>> --- a/migration/savevm.h
>>>>>>> +++ b/migration/savevm.h
>>>>>>> @@ -32,7 +32,7 @@
>>>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>>>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>>>    int qemu_savevm_state_prepare(Error **errp);
>>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>>>> --- a/migration/migration.c
>>>>>>> +++ b/migration/migration.c
>>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>>>        MigThrError thr_error;
>>>>>>>        bool urgent = false;
>>>>>>> +    Error *local_err = NULL;
>>>>>>> +    int ret;
>>>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>>>        }
>>>>>>>        bql_lock();
>>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>>>        bql_unlock();
>>>>>>> +    if (ret) {
>>>>>>> +        migrate_set_error(s, local_err);
>>>>>>> +        error_free(local_err);
>>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>>>> +        goto out;
>>>>>>> +     }
>>>>>>> +
>>>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>>>                                   MIGRATION_STATUS_ACTIVE);
>>>>>>
>>>>>> This^ should be before the new block it seems:
>>>>>>
>>>>>> GOOD:
>>>>>> migrate_set_state new state setup
>>>>>> migrate_set_state new state wait-unplug
>>>>>> migrate_fd_cancel
>>>>>> migrate_set_state new state cancelling
>>>>>> migrate_fd_cleanup
>>>>>> migrate_set_state new state cancelled
>>>>>> migrate_fd_cancel
>>>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>>>>
>>>>>> BAD:
>>>>>> migrate_set_state new state setup
>>>>>> migrate_fd_cancel
>>>>>> migrate_set_state new state cancelling
>>>>>> migrate_fd_cleanup
>>>>>> migrate_set_state new state cancelled
>>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>>>> **
>>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>>>
>>>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>>>>> and bad things happens.
>>>>>
>>>>> This hack makes things work :
>>>>>
>>>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>>>>>        }
>>>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>> +                            MIGRATION_STATUS_SETUP);
>>>>> +
>>>>
>>>> Why move it all the way up here? Has moving the wait_unplug before the
>>>> 'if (ret)' block not worked for you?
>>>
>>> We could be sleeping while holding the BQL. It looked wrong.
>> 
>> Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
>> I can reproduce easily with an x86 guest running on PPC64.
>
> That works just the same.
>
> Peter, Fabiano,
>
> What would you prefer  ?
>
> 1. move qemu_savevm_wait_unplug() before qemu_savevm_state_setup(),
>     means one new patch.

Is there a point to this except "because we can"? Honest question, I
might have missed the motivation.

Also a couple of points:

- The current version of this proposal seems it will lose the transition
from SETUP->ACTIVE no? As in, after qemu_savevm_state_setup, there's
nothing changing the state to ACTIVE anymore.

- You also need to change the bg migration path.

>
> 2. leave qemu_savevm_wait_unplug() after qemu_savevm_state_setup()
>     and handle state_setup() errors after waiting. means an update
>     of this patch.

I vote for this. This failover feature is a pretty complex one, let's
not risk changing the behavior for no good reason. Just look at the
amount of head-banging going on in these threads:

https://patchwork.ozlabs.org/project/qemu-devel/cover/20181025140631.634922-1-sameeh@daynix.com/
https://www.mail-archive.com/qemu-devel@nongnu.org/msg609296.html

>
>
> Thanks,
>
> C.
>
>
>
Peter Xu March 12, 2024, 3:18 p.m. UTC | #19
On Tue, Mar 12, 2024 at 11:24:39AM -0300, Fabiano Rosas wrote:
> Cédric Le Goater <clg@redhat.com> writes:
> 
> > On 3/12/24 14:34, Cédric Le Goater wrote:
> >> On 3/12/24 13:32, Cédric Le Goater wrote:
> >>> On 3/11/24 20:03, Fabiano Rosas wrote:
> >>>> Cédric Le Goater <clg@redhat.com> writes:
> >>>>
> >>>>> On 3/8/24 15:36, Fabiano Rosas wrote:
> >>>>>> Cédric Le Goater <clg@redhat.com> writes:
> >>>>>>
> >>>>>>> This prepares ground for the changes coming next which add an Error**
> >>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
> >>>>>>> now handle the error and fail earlier setting the migration state from
> >>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
> >>>>>>>
> >>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
> >>>>>>> reported by .save_setup() handlers.
> >>>>>>>
> >>>>>>> Since the previous behavior was to ignore errors at this step of
> >>>>>>> migration, this change should be examined closely to check that
> >>>>>>> cleanups are still correctly done.
> >>>>>>>
> >>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> >>>>>>> ---
> >>>>>>>
> >>>>>>>    Changes in v4:
> >>>>>>>    - Merged cleanup change in qemu_savevm_state()
> >>>>>>>    Changes in v3:
> >>>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
> >>>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
> >>>>>>>    - Made sure an error is always set in case of failure in
> >>>>>>>      qemu_savevm_state_setup()
> >>>>>>>    migration/savevm.h    |  2 +-
> >>>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
> >>>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
> >>>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
> >>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> >>>>>>> --- a/migration/savevm.h
> >>>>>>> +++ b/migration/savevm.h
> >>>>>>> @@ -32,7 +32,7 @@
> >>>>>>>    bool qemu_savevm_state_blocked(Error **errp);
> >>>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
> >>>>>>>    int qemu_savevm_state_prepare(Error **errp);
> >>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
> >>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
> >>>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
> >>>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
> >>>>>>>    void qemu_savevm_state_header(QEMUFile *f);
> >>>>>>> diff --git a/migration/migration.c b/migration/migration.c
> >>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
> >>>>>>> --- a/migration/migration.c
> >>>>>>> +++ b/migration/migration.c
> >>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
> >>>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>>>>>>        MigThrError thr_error;
> >>>>>>>        bool urgent = false;
> >>>>>>> +    Error *local_err = NULL;
> >>>>>>> +    int ret;
> >>>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
> >>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
> >>>>>>>        }
> >>>>>>>        bql_lock();
> >>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
> >>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
> >>>>>>>        bql_unlock();
> >>>>>>> +    if (ret) {
> >>>>>>> +        migrate_set_error(s, local_err);
> >>>>>>> +        error_free(local_err);
> >>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> >>>>>>> +                          MIGRATION_STATUS_FAILED);
> >>>>>>> +        goto out;
> >>>>>>> +     }
> >>>>>>> +
> >>>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> >>>>>>>                                   MIGRATION_STATUS_ACTIVE);
> >>>>>>
> >>>>>> This^ should be before the new block it seems:
> >>>>>>
> >>>>>> GOOD:
> >>>>>> migrate_set_state new state setup
> >>>>>> migrate_set_state new state wait-unplug
> >>>>>> migrate_fd_cancel
> >>>>>> migrate_set_state new state cancelling
> >>>>>> migrate_fd_cleanup
> >>>>>> migrate_set_state new state cancelled
> >>>>>> migrate_fd_cancel
> >>>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
> >>>>>>
> >>>>>> BAD:
> >>>>>> migrate_set_state new state setup
> >>>>>> migrate_fd_cancel
> >>>>>> migrate_set_state new state cancelling
> >>>>>> migrate_fd_cleanup
> >>>>>> migrate_set_state new state cancelled
> >>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
> >>>>>> **
> >>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
> >>>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
> >>>>>>
> >>>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
> >>>>>> that will run concurrently with migrate_fd_cancel() issued by the test
> >>>>>> and bad things happens.
> >>>>>
> >>>>> This hack makes things work :
> >>>>>
> >>>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
> >>>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
> >>>>>        }
> >>>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> >>>>> +                            MIGRATION_STATUS_SETUP);
> >>>>> +
> >>>>
> >>>> Why move it all the way up here? Has moving the wait_unplug before the
> >>>> 'if (ret)' block not worked for you?
> >>>
> >>> We could be sleeping while holding the BQL. It looked wrong.
> >> 
> >> Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
> >> I can reproduce easily with an x86 guest running on PPC64.
> >
> > That works just the same.
> >
> > Peter, Fabiano,
> >
> > What would you prefer  ?
> >
> > 1. move qemu_savevm_wait_unplug() before qemu_savevm_state_setup(),
> >     means one new patch.
> 
> Is there a point to this except "because we can"? Honest question, I
> might have missed the motivation.

My previous point was, it avoids holding the resources (that will be
allocated in setup() routines) while we know we can wait for a long time.

But then I found that the ordering is indeed needed at least if we don't
change migrate_set_state() first - it is the only place we set the status
to START (which I overlooked, sorry)...

IMHO the function is not well designed; the state update of the next stage
should not reside in a function to wait for failover primary devices
conditionally. It's a bit of a mess.

> 
> Also a couple of points:
> 
> - The current version of this proposal seems it will lose the transition
> from SETUP->ACTIVE no? As in, after qemu_savevm_state_setup, there's
> nothing changing the state to ACTIVE anymore.
> 
> - You also need to change the bg migration path.
> 
> >
> > 2. leave qemu_savevm_wait_unplug() after qemu_savevm_state_setup()
> >     and handle state_setup() errors after waiting. means an update
> >     of this patch.
> 
> I vote for this. This failover feature is a pretty complex one, let's
> not risk changing the behavior for no good reason. Just look at the
> amount of head-banging going on in these threads:
> 
> https://patchwork.ozlabs.org/project/qemu-devel/cover/20181025140631.634922-1-sameeh@daynix.com/
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg609296.html

Do we know who is consuming this feature?

Now VFIO allows a migration to happen without this trick.  I'm wondering
whether all relevant NICs can also support VFIO migrations in the future,
then we can drop this tricky feature for good.
Cédric Le Goater March 12, 2024, 6:06 p.m. UTC | #20
[ ...]

> Now VFIO allows a migration to happen without this trick.  I'm wondering
> whether all relevant NICs can also support VFIO migrations in the future,
> then we can drop this tricky feature for good.

Currently, VFIO migration requires a VFIO (PCI) variant driver implementing
the specific ops for migration. Only a few NICs are supported. It's growing
though and we should expect more in the future, specially entreprise grade
NICs.

Thanks,

C.
Fabiano Rosas March 12, 2024, 6:28 p.m. UTC | #21
Peter Xu <peterx@redhat.com> writes:

> On Tue, Mar 12, 2024 at 11:24:39AM -0300, Fabiano Rosas wrote:
>> Cédric Le Goater <clg@redhat.com> writes:
>> 
>> > On 3/12/24 14:34, Cédric Le Goater wrote:
>> >> On 3/12/24 13:32, Cédric Le Goater wrote:
>> >>> On 3/11/24 20:03, Fabiano Rosas wrote:
>> >>>> Cédric Le Goater <clg@redhat.com> writes:
>> >>>>
>> >>>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>> >>>>>> Cédric Le Goater <clg@redhat.com> writes:
>> >>>>>>
>> >>>>>>> This prepares ground for the changes coming next which add an Error**
>> >>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>> >>>>>>> now handle the error and fail earlier setting the migration state from
>> >>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>> >>>>>>>
>> >>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>> >>>>>>> reported by .save_setup() handlers.
>> >>>>>>>
>> >>>>>>> Since the previous behavior was to ignore errors at this step of
>> >>>>>>> migration, this change should be examined closely to check that
>> >>>>>>> cleanups are still correctly done.
>> >>>>>>>
>> >>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> >>>>>>> ---
>> >>>>>>>
>> >>>>>>>    Changes in v4:
>> >>>>>>>    - Merged cleanup change in qemu_savevm_state()
>> >>>>>>>    Changes in v3:
>> >>>>>>>    - Set migration state to MIGRATION_STATUS_FAILED
>> >>>>>>>    - Fixed error handling to be done under lock in bg_migration_thread()
>> >>>>>>>    - Made sure an error is always set in case of failure in
>> >>>>>>>      qemu_savevm_state_setup()
>> >>>>>>>    migration/savevm.h    |  2 +-
>> >>>>>>>    migration/migration.c | 27 ++++++++++++++++++++++++---
>> >>>>>>>    migration/savevm.c    | 26 +++++++++++++++-----------
>> >>>>>>>    3 files changed, 40 insertions(+), 15 deletions(-)
>> >>>>>>>
>> >>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>> >>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>> >>>>>>> --- a/migration/savevm.h
>> >>>>>>> +++ b/migration/savevm.h
>> >>>>>>> @@ -32,7 +32,7 @@
>> >>>>>>>    bool qemu_savevm_state_blocked(Error **errp);
>> >>>>>>>    void qemu_savevm_non_migratable_list(strList **reasons);
>> >>>>>>>    int qemu_savevm_state_prepare(Error **errp);
>> >>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>> >>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>> >>>>>>>    bool qemu_savevm_state_guest_unplug_pending(void);
>> >>>>>>>    int qemu_savevm_state_resume_prepare(MigrationState *s);
>> >>>>>>>    void qemu_savevm_state_header(QEMUFile *f);
>> >>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>> >>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>> >>>>>>> --- a/migration/migration.c
>> >>>>>>> +++ b/migration/migration.c
>> >>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>> >>>>>>>        int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>> >>>>>>>        MigThrError thr_error;
>> >>>>>>>        bool urgent = false;
>> >>>>>>> +    Error *local_err = NULL;
>> >>>>>>> +    int ret;
>> >>>>>>>        thread = migration_threads_add("live_migration", qemu_get_thread_id());
>> >>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>> >>>>>>>        }
>> >>>>>>>        bql_lock();
>> >>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>> >>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>> >>>>>>>        bql_unlock();
>> >>>>>>> +    if (ret) {
>> >>>>>>> +        migrate_set_error(s, local_err);
>> >>>>>>> +        error_free(local_err);
>> >>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>> >>>>>>> +                          MIGRATION_STATUS_FAILED);
>> >>>>>>> +        goto out;
>> >>>>>>> +     }
>> >>>>>>> +
>> >>>>>>>        qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>> >>>>>>>                                   MIGRATION_STATUS_ACTIVE);
>> >>>>>>
>> >>>>>> This^ should be before the new block it seems:
>> >>>>>>
>> >>>>>> GOOD:
>> >>>>>> migrate_set_state new state setup
>> >>>>>> migrate_set_state new state wait-unplug
>> >>>>>> migrate_fd_cancel
>> >>>>>> migrate_set_state new state cancelling
>> >>>>>> migrate_fd_cleanup
>> >>>>>> migrate_set_state new state cancelled
>> >>>>>> migrate_fd_cancel
>> >>>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>> >>>>>>
>> >>>>>> BAD:
>> >>>>>> migrate_set_state new state setup
>> >>>>>> migrate_fd_cancel
>> >>>>>> migrate_set_state new state cancelling
>> >>>>>> migrate_fd_cleanup
>> >>>>>> migrate_set_state new state cancelled
>> >>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>> >>>>>> **
>> >>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>> >>>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>> >>>>>>
>> >>>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>> >>>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>> >>>>>> and bad things happens.
>> >>>>>
>> >>>>> This hack makes things work :
>> >>>>>
>> >>>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>> >>>>>            qemu_savevm_send_colo_enable(s->to_dst_file);
>> >>>>>        }
>> >>>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>> >>>>> +                            MIGRATION_STATUS_SETUP);
>> >>>>> +
>> >>>>
>> >>>> Why move it all the way up here? Has moving the wait_unplug before the
>> >>>> 'if (ret)' block not worked for you?
>> >>>
>> >>> We could be sleeping while holding the BQL. It looked wrong.
>> >> 
>> >> Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
>> >> I can reproduce easily with an x86 guest running on PPC64.
>> >
>> > That works just the same.
>> >
>> > Peter, Fabiano,
>> >
>> > What would you prefer  ?
>> >
>> > 1. move qemu_savevm_wait_unplug() before qemu_savevm_state_setup(),
>> >     means one new patch.
>> 
>> Is there a point to this except "because we can"? Honest question, I
>> might have missed the motivation.
>
> My previous point was, it avoids holding the resources (that will be
> allocated in setup() routines) while we know we can wait for a long time.
>
> But then I found that the ordering is indeed needed at least if we don't
> change migrate_set_state() first - it is the only place we set the status
> to START (which I overlooked, sorry)...
>
> IMHO the function is not well designed; the state update of the next stage
> should not reside in a function to wait for failover primary devices
> conditionally. It's a bit of a mess.
>

I agree. We can clean that up in 9.1.

migrate_set_state is also unintuitive because it ignores invalid state
transitions and we've been using that property to deal with special
states such as POSTCOPY_PAUSED and FAILED:

- After the migration goes into POSTCOPY_PAUSED, the resumed migration's
  migrate_init() will try to set the state NONE->SETUP, which is not
  valid.

- After save_setup fails, the migration goes into FAILED, but wait_unplug
  will try to transition SETUP->ACTIVE, which is also not valid.
Cédric Le Goater March 15, 2024, 10:17 a.m. UTC | #22
On 3/12/24 19:28, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
>> On Tue, Mar 12, 2024 at 11:24:39AM -0300, Fabiano Rosas wrote:
>>> Cédric Le Goater <clg@redhat.com> writes:
>>>
>>>> On 3/12/24 14:34, Cédric Le Goater wrote:
>>>>> On 3/12/24 13:32, Cédric Le Goater wrote:
>>>>>> On 3/11/24 20:03, Fabiano Rosas wrote:
>>>>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>>>>
>>>>>>>> On 3/8/24 15:36, Fabiano Rosas wrote:
>>>>>>>>> Cédric Le Goater <clg@redhat.com> writes:
>>>>>>>>>
>>>>>>>>>> This prepares ground for the changes coming next which add an Error**
>>>>>>>>>> argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
>>>>>>>>>> now handle the error and fail earlier setting the migration state from
>>>>>>>>>> MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.
>>>>>>>>>>
>>>>>>>>>> In qemu_savevm_state(), move the cleanup to preserve the error
>>>>>>>>>> reported by .save_setup() handlers.
>>>>>>>>>>
>>>>>>>>>> Since the previous behavior was to ignore errors at this step of
>>>>>>>>>> migration, this change should be examined closely to check that
>>>>>>>>>> cleanups are still correctly done.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>>     Changes in v4:
>>>>>>>>>>     - Merged cleanup change in qemu_savevm_state()
>>>>>>>>>>     Changes in v3:
>>>>>>>>>>     - Set migration state to MIGRATION_STATUS_FAILED
>>>>>>>>>>     - Fixed error handling to be done under lock in bg_migration_thread()
>>>>>>>>>>     - Made sure an error is always set in case of failure in
>>>>>>>>>>       qemu_savevm_state_setup()
>>>>>>>>>>     migration/savevm.h    |  2 +-
>>>>>>>>>>     migration/migration.c | 27 ++++++++++++++++++++++++---
>>>>>>>>>>     migration/savevm.c    | 26 +++++++++++++++-----------
>>>>>>>>>>     3 files changed, 40 insertions(+), 15 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/migration/savevm.h b/migration/savevm.h
>>>>>>>>>> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
>>>>>>>>>> --- a/migration/savevm.h
>>>>>>>>>> +++ b/migration/savevm.h
>>>>>>>>>> @@ -32,7 +32,7 @@
>>>>>>>>>>     bool qemu_savevm_state_blocked(Error **errp);
>>>>>>>>>>     void qemu_savevm_non_migratable_list(strList **reasons);
>>>>>>>>>>     int qemu_savevm_state_prepare(Error **errp);
>>>>>>>>>> -void qemu_savevm_state_setup(QEMUFile *f);
>>>>>>>>>> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>>>>>>>>>>     bool qemu_savevm_state_guest_unplug_pending(void);
>>>>>>>>>>     int qemu_savevm_state_resume_prepare(MigrationState *s);
>>>>>>>>>>     void qemu_savevm_state_header(QEMUFile *f);
>>>>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>>>>> index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
>>>>>>>>>> --- a/migration/migration.c
>>>>>>>>>> +++ b/migration/migration.c
>>>>>>>>>> @@ -3408,6 +3408,8 @@ static void *migration_thread(void *opaque)
>>>>>>>>>>         int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>>>>>>>         MigThrError thr_error;
>>>>>>>>>>         bool urgent = false;
>>>>>>>>>> +    Error *local_err = NULL;
>>>>>>>>>> +    int ret;
>>>>>>>>>>         thread = migration_threads_add("live_migration", qemu_get_thread_id());
>>>>>>>>>> @@ -3451,9 +3453,17 @@ static void *migration_thread(void *opaque)
>>>>>>>>>>         }
>>>>>>>>>>         bql_lock();
>>>>>>>>>> -    qemu_savevm_state_setup(s->to_dst_file);
>>>>>>>>>> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>>>>>>>>>>         bql_unlock();
>>>>>>>>>> +    if (ret) {
>>>>>>>>>> +        migrate_set_error(s, local_err);
>>>>>>>>>> +        error_free(local_err);
>>>>>>>>>> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>>>>>>>>> +                          MIGRATION_STATUS_FAILED);
>>>>>>>>>> +        goto out;
>>>>>>>>>> +     }
>>>>>>>>>> +
>>>>>>>>>>         qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>>>>>>                                    MIGRATION_STATUS_ACTIVE);
>>>>>>>>>
>>>>>>>>> This^ should be before the new block it seems:
>>>>>>>>>
>>>>>>>>> GOOD:
>>>>>>>>> migrate_set_state new state setup
>>>>>>>>> migrate_set_state new state wait-unplug
>>>>>>>>> migrate_fd_cancel
>>>>>>>>> migrate_set_state new state cancelling
>>>>>>>>> migrate_fd_cleanup
>>>>>>>>> migrate_set_state new state cancelled
>>>>>>>>> migrate_fd_cancel
>>>>>>>>> ok 1 /x86_64/failover-virtio-net/migrate/abort/wait-unplug
>>>>>>>>>
>>>>>>>>> BAD:
>>>>>>>>> migrate_set_state new state setup
>>>>>>>>> migrate_fd_cancel
>>>>>>>>> migrate_set_state new state cancelling
>>>>>>>>> migrate_fd_cleanup
>>>>>>>>> migrate_set_state new state cancelled
>>>>>>>>> qemu-system-x86_64: ram_save_setup failed: Input/output error
>>>>>>>>> **
>>>>>>>>> ERROR:../tests/qtest/virtio-net-failover.c:1203:test_migrate_abort_wait_unplug:
>>>>>>>>> assertion failed (status == "cancelling"): ("cancelled" == "cancelling")
>>>>>>>>>
>>>>>>>>> Otherwise migration_iteration_finish() will schedule the cleanup BH and
>>>>>>>>> that will run concurrently with migrate_fd_cancel() issued by the test
>>>>>>>>> and bad things happens.
>>>>>>>>
>>>>>>>> This hack makes things work :
>>>>>>>>
>>>>>>>> @@ -3452,6 +3452,9 @@ static void *migration_thread(void *opaq
>>>>>>>>             qemu_savevm_send_colo_enable(s->to_dst_file);
>>>>>>>>         }
>>>>>>>> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>>>>>>>> +                            MIGRATION_STATUS_SETUP);
>>>>>>>> +
>>>>>>>
>>>>>>> Why move it all the way up here? Has moving the wait_unplug before the
>>>>>>> 'if (ret)' block not worked for you?
>>>>>>
>>>>>> We could be sleeping while holding the BQL. It looked wrong.
>>>>>
>>>>> Sorry wrong answer. Yes I can try moving it before the 'if (ret)' block.
>>>>> I can reproduce easily with an x86 guest running on PPC64.
>>>>
>>>> That works just the same.
>>>>
>>>> Peter, Fabiano,
>>>>
>>>> What would you prefer  ?
>>>>
>>>> 1. move qemu_savevm_wait_unplug() before qemu_savevm_state_setup(),
>>>>      means one new patch.
>>>
>>> Is there a point to this except "because we can"? Honest question, I
>>> might have missed the motivation.
>>
>> My previous point was, it avoids holding the resources (that will be
>> allocated in setup() routines) while we know we can wait for a long time.
>>
>> But then I found that the ordering is indeed needed at least if we don't
>> change migrate_set_state() first - it is the only place we set the status
>> to START (which I overlooked, sorry)...
>>
>> IMHO the function is not well designed; the state update of the next stage
>> should not reside in a function to wait for failover primary devices
>> conditionally. It's a bit of a mess.
>>
> 
> I agree. We can clean that up in 9.1.
> 
> migrate_set_state is also unintuitive because it ignores invalid state
> transitions and we've been using that property to deal with special
> states such as POSTCOPY_PAUSED and FAILED:
> 
> - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
>    migrate_init() will try to set the state NONE->SETUP, which is not
>    valid.
> 
> - After save_setup fails, the migration goes into FAILED, but wait_unplug
>    will try to transition SETUP->ACTIVE, which is also not valid.
> 

I am not sure I understand what the plan is. Both solutions are problematic
regarding the state transitions.

Should we consider that waiting for failover devices to unplug is an internal
step of the SETUP phase not transitioning to ACTIVE ?

Thanks,

C.
Peter Xu March 15, 2024, 11:01 a.m. UTC | #23
On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
> > migrate_set_state is also unintuitive because it ignores invalid state
> > transitions and we've been using that property to deal with special
> > states such as POSTCOPY_PAUSED and FAILED:
> > 
> > - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
> >    migrate_init() will try to set the state NONE->SETUP, which is not
> >    valid.
> > 
> > - After save_setup fails, the migration goes into FAILED, but wait_unplug
> >    will try to transition SETUP->ACTIVE, which is also not valid.
> > 
> 
> I am not sure I understand what the plan is. Both solutions are problematic
> regarding the state transitions.
> 
> Should we consider that waiting for failover devices to unplug is an internal
> step of the SETUP phase not transitioning to ACTIVE ?

If to unblock this series, IIUC the simplest solution is to do what Fabiano
suggested, that we move qemu_savevm_wait_unplug() to be before the check of
setup() ret.  In that case, the state change in qemu_savevm_wait_unplug()
should be benign and we should see a super small window it became ACTIVE
but then it should be FAILED (and IIUC the patch itself will need to use
ACTIVE as "old_state", not SETUP anymore).

For the long term, maybe we can remove the WAIT_UNPLUG state?  The only
Libvirt support seems to be here:

commit 8a226ddb3602586a2ba2359afc4448c02f566a0e
Author: Laine Stump <laine@redhat.com>
Date:   Wed Jan 15 16:38:57 2020 -0500

    qemu: add wait-unplug to qemu migration status enum

Considering that qemu_savevm_wait_unplug() can be a noop if the device is
already unplugged, I think it means no upper layer app should rely on this
state to present.

Thanks,
Cédric Le Goater March 15, 2024, 12:20 p.m. UTC | #24
On 3/15/24 12:01, Peter Xu wrote:
> On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
>>> migrate_set_state is also unintuitive because it ignores invalid state
>>> transitions and we've been using that property to deal with special
>>> states such as POSTCOPY_PAUSED and FAILED:
>>>
>>> - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
>>>     migrate_init() will try to set the state NONE->SETUP, which is not
>>>     valid.
>>>
>>> - After save_setup fails, the migration goes into FAILED, but wait_unplug
>>>     will try to transition SETUP->ACTIVE, which is also not valid.
>>>
>>
>> I am not sure I understand what the plan is. Both solutions are problematic
>> regarding the state transitions.
>>
>> Should we consider that waiting for failover devices to unplug is an internal
>> step of the SETUP phase not transitioning to ACTIVE ?
> 
> If to unblock this series, IIUC the simplest solution is to do what Fabiano
> suggested, that we move qemu_savevm_wait_unplug() to be before the check of
> setup() ret. 

The simplest is IMHO moving qemu_savevm_wait_unplug() before
qemu_savevm_state_setup() and leave patch 10 is unchanged. See
below the extra patch. It looks much cleaner than what we have
today.

> In that case, the state change in qemu_savevm_wait_unplug()
> should be benign and we should see a super small window it became ACTIVE
> but then it should be FAILED (and IIUC the patch itself will need to use
> ACTIVE as "old_state", not SETUP anymore).

OK. I will give it a try to compare.

> For the long term, maybe we can remove the WAIT_UNPLUG state?  

I hope so, it's an internal SETUP state for me.

> The only Libvirt support seems to be here:
> 
> commit 8a226ddb3602586a2ba2359afc4448c02f566a0e
> Author: Laine Stump <laine@redhat.com>
> Date:   Wed Jan 15 16:38:57 2020 -0500
> 
>      qemu: add wait-unplug to qemu migration status enum
> 
> Considering that qemu_savevm_wait_unplug() can be a noop if the device is
> already unplugged, I think it means no upper layer app should rely on this
> state to present.

Thanks,

C.


> 
@@ -3383,11 +3383,10 @@ bool migration_rate_limit(void)
   * unplugged
   */
  
-static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
-                                    int new_state)
+static void qemu_savevm_wait_unplug(MigrationState *s, int state)
  {
      if (qemu_savevm_state_guest_unplug_pending()) {
-        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
+        migrate_set_state(&s->state, state, MIGRATION_STATUS_WAIT_UNPLUG);
  
          while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
                 qemu_savevm_state_guest_unplug_pending()) {
@@ -3410,9 +3409,7 @@ static void qemu_savevm_wait_unplug(Migr
              }
          }
  
-        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
-    } else {
-        migrate_set_state(&s->state, old_state, new_state);
+        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, state);
      }
  }
  
@@ -3469,17 +3466,19 @@ static void *migration_thread(void *opaq
          qemu_savevm_send_colo_enable(s->to_dst_file);
      }
  
+    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP);
+
      bql_lock();
      qemu_savevm_state_setup(s->to_dst_file);
      bql_unlock();
  
-    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
-                               MIGRATION_STATUS_ACTIVE);
-
      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  
      trace_migration_thread_setup_complete();
  
+    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                      MIGRATION_STATUS_ACTIVE);
+
      while (migration_is_active()) {
          if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
              MigIterateState iter_state = migration_iteration_run(s);
@@ -3580,18 +3579,20 @@ static void *bg_migration_thread(void *o
      ram_write_tracking_prepare();
  #endif
  
+    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP);
+
      bql_lock();
      qemu_savevm_state_header(s->to_dst_file);
      qemu_savevm_state_setup(s->to_dst_file);
      bql_unlock();
  
-    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
-                               MIGRATION_STATUS_ACTIVE);
-
      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  
      trace_migration_thread_setup_complete();
  
+    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                      MIGRATION_STATUS_ACTIVE);
+
      bql_lock();
  
      if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
Peter Xu March 15, 2024, 1:09 p.m. UTC | #25
On Fri, Mar 15, 2024 at 01:20:49PM +0100, Cédric Le Goater wrote:
> On 3/15/24 12:01, Peter Xu wrote:
> > On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
> > > > migrate_set_state is also unintuitive because it ignores invalid state
> > > > transitions and we've been using that property to deal with special
> > > > states such as POSTCOPY_PAUSED and FAILED:
> > > > 
> > > > - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
> > > >     migrate_init() will try to set the state NONE->SETUP, which is not
> > > >     valid.
> > > > 
> > > > - After save_setup fails, the migration goes into FAILED, but wait_unplug
> > > >     will try to transition SETUP->ACTIVE, which is also not valid.
> > > > 
> > > 
> > > I am not sure I understand what the plan is. Both solutions are problematic
> > > regarding the state transitions.
> > > 
> > > Should we consider that waiting for failover devices to unplug is an internal
> > > step of the SETUP phase not transitioning to ACTIVE ?
> > 
> > If to unblock this series, IIUC the simplest solution is to do what Fabiano
> > suggested, that we move qemu_savevm_wait_unplug() to be before the check of
> > setup() ret.
> 
> The simplest is IMHO moving qemu_savevm_wait_unplug() before
> qemu_savevm_state_setup() and leave patch 10 is unchanged. See
> below the extra patch. It looks much cleaner than what we have
> today.

Yes it looks cleaner indeed, it's just that then we'll have one more
possible state conversions like SETUP->UNPLUG->SETUP.  I'd say it's fine,
but let's also copy Laruent and Laine if it's going to be posted formally.

Thanks,

> 
> > In that case, the state change in qemu_savevm_wait_unplug()
> > should be benign and we should see a super small window it became ACTIVE
> > but then it should be FAILED (and IIUC the patch itself will need to use
> > ACTIVE as "old_state", not SETUP anymore).
> 
> OK. I will give it a try to compare.
> 
> > For the long term, maybe we can remove the WAIT_UNPLUG state?
> 
> I hope so, it's an internal SETUP state for me.
> 
> > The only Libvirt support seems to be here:
> > 
> > commit 8a226ddb3602586a2ba2359afc4448c02f566a0e
> > Author: Laine Stump <laine@redhat.com>
> > Date:   Wed Jan 15 16:38:57 2020 -0500
> > 
> >      qemu: add wait-unplug to qemu migration status enum
> > 
> > Considering that qemu_savevm_wait_unplug() can be a noop if the device is
> > already unplugged, I think it means no upper layer app should rely on this
> > state to present.
> 
> Thanks,
> 
> C.
> 
> 
> > 
> @@ -3383,11 +3383,10 @@ bool migration_rate_limit(void)
>   * unplugged
>   */
> -static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
> -                                    int new_state)
> +static void qemu_savevm_wait_unplug(MigrationState *s, int state)
>  {
>      if (qemu_savevm_state_guest_unplug_pending()) {
> -        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
> +        migrate_set_state(&s->state, state, MIGRATION_STATUS_WAIT_UNPLUG);
>          while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
>                 qemu_savevm_state_guest_unplug_pending()) {
> @@ -3410,9 +3409,7 @@ static void qemu_savevm_wait_unplug(Migr
>              }
>          }
> -        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
> -    } else {
> -        migrate_set_state(&s->state, old_state, new_state);
> +        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, state);
>      }
>  }
> @@ -3469,17 +3466,19 @@ static void *migration_thread(void *opaq
>          qemu_savevm_send_colo_enable(s->to_dst_file);
>      }
> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP);
> +
>      bql_lock();
>      qemu_savevm_state_setup(s->to_dst_file);
>      bql_unlock();
> -    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> -                               MIGRATION_STATUS_ACTIVE);
> -
>      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
>      trace_migration_thread_setup_complete();
> +    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> +                      MIGRATION_STATUS_ACTIVE);
> +
>      while (migration_is_active()) {
>          if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
>              MigIterateState iter_state = migration_iteration_run(s);
> @@ -3580,18 +3579,20 @@ static void *bg_migration_thread(void *o
>      ram_write_tracking_prepare();
>  #endif
> +    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP);
> +
>      bql_lock();
>      qemu_savevm_state_header(s->to_dst_file);
>      qemu_savevm_state_setup(s->to_dst_file);
>      bql_unlock();
> -    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
> -                               MIGRATION_STATUS_ACTIVE);
> -
>      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
>      trace_migration_thread_setup_complete();
> +    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> +                      MIGRATION_STATUS_ACTIVE);
> +
>      bql_lock();
>      if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
>
Peter Xu March 15, 2024, 1:11 p.m. UTC | #26
On Fri, Mar 15, 2024 at 01:20:49PM +0100, Cédric Le Goater wrote:
> +static void qemu_savevm_wait_unplug(MigrationState *s, int state)

One more trivial comment: I'd even consider dropping "state" altogether, as
this should be the only state this function should be invoked.  So we can
perhaps assert it instead of passing it over?
Cédric Le Goater March 15, 2024, 2:21 p.m. UTC | #27
On 3/15/24 13:20, Cédric Le Goater wrote:
> On 3/15/24 12:01, Peter Xu wrote:
>> On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
>>>> migrate_set_state is also unintuitive because it ignores invalid state
>>>> transitions and we've been using that property to deal with special
>>>> states such as POSTCOPY_PAUSED and FAILED:
>>>>
>>>> - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
>>>>     migrate_init() will try to set the state NONE->SETUP, which is not
>>>>     valid.
>>>>
>>>> - After save_setup fails, the migration goes into FAILED, but wait_unplug
>>>>     will try to transition SETUP->ACTIVE, which is also not valid.
>>>>
>>>
>>> I am not sure I understand what the plan is. Both solutions are problematic
>>> regarding the state transitions.
>>>
>>> Should we consider that waiting for failover devices to unplug is an internal
>>> step of the SETUP phase not transitioning to ACTIVE ?
>>
>> If to unblock this series, IIUC the simplest solution is to do what Fabiano
>> suggested, that we move qemu_savevm_wait_unplug() to be before the check of
>> setup() ret. 
> 
> The simplest is IMHO moving qemu_savevm_wait_unplug() before
> qemu_savevm_state_setup() and leave patch 10 is unchanged. See
> below the extra patch. It looks much cleaner than what we have
> today.
> 
>> In that case, the state change in qemu_savevm_wait_unplug()
>> should be benign and we should see a super small window it became ACTIVE
>> but then it should be FAILED (and IIUC the patch itself will need to use
>> ACTIVE as "old_state", not SETUP anymore).
> 
> OK. I will give it a try to compare.

Here's the alternative solution. SETUP state failures are handled after
transitioning to ACTIVE state, which is unfortunate but probably harmless.
I guess it's OK.

Thanks,

C.



Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
  migration/savevm.h    |  2 +-
  migration/migration.c | 29 +++++++++++++++++++++++++++--
  migration/savevm.c    | 26 +++++++++++++++-----------
  3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/migration/savevm.h b/migration/savevm.h
index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -32,7 +32,7 @@
  bool qemu_savevm_state_blocked(Error **errp);
  void qemu_savevm_non_migratable_list(strList **reasons);
  int qemu_savevm_state_prepare(Error **errp);
-void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
  bool qemu_savevm_state_guest_unplug_pending(void);
  int qemu_savevm_state_resume_prepare(MigrationState *s);
  void qemu_savevm_state_header(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index 644e073b7dcc70cb2bdaa9c975ba478952465ff4..0704ad6226df61f2f15bd81a2897f9946d601ca7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3427,6 +3427,8 @@ static void *migration_thread(void *opaque)
      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
      MigThrError thr_error;
      bool urgent = false;
+    Error *local_err = NULL;
+    int ret;
  
      thread = migration_threads_add("live_migration", qemu_get_thread_id());
  
@@ -3470,12 +3472,24 @@ static void *migration_thread(void *opaque)
      }
  
      bql_lock();
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
      bql_unlock();
  
      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                 MIGRATION_STATUS_ACTIVE);
  
+    /*
+     * Handle SETUP failures after waiting for virtio-net-failover
+     * devices to unplug. This to preserve migration state transitions.
+     */
+    if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                          MIGRATION_STATUS_FAILED);
+        goto out;
+    }
+
      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  
      trace_migration_thread_setup_complete();
@@ -3549,6 +3563,8 @@ static void *bg_migration_thread(void *opaque)
      MigThrError thr_error;
      QEMUFile *fb;
      bool early_fail = true;
+    Error *local_err = NULL;
+    int ret;
  
      rcu_register_thread();
      object_ref(OBJECT(s));
@@ -3582,12 +3598,20 @@ static void *bg_migration_thread(void *opaque)
  
      bql_lock();
      qemu_savevm_state_header(s->to_dst_file);
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
      bql_unlock();
  
      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                 MIGRATION_STATUS_ACTIVE);
  
+    if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                          MIGRATION_STATUS_FAILED);
+        goto fail_setup;
+    }
+
      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  
      trace_migration_thread_setup_complete();
@@ -3656,6 +3680,7 @@ fail:
          bql_unlock();
      }
  
+fail_setup:
      bg_migration_iteration_finish(s);
  
      qemu_fclose(fb);
diff --git a/migration/savevm.c b/migration/savevm.c
index 1a7b5cb78a912c36ae16db703afc90ef2906b61f..0eb94e61f888adba2c0732c2cb701b110814c455 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
      return 0;
  }
  
-void qemu_savevm_state_setup(QEMUFile *f)
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
  {
+    ERRP_GUARD();
      MigrationState *ms = migrate_get_current();
      SaveStateEntry *se;
-    Error *local_err = NULL;
      int ret = 0;
  
      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
@@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
      trace_savevm_state_setup();
      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
          if (se->vmsd && se->vmsd->early_setup) {
-            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
+            ret = vmstate_save(f, se, ms->vmdesc, errp);
              if (ret) {
-                migrate_set_error(ms, local_err);
-                error_report_err(local_err);
+                migrate_set_error(ms, *errp);
                  qemu_file_set_error(f, ret);
                  break;
              }
@@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
          ret = se->ops->save_setup(f, se->opaque);
          save_section_footer(f, se);
          if (ret < 0) {
+            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
+                       "%d(%s): %d", se->section_id, se->idstr, ret);
              qemu_file_set_error(f, ret);
              break;
          }
      }
  
      if (ret) {
-        return;
+        return ret;
      }
  
-    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
-        error_report_err(local_err);
-    }
+    /* TODO: Should we check that errp is set in case of failure ? */
+    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
  }
  
  int qemu_savevm_state_resume_prepare(MigrationState *s)
@@ -1725,7 +1725,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
      ms->to_dst_file = f;
  
      qemu_savevm_state_header(f);
-    qemu_savevm_state_setup(f);
+    ret = qemu_savevm_state_setup(f, errp);
+    if (ret) {
+        goto cleanup;
+    }
  
      while (qemu_file_get_error(f) == 0) {
          if (qemu_savevm_state_iterate(f, false) > 0) {
@@ -1738,10 +1741,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
          qemu_savevm_state_complete_precopy(f, false, false);
          ret = qemu_file_get_error(f);
      }
-    qemu_savevm_state_cleanup();
      if (ret != 0) {
          error_setg_errno(errp, -ret, "Error while writing VM state");
      }
+cleanup:
+    qemu_savevm_state_cleanup();
  
      if (ret != 0) {
          status = MIGRATION_STATUS_FAILED;
Cédric Le Goater March 15, 2024, 2:30 p.m. UTC | #28
On 3/15/24 14:09, Peter Xu wrote:
> On Fri, Mar 15, 2024 at 01:20:49PM +0100, Cédric Le Goater wrote:
>> On 3/15/24 12:01, Peter Xu wrote:
>>> On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
>>>>> migrate_set_state is also unintuitive because it ignores invalid state
>>>>> transitions and we've been using that property to deal with special
>>>>> states such as POSTCOPY_PAUSED and FAILED:
>>>>>
>>>>> - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
>>>>>      migrate_init() will try to set the state NONE->SETUP, which is not
>>>>>      valid.
>>>>>
>>>>> - After save_setup fails, the migration goes into FAILED, but wait_unplug
>>>>>      will try to transition SETUP->ACTIVE, which is also not valid.
>>>>>
>>>>
>>>> I am not sure I understand what the plan is. Both solutions are problematic
>>>> regarding the state transitions.
>>>>
>>>> Should we consider that waiting for failover devices to unplug is an internal
>>>> step of the SETUP phase not transitioning to ACTIVE ?
>>>
>>> If to unblock this series, IIUC the simplest solution is to do what Fabiano
>>> suggested, that we move qemu_savevm_wait_unplug() to be before the check of
>>> setup() ret.
>>
>> The simplest is IMHO moving qemu_savevm_wait_unplug() before
>> qemu_savevm_state_setup() and leave patch 10 is unchanged. See
>> below the extra patch. It looks much cleaner than what we have
>> today.
> 
> Yes it looks cleaner indeed, it's just that then we'll have one more
> possible state conversions like SETUP->UNPLUG->SETUP.  I'd say it's fine,
> but let's also copy Laruent and Laine if it's going to be posted formally.

OK. I just sent the alternative implementation. The code looks a little
ugly  :

     bql_lock();                                                           |
     qemu_savevm_state_header(s->to_dst_file);                             |
     ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);            | in SETUP state
     bql_unlock();                                                         |
                                                                           |
     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,                    | SETUP -> ACTIVE transition
                                MIGRATION_STATUS_ACTIVE);                  |
                                                                           |
     /*                                                                    |
      * Handle SETUP failures after waiting for virtio-net-failover        |
      * devices to unplug. This to preserve migration state transitions.   |
      */                                                                   |
     if (ret) {                                                            |
         migrate_set_error(s, local_err);                                  | handling SETUP errors in ACTIVE
         error_free(local_err);                                            |
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,             |
                           MIGRATION_STATUS_FAILED);                       |
         goto fail_setup;                                                  |
     }                                                                     |
                                                                           |
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;     | SETUP duration
                                                                           |
     trace_migration_thread_setup_complete();                              | SETUP trace event



Thanks,

C.
Cédric Le Goater March 15, 2024, 2:31 p.m. UTC | #29
On 3/15/24 14:11, Peter Xu wrote:
> On Fri, Mar 15, 2024 at 01:20:49PM +0100, Cédric Le Goater wrote:
>> +static void qemu_savevm_wait_unplug(MigrationState *s, int state)
> 
> One more trivial comment: I'd even consider dropping "state" altogether, as
> this should be the only state this function should be invoked.  So we can
> perhaps assert it instead of passing it over?

Yes. If you prefer this implementation I will change.


Thanks,

C.
Peter Xu March 15, 2024, 2:52 p.m. UTC | #30
On Fri, Mar 15, 2024 at 03:21:27PM +0100, Cédric Le Goater wrote:
> On 3/15/24 13:20, Cédric Le Goater wrote:
> > On 3/15/24 12:01, Peter Xu wrote:
> > > On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
> > > > > migrate_set_state is also unintuitive because it ignores invalid state
> > > > > transitions and we've been using that property to deal with special
> > > > > states such as POSTCOPY_PAUSED and FAILED:
> > > > > 
> > > > > - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
> > > > >     migrate_init() will try to set the state NONE->SETUP, which is not
> > > > >     valid.
> > > > > 
> > > > > - After save_setup fails, the migration goes into FAILED, but wait_unplug
> > > > >     will try to transition SETUP->ACTIVE, which is also not valid.
> > > > > 
> > > > 
> > > > I am not sure I understand what the plan is. Both solutions are problematic
> > > > regarding the state transitions.
> > > > 
> > > > Should we consider that waiting for failover devices to unplug is an internal
> > > > step of the SETUP phase not transitioning to ACTIVE ?
> > > 
> > > If to unblock this series, IIUC the simplest solution is to do what Fabiano
> > > suggested, that we move qemu_savevm_wait_unplug() to be before the check of
> > > setup() ret.
> > 
> > The simplest is IMHO moving qemu_savevm_wait_unplug() before
> > qemu_savevm_state_setup() and leave patch 10 is unchanged. See
> > below the extra patch. It looks much cleaner than what we have
> > today.
> > 
> > > In that case, the state change in qemu_savevm_wait_unplug()
> > > should be benign and we should see a super small window it became ACTIVE
> > > but then it should be FAILED (and IIUC the patch itself will need to use
> > > ACTIVE as "old_state", not SETUP anymore).
> > 
> > OK. I will give it a try to compare.
> 
> Here's the alternative solution. SETUP state failures are handled after
> transitioning to ACTIVE state, which is unfortunate but probably harmless.
> I guess it's OK.

This also looks good to me, thanks.

One trivial early comment is in this case we can introduce a helper to
cover both setup() calls and UNPLUG waits and dedup the two paths.

> 
> Thanks,
> 
> C.
> 
> 
> 
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>  migration/savevm.h    |  2 +-
>  migration/migration.c | 29 +++++++++++++++++++++++++++--
>  migration/savevm.c    | 26 +++++++++++++++-----------
>  3 files changed, 43 insertions(+), 14 deletions(-)
> 
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -32,7 +32,7 @@
>  bool qemu_savevm_state_blocked(Error **errp);
>  void qemu_savevm_non_migratable_list(strList **reasons);
>  int qemu_savevm_state_prepare(Error **errp);
> -void qemu_savevm_state_setup(QEMUFile *f);
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
>  bool qemu_savevm_state_guest_unplug_pending(void);
>  int qemu_savevm_state_resume_prepare(MigrationState *s);
>  void qemu_savevm_state_header(QEMUFile *f);
> diff --git a/migration/migration.c b/migration/migration.c
> index 644e073b7dcc70cb2bdaa9c975ba478952465ff4..0704ad6226df61f2f15bd81a2897f9946d601ca7 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3427,6 +3427,8 @@ static void *migration_thread(void *opaque)
>      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      MigThrError thr_error;
>      bool urgent = false;
> +    Error *local_err = NULL;
> +    int ret;
>      thread = migration_threads_add("live_migration", qemu_get_thread_id());
> @@ -3470,12 +3472,24 @@ static void *migration_thread(void *opaque)
>      }
>      bql_lock();
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>      bql_unlock();
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
> +    /*
> +     * Handle SETUP failures after waiting for virtio-net-failover
> +     * devices to unplug. This to preserve migration state transitions.
> +     */
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +                          MIGRATION_STATUS_FAILED);
> +        goto out;
> +    }
> +
>      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
>      trace_migration_thread_setup_complete();
> @@ -3549,6 +3563,8 @@ static void *bg_migration_thread(void *opaque)
>      MigThrError thr_error;
>      QEMUFile *fb;
>      bool early_fail = true;
> +    Error *local_err = NULL;
> +    int ret;
>      rcu_register_thread();
>      object_ref(OBJECT(s));
> @@ -3582,12 +3598,20 @@ static void *bg_migration_thread(void *opaque)
>      bql_lock();
>      qemu_savevm_state_header(s->to_dst_file);
> -    qemu_savevm_state_setup(s->to_dst_file);
> +    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
>      bql_unlock();
>      qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
>                                 MIGRATION_STATUS_ACTIVE);
> +    if (ret) {
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +                          MIGRATION_STATUS_FAILED);
> +        goto fail_setup;
> +    }
> +
>      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
>      trace_migration_thread_setup_complete();
> @@ -3656,6 +3680,7 @@ fail:
>          bql_unlock();
>      }
> +fail_setup:
>      bg_migration_iteration_finish(s);
>      qemu_fclose(fb);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 1a7b5cb78a912c36ae16db703afc90ef2906b61f..0eb94e61f888adba2c0732c2cb701b110814c455 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
>      return 0;
>  }
> -void qemu_savevm_state_setup(QEMUFile *f)
> +int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>  {
> +    ERRP_GUARD();
>      MigrationState *ms = migrate_get_current();
>      SaveStateEntry *se;
> -    Error *local_err = NULL;
>      int ret = 0;
>      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
> @@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
>      trace_savevm_state_setup();
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (se->vmsd && se->vmsd->early_setup) {
> -            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
> +            ret = vmstate_save(f, se, ms->vmdesc, errp);
>              if (ret) {
> -                migrate_set_error(ms, local_err);
> -                error_report_err(local_err);
> +                migrate_set_error(ms, *errp);
>                  qemu_file_set_error(f, ret);
>                  break;
>              }
> @@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
>          ret = se->ops->save_setup(f, se->opaque);
>          save_section_footer(f, se);
>          if (ret < 0) {
> +            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
> +                       "%d(%s): %d", se->section_id, se->idstr, ret);
>              qemu_file_set_error(f, ret);
>              break;
>          }
>      }
>      if (ret) {
> -        return;
> +        return ret;
>      }
> -    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
> -        error_report_err(local_err);
> -    }
> +    /* TODO: Should we check that errp is set in case of failure ? */
> +    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>  }
>  int qemu_savevm_state_resume_prepare(MigrationState *s)
> @@ -1725,7 +1725,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>      ms->to_dst_file = f;
>      qemu_savevm_state_header(f);
> -    qemu_savevm_state_setup(f);
> +    ret = qemu_savevm_state_setup(f, errp);
> +    if (ret) {
> +        goto cleanup;
> +    }
>      while (qemu_file_get_error(f) == 0) {
>          if (qemu_savevm_state_iterate(f, false) > 0) {
> @@ -1738,10 +1741,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>          qemu_savevm_state_complete_precopy(f, false, false);
>          ret = qemu_file_get_error(f);
>      }
> -    qemu_savevm_state_cleanup();
>      if (ret != 0) {
>          error_setg_errno(errp, -ret, "Error while writing VM state");
>      }
> +cleanup:
> +    qemu_savevm_state_cleanup();
>      if (ret != 0) {
>          status = MIGRATION_STATUS_FAILED;
> -- 
> 2.44.0
> 
>
Peter Xu March 15, 2024, 2:57 p.m. UTC | #31
On Fri, Mar 15, 2024 at 03:31:28PM +0100, Cédric Le Goater wrote:
> On 3/15/24 14:11, Peter Xu wrote:
> > On Fri, Mar 15, 2024 at 01:20:49PM +0100, Cédric Le Goater wrote:
> > > +static void qemu_savevm_wait_unplug(MigrationState *s, int state)
> > 
> > One more trivial comment: I'd even consider dropping "state" altogether, as
> > this should be the only state this function should be invoked.  So we can
> > perhaps assert it instead of passing it over?
> 
> Yes. If you prefer this implementation I will change.

I am fine with either approach, we can wait for 1-2 days to see whether
others want to say.  Otherwise the other approach actually looks better to
me in that it avoids SETUP->UNPLUG->SETUP jumps.

And then we wait to see whether UNPLUG can be dropped for either way to go,
perhaps starting from adding it into deprecation list if no objections from
the relevant folks.

Thanks,
Cédric Le Goater March 19, 2024, 10:46 a.m. UTC | #32
On 3/15/24 15:52, Peter Xu wrote:
> On Fri, Mar 15, 2024 at 03:21:27PM +0100, Cédric Le Goater wrote:
>> On 3/15/24 13:20, Cédric Le Goater wrote:
>>> On 3/15/24 12:01, Peter Xu wrote:
>>>> On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
>>>>>> migrate_set_state is also unintuitive because it ignores invalid state
>>>>>> transitions and we've been using that property to deal with special
>>>>>> states such as POSTCOPY_PAUSED and FAILED:
>>>>>>
>>>>>> - After the migration goes into POSTCOPY_PAUSED, the resumed migration's
>>>>>>      migrate_init() will try to set the state NONE->SETUP, which is not
>>>>>>      valid.
>>>>>>
>>>>>> - After save_setup fails, the migration goes into FAILED, but wait_unplug
>>>>>>      will try to transition SETUP->ACTIVE, which is also not valid.
>>>>>>
>>>>>
>>>>> I am not sure I understand what the plan is. Both solutions are problematic
>>>>> regarding the state transitions.
>>>>>
>>>>> Should we consider that waiting for failover devices to unplug is an internal
>>>>> step of the SETUP phase not transitioning to ACTIVE ?
>>>>
>>>> If to unblock this series, IIUC the simplest solution is to do what Fabiano
>>>> suggested, that we move qemu_savevm_wait_unplug() to be before the check of
>>>> setup() ret.
>>>
>>> The simplest is IMHO moving qemu_savevm_wait_unplug() before
>>> qemu_savevm_state_setup() and leave patch 10 is unchanged. See
>>> below the extra patch. It looks much cleaner than what we have
>>> today.
>>>
>>>> In that case, the state change in qemu_savevm_wait_unplug()
>>>> should be benign and we should see a super small window it became ACTIVE
>>>> but then it should be FAILED (and IIUC the patch itself will need to use
>>>> ACTIVE as "old_state", not SETUP anymore).
>>>
>>> OK. I will give it a try to compare.
>>
>> Here's the alternative solution. SETUP state failures are handled after
>> transitioning to ACTIVE state, which is unfortunate but probably harmless.
>> I guess it's OK.
> 
> This also looks good to me, thanks.
> 
> One trivial early comment is in this case we can introduce a helper to
> cover both setup() calls and UNPLUG waits and dedup the two paths.

There is one little difference: qemu_savevm_state_header() is called
earlier in the migration thread, before return-path, postcopy and colo
are advertised on the target. I don't think it can it be moved.

Thanks,

C.
diff mbox series

Patch

diff --git a/migration/savevm.h b/migration/savevm.h
index 74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -32,7 +32,7 @@ 
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
 int qemu_savevm_state_prepare(Error **errp);
-void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
 bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_state_header(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index a49fcd53ee19df1ce0182bc99d7e064968f0317b..6d1544224e96f5edfe56939a9c8395d88ef29581 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3408,6 +3408,8 @@  static void *migration_thread(void *opaque)
     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     MigThrError thr_error;
     bool urgent = false;
+    Error *local_err = NULL;
+    int ret;
 
     thread = migration_threads_add("live_migration", qemu_get_thread_id());
 
@@ -3451,9 +3453,17 @@  static void *migration_thread(void *opaque)
     }
 
     bql_lock();
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
     bql_unlock();
 
+    if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                          MIGRATION_STATUS_FAILED);
+        goto out;
+     }
+
     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                MIGRATION_STATUS_ACTIVE);
 
@@ -3530,6 +3540,9 @@  static void *bg_migration_thread(void *opaque)
     MigThrError thr_error;
     QEMUFile *fb;
     bool early_fail = true;
+    bool setup_fail = true;
+    Error *local_err = NULL;
+    int ret;
 
     rcu_register_thread();
     object_ref(OBJECT(s));
@@ -3563,9 +3576,16 @@  static void *bg_migration_thread(void *opaque)
 
     bql_lock();
     qemu_savevm_state_header(s->to_dst_file);
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
+    if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        goto fail;
+    }
     bql_unlock();
 
+    setup_fail = false;
+
     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                MIGRATION_STATUS_ACTIVE);
 
@@ -3632,7 +3652,8 @@  static void *bg_migration_thread(void *opaque)
 
 fail:
     if (early_fail) {
-        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+        migrate_set_state(&s->state,
+                setup_fail ? MIGRATION_STATUS_SETUP : MIGRATION_STATUS_ACTIVE,
                 MIGRATION_STATUS_FAILED);
         bql_unlock();
     }
diff --git a/migration/savevm.c b/migration/savevm.c
index ee31ffb5e88cea723039c754c30ce2c8a0ef35f3..63fdbb5ad7d4dbfaef1d2094350bf302cc677602 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1310,11 +1310,11 @@  int qemu_savevm_state_prepare(Error **errp)
     return 0;
 }
 
-void qemu_savevm_state_setup(QEMUFile *f)
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
 {
+    ERRP_GUARD();
     MigrationState *ms = migrate_get_current();
     SaveStateEntry *se;
-    Error *local_err = NULL;
     int ret = 0;
 
     json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
@@ -1323,10 +1323,9 @@  void qemu_savevm_state_setup(QEMUFile *f)
     trace_savevm_state_setup();
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->early_setup) {
-            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
+            ret = vmstate_save(f, se, ms->vmdesc, errp);
             if (ret) {
-                migrate_set_error(ms, local_err);
-                error_report_err(local_err);
+                migrate_set_error(ms, *errp);
                 qemu_file_set_error(f, ret);
                 break;
             }
@@ -1346,18 +1345,19 @@  void qemu_savevm_state_setup(QEMUFile *f)
         ret = se->ops->save_setup(f, se->opaque);
         save_section_footer(f, se);
         if (ret < 0) {
+            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
+                       "%d(%s): %d", se->section_id, se->idstr, ret);
             qemu_file_set_error(f, ret);
             break;
         }
     }
 
     if (ret) {
-        return;
+        return ret;
     }
 
-    if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
-        error_report_err(local_err);
-    }
+    /* TODO: Should we check that errp is set in case of failure ? */
+    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
 }
 
 int qemu_savevm_state_resume_prepare(MigrationState *s)
@@ -1728,7 +1728,10 @@  static int qemu_savevm_state(QEMUFile *f, Error **errp)
     ms->to_dst_file = f;
 
     qemu_savevm_state_header(f);
-    qemu_savevm_state_setup(f);
+    ret = qemu_savevm_state_setup(f, errp);
+    if (ret) {
+        goto cleanup;
+    }
 
     while (qemu_file_get_error(f) == 0) {
         if (qemu_savevm_state_iterate(f, false) > 0) {
@@ -1741,10 +1744,11 @@  static int qemu_savevm_state(QEMUFile *f, Error **errp)
         qemu_savevm_state_complete_precopy(f, false, false);
         ret = qemu_file_get_error(f);
     }
-    qemu_savevm_state_cleanup();
     if (ret != 0) {
         error_setg_errno(errp, -ret, "Error while writing VM state");
     }
+cleanup:
+    qemu_savevm_state_cleanup();
 
     if (ret != 0) {
         status = MIGRATION_STATUS_FAILED;