Message ID | 1702491093-383782-3-git-send-email-steven.sistare@oracle.com |
---|---|
State | New |
Headers | show |
Series | allow cpr-reboot for vfio | expand |
On Wed, Dec 13, 2023 at 10:11:32AM -0800, Steve Sistare wrote: > After calling notifiers, check if an error has been reported via > migrate_set_error, and halt the migration. > > None of the notifiers call migrate_set_error at this time, so no > functional change. > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > --- > include/migration/misc.h | 2 +- > migration/migration.c | 26 ++++++++++++++++++++++---- > 2 files changed, 23 insertions(+), 5 deletions(-) > > diff --git a/include/migration/misc.h b/include/migration/misc.h > index 901d117..231d7e4 100644 > --- a/include/migration/misc.h > +++ b/include/migration/misc.h > @@ -65,7 +65,7 @@ MigMode migrate_mode_of(MigrationState *); > void migration_add_notifier(Notifier *notify, > void (*func)(Notifier *notifier, void *data)); > void migration_remove_notifier(Notifier *notify); > -void migration_call_notifiers(MigrationState *s); > +int migration_call_notifiers(MigrationState *s); > bool migration_in_setup(MigrationState *); > bool migration_has_finished(MigrationState *); > bool migration_has_failed(MigrationState *); > diff --git a/migration/migration.c b/migration/migration.c > index d5bfe70..29a9a92 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1280,6 +1280,8 @@ void migrate_set_state(int *state, int old_state, int new_state) > > static void migrate_fd_cleanup(MigrationState *s) > { > + bool already_failed; > + > qemu_bh_delete(s->cleanup_bh); > s->cleanup_bh = NULL; > > @@ -1327,11 +1329,20 @@ static void migrate_fd_cleanup(MigrationState *s) > MIGRATION_STATUS_CANCELLED); > } > > + already_failed = migration_has_failed(s); > + if (migration_call_notifiers(s)) { > + if (!already_failed) { > + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); > + /* Notify again to recover from this late failure. */ > + migration_call_notifiers(s); > + } > + } > + > if (s->error) { > /* It is used on info migrate. We can't free it */ > error_report_err(error_copy(s->error)); > } > - migration_call_notifiers(s); > + > block_cleanup_parameters(); > yank_unregister_instance(MIGRATION_YANK_INSTANCE); > } > @@ -1450,9 +1461,10 @@ void migration_remove_notifier(Notifier *notify) > } > } > > -void migration_call_notifiers(MigrationState *s) > +int migration_call_notifiers(MigrationState *s) > { > notifier_list_notify(&migration_state_notifiers, s); > + return (s->error != NULL); Exporting more migration_*() functions is pretty ugly to me.. Would it be better to pass in "Error** errp" into each notifiers? That may need an open coded notifier_list_notify(), breaking the loop if "*errp". And the notifier API currently only support one arg.. maybe we should implement the notifiers ourselves, ideally passing in "(int state, Error **errp)" instead of "(MigrationState *s)". Ideally with that MigrationState* shouldn't be visible outside migration/. Thanks, > } > > bool migration_in_setup(MigrationState *s) > @@ -2520,7 +2532,9 @@ static int postcopy_start(MigrationState *ms, Error **errp) > * spice needs to trigger a transition now > */ > ms->postcopy_after_devices = true; > - migration_call_notifiers(ms); > + if (migration_call_notifiers(ms)) { > + goto fail; > + } > > migration_downtime_end(ms); > > @@ -3589,7 +3603,11 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) > rate_limit = migrate_max_bandwidth(); > > /* Notify before starting migration thread */ > - migration_call_notifiers(s); > + if (migration_call_notifiers(s)) { > + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); > + migrate_fd_cleanup(s); > + return; > + } > } > > migration_rate_set(rate_limit); > -- > 1.8.3.1 >
On 1/10/2024 2:18 AM, Peter Xu wrote: > On Wed, Dec 13, 2023 at 10:11:32AM -0800, Steve Sistare wrote: >> After calling notifiers, check if an error has been reported via >> migrate_set_error, and halt the migration. >> >> None of the notifiers call migrate_set_error at this time, so no >> functional change. >> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >> --- >> include/migration/misc.h | 2 +- >> migration/migration.c | 26 ++++++++++++++++++++++---- >> 2 files changed, 23 insertions(+), 5 deletions(-) >> >> diff --git a/include/migration/misc.h b/include/migration/misc.h >> index 901d117..231d7e4 100644 >> --- a/include/migration/misc.h >> +++ b/include/migration/misc.h >> @@ -65,7 +65,7 @@ MigMode migrate_mode_of(MigrationState *); >> void migration_add_notifier(Notifier *notify, >> void (*func)(Notifier *notifier, void *data)); >> void migration_remove_notifier(Notifier *notify); >> -void migration_call_notifiers(MigrationState *s); >> +int migration_call_notifiers(MigrationState *s); >> bool migration_in_setup(MigrationState *); >> bool migration_has_finished(MigrationState *); >> bool migration_has_failed(MigrationState *); >> diff --git a/migration/migration.c b/migration/migration.c >> index d5bfe70..29a9a92 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -1280,6 +1280,8 @@ void migrate_set_state(int *state, int old_state, int new_state) >> >> static void migrate_fd_cleanup(MigrationState *s) >> { >> + bool already_failed; >> + >> qemu_bh_delete(s->cleanup_bh); >> s->cleanup_bh = NULL; >> >> @@ -1327,11 +1329,20 @@ static void migrate_fd_cleanup(MigrationState *s) >> MIGRATION_STATUS_CANCELLED); >> } >> >> + already_failed = migration_has_failed(s); >> + if (migration_call_notifiers(s)) { >> + if (!already_failed) { >> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); >> + /* Notify again to recover from this late failure. */ >> + migration_call_notifiers(s); >> + } >> + } >> + >> if (s->error) { >> /* It is used on info migrate. We can't free it */ >> error_report_err(error_copy(s->error)); >> } >> - migration_call_notifiers(s); >> + >> block_cleanup_parameters(); >> yank_unregister_instance(MIGRATION_YANK_INSTANCE); >> } >> @@ -1450,9 +1461,10 @@ void migration_remove_notifier(Notifier *notify) >> } >> } >> >> -void migration_call_notifiers(MigrationState *s) >> +int migration_call_notifiers(MigrationState *s) >> { >> notifier_list_notify(&migration_state_notifiers, s); >> + return (s->error != NULL); > > Exporting more migration_*() functions is pretty ugly to me.. I assume you mean migrate_set_error(), which is currently only called from migration/*.c code. Instead, we could define a new function migrate_set_notifier_error(), defined in the new file migration/notifier.h, so we clearly limit the migration functions which can be called from notifiers. (Its implementation just calls migrate_set_error) > Would it be better to pass in "Error** errp" into each notifiers? That may > need an open coded notifier_list_notify(), breaking the loop if "*errp". > > And the notifier API currently only support one arg.. maybe we should > implement the notifiers ourselves, ideally passing in "(int state, Error > **errp)" instead of "(MigrationState *s)". > > Ideally with that MigrationState* shouldn't be visible outside migration/. I will regret saying this because of the amount of (mechanical) code change involved, but the cleanest solution is: * Pass errp to: notifier_with_return_list_notify(NotifierWithReturnList *list, void *data, Error *errp) * Pass errp to the NotifierWithReturn notifier: int (*notify)(NotifierWithReturn *notifier, void *data, Error **errp); * Delete the errp member from struct PostcopyNotifyData and pass errp to the notifier function Ditto for PrecopyNotifyData. * Convert all migration notifiers to NotifierWithReturn - Steve >> } >> >> bool migration_in_setup(MigrationState *s) >> @@ -2520,7 +2532,9 @@ static int postcopy_start(MigrationState *ms, Error **errp) >> * spice needs to trigger a transition now >> */ >> ms->postcopy_after_devices = true; >> - migration_call_notifiers(ms); >> + if (migration_call_notifiers(ms)) { >> + goto fail; >> + } >> >> migration_downtime_end(ms); >> >> @@ -3589,7 +3603,11 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) >> rate_limit = migrate_max_bandwidth(); >> >> /* Notify before starting migration thread */ >> - migration_call_notifiers(s); >> + if (migration_call_notifiers(s)) { >> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); >> + migrate_fd_cleanup(s); >> + return; >> + } >> } >> >> migration_rate_set(rate_limit); >> -- >> 1.8.3.1 >> >
On Wed, Jan 10, 2024 at 01:08:41PM -0500, Steven Sistare wrote: > On 1/10/2024 2:18 AM, Peter Xu wrote: > > On Wed, Dec 13, 2023 at 10:11:32AM -0800, Steve Sistare wrote: > >> After calling notifiers, check if an error has been reported via > >> migrate_set_error, and halt the migration. > >> > >> None of the notifiers call migrate_set_error at this time, so no > >> functional change. > >> > >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > >> --- > >> include/migration/misc.h | 2 +- > >> migration/migration.c | 26 ++++++++++++++++++++++---- > >> 2 files changed, 23 insertions(+), 5 deletions(-) > >> > >> diff --git a/include/migration/misc.h b/include/migration/misc.h > >> index 901d117..231d7e4 100644 > >> --- a/include/migration/misc.h > >> +++ b/include/migration/misc.h > >> @@ -65,7 +65,7 @@ MigMode migrate_mode_of(MigrationState *); > >> void migration_add_notifier(Notifier *notify, > >> void (*func)(Notifier *notifier, void *data)); > >> void migration_remove_notifier(Notifier *notify); > >> -void migration_call_notifiers(MigrationState *s); > >> +int migration_call_notifiers(MigrationState *s); > >> bool migration_in_setup(MigrationState *); > >> bool migration_has_finished(MigrationState *); > >> bool migration_has_failed(MigrationState *); > >> diff --git a/migration/migration.c b/migration/migration.c > >> index d5bfe70..29a9a92 100644 > >> --- a/migration/migration.c > >> +++ b/migration/migration.c > >> @@ -1280,6 +1280,8 @@ void migrate_set_state(int *state, int old_state, int new_state) > >> > >> static void migrate_fd_cleanup(MigrationState *s) > >> { > >> + bool already_failed; > >> + > >> qemu_bh_delete(s->cleanup_bh); > >> s->cleanup_bh = NULL; > >> > >> @@ -1327,11 +1329,20 @@ static void migrate_fd_cleanup(MigrationState *s) > >> MIGRATION_STATUS_CANCELLED); > >> } > >> > >> + already_failed = migration_has_failed(s); > >> + if (migration_call_notifiers(s)) { > >> + if (!already_failed) { > >> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); > >> + /* Notify again to recover from this late failure. */ > >> + migration_call_notifiers(s); > >> + } > >> + } > >> + > >> if (s->error) { > >> /* It is used on info migrate. We can't free it */ > >> error_report_err(error_copy(s->error)); > >> } > >> - migration_call_notifiers(s); > >> + > >> block_cleanup_parameters(); > >> yank_unregister_instance(MIGRATION_YANK_INSTANCE); > >> } > >> @@ -1450,9 +1461,10 @@ void migration_remove_notifier(Notifier *notify) > >> } > >> } > >> > >> -void migration_call_notifiers(MigrationState *s) > >> +int migration_call_notifiers(MigrationState *s) > >> { > >> notifier_list_notify(&migration_state_notifiers, s); > >> + return (s->error != NULL); > > > > Exporting more migration_*() functions is pretty ugly to me.. > > I assume you mean migrate_set_error(), which is currently only called from > migration/*.c code. > > Instead, we could define a new function migrate_set_notifier_error(), defined > in the new file migration/notifier.h, so we clearly limit the migration > functions which can be called from notifiers. (Its implementation just calls > migrate_set_error) Fundementally this allows another .c to change one more field of MigrationState (which is ->error) and I still want to avoid it. I just replied in the other thread, but now with all these in mind I think I still prefer not passing in MigrationState* at all. It's already kind of abused due to migrate_get_current(), and IMHO it's healthier to limit its usage to minimum to cover the core of migration states for migration/ use only. Shrinking or even stop exporting migrate_get_current() is another more challenging task, but now what we can do is stop enlarging the direct use of MigrationState*. > > > Would it be better to pass in "Error** errp" into each notifiers? That may > > need an open coded notifier_list_notify(), breaking the loop if "*errp". > > > > And the notifier API currently only support one arg.. maybe we should > > implement the notifiers ourselves, ideally passing in "(int state, Error > > **errp)" instead of "(MigrationState *s)". > > > > Ideally with that MigrationState* shouldn't be visible outside migration/. > > I will regret saying this because of the amount of (mechanical) code change involved, > but the cleanest solution is: :) > > * Pass errp to: > notifier_with_return_list_notify(NotifierWithReturnList *list, void *data, Error *errp) > * Pass errp to the NotifierWithReturn notifier: > int (*notify)(NotifierWithReturn *notifier, void *data, Error **errp); > * Delete the errp member from struct PostcopyNotifyData and pass errp to the notifier function > Ditto for PrecopyNotifyData. > * Convert all migration notifiers to NotifierWithReturn Would you mind changing MigrationState* into an event just like postcopy? We don't need to use migration_has_failed() etc., afaict three events should be enough for the existing four users, exactly like what postcopy does: - MIG_EVENT_PRECOPY_SETUP - MIG_EVENT_PRECOPY_DONE - MIG_EVENT_PRECOPY_FAILED Merging postcopy will be indeed the cleanest. I'm okay if you want to leave that for later, but if you'd do that together I'd appreciate that. Thanks,
On 1/10/2024 9:16 PM, Peter Xu wrote: > On Wed, Jan 10, 2024 at 01:08:41PM -0500, Steven Sistare wrote: >> On 1/10/2024 2:18 AM, Peter Xu wrote: >>> On Wed, Dec 13, 2023 at 10:11:32AM -0800, Steve Sistare wrote: >>>> After calling notifiers, check if an error has been reported via >>>> migrate_set_error, and halt the migration. >>>> >>>> None of the notifiers call migrate_set_error at this time, so no >>>> functional change. >>>> >>>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >>>> --- >>>> include/migration/misc.h | 2 +- >>>> migration/migration.c | 26 ++++++++++++++++++++++---- >>>> 2 files changed, 23 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/include/migration/misc.h b/include/migration/misc.h >>>> index 901d117..231d7e4 100644 >>>> --- a/include/migration/misc.h >>>> +++ b/include/migration/misc.h >>>> @@ -65,7 +65,7 @@ MigMode migrate_mode_of(MigrationState *); >>>> void migration_add_notifier(Notifier *notify, >>>> void (*func)(Notifier *notifier, void *data)); >>>> void migration_remove_notifier(Notifier *notify); >>>> -void migration_call_notifiers(MigrationState *s); >>>> +int migration_call_notifiers(MigrationState *s); >>>> bool migration_in_setup(MigrationState *); >>>> bool migration_has_finished(MigrationState *); >>>> bool migration_has_failed(MigrationState *); >>>> diff --git a/migration/migration.c b/migration/migration.c >>>> index d5bfe70..29a9a92 100644 >>>> --- a/migration/migration.c >>>> +++ b/migration/migration.c >>>> @@ -1280,6 +1280,8 @@ void migrate_set_state(int *state, int old_state, int new_state) >>>> >>>> static void migrate_fd_cleanup(MigrationState *s) >>>> { >>>> + bool already_failed; >>>> + >>>> qemu_bh_delete(s->cleanup_bh); >>>> s->cleanup_bh = NULL; >>>> >>>> @@ -1327,11 +1329,20 @@ static void migrate_fd_cleanup(MigrationState *s) >>>> MIGRATION_STATUS_CANCELLED); >>>> } >>>> >>>> + already_failed = migration_has_failed(s); >>>> + if (migration_call_notifiers(s)) { >>>> + if (!already_failed) { >>>> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); >>>> + /* Notify again to recover from this late failure. */ >>>> + migration_call_notifiers(s); >>>> + } >>>> + } >>>> + >>>> if (s->error) { >>>> /* It is used on info migrate. We can't free it */ >>>> error_report_err(error_copy(s->error)); >>>> } >>>> - migration_call_notifiers(s); >>>> + >>>> block_cleanup_parameters(); >>>> yank_unregister_instance(MIGRATION_YANK_INSTANCE); >>>> } >>>> @@ -1450,9 +1461,10 @@ void migration_remove_notifier(Notifier *notify) >>>> } >>>> } >>>> >>>> -void migration_call_notifiers(MigrationState *s) >>>> +int migration_call_notifiers(MigrationState *s) >>>> { >>>> notifier_list_notify(&migration_state_notifiers, s); >>>> + return (s->error != NULL); >>> >>> Exporting more migration_*() functions is pretty ugly to me.. >> >> I assume you mean migrate_set_error(), which is currently only called from >> migration/*.c code. >> >> Instead, we could define a new function migrate_set_notifier_error(), defined >> in the new file migration/notifier.h, so we clearly limit the migration >> functions which can be called from notifiers. (Its implementation just calls >> migrate_set_error) > > Fundementally this allows another .c to change one more field of > MigrationState (which is ->error) and I still want to avoid it. > > I just replied in the other thread, but now with all these in mind I think > I still prefer not passing in MigrationState* at all. It's already kind of > abused due to migrate_get_current(), and IMHO it's healthier to limit its > usage to minimum to cover the core of migration states for migration/ use > only. > > Shrinking or even stop exporting migrate_get_current() is another more > challenging task, but now what we can do is stop enlarging the direct use > of MigrationState*. > >> >>> Would it be better to pass in "Error** errp" into each notifiers? That may >>> need an open coded notifier_list_notify(), breaking the loop if "*errp". >>> >>> And the notifier API currently only support one arg.. maybe we should >>> implement the notifiers ourselves, ideally passing in "(int state, Error >>> **errp)" instead of "(MigrationState *s)". >>> >>> Ideally with that MigrationState* shouldn't be visible outside migration/. >> >> I will regret saying this because of the amount of (mechanical) code change involved, >> but the cleanest solution is: > > :) > >> >> * Pass errp to: >> notifier_with_return_list_notify(NotifierWithReturnList *list, void *data, Error *errp) >> * Pass errp to the NotifierWithReturn notifier: >> int (*notify)(NotifierWithReturn *notifier, void *data, Error **errp); >> * Delete the errp member from struct PostcopyNotifyData and pass errp to the notifier function >> Ditto for PrecopyNotifyData. >> * Convert all migration notifiers to NotifierWithReturn > > Would you mind changing MigrationState* into an event just like postcopy? > We don't need to use migration_has_failed() etc., afaict three events > should be enough for the existing four users, exactly like what postcopy > does: > > - MIG_EVENT_PRECOPY_SETUP > - MIG_EVENT_PRECOPY_DONE > - MIG_EVENT_PRECOPY_FAILED Will do. > Merging postcopy will be indeed the cleanest. I'm okay if you want to > leave that for later, but if you'd do that together I'd appreciate that. Will do. - Steve
diff --git a/include/migration/misc.h b/include/migration/misc.h index 901d117..231d7e4 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -65,7 +65,7 @@ MigMode migrate_mode_of(MigrationState *); void migration_add_notifier(Notifier *notify, void (*func)(Notifier *notifier, void *data)); void migration_remove_notifier(Notifier *notify); -void migration_call_notifiers(MigrationState *s); +int migration_call_notifiers(MigrationState *s); bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); diff --git a/migration/migration.c b/migration/migration.c index d5bfe70..29a9a92 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1280,6 +1280,8 @@ void migrate_set_state(int *state, int old_state, int new_state) static void migrate_fd_cleanup(MigrationState *s) { + bool already_failed; + qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; @@ -1327,11 +1329,20 @@ static void migrate_fd_cleanup(MigrationState *s) MIGRATION_STATUS_CANCELLED); } + already_failed = migration_has_failed(s); + if (migration_call_notifiers(s)) { + if (!already_failed) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + /* Notify again to recover from this late failure. */ + migration_call_notifiers(s); + } + } + if (s->error) { /* It is used on info migrate. We can't free it */ error_report_err(error_copy(s->error)); } - migration_call_notifiers(s); + block_cleanup_parameters(); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -1450,9 +1461,10 @@ void migration_remove_notifier(Notifier *notify) } } -void migration_call_notifiers(MigrationState *s) +int migration_call_notifiers(MigrationState *s) { notifier_list_notify(&migration_state_notifiers, s); + return (s->error != NULL); } bool migration_in_setup(MigrationState *s) @@ -2520,7 +2532,9 @@ static int postcopy_start(MigrationState *ms, Error **errp) * spice needs to trigger a transition now */ ms->postcopy_after_devices = true; - migration_call_notifiers(ms); + if (migration_call_notifiers(ms)) { + goto fail; + } migration_downtime_end(ms); @@ -3589,7 +3603,11 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) rate_limit = migrate_max_bandwidth(); /* Notify before starting migration thread */ - migration_call_notifiers(s); + if (migration_call_notifiers(s)) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + migrate_fd_cleanup(s); + return; + } } migration_rate_set(rate_limit);
After calling notifiers, check if an error has been reported via migrate_set_error, and halt the migration. None of the notifiers call migrate_set_error at this time, so no functional change. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- include/migration/misc.h | 2 +- migration/migration.c | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-)