Message ID | 20230606144551.24367-3-farosas@suse.de |
---|---|
State | New |
Headers | show |
Series | migration: Fix multifd cancel test | expand |
On Tue, Jun 06, 2023 at 11:45:50AM -0300, Fabiano Rosas wrote: > This doubly linked list is common for all the multifd and migration > threads so we need to avoid concurrent access. > > Add a mutex to protect the data from concurrent access. This fixes a > crash when removing two MigrationThread objects from the list at the > same time during cleanup of multifd threads. > > To avoid destroying the mutex before the last element has been > removed, move calls to qmp_migration_thread_remove so they run before > multifd_save_cleanup joins the threads. > > Fixes: 671326201d ("migration: Introduce interface query-migrationthreads") > Signed-off-by: Fabiano Rosas <farosas@suse.de> > --- > migration/migration.c | 5 ++++- > migration/multifd.c | 3 ++- > migration/threadinfo.c | 19 ++++++++++++++++++- > migration/threadinfo.h | 5 +++-- > 4 files changed, 27 insertions(+), 5 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index e731fc98a1..b3b8345eb2 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s) > qemu_mutex_lock_iothread(); > > multifd_save_cleanup(); > + qmp_migration_threads_cleanup(); > qemu_mutex_lock(&s->qemu_file_lock); > tmp = s->to_dst_file; > s->to_dst_file = NULL; > @@ -1405,6 +1406,8 @@ void migrate_init(MigrationState *s) > s->vm_old_state = -1; > s->iteration_initial_bytes = 0; > s->threshold_size = 0; > + > + qmp_migration_threads_init(); > } > > int migrate_add_blocker_internal(Error *reason, Error **errp) > @@ -2997,10 +3000,10 @@ static void *migration_thread(void *opaque) > } > > trace_migration_thread_after_loop(); > + qmp_migration_threads_remove(thread); > migration_iteration_finish(s); > object_unref(OBJECT(s)); > rcu_unregister_thread(); > - qmp_migration_threads_remove(thread); > return NULL; > } > > diff --git a/migration/multifd.c b/migration/multifd.c > index 5ec1ac5c64..ee7944560a 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -762,12 +762,13 @@ out: > qemu_sem_post(&multifd_send_state->channels_ready); > } > > + qmp_migration_threads_remove(thread); > + > qemu_mutex_lock(&p->mutex); > p->running = false; > qemu_mutex_unlock(&p->mutex); > > rcu_unregister_thread(); > - qmp_migration_threads_remove(thread); > trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); > > return NULL; > diff --git a/migration/threadinfo.c b/migration/threadinfo.c > index c3e85c33e8..1fe64a02dd 100644 > --- a/migration/threadinfo.c > +++ b/migration/threadinfo.c > @@ -10,23 +10,40 @@ > * See the COPYING file in the top-level directory. > */ > > +#include "qemu/osdep.h" > +#include "qemu/queue.h" > +#include "qemu/lockable.h" > #include "threadinfo.h" > > +QemuMutex migration_threads_lock; > static QLIST_HEAD(, MigrationThread) migration_threads; > > +void qmp_migration_threads_init(void) > +{ > + qemu_mutex_init(&migration_threads_lock); > +} > + > +void qmp_migration_threads_cleanup(void) > +{ > + qemu_mutex_destroy(&migration_threads_lock); > +} > + > MigrationThread *qmp_migration_threads_add(const char *name, int thread_id) > { > MigrationThread *thread = g_new0(MigrationThread, 1); > thread->name = name; > thread->thread_id = thread_id; > > - QLIST_INSERT_HEAD(&migration_threads, thread, node); > + WITH_QEMU_LOCK_GUARD(&migration_threads_lock) { > + QLIST_INSERT_HEAD(&migration_threads, thread, node); > + } > > return thread; > } > > void qmp_migration_threads_remove(MigrationThread *thread) > { > + QEMU_LOCK_GUARD(&migration_threads_lock); > if (thread) { > QLIST_REMOVE(thread, node); > g_free(thread); qmp_query_migrationthreads() better also have the lock? Other than that looks good, thanks! > diff --git a/migration/threadinfo.h b/migration/threadinfo.h > index 61b990f5e3..eb7f8e5bb2 100644 > --- a/migration/threadinfo.h > +++ b/migration/threadinfo.h > @@ -10,8 +10,6 @@ > * See the COPYING file in the top-level directory. > */ > > -#include "qemu/queue.h" > -#include "qemu/osdep.h" > #include "qapi/error.h" > #include "qapi/qapi-commands-migration.h" > > @@ -23,5 +21,8 @@ struct MigrationThread { > QLIST_ENTRY(MigrationThread) node; > }; > > +void qmp_migration_threads_init(void); > +void qmp_migration_threads_cleanup(void); > + > MigrationThread *qmp_migration_threads_add(const char *name, int thread_id); > void qmp_migration_threads_remove(MigrationThread *info); > -- > 2.35.3 >
Fabiano Rosas <farosas@suse.de> wrote: > This doubly linked list is common for all the multifd and migration > threads so we need to avoid concurrent access. > > Add a mutex to protect the data from concurrent access. This fixes a > crash when removing two MigrationThread objects from the list at the > same time during cleanup of multifd threads. > > To avoid destroying the mutex before the last element has been > removed, move calls to qmp_migration_thread_remove so they run before > multifd_save_cleanup joins the threads. > > Fixes: 671326201d ("migration: Introduce interface query-migrationthreads") > Signed-off-by: Fabiano Rosas <farosas@suse.de> I agree with Peter here. Why don't you have to protect the walking? > --- > migration/migration.c | 5 ++++- > migration/multifd.c | 3 ++- > migration/threadinfo.c | 19 ++++++++++++++++++- > migration/threadinfo.h | 5 +++-- > 4 files changed, 27 insertions(+), 5 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index e731fc98a1..b3b8345eb2 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s) > qemu_mutex_lock_iothread(); > > multifd_save_cleanup(); > + qmp_migration_threads_cleanup(); I think I will spare this one as the mutex is static, so we are not winning any memory back. > } > > trace_migration_thread_after_loop(); > + qmp_migration_threads_remove(thread); > migration_iteration_finish(s); I can understand moving it here, but why before migration_iteration_finish? > object_unref(OBJECT(s)); > rcu_unregister_thread(); > - qmp_migration_threads_remove(thread); > return NULL; > } > + qmp_migration_threads_remove(thread); > + > qemu_mutex_lock(&p->mutex); > p->running = false; > qemu_mutex_unlock(&p->mutex); > > rcu_unregister_thread(); > - qmp_migration_threads_remove(thread); > trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); > > return NULL; Here it looks like the right place. > +#include "qemu/osdep.h" > +#include "qemu/queue.h" > +#include "qemu/lockable.h" > #include "threadinfo.h" Ouch, it missed Markus cleanup. Thanks. For the rest it looks good. Later, Juan.
Juan Quintela <quintela@redhat.com> writes: > Fabiano Rosas <farosas@suse.de> wrote: >> This doubly linked list is common for all the multifd and migration >> threads so we need to avoid concurrent access. >> >> Add a mutex to protect the data from concurrent access. This fixes a >> crash when removing two MigrationThread objects from the list at the >> same time during cleanup of multifd threads. >> >> To avoid destroying the mutex before the last element has been >> removed, move calls to qmp_migration_thread_remove so they run before >> multifd_save_cleanup joins the threads. >> >> Fixes: 671326201d ("migration: Introduce interface query-migrationthreads") >> Signed-off-by: Fabiano Rosas <farosas@suse.de> > > I agree with Peter here. Why don't you have to protect the walking? > Oversight on my part. >> --- >> migration/migration.c | 5 ++++- >> migration/multifd.c | 3 ++- >> migration/threadinfo.c | 19 ++++++++++++++++++- >> migration/threadinfo.h | 5 +++-- >> 4 files changed, 27 insertions(+), 5 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index e731fc98a1..b3b8345eb2 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s) >> qemu_mutex_lock_iothread(); >> >> multifd_save_cleanup(); >> + qmp_migration_threads_cleanup(); > > I think I will spare this one as the mutex is static, so we are not > winning any memory back. > Ok >> } >> >> trace_migration_thread_after_loop(); >> + qmp_migration_threads_remove(thread); >> migration_iteration_finish(s); > > I can understand moving it here, but why before migration_iteration_finish? > Because migration_iteration_finish schedules migrate_fd_cleanup and it calls qmp_migration_threads_cleanup. So I wanted to be sure that the removal happens before destroying the mutex. >> object_unref(OBJECT(s)); >> rcu_unregister_thread(); >> - qmp_migration_threads_remove(thread); >> return NULL; >> } >> + qmp_migration_threads_remove(thread); >> + >> qemu_mutex_lock(&p->mutex); >> p->running = false; >> qemu_mutex_unlock(&p->mutex); >> >> rcu_unregister_thread(); >> - qmp_migration_threads_remove(thread); >> trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); >> >> return NULL; > > Here it looks like the right place. > Yep, we shouldn't really put any new code after that p->running = false. Because multifd_save_cleanup will happily start cleaning up everything while this thread is still running if it sees p->running == false. > >> +#include "qemu/osdep.h" >> +#include "qemu/queue.h" >> +#include "qemu/lockable.h" >> #include "threadinfo.h" > > Ouch, it missed Markus cleanup. Thanks. > > For the rest it looks good. > > Later, Juan.
On Wed, Jun 07, 2023 at 09:00:14AM -0300, Fabiano Rosas wrote: > >> diff --git a/migration/migration.c b/migration/migration.c > >> index e731fc98a1..b3b8345eb2 100644 > >> --- a/migration/migration.c > >> +++ b/migration/migration.c > >> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s) > >> qemu_mutex_lock_iothread(); > >> > >> multifd_save_cleanup(); > >> + qmp_migration_threads_cleanup(); > > > > I think I will spare this one as the mutex is static, so we are not > > winning any memory back. > > > > Ok We could consider __attribute__((__constructor__)) in this case.
Sounds good. On Wed, Jun 7, 2023, 18:28 Peter Xu <peterx@redhat.com> wrote: > On Wed, Jun 07, 2023 at 09:00:14AM -0300, Fabiano Rosas wrote: > > >> diff --git a/migration/migration.c b/migration/migration.c > > >> index e731fc98a1..b3b8345eb2 100644 > > >> --- a/migration/migration.c > > >> +++ b/migration/migration.c > > >> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState > *s) > > >> qemu_mutex_lock_iothread(); > > >> > > >> multifd_save_cleanup(); > > >> + qmp_migration_threads_cleanup(); > > > > > > I think I will spare this one as the mutex is static, so we are not > > > winning any memory back. > > > > > > > Ok > > We could consider __attribute__((__constructor__)) in this case. > > -- > Peter Xu > > >
diff --git a/migration/migration.c b/migration/migration.c index e731fc98a1..b3b8345eb2 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_mutex_lock_iothread(); multifd_save_cleanup(); + qmp_migration_threads_cleanup(); qemu_mutex_lock(&s->qemu_file_lock); tmp = s->to_dst_file; s->to_dst_file = NULL; @@ -1405,6 +1406,8 @@ void migrate_init(MigrationState *s) s->vm_old_state = -1; s->iteration_initial_bytes = 0; s->threshold_size = 0; + + qmp_migration_threads_init(); } int migrate_add_blocker_internal(Error *reason, Error **errp) @@ -2997,10 +3000,10 @@ static void *migration_thread(void *opaque) } trace_migration_thread_after_loop(); + qmp_migration_threads_remove(thread); migration_iteration_finish(s); object_unref(OBJECT(s)); rcu_unregister_thread(); - qmp_migration_threads_remove(thread); return NULL; } diff --git a/migration/multifd.c b/migration/multifd.c index 5ec1ac5c64..ee7944560a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -762,12 +762,13 @@ out: qemu_sem_post(&multifd_send_state->channels_ready); } + qmp_migration_threads_remove(thread); + qemu_mutex_lock(&p->mutex); p->running = false; qemu_mutex_unlock(&p->mutex); rcu_unregister_thread(); - qmp_migration_threads_remove(thread); trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); return NULL; diff --git a/migration/threadinfo.c b/migration/threadinfo.c index c3e85c33e8..1fe64a02dd 100644 --- a/migration/threadinfo.c +++ b/migration/threadinfo.c @@ -10,23 +10,40 @@ * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" +#include "qemu/queue.h" +#include "qemu/lockable.h" #include "threadinfo.h" +QemuMutex migration_threads_lock; static QLIST_HEAD(, MigrationThread) migration_threads; +void qmp_migration_threads_init(void) +{ + qemu_mutex_init(&migration_threads_lock); +} + +void qmp_migration_threads_cleanup(void) +{ + qemu_mutex_destroy(&migration_threads_lock); +} + MigrationThread *qmp_migration_threads_add(const char *name, int thread_id) { MigrationThread *thread = g_new0(MigrationThread, 1); thread->name = name; thread->thread_id = thread_id; - QLIST_INSERT_HEAD(&migration_threads, thread, node); + WITH_QEMU_LOCK_GUARD(&migration_threads_lock) { + QLIST_INSERT_HEAD(&migration_threads, thread, node); + } return thread; } void qmp_migration_threads_remove(MigrationThread *thread) { + QEMU_LOCK_GUARD(&migration_threads_lock); if (thread) { QLIST_REMOVE(thread, node); g_free(thread); diff --git a/migration/threadinfo.h b/migration/threadinfo.h index 61b990f5e3..eb7f8e5bb2 100644 --- a/migration/threadinfo.h +++ b/migration/threadinfo.h @@ -10,8 +10,6 @@ * See the COPYING file in the top-level directory. */ -#include "qemu/queue.h" -#include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qapi-commands-migration.h" @@ -23,5 +21,8 @@ struct MigrationThread { QLIST_ENTRY(MigrationThread) node; }; +void qmp_migration_threads_init(void); +void qmp_migration_threads_cleanup(void); + MigrationThread *qmp_migration_threads_add(const char *name, int thread_id); void qmp_migration_threads_remove(MigrationThread *info);
This doubly linked list is common for all the multifd and migration threads so we need to avoid concurrent access. Add a mutex to protect the data from concurrent access. This fixes a crash when removing two MigrationThread objects from the list at the same time during cleanup of multifd threads. To avoid destroying the mutex before the last element has been removed, move calls to qmp_migration_thread_remove so they run before multifd_save_cleanup joins the threads. Fixes: 671326201d ("migration: Introduce interface query-migrationthreads") Signed-off-by: Fabiano Rosas <farosas@suse.de> --- migration/migration.c | 5 ++++- migration/multifd.c | 3 ++- migration/threadinfo.c | 19 ++++++++++++++++++- migration/threadinfo.h | 5 +++-- 4 files changed, 27 insertions(+), 5 deletions(-)