diff mbox

[1/1] Stop reinit of XBZRLE.lock

Message ID 1395158215-20488-1-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert March 18, 2014, 3:56 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Markus Armbruster spotted that the XBZRLE.lock might get initalised
multiple times in the case of a second attempted migration, and
that's undefined behaviour for pthread_mutex_init.

This patch adds a flag to stop re-initialisation - finding somewhere
to cleanly destroy it where we can guarantee isn't happening
at the same place we might want to take the lock is tricky.

It also adds comments to explain the lock use more.

(I considered rewriting this lockless but can't justify it given
the simplicity of the fix).

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 arch_init.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

Comments

陈梁 March 18, 2014, 4:47 p.m. UTC | #1
nice catch

> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Markus Armbruster spotted that the XBZRLE.lock might get initalised
> multiple times in the case of a second attempted migration, and
> that's undefined behaviour for pthread_mutex_init.
> 
> This patch adds a flag to stop re-initialisation - finding somewhere
> to cleanly destroy it where we can guarantee isn't happening
> at the same place we might want to take the lock is tricky.
> 
> It also adds comments to explain the lock use more.
> 
> (I considered rewriting this lockless but can't justify it given
> the simplicity of the fix).
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
> arch_init.c | 30 ++++++++++++++++++++++++++----
> 1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index 60c975d..16474b5 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -167,10 +167,13 @@ static struct {
>     /* Cache for XBZRLE, Protected by lock. */
>     PageCache *cache;
>     QemuMutex lock;

QemuMutex *lock;

> +    bool lock_init; /* True once we've init'd lock */
> } XBZRLE = {
>     .encoded_buf = NULL,
>     .current_buf = NULL,
>     .cache = NULL,

.lock = NULL,

> +    /* .lock is initialised in ram_save_setup */
> +    .lock_init = false
> };

maybe it is better that we malloc the lock in ram_save_setup dynamic,
and free it in migration_end.

> /* buffer used for XBZRLE decoding */
> static uint8_t *xbzrle_decoded_buf;
> @@ -187,6 +190,11 @@ static void XBZRLE_cache_unlock(void)
>         qemu_mutex_unlock(&XBZRLE.lock);
> }
> 
> +/* called from qmp_migrate_set_cache_size in main thread, possibly while
> + * a migration is in progress.
> + * A running migration maybe using the cache and might finish during this
> + * call, hence changes to the cache are protected by XBZRLE.lock().
> + */
> int64_t xbzrle_cache_resize(int64_t new_size)
> {
>     PageCache *new_cache, *cache_to_free;
> @@ -195,9 +203,12 @@ int64_t xbzrle_cache_resize(int64_t new_size)
>         return -1;
>     }
> 
> -    /* no need to lock, the current thread holds qemu big lock */
> +    /* The current thread holds qemu big lock, and we hold it while creating
> +     * the cache in ram_save_setup, thus it's safe to test if the
> +     * cache exists yet without it's own lock (which might not have been
> +     * init'd yet)
> +     */
>     if (XBZRLE.cache != NULL) {
> -        /* check XBZRLE.cache again later */
>         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
>             return pow2floor(new_size);
>         }
> @@ -209,7 +220,10 @@ int64_t xbzrle_cache_resize(int64_t new_size)
>         }
> 
>         XBZRLE_cache_lock();
> -        /* the XBZRLE.cache may have be destroyed, check it again */
> +        /* the migration might have finished between the check above and us
> +         * taking the lock,  causing XBZRLE.cache to be destroyed
> +         *   check it again
> +         */
>         if (XBZRLE.cache != NULL) {
>             cache_to_free = XBZRLE.cache;
>             XBZRLE.cache = new_cache;
> @@ -744,7 +758,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>             DPRINTF("Error creating cache\n");
>             return -1;
>         }
> -        qemu_mutex_init(&XBZRLE.lock);

we malloc the lock and init it. Then free it in migration_end. It is safe because
migration_end holds qemu big lock.

> +        /* mutex's can't be reinit'd without destroying them
> +         * and we've not got a good place to destroy it that
> +         * we can guarantee isn't being called when we might want
> +         * to hold the lock.
> +         */
> +        if (!XBZRLE.lock_init) {
> +            XBZRLE.lock_init = true;
> +            qemu_mutex_init(&XBZRLE.lock);
> +        }
>         qemu_mutex_unlock_iothread();
> 
>         /* We prefer not to abort if there is no memory */
> -- 
> 1.8.5.3
> 
>
Markus Armbruster March 18, 2014, 5:24 p.m. UTC | #2
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> writes:

> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> Markus Armbruster spotted that the XBZRLE.lock might get initalised
> multiple times in the case of a second attempted migration, and
> that's undefined behaviour for pthread_mutex_init.
>
> This patch adds a flag to stop re-initialisation - finding somewhere
> to cleanly destroy it where we can guarantee isn't happening
> at the same place we might want to take the lock is tricky.
>
> It also adds comments to explain the lock use more.
>
> (I considered rewriting this lockless but can't justify it given
> the simplicity of the fix).
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  arch_init.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 60c975d..16474b5 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -167,10 +167,13 @@ static struct {
>      /* Cache for XBZRLE, Protected by lock. */
>      PageCache *cache;
>      QemuMutex lock;
> +    bool lock_init; /* True once we've init'd lock */
>  } XBZRLE = {
>      .encoded_buf = NULL,
>      .current_buf = NULL,
>      .cache = NULL,
> +    /* .lock is initialised in ram_save_setup */
> +    .lock_init = false
>  };

Redundant initializers.

>  /* buffer used for XBZRLE decoding */
>  static uint8_t *xbzrle_decoded_buf;
> @@ -187,6 +190,11 @@ static void XBZRLE_cache_unlock(void)
>          qemu_mutex_unlock(&XBZRLE.lock);
>  }
>  
> +/* called from qmp_migrate_set_cache_size in main thread, possibly while
> + * a migration is in progress.
> + * A running migration maybe using the cache and might finish during this

may be

> + * call, hence changes to the cache are protected by XBZRLE.lock().
> + */

Style nit, since I'm nitpicking spelling already: our winged comments
usually look like

/*
 * Text
 */

>  int64_t xbzrle_cache_resize(int64_t new_size)
>  {
>      PageCache *new_cache, *cache_to_free;
> @@ -195,9 +203,12 @@ int64_t xbzrle_cache_resize(int64_t new_size)
>          return -1;
>      }
>  
> -    /* no need to lock, the current thread holds qemu big lock */
> +    /* The current thread holds qemu big lock, and we hold it while creating
> +     * the cache in ram_save_setup, thus it's safe to test if the
> +     * cache exists yet without it's own lock (which might not have been
> +     * init'd yet)
> +     */
>      if (XBZRLE.cache != NULL) {
> -        /* check XBZRLE.cache again later */
>          if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
>              return pow2floor(new_size);
>          }
> @@ -209,7 +220,10 @@ int64_t xbzrle_cache_resize(int64_t new_size)
>          }
>  
>          XBZRLE_cache_lock();
> -        /* the XBZRLE.cache may have be destroyed, check it again */
> +        /* the migration might have finished between the check above and us
> +         * taking the lock,  causing XBZRLE.cache to be destroyed
> +         *   check it again
> +         */
>          if (XBZRLE.cache != NULL) {
>              cache_to_free = XBZRLE.cache;
>              XBZRLE.cache = new_cache;
> @@ -744,7 +758,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>              DPRINTF("Error creating cache\n");
>              return -1;
>          }
> -        qemu_mutex_init(&XBZRLE.lock);
> +        /* mutex's can't be reinit'd without destroying them
> +         * and we've not got a good place to destroy it that
> +         * we can guarantee isn't being called when we might want
> +         * to hold the lock.
> +         */
> +        if (!XBZRLE.lock_init) {
> +            XBZRLE.lock_init = true;
> +            qemu_mutex_init(&XBZRLE.lock);
> +        }
>          qemu_mutex_unlock_iothread();
>  
>          /* We prefer not to abort if there is no memory */

I detest how the locking works in xbzrle_cache_resize().

The first XBZRLE.cache != NULL is not under XBZRLE.lock.  As you explain
in the comment, this is required, because we foolishly delay lock
initialization until a migration starts, and it's safe, because cache
creation is also under the BQL.

The second XBZRLE.cache != NULL *is* under XBZRLE.lock.  Required,
because cache destruction is *not* inder the BQL, only under
XBZRLE.lock.

I'd very, very much prefer this to be made obviously safe: initialize
XBZRLE.lock sufficiently early, then access XBZRLE.cache only under
XBZRLE.lock.

Confusing and way too subtle for no good reason.  But your patch doesn't
add subtlety, it explains it, and fixes a bug.  Therefore:

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Dr. David Alan Gilbert March 18, 2014, 8:20 p.m. UTC | #3
* ???? (chenliang0016@icloud.com) wrote:
> nice catch
> 
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > Markus Armbruster spotted that the XBZRLE.lock might get initalised
> > multiple times in the case of a second attempted migration, and
> > that's undefined behaviour for pthread_mutex_init.
> > 
> > This patch adds a flag to stop re-initialisation - finding somewhere
> > to cleanly destroy it where we can guarantee isn't happening
> > at the same place we might want to take the lock is tricky.
> > 
> > It also adds comments to explain the lock use more.
> > 
> > (I considered rewriting this lockless but can't justify it given
> > the simplicity of the fix).
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> > arch_init.c | 30 ++++++++++++++++++++++++++----
> > 1 file changed, 26 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch_init.c b/arch_init.c
> > index 60c975d..16474b5 100644
> > --- a/arch_init.c
> > +++ b/arch_init.c
> > @@ -167,10 +167,13 @@ static struct {
> >     /* Cache for XBZRLE, Protected by lock. */
> >     PageCache *cache;
> >     QemuMutex lock;
> 
> QemuMutex *lock;
> 
> > +    bool lock_init; /* True once we've init'd lock */
> > } XBZRLE = {
> >     .encoded_buf = NULL,
> >     .current_buf = NULL,
> >     .cache = NULL,
> 
> .lock = NULL,
> > +    /* .lock is initialised in ram_save_setup */
> > +    .lock_init = false
> > };
> 
> maybe it is better that we malloc the lock in ram_save_setup dynamic,
> and free it in migration_end.

Yes, that would be another way of doing the same thing, but we
do need to be able to free it...

> 
> > /* buffer used for XBZRLE decoding */
> > static uint8_t *xbzrle_decoded_buf;
> > @@ -187,6 +190,11 @@ static void XBZRLE_cache_unlock(void)
> >         qemu_mutex_unlock(&XBZRLE.lock);
> > }
> > 
> > +/* called from qmp_migrate_set_cache_size in main thread, possibly while
> > + * a migration is in progress.
> > + * A running migration maybe using the cache and might finish during this
> > + * call, hence changes to the cache are protected by XBZRLE.lock().
> > + */
> > int64_t xbzrle_cache_resize(int64_t new_size)
> > {
> >     PageCache *new_cache, *cache_to_free;
> > @@ -195,9 +203,12 @@ int64_t xbzrle_cache_resize(int64_t new_size)
> >         return -1;
> >     }
> > 
> > -    /* no need to lock, the current thread holds qemu big lock */
> > +    /* The current thread holds qemu big lock, and we hold it while creating
> > +     * the cache in ram_save_setup, thus it's safe to test if the
> > +     * cache exists yet without it's own lock (which might not have been
> > +     * init'd yet)
> > +     */
> >     if (XBZRLE.cache != NULL) {
> > -        /* check XBZRLE.cache again later */
> >         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
> >             return pow2floor(new_size);
> >         }
> > @@ -209,7 +220,10 @@ int64_t xbzrle_cache_resize(int64_t new_size)
> >         }
> > 
> >         XBZRLE_cache_lock();
> > -        /* the XBZRLE.cache may have be destroyed, check it again */
> > +        /* the migration might have finished between the check above and us
> > +         * taking the lock,  causing XBZRLE.cache to be destroyed
> > +         *   check it again
> > +         */
> >         if (XBZRLE.cache != NULL) {
> >             cache_to_free = XBZRLE.cache;
> >             XBZRLE.cache = new_cache;
> > @@ -744,7 +758,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >             DPRINTF("Error creating cache\n");
> >             return -1;
> >         }
> > -        qemu_mutex_init(&XBZRLE.lock);
> 
> we malloc the lock and init it. Then free it in migration_end. It is safe because
> migration_end holds qemu big lock.

What makes you say migration_end has the big lock - I don't see it.
Indeed I think that's why you currently check the cache != NULL for the
2nd time, in case migration_end happens at that point.

> > +        /* mutex's can't be reinit'd without destroying them
> > +         * and we've not got a good place to destroy it that
> > +         * we can guarantee isn't being called when we might want
> > +         * to hold the lock.
> > +         */
> > +        if (!XBZRLE.lock_init) {
> > +            XBZRLE.lock_init = true;
> > +            qemu_mutex_init(&XBZRLE.lock);
> > +        }
> >         qemu_mutex_unlock_iothread();
> > 
> >         /* We prefer not to abort if there is no memory */
> > -- 
> > 1.8.5.3
> > 
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert March 18, 2014, 8:47 p.m. UTC | #4
* Markus Armbruster (armbru@redhat.com) wrote:
> "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> writes:

<snip>

> > diff --git a/arch_init.c b/arch_init.c
> > index 60c975d..16474b5 100644
> > --- a/arch_init.c
> > +++ b/arch_init.c
> > @@ -167,10 +167,13 @@ static struct {
> >      /* Cache for XBZRLE, Protected by lock. */
> >      PageCache *cache;
> >      QemuMutex lock;
> > +    bool lock_init; /* True once we've init'd lock */
> >  } XBZRLE = {
> >      .encoded_buf = NULL,
> >      .current_buf = NULL,
> >      .cache = NULL,
> > +    /* .lock is initialised in ram_save_setup */
> > +    .lock_init = false
> >  };
> 
> Redundant initializers.

Given how subtle lock stuff is, I'll take making it obvious as more important.

> >  /* buffer used for XBZRLE decoding */
> >  static uint8_t *xbzrle_decoded_buf;
> > @@ -187,6 +190,11 @@ static void XBZRLE_cache_unlock(void)
> >          qemu_mutex_unlock(&XBZRLE.lock);
> >  }
> >  
> > +/* called from qmp_migrate_set_cache_size in main thread, possibly while
> > + * a migration is in progress.
> > + * A running migration maybe using the cache and might finish during this
> 
> may be
> 
> > + * call, hence changes to the cache are protected by XBZRLE.lock().
> > + */
> 
> Style nit, since I'm nitpicking spelling already: our winged comments
> usually look like
> 
> /*
>  * Text
>  */

Oops, yes; if I need to respin I'll fix that (hmm I wonder if the check
script could be tweaked to find those).

> >  int64_t xbzrle_cache_resize(int64_t new_size)
> >  {
> >      PageCache *new_cache, *cache_to_free;
> > @@ -195,9 +203,12 @@ int64_t xbzrle_cache_resize(int64_t new_size)
> >          return -1;
> >      }
> >  
> > -    /* no need to lock, the current thread holds qemu big lock */
> > +    /* The current thread holds qemu big lock, and we hold it while creating
> > +     * the cache in ram_save_setup, thus it's safe to test if the
> > +     * cache exists yet without it's own lock (which might not have been
> > +     * init'd yet)
> > +     */
> >      if (XBZRLE.cache != NULL) {
> > -        /* check XBZRLE.cache again later */
> >          if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
> >              return pow2floor(new_size);
> >          }
> > @@ -209,7 +220,10 @@ int64_t xbzrle_cache_resize(int64_t new_size)
> >          }
> >  
> >          XBZRLE_cache_lock();
> > -        /* the XBZRLE.cache may have be destroyed, check it again */
> > +        /* the migration might have finished between the check above and us
> > +         * taking the lock,  causing XBZRLE.cache to be destroyed
> > +         *   check it again
> > +         */
> >          if (XBZRLE.cache != NULL) {
> >              cache_to_free = XBZRLE.cache;
> >              XBZRLE.cache = new_cache;
> > @@ -744,7 +758,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >              DPRINTF("Error creating cache\n");
> >              return -1;
> >          }
> > -        qemu_mutex_init(&XBZRLE.lock);
> > +        /* mutex's can't be reinit'd without destroying them
> > +         * and we've not got a good place to destroy it that
> > +         * we can guarantee isn't being called when we might want
> > +         * to hold the lock.
> > +         */
> > +        if (!XBZRLE.lock_init) {
> > +            XBZRLE.lock_init = true;
> > +            qemu_mutex_init(&XBZRLE.lock);
> > +        }
> >          qemu_mutex_unlock_iothread();
> >  
> >          /* We prefer not to abort if there is no memory */
> 
> I detest how the locking works in xbzrle_cache_resize().

Yeh, it's tricky - I think the way to think about it is that the 
lock protects the cache and it's contents, not the pointer.
Except that's not really true when it swaps the new one in - hmm.

> The first XBZRLE.cache != NULL is not under XBZRLE.lock.  As you explain
> in the comment, this is required, because we foolishly delay lock
> initialization until a migration starts, and it's safe, because cache
> creation is also under the BQL.

Some of this is down to the interfaces between migration generally,
the devices being migrated and the specials for RAM/iterative migration.

1) A lot of the ram migration data, including XBZRLE, is global data in
arch_init - this is bad.

2) The management of these is generally glued onto the migration
setup/iterate/complete set of methods used during migration, however
they don't have any calls that correspond to the actual start/end of
migration, or the like that could be sanely used to do any init
that tied up with the actual start end of migration - this is bad.

3) Migration is in a separate thread and could finish at any time - this
is expected but complicates life, especially when (2) means that
the data structures for RAMs migration etc are cleared up when migration
finishes.

I don't know the history but (1) seems to arrise from the
semi-arbitrary split between arch_init.c, memory.c, savevm.c, and probably
3 or 4 other files I've failed to mention.

> The second XBZRLE.cache != NULL *is* under XBZRLE.lock.  Required,
> because cache destruction is *not* inder the BQL, only under
> XBZRLE.lock.
> 
> I'd very, very much prefer this to be made obviously safe: initialize
> XBZRLE.lock sufficiently early, then access XBZRLE.cache only under
> XBZRLE.lock.
> 
> Confusing and way too subtle for no good reason.  But your patch doesn't
> add subtlety, it explains it, and fixes a bug.  Therefore:
> 
> Reviewed-by: Markus Armbruster <armbru@redhat.com>

Thanks.

Dave
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index 60c975d..16474b5 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -167,10 +167,13 @@  static struct {
     /* Cache for XBZRLE, Protected by lock. */
     PageCache *cache;
     QemuMutex lock;
+    bool lock_init; /* True once we've init'd lock */
 } XBZRLE = {
     .encoded_buf = NULL,
     .current_buf = NULL,
     .cache = NULL,
+    /* .lock is initialised in ram_save_setup */
+    .lock_init = false
 };
 /* buffer used for XBZRLE decoding */
 static uint8_t *xbzrle_decoded_buf;
@@ -187,6 +190,11 @@  static void XBZRLE_cache_unlock(void)
         qemu_mutex_unlock(&XBZRLE.lock);
 }
 
+/* called from qmp_migrate_set_cache_size in main thread, possibly while
+ * a migration is in progress.
+ * A running migration maybe using the cache and might finish during this
+ * call, hence changes to the cache are protected by XBZRLE.lock().
+ */
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
     PageCache *new_cache, *cache_to_free;
@@ -195,9 +203,12 @@  int64_t xbzrle_cache_resize(int64_t new_size)
         return -1;
     }
 
-    /* no need to lock, the current thread holds qemu big lock */
+    /* The current thread holds qemu big lock, and we hold it while creating
+     * the cache in ram_save_setup, thus it's safe to test if the
+     * cache exists yet without it's own lock (which might not have been
+     * init'd yet)
+     */
     if (XBZRLE.cache != NULL) {
-        /* check XBZRLE.cache again later */
         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
             return pow2floor(new_size);
         }
@@ -209,7 +220,10 @@  int64_t xbzrle_cache_resize(int64_t new_size)
         }
 
         XBZRLE_cache_lock();
-        /* the XBZRLE.cache may have be destroyed, check it again */
+        /* the migration might have finished between the check above and us
+         * taking the lock,  causing XBZRLE.cache to be destroyed
+         *   check it again
+         */
         if (XBZRLE.cache != NULL) {
             cache_to_free = XBZRLE.cache;
             XBZRLE.cache = new_cache;
@@ -744,7 +758,15 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
             DPRINTF("Error creating cache\n");
             return -1;
         }
-        qemu_mutex_init(&XBZRLE.lock);
+        /* mutex's can't be reinit'd without destroying them
+         * and we've not got a good place to destroy it that
+         * we can guarantee isn't being called when we might want
+         * to hold the lock.
+         */
+        if (!XBZRLE.lock_init) {
+            XBZRLE.lock_init = true;
+            qemu_mutex_init(&XBZRLE.lock);
+        }
         qemu_mutex_unlock_iothread();
 
         /* We prefer not to abort if there is no memory */