diff mbox series

[v5,3/5] migration: process_incoming_migration_co(): fix reporting s->error

Message ID 20240429191426.2327225-4-vsementsov@yandex-team.ru
State New
Headers show
Series migration: do not exit on incoming failure | expand

Commit Message

Vladimir Sementsov-Ogievskiy April 29, 2024, 7:14 p.m. UTC
It's bad idea to leave critical section with error object freed, but
s->error still set, this theoretically may lead to use-after-free
crash. Let's avoid it.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 migration/migration.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

Comments

Peter Xu April 29, 2024, 7:32 p.m. UTC | #1
On Mon, Apr 29, 2024 at 10:14:24PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> It's bad idea to leave critical section with error object freed, but
> s->error still set, this theoretically may lead to use-after-free
> crash. Let's avoid it.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>  migration/migration.c | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 0d26db47f7..58fd5819bc 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -732,9 +732,19 @@ static void process_incoming_migration_bh(void *opaque)
>      migration_incoming_state_destroy();
>  }
>  
> +static void migrate_error_free(MigrationState *s)
> +{
> +    QEMU_LOCK_GUARD(&s->error_mutex);
> +    if (s->error) {
> +        error_free(s->error);
> +        s->error = NULL;
> +    }
> +}
> +
>  static void coroutine_fn
>  process_incoming_migration_co(void *opaque)
>  {
> +    MigrationState *s = migrate_get_current();
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyState ps;
>      int ret;
> @@ -779,11 +789,9 @@ process_incoming_migration_co(void *opaque)
>      }
>  
>      if (ret < 0) {
> -        MigrationState *s = migrate_get_current();
> -
>          if (migrate_has_error(s)) {
>              WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
> -                error_report_err(s->error);
> +                error_report_err(error_copy(s->error));

This looks like a bugfix, agreed.

>              }
>          }
>          error_report("load of migration failed: %s", strerror(-ret));
> @@ -801,6 +809,7 @@ fail:
>                        MIGRATION_STATUS_FAILED);
>      migration_incoming_state_destroy();
>  
> +    migrate_error_free(s);

Would migration_incoming_state_destroy() be a better place?

One thing weird is we actually reuses MigrationState*'s error for incoming
too, but so far it looks ok as long as QEMU can't be both src & dst.  Then
calling migrate_error_free even in incoming_state_destroy() looks ok.

This patch still looks like containing two changes.  Better split them (or
just fix the bug only)?

Thanks,

>      exit(EXIT_FAILURE);
>  }
>  
> @@ -1433,15 +1442,6 @@ bool migrate_has_error(MigrationState *s)
>      return qatomic_read(&s->error);
>  }
>  
> -static void migrate_error_free(MigrationState *s)
> -{
> -    QEMU_LOCK_GUARD(&s->error_mutex);
> -    if (s->error) {
> -        error_free(s->error);
> -        s->error = NULL;
> -    }
> -}
> -
>  static void migrate_fd_error(MigrationState *s, const Error *error)
>  {
>      assert(s->to_dst_file == NULL);
> -- 
> 2.34.1
>
Vladimir Sementsov-Ogievskiy April 30, 2024, 8:06 a.m. UTC | #2
On 29.04.24 22:32, Peter Xu wrote:
> On Mon, Apr 29, 2024 at 10:14:24PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> It's bad idea to leave critical section with error object freed, but
>> s->error still set, this theoretically may lead to use-after-free
>> crash. Let's avoid it.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   migration/migration.c | 24 ++++++++++++------------
>>   1 file changed, 12 insertions(+), 12 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 0d26db47f7..58fd5819bc 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -732,9 +732,19 @@ static void process_incoming_migration_bh(void *opaque)
>>       migration_incoming_state_destroy();
>>   }
>>   
>> +static void migrate_error_free(MigrationState *s)
>> +{
>> +    QEMU_LOCK_GUARD(&s->error_mutex);
>> +    if (s->error) {
>> +        error_free(s->error);
>> +        s->error = NULL;
>> +    }
>> +}
>> +
>>   static void coroutine_fn
>>   process_incoming_migration_co(void *opaque)
>>   {
>> +    MigrationState *s = migrate_get_current();
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       PostcopyState ps;
>>       int ret;
>> @@ -779,11 +789,9 @@ process_incoming_migration_co(void *opaque)
>>       }
>>   
>>       if (ret < 0) {
>> -        MigrationState *s = migrate_get_current();
>> -
>>           if (migrate_has_error(s)) {
>>               WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>> -                error_report_err(s->error);
>> +                error_report_err(error_copy(s->error));
> 
> This looks like a bugfix, agreed.
> 
>>               }
>>           }
>>           error_report("load of migration failed: %s", strerror(-ret));
>> @@ -801,6 +809,7 @@ fail:
>>                         MIGRATION_STATUS_FAILED);
>>       migration_incoming_state_destroy();
>>   
>> +    migrate_error_free(s);
> 
> Would migration_incoming_state_destroy() be a better place?

Hmm. But we want to call migration_incoming_state_destroy() in case when exit-on-error=false too. And in this case we want to keep the error for further query-migrate commands.

> 
> One thing weird is we actually reuses MigrationState*'s error for incoming
> too, but so far it looks ok as long as QEMU can't be both src & dst.  Then
> calling migrate_error_free even in incoming_state_destroy() looks ok.
> 
> This patch still looks like containing two changes.  Better split them (or
> just fix the bug only)?
> 
> Thanks,
> 
>>       exit(EXIT_FAILURE);
>>   }
>>   
>> @@ -1433,15 +1442,6 @@ bool migrate_has_error(MigrationState *s)
>>       return qatomic_read(&s->error);
>>   }
>>   
>> -static void migrate_error_free(MigrationState *s)
>> -{
>> -    QEMU_LOCK_GUARD(&s->error_mutex);
>> -    if (s->error) {
>> -        error_free(s->error);
>> -        s->error = NULL;
>> -    }
>> -}
>> -
>>   static void migrate_fd_error(MigrationState *s, const Error *error)
>>   {
>>       assert(s->to_dst_file == NULL);
>> -- 
>> 2.34.1
>>
>
Vladimir Sementsov-Ogievskiy April 30, 2024, 8:09 a.m. UTC | #3
On 30.04.24 11:06, Vladimir Sementsov-Ogievskiy wrote:
> On 29.04.24 22:32, Peter Xu wrote:
>> On Mon, Apr 29, 2024 at 10:14:24PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>> It's bad idea to leave critical section with error object freed, but
>>> s->error still set, this theoretically may lead to use-after-free
>>> crash. Let's avoid it.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>>> ---
>>>   migration/migration.c | 24 ++++++++++++------------
>>>   1 file changed, 12 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index 0d26db47f7..58fd5819bc 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -732,9 +732,19 @@ static void process_incoming_migration_bh(void *opaque)
>>>       migration_incoming_state_destroy();
>>>   }
>>> +static void migrate_error_free(MigrationState *s)
>>> +{
>>> +    QEMU_LOCK_GUARD(&s->error_mutex);
>>> +    if (s->error) {
>>> +        error_free(s->error);
>>> +        s->error = NULL;
>>> +    }
>>> +}
>>> +
>>>   static void coroutine_fn
>>>   process_incoming_migration_co(void *opaque)
>>>   {
>>> +    MigrationState *s = migrate_get_current();
>>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>>       PostcopyState ps;
>>>       int ret;
>>> @@ -779,11 +789,9 @@ process_incoming_migration_co(void *opaque)
>>>       }
>>>       if (ret < 0) {
>>> -        MigrationState *s = migrate_get_current();
>>> -
>>>           if (migrate_has_error(s)) {
>>>               WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>>> -                error_report_err(s->error);
>>> +                error_report_err(error_copy(s->error));
>>
>> This looks like a bugfix, agreed.
>>
>>>               }
>>>           }
>>>           error_report("load of migration failed: %s", strerror(-ret));
>>> @@ -801,6 +809,7 @@ fail:
>>>                         MIGRATION_STATUS_FAILED);
>>>       migration_incoming_state_destroy();
>>> +    migrate_error_free(s);
>>
>> Would migration_incoming_state_destroy() be a better place?
> 
> Hmm. But we want to call migration_incoming_state_destroy() in case when exit-on-error=false too. And in this case we want to keep the error for further query-migrate commands.

Actually, I think we shouldn't care about freeing the error for exit() case. I think, we skip a lot of other cleanups which we normally do in main() in case of this exit().

> 
>>
>> One thing weird is we actually reuses MigrationState*'s error for incoming
>> too, but so far it looks ok as long as QEMU can't be both src & dst.  Then
>> calling migrate_error_free even in incoming_state_destroy() looks ok.
>>
>> This patch still looks like containing two changes.  Better split them (or
>> just fix the bug only)?
>>
>> Thanks,
>>
>>>       exit(EXIT_FAILURE);
>>>   }
>>> @@ -1433,15 +1442,6 @@ bool migrate_has_error(MigrationState *s)
>>>       return qatomic_read(&s->error);
>>>   }
>>> -static void migrate_error_free(MigrationState *s)
>>> -{
>>> -    QEMU_LOCK_GUARD(&s->error_mutex);
>>> -    if (s->error) {
>>> -        error_free(s->error);
>>> -        s->error = NULL;
>>> -    }
>>> -}
>>> -
>>>   static void migrate_fd_error(MigrationState *s, const Error *error)
>>>   {
>>>       assert(s->to_dst_file == NULL);
>>> -- 
>>> 2.34.1
>>>
>>
>
Vladimir Sementsov-Ogievskiy April 30, 2024, 8:47 a.m. UTC | #4
On 30.04.24 11:09, Vladimir Sementsov-Ogievskiy wrote:
> On 30.04.24 11:06, Vladimir Sementsov-Ogievskiy wrote:
>> On 29.04.24 22:32, Peter Xu wrote:
>>> On Mon, Apr 29, 2024 at 10:14:24PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>>> It's bad idea to leave critical section with error object freed, but
>>>> s->error still set, this theoretically may lead to use-after-free
>>>> crash. Let's avoid it.
>>>>
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>>>> ---
>>>>   migration/migration.c | 24 ++++++++++++------------
>>>>   1 file changed, 12 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index 0d26db47f7..58fd5819bc 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -732,9 +732,19 @@ static void process_incoming_migration_bh(void *opaque)
>>>>       migration_incoming_state_destroy();
>>>>   }
>>>> +static void migrate_error_free(MigrationState *s)
>>>> +{
>>>> +    QEMU_LOCK_GUARD(&s->error_mutex);
>>>> +    if (s->error) {
>>>> +        error_free(s->error);
>>>> +        s->error = NULL;
>>>> +    }
>>>> +}
>>>> +
>>>>   static void coroutine_fn
>>>>   process_incoming_migration_co(void *opaque)
>>>>   {
>>>> +    MigrationState *s = migrate_get_current();
>>>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>>>       PostcopyState ps;
>>>>       int ret;
>>>> @@ -779,11 +789,9 @@ process_incoming_migration_co(void *opaque)
>>>>       }
>>>>       if (ret < 0) {
>>>> -        MigrationState *s = migrate_get_current();
>>>> -
>>>>           if (migrate_has_error(s)) {
>>>>               WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>>>> -                error_report_err(s->error);
>>>> +                error_report_err(error_copy(s->error));
>>>
>>> This looks like a bugfix, agreed.
>>>
>>>>               }
>>>>           }
>>>>           error_report("load of migration failed: %s", strerror(-ret));
>>>> @@ -801,6 +809,7 @@ fail:
>>>>                         MIGRATION_STATUS_FAILED);
>>>>       migration_incoming_state_destroy();
>>>> +    migrate_error_free(s);
>>>
>>> Would migration_incoming_state_destroy() be a better place?
>>
>> Hmm. But we want to call migration_incoming_state_destroy() in case when exit-on-error=false too. And in this case we want to keep the error for further query-migrate commands.
> 
> Actually, I think we shouldn't care about freeing the error for exit() case. I think, we skip a lot of other cleanups which we normally do in main() in case of this exit().
> 

Or, just do the simplest fix of potential use-after-free, and don't care:

WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
    error_report_err(s->error);
    s->error = NULL;
}

- I'll send v6 with it.

>>
>>>
>>> One thing weird is we actually reuses MigrationState*'s error for incoming
>>> too, but so far it looks ok as long as QEMU can't be both src & dst.  Then
>>> calling migrate_error_free even in incoming_state_destroy() looks ok.
>>>
>>> This patch still looks like containing two changes.  Better split them (or
>>> just fix the bug only)?
>>>
>>> Thanks,
>>>
>>>>       exit(EXIT_FAILURE);
>>>>   }
>>>> @@ -1433,15 +1442,6 @@ bool migrate_has_error(MigrationState *s)
>>>>       return qatomic_read(&s->error);
>>>>   }
>>>> -static void migrate_error_free(MigrationState *s)
>>>> -{
>>>> -    QEMU_LOCK_GUARD(&s->error_mutex);
>>>> -    if (s->error) {
>>>> -        error_free(s->error);
>>>> -        s->error = NULL;
>>>> -    }
>>>> -}
>>>> -
>>>>   static void migrate_fd_error(MigrationState *s, const Error *error)
>>>>   {
>>>>       assert(s->to_dst_file == NULL);
>>>> -- 
>>>> 2.34.1
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index 0d26db47f7..58fd5819bc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -732,9 +732,19 @@  static void process_incoming_migration_bh(void *opaque)
     migration_incoming_state_destroy();
 }
 
+static void migrate_error_free(MigrationState *s)
+{
+    QEMU_LOCK_GUARD(&s->error_mutex);
+    if (s->error) {
+        error_free(s->error);
+        s->error = NULL;
+    }
+}
+
 static void coroutine_fn
 process_incoming_migration_co(void *opaque)
 {
+    MigrationState *s = migrate_get_current();
     MigrationIncomingState *mis = migration_incoming_get_current();
     PostcopyState ps;
     int ret;
@@ -779,11 +789,9 @@  process_incoming_migration_co(void *opaque)
     }
 
     if (ret < 0) {
-        MigrationState *s = migrate_get_current();
-
         if (migrate_has_error(s)) {
             WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
-                error_report_err(s->error);
+                error_report_err(error_copy(s->error));
             }
         }
         error_report("load of migration failed: %s", strerror(-ret));
@@ -801,6 +809,7 @@  fail:
                       MIGRATION_STATUS_FAILED);
     migration_incoming_state_destroy();
 
+    migrate_error_free(s);
     exit(EXIT_FAILURE);
 }
 
@@ -1433,15 +1442,6 @@  bool migrate_has_error(MigrationState *s)
     return qatomic_read(&s->error);
 }
 
-static void migrate_error_free(MigrationState *s)
-{
-    QEMU_LOCK_GUARD(&s->error_mutex);
-    if (s->error) {
-        error_free(s->error);
-        s->error = NULL;
-    }
-}
-
 static void migrate_fd_error(MigrationState *s, const Error *error)
 {
     assert(s->to_dst_file == NULL);