diff mbox

[RFC,v2,06/12] mc: introduce state machine changes for MC

Message ID 1392713429-18201-7-git-send-email-mrhines@linux.vnet.ibm.com
State New
Headers show

Commit Message

mrhines@linux.vnet.ibm.com Feb. 18, 2014, 8:50 a.m. UTC
From: "Michael R. Hines" <mrhines@us.ibm.com>

This patch sets up the initial changes to the migration state
machine and prototypes to be used by the checkpointing code
to interact with the state machine so that we can later handle
failure and recovery scenarios.

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 arch_init.c                   | 29 ++++++++++++++++++++++++-----
 include/migration/migration.h |  2 ++
 migration.c                   | 37 +++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 21 deletions(-)

Comments

liguang Feb. 19, 2014, 1 a.m. UTC | #1
Hi,

mrhines@linux.vnet.ibm.com wrote:
> From: "Michael R. Hines"<mrhines@us.ibm.com>
>
> This patch sets up the initial changes to the migration state
> machine and prototypes to be used by the checkpointing code
> to interact with the state machine so that we can later handle
> failure and recovery scenarios.
>
> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
> ---
>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>   include/migration/migration.h |  2 ++
>   migration.c                   | 37 +++++++++++++++++++++----------------
>   3 files changed, 47 insertions(+), 21 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index db75120..e9d4d9e 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>       migration_end();
>   }
>
> -static void reset_ram_globals(void)
> +static void reset_ram_globals(bool reset_bulk_stage)
>   {
>       last_seen_block = NULL;
>       last_sent_block = NULL;
>       last_offset = 0;
>       last_version = ram_list.version;
> -    ram_bulk_stage = true;
> +    ram_bulk_stage = reset_bulk_stage;
>   }
>
>    

here is a chance that ram_save_block will never break while loop
if loat_seen_block be reset for mc when there are no dirty pages
to be migrated.

Thanks!

>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>       RAMBlock *block;
>       int64_t ram_pages = last_ram_offset()>>  TARGET_PAGE_BITS;
>
> +    /*
> +     * RAM stays open during micro-checkpointing for the next transaction.
> +     */
> +    if (migration_is_mc(migrate_get_current())) {
> +        qemu_mutex_lock_ramlist();
> +        reset_ram_globals(false);
> +        goto skip_setup;
> +    }
> +
>       migration_bitmap = bitmap_new(ram_pages);
>       bitmap_set(migration_bitmap, 0, ram_pages);
>       migration_dirty_pages = ram_pages;
> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>       qemu_mutex_lock_iothread();
>       qemu_mutex_lock_ramlist();
>       bytes_transferred = 0;
> -    reset_ram_globals();
> +    reset_ram_globals(true);
>
>       memory_global_dirty_log_start();
>       migration_bitmap_sync();
>       qemu_mutex_unlock_iothread();
>
> +skip_setup:
> +
>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>
>       QTAILQ_FOREACH(block,&ram_list.blocks, next) {
> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>       qemu_mutex_lock_ramlist();
>
>       if (ram_list.version != last_version) {
> -        reset_ram_globals();
> +        reset_ram_globals(true);
>       }
>
>       ram_control_before_iterate(f, RAM_CONTROL_ROUND);
> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>       }
>
>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> -    migration_end();
> +
> +    /*
> +     * Only cleanup at the end of normal migrations
> +     * or if the MC destination failed and we got an error.
> +     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
> +     */
> +    if(!migrate_use_mc() || migration_has_failed(migrate_get_current())) {
> +        migration_end();
> +    }
>
>       qemu_mutex_unlock_ramlist();
>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index a7c54fe..e876a2c 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -101,7 +101,9 @@ int migrate_fd_close(MigrationState *s);
>
>   void add_migration_state_change_notifier(Notifier *notify);
>   void remove_migration_state_change_notifier(Notifier *notify);
> +bool migration_is_active(MigrationState *);
>   bool migration_in_setup(MigrationState *);
> +bool migration_is_mc(MigrationState *s);
>   bool migration_has_finished(MigrationState *);
>   bool migration_has_failed(MigrationState *);
>   MigrationState *migrate_get_current(void);
> diff --git a/migration.c b/migration.c
> index 25add6f..f42dae4 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -36,16 +36,6 @@
>       do { } while (0)
>   #endif
>
> -enum {
> -    MIG_STATE_ERROR = -1,
> -    MIG_STATE_NONE,
> -    MIG_STATE_SETUP,
> -    MIG_STATE_CANCELLING,
> -    MIG_STATE_CANCELLED,
> -    MIG_STATE_ACTIVE,
> -    MIG_STATE_COMPLETED,
> -};
> -
>   #define MAX_THROTTLE  (32<<  20)      /* Migration speed throttling */
>
>   /* Amount of time to allocate to each "chunk" of bandwidth-throttled
> @@ -273,7 +263,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>       MigrationState *s = migrate_get_current();
>       MigrationCapabilityStatusList *cap;
>
> -    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
> +    if (migration_is_active(s)) {
>           error_set(errp, QERR_MIGRATION_ACTIVE);
>           return;
>       }
> @@ -285,7 +275,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>
>   /* shared migration helpers */
>
> -static void migrate_set_state(MigrationState *s, int old_state, int new_state)
> +bool migration_is_active(MigrationState *s)
> +{
> +    return (s->state == MIG_STATE_ACTIVE) || s->state == MIG_STATE_SETUP
> +            || s->state == MIG_STATE_CHECKPOINTING;
> +}
> +
> +void migrate_set_state(MigrationState *s, int old_state, int new_state)
>   {
>       if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
>           trace_migrate_set_state(new_state);
> @@ -309,7 +305,7 @@ static void migrate_fd_cleanup(void *opaque)
>           s->file = NULL;
>       }
>
> -    assert(s->state != MIG_STATE_ACTIVE);
> +    assert(!migration_is_active(s));
>
>       if (s->state != MIG_STATE_COMPLETED) {
>           qemu_savevm_state_cancel();
> @@ -356,7 +352,12 @@ void remove_migration_state_change_notifier(Notifier *notify)
>
>   bool migration_in_setup(MigrationState *s)
>   {
> -    return s->state == MIG_STATE_SETUP;
> +        return s->state == MIG_STATE_SETUP;
> +}
> +
> +bool migration_is_mc(MigrationState *s)
> +{
> +        return s->state == MIG_STATE_CHECKPOINTING;
>   }
>
>   bool migration_has_finished(MigrationState *s)
> @@ -419,7 +420,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>       params.shared = has_inc&&  inc;
>
>       if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
> -        s->state == MIG_STATE_CANCELLING) {
> +        s->state == MIG_STATE_CANCELLING
> +         || s->state == MIG_STATE_CHECKPOINTING) {
>           error_set(errp, QERR_MIGRATION_ACTIVE);
>           return;
>       }
> @@ -624,7 +626,10 @@ static void *migration_thread(void *opaque)
>                   }
>
>                   if (!qemu_file_get_error(s->file)) {
> -                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
> +                    if (!migrate_use_mc()) {
> +                        migrate_set_state(s,
> +                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
> +                    }
>                       break;
>                   }
>               }
>
mrhines@linux.vnet.ibm.com Feb. 19, 2014, 2:14 a.m. UTC | #2
On 02/19/2014 09:00 AM, Li Guang wrote:
> Hi,
>
> mrhines@linux.vnet.ibm.com wrote:
>> From: "Michael R. Hines"<mrhines@us.ibm.com>
>>
>> This patch sets up the initial changes to the migration state
>> machine and prototypes to be used by the checkpointing code
>> to interact with the state machine so that we can later handle
>> failure and recovery scenarios.
>>
>> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
>> ---
>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>   include/migration/migration.h |  2 ++
>>   migration.c                   | 37 
>> +++++++++++++++++++++----------------
>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index db75120..e9d4d9e 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>       migration_end();
>>   }
>>
>> -static void reset_ram_globals(void)
>> +static void reset_ram_globals(bool reset_bulk_stage)
>>   {
>>       last_seen_block = NULL;
>>       last_sent_block = NULL;
>>       last_offset = 0;
>>       last_version = ram_list.version;
>> -    ram_bulk_stage = true;
>> +    ram_bulk_stage = reset_bulk_stage;
>>   }
>>
>
> here is a chance that ram_save_block will never break while loop
> if loat_seen_block be reset for mc when there are no dirty pages
> to be migrated.
>
> Thanks!

I see. Question:

While running the code, I have never seen a case where there are no
dirty pages. Have you seen this before? And even if there are no dirty
pages during a single 100 millisecond checkpoint interval (the default
which can be changed) - the probability that there will continue to be
no dirty pages during the next checkpoint interval will be very low,
right?

If there are no dirty pages during a checkpoint, should not
ram_save_block() keep waiting? The virtual machine is still running
during MC - the MC code does not stop the virtual machine, so if there
are no dirty pages, isn't it safe to just let ram_save_block() keep looping
until there are dirty pages available?

- Michael
mrhines@linux.vnet.ibm.com Feb. 20, 2014, 5:03 a.m. UTC | #3
On 02/19/2014 09:00 AM, Li Guang wrote:
> Hi,
>
> mrhines@linux.vnet.ibm.com wrote:
>> From: "Michael R. Hines"<mrhines@us.ibm.com>
>>
>> This patch sets up the initial changes to the migration state
>> machine and prototypes to be used by the checkpointing code
>> to interact with the state machine so that we can later handle
>> failure and recovery scenarios.
>>
>> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
>> ---
>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>   include/migration/migration.h |  2 ++
>>   migration.c                   | 37 
>> +++++++++++++++++++++----------------
>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index db75120..e9d4d9e 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>       migration_end();
>>   }
>>
>> -static void reset_ram_globals(void)
>> +static void reset_ram_globals(bool reset_bulk_stage)
>>   {
>>       last_seen_block = NULL;
>>       last_sent_block = NULL;
>>       last_offset = 0;
>>       last_version = ram_list.version;
>> -    ram_bulk_stage = true;
>> +    ram_bulk_stage = reset_bulk_stage;
>>   }
>>
>
> here is a chance that ram_save_block will never break while loop
> if loat_seen_block be reset for mc when there are no dirty pages
> to be migrated.
>
> Thanks! 

OK, now I have finally experienced this bug: When I deleted the netdev
network card, I get an infinite loop in ram_save_block.......

Need to figure out how to fix it =)

- Michael
mrhines@linux.vnet.ibm.com Feb. 21, 2014, 8:13 a.m. UTC | #4
On 02/19/2014 09:00 AM, Li Guang wrote:
> Hi,
>
> mrhines@linux.vnet.ibm.com wrote:
>> From: "Michael R. Hines"<mrhines@us.ibm.com>
>>
>> This patch sets up the initial changes to the migration state
>> machine and prototypes to be used by the checkpointing code
>> to interact with the state machine so that we can later handle
>> failure and recovery scenarios.
>>
>> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
>> ---
>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>   include/migration/migration.h |  2 ++
>>   migration.c                   | 37 
>> +++++++++++++++++++++----------------
>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index db75120..e9d4d9e 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>       migration_end();
>>   }
>>
>> -static void reset_ram_globals(void)
>> +static void reset_ram_globals(bool reset_bulk_stage)
>>   {
>>       last_seen_block = NULL;
>>       last_sent_block = NULL;
>>       last_offset = 0;
>>       last_version = ram_list.version;
>> -    ram_bulk_stage = true;
>> +    ram_bulk_stage = reset_bulk_stage;
>>   }
>>
>
> here is a chance that ram_save_block will never break while loop
> if loat_seen_block be reset for mc when there are no dirty pages
> to be migrated.
>
> Thanks!
>

This bug is fixed now - you can re-pull from github.com.

     Believe it or not, when there is no network devices attached to the
     guest whatsoever, the initial bootup process can be extremely slow,
     where there are almost no processes dirtying memory at all or
     only occasionally except for maybe a DHCP client. This results in
     some 100ms periods of time where there are actually *no* dirty
     pages - hard to believe, but it does happen.

     ram_save_block() really doesn't understand this possibility,
     surprisingly. It results in an infinite loop because it was expecting
     last_seen_block to always be non-NULL, when in fact, we have reset
     the value to start from the beginning of the guest can scan the
     entire VM for dirty memory.


>>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
>> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void 
>> *opaque)
>>       RAMBlock *block;
>>       int64_t ram_pages = last_ram_offset()>> TARGET_PAGE_BITS;
>>
>> +    /*
>> +     * RAM stays open during micro-checkpointing for the next 
>> transaction.
>> +     */
>> +    if (migration_is_mc(migrate_get_current())) {
>> +        qemu_mutex_lock_ramlist();
>> +        reset_ram_globals(false);
>> +        goto skip_setup;
>> +    }
>> +
>>       migration_bitmap = bitmap_new(ram_pages);
>>       bitmap_set(migration_bitmap, 0, ram_pages);
>>       migration_dirty_pages = ram_pages;
>> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void 
>> *opaque)
>>       qemu_mutex_lock_iothread();
>>       qemu_mutex_lock_ramlist();
>>       bytes_transferred = 0;
>> -    reset_ram_globals();
>> +    reset_ram_globals(true);
>>
>>       memory_global_dirty_log_start();
>>       migration_bitmap_sync();
>>       qemu_mutex_unlock_iothread();
>>
>> +skip_setup:
>> +
>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>
>>       QTAILQ_FOREACH(block,&ram_list.blocks, next) {
>> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void 
>> *opaque)
>>       qemu_mutex_lock_ramlist();
>>
>>       if (ram_list.version != last_version) {
>> -        reset_ram_globals();
>> +        reset_ram_globals(true);
>>       }
>>
>>       ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void 
>> *opaque)
>>       }
>>
>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>> -    migration_end();
>> +
>> +    /*
>> +     * Only cleanup at the end of normal migrations
>> +     * or if the MC destination failed and we got an error.
>> +     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
>> +     */
>> +    if(!migrate_use_mc() || 
>> migration_has_failed(migrate_get_current())) {
>> +        migration_end();
>> +    }
>>
>>       qemu_mutex_unlock_ramlist();
>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>> diff --git a/include/migration/migration.h 
>> b/include/migration/migration.h
>> index a7c54fe..e876a2c 100644
>> --- a/include/migration/migration.h
>> +++ b/include/migration/migration.h
>> @@ -101,7 +101,9 @@ int migrate_fd_close(MigrationState *s);
>>
>>   void add_migration_state_change_notifier(Notifier *notify);
>>   void remove_migration_state_change_notifier(Notifier *notify);
>> +bool migration_is_active(MigrationState *);
>>   bool migration_in_setup(MigrationState *);
>> +bool migration_is_mc(MigrationState *s);
>>   bool migration_has_finished(MigrationState *);
>>   bool migration_has_failed(MigrationState *);
>>   MigrationState *migrate_get_current(void);
>> diff --git a/migration.c b/migration.c
>> index 25add6f..f42dae4 100644
>> --- a/migration.c
>> +++ b/migration.c
>> @@ -36,16 +36,6 @@
>>       do { } while (0)
>>   #endif
>>
>> -enum {
>> -    MIG_STATE_ERROR = -1,
>> -    MIG_STATE_NONE,
>> -    MIG_STATE_SETUP,
>> -    MIG_STATE_CANCELLING,
>> -    MIG_STATE_CANCELLED,
>> -    MIG_STATE_ACTIVE,
>> -    MIG_STATE_COMPLETED,
>> -};
>> -
>>   #define MAX_THROTTLE  (32<<  20)      /* Migration speed throttling */
>>
>>   /* Amount of time to allocate to each "chunk" of bandwidth-throttled
>> @@ -273,7 +263,7 @@ void 
>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>       MigrationState *s = migrate_get_current();
>>       MigrationCapabilityStatusList *cap;
>>
>> -    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
>> +    if (migration_is_active(s)) {
>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>           return;
>>       }
>> @@ -285,7 +275,13 @@ void 
>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>
>>   /* shared migration helpers */
>>
>> -static void migrate_set_state(MigrationState *s, int old_state, int 
>> new_state)
>> +bool migration_is_active(MigrationState *s)
>> +{
>> +    return (s->state == MIG_STATE_ACTIVE) || s->state == 
>> MIG_STATE_SETUP
>> +            || s->state == MIG_STATE_CHECKPOINTING;
>> +}
>> +
>> +void migrate_set_state(MigrationState *s, int old_state, int new_state)
>>   {
>>       if (atomic_cmpxchg(&s->state, old_state, new_state) == 
>> new_state) {
>>           trace_migrate_set_state(new_state);
>> @@ -309,7 +305,7 @@ static void migrate_fd_cleanup(void *opaque)
>>           s->file = NULL;
>>       }
>>
>> -    assert(s->state != MIG_STATE_ACTIVE);
>> +    assert(!migration_is_active(s));
>>
>>       if (s->state != MIG_STATE_COMPLETED) {
>>           qemu_savevm_state_cancel();
>> @@ -356,7 +352,12 @@ void 
>> remove_migration_state_change_notifier(Notifier *notify)
>>
>>   bool migration_in_setup(MigrationState *s)
>>   {
>> -    return s->state == MIG_STATE_SETUP;
>> +        return s->state == MIG_STATE_SETUP;
>> +}
>> +
>> +bool migration_is_mc(MigrationState *s)
>> +{
>> +        return s->state == MIG_STATE_CHECKPOINTING;
>>   }
>>
>>   bool migration_has_finished(MigrationState *s)
>> @@ -419,7 +420,8 @@ void qmp_migrate(const char *uri, bool has_blk, 
>> bool blk,
>>       params.shared = has_inc&&  inc;
>>
>>       if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
>> -        s->state == MIG_STATE_CANCELLING) {
>> +        s->state == MIG_STATE_CANCELLING
>> +         || s->state == MIG_STATE_CHECKPOINTING) {
>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>           return;
>>       }
>> @@ -624,7 +626,10 @@ static void *migration_thread(void *opaque)
>>                   }
>>
>>                   if (!qemu_file_get_error(s->file)) {
>> -                    migrate_set_state(s, MIG_STATE_ACTIVE, 
>> MIG_STATE_COMPLETED);
>> +                    if (!migrate_use_mc()) {
>> +                        migrate_set_state(s,
>> +                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
>> +                    }
>>                       break;
>>                   }
>>               }
>
liguang Feb. 24, 2014, 6:48 a.m. UTC | #5
Michael R. Hines wrote:
> On 02/19/2014 09:00 AM, Li Guang wrote:
>> Hi,
>>
>> mrhines@linux.vnet.ibm.com wrote:
>>> From: "Michael R. Hines"<mrhines@us.ibm.com>
>>>
>>> This patch sets up the initial changes to the migration state
>>> machine and prototypes to be used by the checkpointing code
>>> to interact with the state machine so that we can later handle
>>> failure and recovery scenarios.
>>>
>>> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
>>> ---
>>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>>   include/migration/migration.h |  2 ++
>>>   migration.c                   | 37 
>>> +++++++++++++++++++++----------------
>>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/arch_init.c b/arch_init.c
>>> index db75120..e9d4d9e 100644
>>> --- a/arch_init.c
>>> +++ b/arch_init.c
>>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>>       migration_end();
>>>   }
>>>
>>> -static void reset_ram_globals(void)
>>> +static void reset_ram_globals(bool reset_bulk_stage)
>>>   {
>>>       last_seen_block = NULL;
>>>       last_sent_block = NULL;
>>>       last_offset = 0;
>>>       last_version = ram_list.version;
>>> -    ram_bulk_stage = true;
>>> +    ram_bulk_stage = reset_bulk_stage;
>>>   }
>>>
>>
>> here is a chance that ram_save_block will never break while loop
>> if loat_seen_block be reset for mc when there are no dirty pages
>> to be migrated.
>>
>> Thanks!
>>
>
> This bug is fixed now - you can re-pull from github.com.
>
>     Believe it or not, when there is no network devices attached to the
>     guest whatsoever, the initial bootup process can be extremely slow,
>     where there are almost no processes dirtying memory at all or
>     only occasionally except for maybe a DHCP client. This results in
>     some 100ms periods of time where there are actually *no* dirty
>     pages - hard to believe, but it does happen.

sorry, seems all my pull requests for github was blocked,
let me check it later.

Thanks!

>
>     ram_save_block() really doesn't understand this possibility,
>     surprisingly. It results in an infinite loop because it was expecting
>     last_seen_block to always be non-NULL, when in fact, we have reset
>     the value to start from the beginning of the guest can scan the
>     entire VM for dirty memory.
>
>
>>>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
>>> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void 
>>> *opaque)
>>>       RAMBlock *block;
>>>       int64_t ram_pages = last_ram_offset()>> TARGET_PAGE_BITS;
>>>
>>> +    /*
>>> +     * RAM stays open during micro-checkpointing for the next 
>>> transaction.
>>> +     */
>>> +    if (migration_is_mc(migrate_get_current())) {
>>> +        qemu_mutex_lock_ramlist();
>>> +        reset_ram_globals(false);
>>> +        goto skip_setup;
>>> +    }
>>> +
>>>       migration_bitmap = bitmap_new(ram_pages);
>>>       bitmap_set(migration_bitmap, 0, ram_pages);
>>>       migration_dirty_pages = ram_pages;
>>> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void 
>>> *opaque)
>>>       qemu_mutex_lock_iothread();
>>>       qemu_mutex_lock_ramlist();
>>>       bytes_transferred = 0;
>>> -    reset_ram_globals();
>>> +    reset_ram_globals(true);
>>>
>>>       memory_global_dirty_log_start();
>>>       migration_bitmap_sync();
>>>       qemu_mutex_unlock_iothread();
>>>
>>> +skip_setup:
>>> +
>>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>>
>>>       QTAILQ_FOREACH(block,&ram_list.blocks, next) {
>>> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void 
>>> *opaque)
>>>       qemu_mutex_lock_ramlist();
>>>
>>>       if (ram_list.version != last_version) {
>>> -        reset_ram_globals();
>>> +        reset_ram_globals(true);
>>>       }
>>>
>>>       ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>>> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void 
>>> *opaque)
>>>       }
>>>
>>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>>> -    migration_end();
>>> +
>>> +    /*
>>> +     * Only cleanup at the end of normal migrations
>>> +     * or if the MC destination failed and we got an error.
>>> +     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
>>> +     */
>>> +    if(!migrate_use_mc() || 
>>> migration_has_failed(migrate_get_current())) {
>>> +        migration_end();
>>> +    }
>>>
>>>       qemu_mutex_unlock_ramlist();
>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>> diff --git a/include/migration/migration.h 
>>> b/include/migration/migration.h
>>> index a7c54fe..e876a2c 100644
>>> --- a/include/migration/migration.h
>>> +++ b/include/migration/migration.h
>>> @@ -101,7 +101,9 @@ int migrate_fd_close(MigrationState *s);
>>>
>>>   void add_migration_state_change_notifier(Notifier *notify);
>>>   void remove_migration_state_change_notifier(Notifier *notify);
>>> +bool migration_is_active(MigrationState *);
>>>   bool migration_in_setup(MigrationState *);
>>> +bool migration_is_mc(MigrationState *s);
>>>   bool migration_has_finished(MigrationState *);
>>>   bool migration_has_failed(MigrationState *);
>>>   MigrationState *migrate_get_current(void);
>>> diff --git a/migration.c b/migration.c
>>> index 25add6f..f42dae4 100644
>>> --- a/migration.c
>>> +++ b/migration.c
>>> @@ -36,16 +36,6 @@
>>>       do { } while (0)
>>>   #endif
>>>
>>> -enum {
>>> -    MIG_STATE_ERROR = -1,
>>> -    MIG_STATE_NONE,
>>> -    MIG_STATE_SETUP,
>>> -    MIG_STATE_CANCELLING,
>>> -    MIG_STATE_CANCELLED,
>>> -    MIG_STATE_ACTIVE,
>>> -    MIG_STATE_COMPLETED,
>>> -};
>>> -
>>>   #define MAX_THROTTLE  (32<<  20)      /* Migration speed 
>>> throttling */
>>>
>>>   /* Amount of time to allocate to each "chunk" of bandwidth-throttled
>>> @@ -273,7 +263,7 @@ void 
>>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>>       MigrationState *s = migrate_get_current();
>>>       MigrationCapabilityStatusList *cap;
>>>
>>> -    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
>>> +    if (migration_is_active(s)) {
>>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>>           return;
>>>       }
>>> @@ -285,7 +275,13 @@ void 
>>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>>
>>>   /* shared migration helpers */
>>>
>>> -static void migrate_set_state(MigrationState *s, int old_state, int 
>>> new_state)
>>> +bool migration_is_active(MigrationState *s)
>>> +{
>>> +    return (s->state == MIG_STATE_ACTIVE) || s->state == 
>>> MIG_STATE_SETUP
>>> +            || s->state == MIG_STATE_CHECKPOINTING;
>>> +}
>>> +
>>> +void migrate_set_state(MigrationState *s, int old_state, int 
>>> new_state)
>>>   {
>>>       if (atomic_cmpxchg(&s->state, old_state, new_state) == 
>>> new_state) {
>>>           trace_migrate_set_state(new_state);
>>> @@ -309,7 +305,7 @@ static void migrate_fd_cleanup(void *opaque)
>>>           s->file = NULL;
>>>       }
>>>
>>> -    assert(s->state != MIG_STATE_ACTIVE);
>>> +    assert(!migration_is_active(s));
>>>
>>>       if (s->state != MIG_STATE_COMPLETED) {
>>>           qemu_savevm_state_cancel();
>>> @@ -356,7 +352,12 @@ void 
>>> remove_migration_state_change_notifier(Notifier *notify)
>>>
>>>   bool migration_in_setup(MigrationState *s)
>>>   {
>>> -    return s->state == MIG_STATE_SETUP;
>>> +        return s->state == MIG_STATE_SETUP;
>>> +}
>>> +
>>> +bool migration_is_mc(MigrationState *s)
>>> +{
>>> +        return s->state == MIG_STATE_CHECKPOINTING;
>>>   }
>>>
>>>   bool migration_has_finished(MigrationState *s)
>>> @@ -419,7 +420,8 @@ void qmp_migrate(const char *uri, bool has_blk, 
>>> bool blk,
>>>       params.shared = has_inc&&  inc;
>>>
>>>       if (s->state == MIG_STATE_ACTIVE || s->state == 
>>> MIG_STATE_SETUP ||
>>> -        s->state == MIG_STATE_CANCELLING) {
>>> +        s->state == MIG_STATE_CANCELLING
>>> +         || s->state == MIG_STATE_CHECKPOINTING) {
>>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>>           return;
>>>       }
>>> @@ -624,7 +626,10 @@ static void *migration_thread(void *opaque)
>>>                   }
>>>
>>>                   if (!qemu_file_get_error(s->file)) {
>>> -                    migrate_set_state(s, MIG_STATE_ACTIVE, 
>>> MIG_STATE_COMPLETED);
>>> +                    if (!migrate_use_mc()) {
>>> +                        migrate_set_state(s,
>>> +                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
>>> +                    }
>>>                       break;
>>>                   }
>>>               }
>>
>
>
liguang Feb. 26, 2014, 2:52 a.m. UTC | #6
Li Guang wrote:
> Michael R. Hines wrote:
>> On 02/19/2014 09:00 AM, Li Guang wrote:
>>> Hi,
>>>
>>> mrhines@linux.vnet.ibm.com wrote:
>>>> From: "Michael R. Hines"<mrhines@us.ibm.com>
>>>>
>>>> This patch sets up the initial changes to the migration state
>>>> machine and prototypes to be used by the checkpointing code
>>>> to interact with the state machine so that we can later handle
>>>> failure and recovery scenarios.
>>>>
>>>> Signed-off-by: Michael R. Hines<mrhines@us.ibm.com>
>>>> ---
>>>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>>>   include/migration/migration.h |  2 ++
>>>>   migration.c                   | 37 
>>>> +++++++++++++++++++++----------------
>>>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/arch_init.c b/arch_init.c
>>>> index db75120..e9d4d9e 100644
>>>> --- a/arch_init.c
>>>> +++ b/arch_init.c
>>>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>>>       migration_end();
>>>>   }
>>>>
>>>> -static void reset_ram_globals(void)
>>>> +static void reset_ram_globals(bool reset_bulk_stage)
>>>>   {
>>>>       last_seen_block = NULL;
>>>>       last_sent_block = NULL;
>>>>       last_offset = 0;
>>>>       last_version = ram_list.version;
>>>> -    ram_bulk_stage = true;
>>>> +    ram_bulk_stage = reset_bulk_stage;
>>>>   }
>>>>
>>>
>>> here is a chance that ram_save_block will never break while loop
>>> if loat_seen_block be reset for mc when there are no dirty pages
>>> to be migrated.
>>>
>>> Thanks!
>>>
>>
>> This bug is fixed now - you can re-pull from github.com.
>>
>>     Believe it or not, when there is no network devices attached to the
>>     guest whatsoever, the initial bootup process can be extremely slow,
>>     where there are almost no processes dirtying memory at all or
>>     only occasionally except for maybe a DHCP client. This results in
>>     some 100ms periods of time where there are actually *no* dirty
>>     pages - hard to believe, but it does happen.
>
> sorry, seems all my pull requests for github was blocked,
> let me check it later.
>
> Thanks!
>

tested, works well for me.

Thanks!

>>
>>     ram_save_block() really doesn't understand this possibility,
>>     surprisingly. It results in an infinite loop because it was 
>> expecting
>>     last_seen_block to always be non-NULL, when in fact, we have reset
>>     the value to start from the beginning of the guest can scan the
>>     entire VM for dirty memory.
>>
>>
>>>>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
>>>> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void 
>>>> *opaque)
>>>>       RAMBlock *block;
>>>>       int64_t ram_pages = last_ram_offset()>> TARGET_PAGE_BITS;
>>>>
>>>> +    /*
>>>> +     * RAM stays open during micro-checkpointing for the next 
>>>> transaction.
>>>> +     */
>>>> +    if (migration_is_mc(migrate_get_current())) {
>>>> +        qemu_mutex_lock_ramlist();
>>>> +        reset_ram_globals(false);
>>>> +        goto skip_setup;
>>>> +    }
>>>> +
>>>>       migration_bitmap = bitmap_new(ram_pages);
>>>>       bitmap_set(migration_bitmap, 0, ram_pages);
>>>>       migration_dirty_pages = ram_pages;
>>>> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void 
>>>> *opaque)
>>>>       qemu_mutex_lock_iothread();
>>>>       qemu_mutex_lock_ramlist();
>>>>       bytes_transferred = 0;
>>>> -    reset_ram_globals();
>>>> +    reset_ram_globals(true);
>>>>
>>>>       memory_global_dirty_log_start();
>>>>       migration_bitmap_sync();
>>>>       qemu_mutex_unlock_iothread();
>>>>
>>>> +skip_setup:
>>>> +
>>>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>>>
>>>>       QTAILQ_FOREACH(block,&ram_list.blocks, next) {
>>>> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void 
>>>> *opaque)
>>>>       qemu_mutex_lock_ramlist();
>>>>
>>>>       if (ram_list.version != last_version) {
>>>> -        reset_ram_globals();
>>>> +        reset_ram_globals(true);
>>>>       }
>>>>
>>>>       ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>>>> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void 
>>>> *opaque)
>>>>       }
>>>>
>>>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>>>> -    migration_end();
>>>> +
>>>> +    /*
>>>> +     * Only cleanup at the end of normal migrations
>>>> +     * or if the MC destination failed and we got an error.
>>>> +     * Otherwise, we are (or will soon be) in 
>>>> MIG_STATE_CHECKPOINTING.
>>>> +     */
>>>> +    if(!migrate_use_mc() || 
>>>> migration_has_failed(migrate_get_current())) {
>>>> +        migration_end();
>>>> +    }
>>>>
>>>>       qemu_mutex_unlock_ramlist();
>>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>>> diff --git a/include/migration/migration.h 
>>>> b/include/migration/migration.h
>>>> index a7c54fe..e876a2c 100644
>>>> --- a/include/migration/migration.h
>>>> +++ b/include/migration/migration.h
>>>> @@ -101,7 +101,9 @@ int migrate_fd_close(MigrationState *s);
>>>>
>>>>   void add_migration_state_change_notifier(Notifier *notify);
>>>>   void remove_migration_state_change_notifier(Notifier *notify);
>>>> +bool migration_is_active(MigrationState *);
>>>>   bool migration_in_setup(MigrationState *);
>>>> +bool migration_is_mc(MigrationState *s);
>>>>   bool migration_has_finished(MigrationState *);
>>>>   bool migration_has_failed(MigrationState *);
>>>>   MigrationState *migrate_get_current(void);
>>>> diff --git a/migration.c b/migration.c
>>>> index 25add6f..f42dae4 100644
>>>> --- a/migration.c
>>>> +++ b/migration.c
>>>> @@ -36,16 +36,6 @@
>>>>       do { } while (0)
>>>>   #endif
>>>>
>>>> -enum {
>>>> -    MIG_STATE_ERROR = -1,
>>>> -    MIG_STATE_NONE,
>>>> -    MIG_STATE_SETUP,
>>>> -    MIG_STATE_CANCELLING,
>>>> -    MIG_STATE_CANCELLED,
>>>> -    MIG_STATE_ACTIVE,
>>>> -    MIG_STATE_COMPLETED,
>>>> -};
>>>> -
>>>>   #define MAX_THROTTLE  (32<<  20)      /* Migration speed 
>>>> throttling */
>>>>
>>>>   /* Amount of time to allocate to each "chunk" of bandwidth-throttled
>>>> @@ -273,7 +263,7 @@ void 
>>>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>>>       MigrationState *s = migrate_get_current();
>>>>       MigrationCapabilityStatusList *cap;
>>>>
>>>> -    if (s->state == MIG_STATE_ACTIVE || s->state == 
>>>> MIG_STATE_SETUP) {
>>>> +    if (migration_is_active(s)) {
>>>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>>>           return;
>>>>       }
>>>> @@ -285,7 +275,13 @@ void 
>>>> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>>>>
>>>>   /* shared migration helpers */
>>>>
>>>> -static void migrate_set_state(MigrationState *s, int old_state, 
>>>> int new_state)
>>>> +bool migration_is_active(MigrationState *s)
>>>> +{
>>>> +    return (s->state == MIG_STATE_ACTIVE) || s->state == 
>>>> MIG_STATE_SETUP
>>>> +            || s->state == MIG_STATE_CHECKPOINTING;
>>>> +}
>>>> +
>>>> +void migrate_set_state(MigrationState *s, int old_state, int 
>>>> new_state)
>>>>   {
>>>>       if (atomic_cmpxchg(&s->state, old_state, new_state) == 
>>>> new_state) {
>>>>           trace_migrate_set_state(new_state);
>>>> @@ -309,7 +305,7 @@ static void migrate_fd_cleanup(void *opaque)
>>>>           s->file = NULL;
>>>>       }
>>>>
>>>> -    assert(s->state != MIG_STATE_ACTIVE);
>>>> +    assert(!migration_is_active(s));
>>>>
>>>>       if (s->state != MIG_STATE_COMPLETED) {
>>>>           qemu_savevm_state_cancel();
>>>> @@ -356,7 +352,12 @@ void 
>>>> remove_migration_state_change_notifier(Notifier *notify)
>>>>
>>>>   bool migration_in_setup(MigrationState *s)
>>>>   {
>>>> -    return s->state == MIG_STATE_SETUP;
>>>> +        return s->state == MIG_STATE_SETUP;
>>>> +}
>>>> +
>>>> +bool migration_is_mc(MigrationState *s)
>>>> +{
>>>> +        return s->state == MIG_STATE_CHECKPOINTING;
>>>>   }
>>>>
>>>>   bool migration_has_finished(MigrationState *s)
>>>> @@ -419,7 +420,8 @@ void qmp_migrate(const char *uri, bool has_blk, 
>>>> bool blk,
>>>>       params.shared = has_inc&&  inc;
>>>>
>>>>       if (s->state == MIG_STATE_ACTIVE || s->state == 
>>>> MIG_STATE_SETUP ||
>>>> -        s->state == MIG_STATE_CANCELLING) {
>>>> +        s->state == MIG_STATE_CANCELLING
>>>> +         || s->state == MIG_STATE_CHECKPOINTING) {
>>>>           error_set(errp, QERR_MIGRATION_ACTIVE);
>>>>           return;
>>>>       }
>>>> @@ -624,7 +626,10 @@ static void *migration_thread(void *opaque)
>>>>                   }
>>>>
>>>>                   if (!qemu_file_get_error(s->file)) {
>>>> -                    migrate_set_state(s, MIG_STATE_ACTIVE, 
>>>> MIG_STATE_COMPLETED);
>>>> +                    if (!migrate_use_mc()) {
>>>> +                        migrate_set_state(s,
>>>> +                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
>>>> +                    }
>>>>                       break;
>>>>                   }
>>>>               }
>>>
>>
>>
>
Juan Quintela March 11, 2014, 9:57 p.m. UTC | #7
mrhines@linux.vnet.ibm.com wrote:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
>
> This patch sets up the initial changes to the migration state
> machine and prototypes to be used by the checkpointing code
> to interact with the state machine so that we can later handle
> failure and recovery scenarios.
>
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  arch_init.c                   | 29 ++++++++++++++++++++++++-----
>  include/migration/migration.h |  2 ++
>  migration.c                   | 37 +++++++++++++++++++++----------------
>  3 files changed, 47 insertions(+), 21 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index db75120..e9d4d9e 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>      migration_end();
>  }
>  
> -static void reset_ram_globals(void)
> +static void reset_ram_globals(bool reset_bulk_stage)
>  {
>      last_seen_block = NULL;
>      last_sent_block = NULL;
>      last_offset = 0;
>      last_version = ram_list.version;
> -    ram_bulk_stage = true;
> +    ram_bulk_stage = reset_bulk_stage;
>  }
>  
>  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      RAMBlock *block;
>      int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>  
> +    /*
> +     * RAM stays open during micro-checkpointing for the next transaction.
> +     */
> +    if (migration_is_mc(migrate_get_current())) {
> +        qemu_mutex_lock_ramlist();
> +        reset_ram_globals(false);
> +        goto skip_setup;
> +    }
> +
>      migration_bitmap = bitmap_new(ram_pages);
>      bitmap_set(migration_bitmap, 0, ram_pages);
>      migration_dirty_pages = ram_pages;
> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      qemu_mutex_lock_iothread();
>      qemu_mutex_lock_ramlist();
>      bytes_transferred = 0;
> -    reset_ram_globals();
> +    reset_ram_globals(true);
>  
>      memory_global_dirty_log_start();
>      migration_bitmap_sync();
>      qemu_mutex_unlock_iothread();
>  
> +skip_setup:
> +
>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>  
>      QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>      qemu_mutex_lock_ramlist();
>  
>      if (ram_list.version != last_version) {
> -        reset_ram_globals();
> +        reset_ram_globals(true);
>      }
>  
>      ram_control_before_iterate(f, RAM_CONTROL_ROUND);
> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      }
>  
>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> -    migration_end();
> +
> +    /*
> +     * Only cleanup at the end of normal migrations
> +     * or if the MC destination failed and we got an error.
> +     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
> +     */
> +    if(!migrate_use_mc() || migration_has_failed(migrate_get_current())) {
> +        migration_end();
> +    }
>  
>      qemu_mutex_unlock_ramlist();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);



I haven't looked at the code in detail, but what we have here is
esentially:


ram_save_complete()
{
   code not needed for mc
   common codo for migration and mc
   code not needed for mc
}

Similar code on ram_save_setup.  Yes, I know that there are some locking
issues here.


SHould we be able do do something like

__ram_save_complete()
{
    common code
}

mc_ram_save_complete()
{
    # Possible something else here
    __ram_save_complete()
}

rest_ram_save_complete()
{
    code not needed for mc
    __ram_save_complete()
    code not needed for mc
}

My problem here is that current code is already quite complex and
convoluted.  At some point we are going to need to change it to
something that is easier to understand?


> -enum {
> -    MIG_STATE_ERROR = -1,
> -    MIG_STATE_NONE,
> -    MIG_STATE_SETUP,
> -    MIG_STATE_CANCELLING,
> -    MIG_STATE_CANCELLED,
> -    MIG_STATE_ACTIVE,
> -    MIG_STATE_COMPLETED,
> -};
> -

Here comes the code seen on the previous patch O:-)

>  
> -static void migrate_set_state(MigrationState *s, int old_state, int new_state)
> +bool migration_is_active(MigrationState *s)
> +{
> +    return (s->state == MIG_STATE_ACTIVE) || s->state == MIG_STATE_SETUP
> +            || s->state == MIG_STATE_CHECKPOINTING;
> +}

The whole idea of moving MIG_STATE_* to this file was to "force" all
other users to use accessor functions.  This way we know what the others
expect frum us.

> -    assert(s->state != MIG_STATE_ACTIVE);
> +    assert(!migration_is_active(s));

I can understand that we want here MIG_STATE_CHECKPOINTING, but _SETUP?
Or it is a bug on upstream?

Thanks, Juan.
mrhines@linux.vnet.ibm.com April 4, 2014, 3:50 a.m. UTC | #8
On 03/12/2014 05:57 AM, Juan Quintela wrote:
> mrhines@linux.vnet.ibm.com wrote:
>> From: "Michael R. Hines" <mrhines@us.ibm.com>
>>
>> This patch sets up the initial changes to the migration state
>> machine and prototypes to be used by the checkpointing code
>> to interact with the state machine so that we can later handle
>> failure and recovery scenarios.
>>
>> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
>> ---
>>   arch_init.c                   | 29 ++++++++++++++++++++++++-----
>>   include/migration/migration.h |  2 ++
>>   migration.c                   | 37 +++++++++++++++++++++----------------
>>   3 files changed, 47 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index db75120..e9d4d9e 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
>>       migration_end();
>>   }
>>   
>> -static void reset_ram_globals(void)
>> +static void reset_ram_globals(bool reset_bulk_stage)
>>   {
>>       last_seen_block = NULL;
>>       last_sent_block = NULL;
>>       last_offset = 0;
>>       last_version = ram_list.version;
>> -    ram_bulk_stage = true;
>> +    ram_bulk_stage = reset_bulk_stage;
>>   }
>>   
>>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
>> @@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>       RAMBlock *block;
>>       int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>>   
>> +    /*
>> +     * RAM stays open during micro-checkpointing for the next transaction.
>> +     */
>> +    if (migration_is_mc(migrate_get_current())) {
>> +        qemu_mutex_lock_ramlist();
>> +        reset_ram_globals(false);
>> +        goto skip_setup;
>> +    }
>> +
>>       migration_bitmap = bitmap_new(ram_pages);
>>       bitmap_set(migration_bitmap, 0, ram_pages);
>>       migration_dirty_pages = ram_pages;
>> @@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>       qemu_mutex_lock_iothread();
>>       qemu_mutex_lock_ramlist();
>>       bytes_transferred = 0;
>> -    reset_ram_globals();
>> +    reset_ram_globals(true);
>>   
>>       memory_global_dirty_log_start();
>>       migration_bitmap_sync();
>>       qemu_mutex_unlock_iothread();
>>   
>> +skip_setup:
>> +
>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>   
>>       QTAILQ_FOREACH(block, &ram_list.blocks, next) {
>> @@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>>       qemu_mutex_lock_ramlist();
>>   
>>       if (ram_list.version != last_version) {
>> -        reset_ram_globals();
>> +        reset_ram_globals(true);
>>       }
>>   
>>       ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>> @@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>       }
>>   
>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>> -    migration_end();
>> +
>> +    /*
>> +     * Only cleanup at the end of normal migrations
>> +     * or if the MC destination failed and we got an error.
>> +     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
>> +     */
>> +    if(!migrate_use_mc() || migration_has_failed(migrate_get_current())) {
>> +        migration_end();
>> +    }
>>   
>>       qemu_mutex_unlock_ramlist();
>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>
> I haven't looked at the code in detail, but what we have here is
> esentially:
>
>
> ram_save_complete()
> {
>     code not needed for mc
>     common codo for migration and mc
>     code not needed for mc
> }
>
> Similar code on ram_save_setup.  Yes, I know that there are some locking
> issues here.
>
>
> SHould we be able do do something like
>
> __ram_save_complete()
> {
>      common code
> }
>
> mc_ram_save_complete()
> {
>      # Possible something else here
>      __ram_save_complete()
> }
>
> rest_ram_save_complete()
> {
>      code not needed for mc
>      __ram_save_complete()
>      code not needed for mc
> }
>
> My problem here is that current code is already quite complex and
> convoluted.  At some point we are going to need to change it to
> something that is easier to understand?

Understood: So it looks like we needs some "accessor" function
pointers or something here, similar to the way Paolo suggested
that we breakout "before" and "after" iteration methods for
localhost migration and RDMA migration.

I'll cook something up and re-submit.

>
>> -enum {
>> -    MIG_STATE_ERROR = -1,
>> -    MIG_STATE_NONE,
>> -    MIG_STATE_SETUP,
>> -    MIG_STATE_CANCELLING,
>> -    MIG_STATE_CANCELLED,
>> -    MIG_STATE_ACTIVE,
>> -    MIG_STATE_COMPLETED,
>> -};
>> -
> Here comes the code seen on the previous patch O:-)
>
>>   
>> -static void migrate_set_state(MigrationState *s, int old_state, int new_state)
>> +bool migration_is_active(MigrationState *s)
>> +{
>> +    return (s->state == MIG_STATE_ACTIVE) || s->state == MIG_STATE_SETUP
>> +            || s->state == MIG_STATE_CHECKPOINTING;
>> +}
> The whole idea of moving MIG_STATE_* to this file was to "force" all
> other users to use accessor functions.  This way we know what the others
> expect frum us.

Acknowleged - I'll work on creating (or enhancing) the accessor
functions to avoid moving the flags again.

>> -    assert(s->state != MIG_STATE_ACTIVE);
>> +    assert(!migration_is_active(s));
> I can understand that we want here MIG_STATE_CHECKPOINTING, but _SETUP?
> Or it is a bug on upstream?

My fault, I think there is some merge breakage here when I started
walking through the diff. Ignore this one for now..........

- Michael
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index db75120..e9d4d9e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -658,13 +658,13 @@  static void ram_migration_cancel(void *opaque)
     migration_end();
 }
 
-static void reset_ram_globals(void)
+static void reset_ram_globals(bool reset_bulk_stage)
 {
     last_seen_block = NULL;
     last_sent_block = NULL;
     last_offset = 0;
     last_version = ram_list.version;
-    ram_bulk_stage = true;
+    ram_bulk_stage = reset_bulk_stage;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -674,6 +674,15 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     RAMBlock *block;
     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
 
+    /*
+     * RAM stays open during micro-checkpointing for the next transaction.
+     */
+    if (migration_is_mc(migrate_get_current())) {
+        qemu_mutex_lock_ramlist();
+        reset_ram_globals(false);
+        goto skip_setup;
+    }
+
     migration_bitmap = bitmap_new(ram_pages);
     bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
@@ -710,12 +719,14 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     qemu_mutex_lock_iothread();
     qemu_mutex_lock_ramlist();
     bytes_transferred = 0;
-    reset_ram_globals();
+    reset_ram_globals(true);
 
     memory_global_dirty_log_start();
     migration_bitmap_sync();
     qemu_mutex_unlock_iothread();
 
+skip_setup:
+
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
@@ -744,7 +755,7 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
     qemu_mutex_lock_ramlist();
 
     if (ram_list.version != last_version) {
-        reset_ram_globals();
+        reset_ram_globals(true);
     }
 
     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
@@ -825,7 +836,15 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
     }
 
     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-    migration_end();
+
+    /*
+     * Only cleanup at the end of normal migrations
+     * or if the MC destination failed and we got an error.
+     * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
+     */
+    if(!migrate_use_mc() || migration_has_failed(migrate_get_current())) {
+        migration_end();
+    }
 
     qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index a7c54fe..e876a2c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -101,7 +101,9 @@  int migrate_fd_close(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
 bool migration_in_setup(MigrationState *);
+bool migration_is_mc(MigrationState *s);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
 MigrationState *migrate_get_current(void);
diff --git a/migration.c b/migration.c
index 25add6f..f42dae4 100644
--- a/migration.c
+++ b/migration.c
@@ -36,16 +36,6 @@ 
     do { } while (0)
 #endif
 
-enum {
-    MIG_STATE_ERROR = -1,
-    MIG_STATE_NONE,
-    MIG_STATE_SETUP,
-    MIG_STATE_CANCELLING,
-    MIG_STATE_CANCELLED,
-    MIG_STATE_ACTIVE,
-    MIG_STATE_COMPLETED,
-};
-
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
@@ -273,7 +263,7 @@  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     MigrationState *s = migrate_get_current();
     MigrationCapabilityStatusList *cap;
 
-    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (migration_is_active(s)) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -285,7 +275,13 @@  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 
 /* shared migration helpers */
 
-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+bool migration_is_active(MigrationState *s)
+{
+    return (s->state == MIG_STATE_ACTIVE) || s->state == MIG_STATE_SETUP
+            || s->state == MIG_STATE_CHECKPOINTING;
+}
+
+void migrate_set_state(MigrationState *s, int old_state, int new_state)
 {
     if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
         trace_migrate_set_state(new_state);
@@ -309,7 +305,7 @@  static void migrate_fd_cleanup(void *opaque)
         s->file = NULL;
     }
 
-    assert(s->state != MIG_STATE_ACTIVE);
+    assert(!migration_is_active(s));
 
     if (s->state != MIG_STATE_COMPLETED) {
         qemu_savevm_state_cancel();
@@ -356,7 +352,12 @@  void remove_migration_state_change_notifier(Notifier *notify)
 
 bool migration_in_setup(MigrationState *s)
 {
-    return s->state == MIG_STATE_SETUP;
+        return s->state == MIG_STATE_SETUP;
+}
+
+bool migration_is_mc(MigrationState *s)
+{
+        return s->state == MIG_STATE_CHECKPOINTING;
 }
 
 bool migration_has_finished(MigrationState *s)
@@ -419,7 +420,8 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.shared = has_inc && inc;
 
     if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
-        s->state == MIG_STATE_CANCELLING) {
+        s->state == MIG_STATE_CANCELLING 
+         || s->state == MIG_STATE_CHECKPOINTING) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -624,7 +626,10 @@  static void *migration_thread(void *opaque)
                 }
 
                 if (!qemu_file_get_error(s->file)) {
-                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    if (!migrate_use_mc()) {
+                        migrate_set_state(s,
+                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    }
                     break;
                 }
             }