diff mbox series

[v2,25/29] migration/multifd: Support outgoing fixed-ram stream format

Message ID 20231023203608.26370-26-farosas@suse.de
State New
Headers show
Series migration: File based migration with multifd and fixed-ram | expand

Commit Message

Fabiano Rosas Oct. 23, 2023, 8:36 p.m. UTC
The new fixed-ram stream format uses a file transport and puts ram
pages in the migration file at their respective offsets and can be
done in parallel by using the pwritev system call which takes iovecs
and an offset.

Add support to enabling the new format along with multifd to make use
of the threading and page handling already in place.

This requires multifd to stop sending headers and leaving the stream
format to the fixed-ram code. When it comes time to write the data, we
need to call a version of qio_channel_write that can take an offset.

Usage on HMP is:

(qemu) stop
(qemu) migrate_set_capability multifd on
(qemu) migrate_set_capability fixed-ram on
(qemu) migrate_set_parameter max-bandwidth 0
(qemu) migrate_set_parameter multifd-channels 8
(qemu) migrate file:migfile

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 include/qemu/bitops.h | 13 ++++++++++
 migration/multifd.c   | 55 +++++++++++++++++++++++++++++++++++++++++--
 migration/options.c   |  6 -----
 migration/ram.c       |  2 +-
 4 files changed, 67 insertions(+), 9 deletions(-)

Comments

Daniel P. Berrangé Oct. 25, 2023, 9:23 a.m. UTC | #1
On Mon, Oct 23, 2023 at 05:36:04PM -0300, Fabiano Rosas wrote:
> The new fixed-ram stream format uses a file transport and puts ram
> pages in the migration file at their respective offsets and can be
> done in parallel by using the pwritev system call which takes iovecs
> and an offset.
> 
> Add support to enabling the new format along with multifd to make use
> of the threading and page handling already in place.
> 
> This requires multifd to stop sending headers and leaving the stream
> format to the fixed-ram code. When it comes time to write the data, we
> need to call a version of qio_channel_write that can take an offset.
> 
> Usage on HMP is:
> 
> (qemu) stop
> (qemu) migrate_set_capability multifd on
> (qemu) migrate_set_capability fixed-ram on
> (qemu) migrate_set_parameter max-bandwidth 0
> (qemu) migrate_set_parameter multifd-channels 8
> (qemu) migrate file:migfile
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
>  include/qemu/bitops.h | 13 ++++++++++
>  migration/multifd.c   | 55 +++++++++++++++++++++++++++++++++++++++++--
>  migration/options.c   |  6 -----
>  migration/ram.c       |  2 +-
>  4 files changed, 67 insertions(+), 9 deletions(-)
> 
> diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
> index cb3526d1f4..2c0a2fe751 100644
> --- a/include/qemu/bitops.h
> +++ b/include/qemu/bitops.h
> @@ -67,6 +67,19 @@ static inline void clear_bit(long nr, unsigned long *addr)
>      *p &= ~mask;
>  }
>  
> +/**
> + * clear_bit_atomic - Clears a bit in memory atomically
> + * @nr: Bit to clear
> + * @addr: Address to start counting from
> + */
> +static inline void clear_bit_atomic(long nr, unsigned long *addr)
> +{
> +    unsigned long mask = BIT_MASK(nr);
> +    unsigned long *p = addr + BIT_WORD(nr);
> +
> +    return qatomic_and(p, ~mask);
> +}
> +
>  /**
>   * change_bit - Toggle a bit in memory
>   * @nr: Bit to change
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 20e8635740..3f95a41ee9 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -260,6 +260,19 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
>      g_free(pages);
>  }
>  
> +static void multifd_set_file_bitmap(MultiFDSendParams *p)
> +{
> +    MultiFDPages_t *pages = p->pages;
> +
> +    if (!pages->block) {
> +        return;
> +    }
> +
> +    for (int i = 0; i < p->normal_num; i++) {
> +        ramblock_set_shadow_bmap_atomic(pages->block, pages->offset[i]);
> +    }
> +}
> +
>  static void multifd_send_fill_packet(MultiFDSendParams *p)
>  {
>      MultiFDPacket_t *packet = p->packet;
> @@ -606,6 +619,29 @@ int multifd_send_sync_main(QEMUFile *f)
>          }
>      }
>  
> +    if (!migrate_multifd_packets()) {
> +        /*
> +         * There's no sync packet to send. Just make sure the sending
> +         * above has finished.
> +         */
> +        for (i = 0; i < migrate_multifd_channels(); i++) {
> +            qemu_sem_wait(&multifd_send_state->channels_ready);
> +        }
> +
> +        /* sanity check and release the channels */
> +        for (i = 0; i < migrate_multifd_channels(); i++) {
> +            MultiFDSendParams *p = &multifd_send_state->params[i];
> +
> +            qemu_mutex_lock(&p->mutex);
> +            assert(!p->pending_job || p->quit);
> +            qemu_mutex_unlock(&p->mutex);
> +
> +            qemu_sem_post(&p->sem);
> +        }
> +
> +        return 0;
> +    }
> +
>      /*
>       * When using zero-copy, it's necessary to flush the pages before any of
>       * the pages can be sent again, so we'll make sure the new version of the
> @@ -689,6 +725,8 @@ static void *multifd_send_thread(void *opaque)
>  
>          if (p->pending_job) {
>              uint32_t flags;
> +            uint64_t write_base;
> +
>              p->normal_num = 0;
>  
>              if (!use_packets || use_zero_copy_send) {
> @@ -713,6 +751,16 @@ static void *multifd_send_thread(void *opaque)
>              if (use_packets) {
>                  multifd_send_fill_packet(p);
>                  p->num_packets++;
> +                write_base = 0;
> +            } else {
> +                multifd_set_file_bitmap(p);
> +
> +                /*
> +                 * If we subtract the host page now, we don't need to
> +                 * pass it into qio_channel_write_full_all() below.
> +                 */
> +                write_base = p->pages->block->pages_offset -
> +                    (uint64_t)p->pages->block->host;
>              }
>  
>              flags = p->flags;
> @@ -738,8 +786,9 @@ static void *multifd_send_thread(void *opaque)
>                  p->iov[0].iov_base = p->packet;
>              }
>  
> -            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
> -                                              0, p->write_flags, &local_err);
> +            ret = qio_channel_write_full_all(p->c, p->iov, p->iovs_num,
> +                                             write_base, NULL, 0,
> +                                             p->write_flags, &local_err);
>              if (ret != 0) {
>                  break;
>              }
> @@ -969,6 +1018,8 @@ int multifd_save_setup(Error **errp)
>  
>          if (migrate_zero_copy_send()) {
>              p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
> +        } else if (!use_packets) {
> +            p->write_flags |= QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET;
>          } else {
>              p->write_flags = 0;
>          }

Ah, so this is why you had the wierd overloaded design for
qio_channel_write_full_all in patch 22 that I queried. I'd
still prefer the simpler design at the QIO level, and just
calling the appropriate function above. 

With regards,
Daniel
Fabiano Rosas Oct. 25, 2023, 2:21 p.m. UTC | #2
Daniel P. Berrangé <berrange@redhat.com> writes:

> On Mon, Oct 23, 2023 at 05:36:04PM -0300, Fabiano Rosas wrote:
>> The new fixed-ram stream format uses a file transport and puts ram
>> pages in the migration file at their respective offsets and can be
>> done in parallel by using the pwritev system call which takes iovecs
>> and an offset.
>> 
>> Add support to enabling the new format along with multifd to make use
>> of the threading and page handling already in place.
>> 
>> This requires multifd to stop sending headers and leaving the stream
>> format to the fixed-ram code. When it comes time to write the data, we
>> need to call a version of qio_channel_write that can take an offset.
>> 
>> Usage on HMP is:
>> 
>> (qemu) stop
>> (qemu) migrate_set_capability multifd on
>> (qemu) migrate_set_capability fixed-ram on
>> (qemu) migrate_set_parameter max-bandwidth 0
>> (qemu) migrate_set_parameter multifd-channels 8
>> (qemu) migrate file:migfile
>> 
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> ---
>>  include/qemu/bitops.h | 13 ++++++++++
>>  migration/multifd.c   | 55 +++++++++++++++++++++++++++++++++++++++++--
>>  migration/options.c   |  6 -----
>>  migration/ram.c       |  2 +-
>>  4 files changed, 67 insertions(+), 9 deletions(-)
>> 
>> diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
>> index cb3526d1f4..2c0a2fe751 100644
>> --- a/include/qemu/bitops.h
>> +++ b/include/qemu/bitops.h
>> @@ -67,6 +67,19 @@ static inline void clear_bit(long nr, unsigned long *addr)
>>      *p &= ~mask;
>>  }
>>  
>> +/**
>> + * clear_bit_atomic - Clears a bit in memory atomically
>> + * @nr: Bit to clear
>> + * @addr: Address to start counting from
>> + */
>> +static inline void clear_bit_atomic(long nr, unsigned long *addr)
>> +{
>> +    unsigned long mask = BIT_MASK(nr);
>> +    unsigned long *p = addr + BIT_WORD(nr);
>> +
>> +    return qatomic_and(p, ~mask);
>> +}
>> +
>>  /**
>>   * change_bit - Toggle a bit in memory
>>   * @nr: Bit to change
>> diff --git a/migration/multifd.c b/migration/multifd.c
>> index 20e8635740..3f95a41ee9 100644
>> --- a/migration/multifd.c
>> +++ b/migration/multifd.c
>> @@ -260,6 +260,19 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
>>      g_free(pages);
>>  }
>>  
>> +static void multifd_set_file_bitmap(MultiFDSendParams *p)
>> +{
>> +    MultiFDPages_t *pages = p->pages;
>> +
>> +    if (!pages->block) {
>> +        return;
>> +    }
>> +
>> +    for (int i = 0; i < p->normal_num; i++) {
>> +        ramblock_set_shadow_bmap_atomic(pages->block, pages->offset[i]);
>> +    }
>> +}
>> +
>>  static void multifd_send_fill_packet(MultiFDSendParams *p)
>>  {
>>      MultiFDPacket_t *packet = p->packet;
>> @@ -606,6 +619,29 @@ int multifd_send_sync_main(QEMUFile *f)
>>          }
>>      }
>>  
>> +    if (!migrate_multifd_packets()) {
>> +        /*
>> +         * There's no sync packet to send. Just make sure the sending
>> +         * above has finished.
>> +         */
>> +        for (i = 0; i < migrate_multifd_channels(); i++) {
>> +            qemu_sem_wait(&multifd_send_state->channels_ready);
>> +        }
>> +
>> +        /* sanity check and release the channels */
>> +        for (i = 0; i < migrate_multifd_channels(); i++) {
>> +            MultiFDSendParams *p = &multifd_send_state->params[i];
>> +
>> +            qemu_mutex_lock(&p->mutex);
>> +            assert(!p->pending_job || p->quit);
>> +            qemu_mutex_unlock(&p->mutex);
>> +
>> +            qemu_sem_post(&p->sem);
>> +        }
>> +
>> +        return 0;
>> +    }
>> +
>>      /*
>>       * When using zero-copy, it's necessary to flush the pages before any of
>>       * the pages can be sent again, so we'll make sure the new version of the
>> @@ -689,6 +725,8 @@ static void *multifd_send_thread(void *opaque)
>>  
>>          if (p->pending_job) {
>>              uint32_t flags;
>> +            uint64_t write_base;
>> +
>>              p->normal_num = 0;
>>  
>>              if (!use_packets || use_zero_copy_send) {
>> @@ -713,6 +751,16 @@ static void *multifd_send_thread(void *opaque)
>>              if (use_packets) {
>>                  multifd_send_fill_packet(p);
>>                  p->num_packets++;
>> +                write_base = 0;
>> +            } else {
>> +                multifd_set_file_bitmap(p);
>> +
>> +                /*
>> +                 * If we subtract the host page now, we don't need to
>> +                 * pass it into qio_channel_write_full_all() below.
>> +                 */
>> +                write_base = p->pages->block->pages_offset -
>> +                    (uint64_t)p->pages->block->host;
>>              }
>>  
>>              flags = p->flags;
>> @@ -738,8 +786,9 @@ static void *multifd_send_thread(void *opaque)
>>                  p->iov[0].iov_base = p->packet;
>>              }
>>  
>> -            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
>> -                                              0, p->write_flags, &local_err);
>> +            ret = qio_channel_write_full_all(p->c, p->iov, p->iovs_num,
>> +                                             write_base, NULL, 0,
>> +                                             p->write_flags, &local_err);
>>              if (ret != 0) {
>>                  break;
>>              }
>> @@ -969,6 +1018,8 @@ int multifd_save_setup(Error **errp)
>>  
>>          if (migrate_zero_copy_send()) {
>>              p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
>> +        } else if (!use_packets) {
>> +            p->write_flags |= QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET;
>>          } else {
>>              p->write_flags = 0;
>>          }
>
> Ah, so this is why you had the wierd overloaded design for
> qio_channel_write_full_all in patch 22 that I queried. I'd
> still prefer the simpler design at the QIO level, and just
> calling the appropriate function above. 

Yes, I didn't want to have multifd choosing between different IO
functions during (multifd) runtime. Here we set the flag at setup time
and forget about it.

I now understand multifd a bit more so I'm more confident changing the
code, let's see how your suggestion looks like.


>
> With regards,
> Daniel
diff mbox series

Patch

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index cb3526d1f4..2c0a2fe751 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -67,6 +67,19 @@  static inline void clear_bit(long nr, unsigned long *addr)
     *p &= ~mask;
 }
 
+/**
+ * clear_bit_atomic - Clears a bit in memory atomically
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ */
+static inline void clear_bit_atomic(long nr, unsigned long *addr)
+{
+    unsigned long mask = BIT_MASK(nr);
+    unsigned long *p = addr + BIT_WORD(nr);
+
+    return qatomic_and(p, ~mask);
+}
+
 /**
  * change_bit - Toggle a bit in memory
  * @nr: Bit to change
diff --git a/migration/multifd.c b/migration/multifd.c
index 20e8635740..3f95a41ee9 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -260,6 +260,19 @@  static void multifd_pages_clear(MultiFDPages_t *pages)
     g_free(pages);
 }
 
+static void multifd_set_file_bitmap(MultiFDSendParams *p)
+{
+    MultiFDPages_t *pages = p->pages;
+
+    if (!pages->block) {
+        return;
+    }
+
+    for (int i = 0; i < p->normal_num; i++) {
+        ramblock_set_shadow_bmap_atomic(pages->block, pages->offset[i]);
+    }
+}
+
 static void multifd_send_fill_packet(MultiFDSendParams *p)
 {
     MultiFDPacket_t *packet = p->packet;
@@ -606,6 +619,29 @@  int multifd_send_sync_main(QEMUFile *f)
         }
     }
 
+    if (!migrate_multifd_packets()) {
+        /*
+         * There's no sync packet to send. Just make sure the sending
+         * above has finished.
+         */
+        for (i = 0; i < migrate_multifd_channels(); i++) {
+            qemu_sem_wait(&multifd_send_state->channels_ready);
+        }
+
+        /* sanity check and release the channels */
+        for (i = 0; i < migrate_multifd_channels(); i++) {
+            MultiFDSendParams *p = &multifd_send_state->params[i];
+
+            qemu_mutex_lock(&p->mutex);
+            assert(!p->pending_job || p->quit);
+            qemu_mutex_unlock(&p->mutex);
+
+            qemu_sem_post(&p->sem);
+        }
+
+        return 0;
+    }
+
     /*
      * When using zero-copy, it's necessary to flush the pages before any of
      * the pages can be sent again, so we'll make sure the new version of the
@@ -689,6 +725,8 @@  static void *multifd_send_thread(void *opaque)
 
         if (p->pending_job) {
             uint32_t flags;
+            uint64_t write_base;
+
             p->normal_num = 0;
 
             if (!use_packets || use_zero_copy_send) {
@@ -713,6 +751,16 @@  static void *multifd_send_thread(void *opaque)
             if (use_packets) {
                 multifd_send_fill_packet(p);
                 p->num_packets++;
+                write_base = 0;
+            } else {
+                multifd_set_file_bitmap(p);
+
+                /*
+                 * If we subtract the host page now, we don't need to
+                 * pass it into qio_channel_write_full_all() below.
+                 */
+                write_base = p->pages->block->pages_offset -
+                    (uint64_t)p->pages->block->host;
             }
 
             flags = p->flags;
@@ -738,8 +786,9 @@  static void *multifd_send_thread(void *opaque)
                 p->iov[0].iov_base = p->packet;
             }
 
-            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
-                                              0, p->write_flags, &local_err);
+            ret = qio_channel_write_full_all(p->c, p->iov, p->iovs_num,
+                                             write_base, NULL, 0,
+                                             p->write_flags, &local_err);
             if (ret != 0) {
                 break;
             }
@@ -969,6 +1018,8 @@  int multifd_save_setup(Error **errp)
 
         if (migrate_zero_copy_send()) {
             p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
+        } else if (!use_packets) {
+            p->write_flags |= QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET;
         } else {
             p->write_flags = 0;
         }
diff --git a/migration/options.c b/migration/options.c
index 469d5d4c50..2193d69e71 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -648,12 +648,6 @@  bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
     }
 
     if (new_caps[MIGRATION_CAPABILITY_FIXED_RAM]) {
-        if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) {
-            error_setg(errp,
-                       "Fixed-ram migration is incompatible with multifd");
-            return false;
-        }
-
         if (new_caps[MIGRATION_CAPABILITY_XBZRLE]) {
             error_setg(errp,
                        "Fixed-ram migration is incompatible with xbzrle");
diff --git a/migration/ram.c b/migration/ram.c
index 3497ed186a..5c67e30e55 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1161,7 +1161,7 @@  static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
 
     if (migrate_fixed_ram()) {
         /* zero pages are not transferred with fixed-ram */
-        clear_bit(offset >> TARGET_PAGE_BITS, pss->block->shadow_bmap);
+        clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->shadow_bmap);
         return 1;
     }