Message ID | 20230712111703.28031-4-hreitz@redhat.com |
---|---|
State | New |
Headers | show |
Series | vhost-user: Back-end state migration | expand |
On Wed, Jul 12, 2023 at 01:17:01PM +0200, Hanna Czenczek wrote: > vhost_save_backend_state() and vhost_load_backend_state() can be used by > vhost front-ends to easily save and load the back-end's state to/from > the migration stream. > > Because we do not know the full state size ahead of time, > vhost_save_backend_state() simply reads the data in 1 MB chunks, and > writes each chunk consecutively into the migration stream, prefixed by > its length. EOF is indicated by a 0-length chunk. > > Signed-off-by: Hanna Czenczek <hreitz@redhat.com> > --- > include/hw/virtio/vhost.h | 35 +++++++ > hw/virtio/vhost.c | 204 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 239 insertions(+) > > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index d8877496e5..0c282abd4e 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -425,4 +425,39 @@ int vhost_set_device_state_fd(struct vhost_dev *dev, > */ > int vhost_check_device_state(struct vhost_dev *dev, Error **errp); > > +/** > + * vhost_save_backend_state(): High-level function to receive a vhost > + * back-end's state, and save it in `f`. Uses I think the GtkDoc syntax is @f instead of `f`. > + * `vhost_set_device_state_fd()` to get the data from the back-end, and > + * stores it in consecutive chunks that are each prefixed by their > + * respective length (be32). The end is marked by a 0-length chunk. > + * > + * Must only be called while the device and all its vrings are stopped > + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). > + * > + * @dev: The vhost device from which to save the state > + * @f: Migration stream in which to save the state > + * @errp: Potential error message > + * > + * Returns 0 on success, and -errno otherwise. > + */ > +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); > + > +/** > + * vhost_load_backend_state(): High-level function to load a vhost > + * back-end's state from `f`, and send it over to the back-end. Reads > + * the data from `f` in the format used by `vhost_save_state()`, and > + * uses `vhost_set_device_state_fd()` to transfer it to the back-end. > + * > + * Must only be called while the device and all its vrings are stopped > + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). > + * > + * @dev: The vhost device to which to send the sate > + * @f: Migration stream from which to load the state > + * @errp: Potential error message > + * > + * Returns 0 on success, and -errno otherwise. > + */ > +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); > + > #endif > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 756b6d55a8..332d49a310 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -2128,3 +2128,207 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp) > "vhost transport does not support migration state transfer"); > return -ENOSYS; > } > + > +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) > +{ > + /* Maximum chunk size in which to transfer the state */ > + const size_t chunk_size = 1 * 1024 * 1024; > + void *transfer_buf = NULL; > + g_autoptr(GError) g_err = NULL; > + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; > + int ret; > + > + /* [0] for reading (our end), [1] for writing (back-end's end) */ > + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { > + error_setg(errp, "Failed to set up state transfer pipe: %s", > + g_err->message); > + ret = -EINVAL; > + goto fail; > + } > + > + read_fd = pipe_fds[0]; > + write_fd = pipe_fds[1]; > + > + /* > + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. > + * We cannot check dev->suspended, because the back-end may not support > + * suspending. > + */ > + assert(!dev->started); > + > + /* Transfer ownership of write_fd to the back-end */ > + ret = vhost_set_device_state_fd(dev, > + VHOST_TRANSFER_STATE_DIRECTION_SAVE, > + VHOST_TRANSFER_STATE_PHASE_STOPPED, > + write_fd, > + &reply_fd, > + errp); > + if (ret < 0) { > + error_prepend(errp, "Failed to initiate state transfer: "); > + goto fail; > + } > + > + /* If the back-end wishes to use a different pipe, switch over */ > + if (reply_fd >= 0) { > + close(read_fd); > + read_fd = reply_fd; > + } > + > + transfer_buf = g_malloc(chunk_size); > + > + while (true) { > + ssize_t read_ret; > + > + read_ret = read(read_fd, transfer_buf, chunk_size); > + if (read_ret < 0) { Is it necessary to handle -EINTR? > + ret = -errno; > + error_setg_errno(errp, -ret, "Failed to receive state"); > + goto fail; > + } > + > + assert(read_ret <= chunk_size); > + qemu_put_be32(f, read_ret); > + > + if (read_ret == 0) { > + /* EOF */ > + break; > + } > + > + qemu_put_buffer(f, transfer_buf, read_ret); > + } > + > + /* > + * Back-end will not really care, but be clean and close our end of the pipe > + * before inquiring the back-end about whether transfer was successful > + */ > + close(read_fd); > + read_fd = -1; > + > + /* Also, verify that the device is still stopped */ > + assert(!dev->started); > + > + ret = vhost_check_device_state(dev, errp); > + if (ret < 0) { > + goto fail; > + } > + > + ret = 0; > +fail: > + g_free(transfer_buf); > + if (read_fd >= 0) { > + close(read_fd); > + } > + > + return ret; > +} > + > +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) > +{ > + size_t transfer_buf_size = 0; > + void *transfer_buf = NULL; > + g_autoptr(GError) g_err = NULL; > + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; > + int ret; > + > + /* [0] for reading (back-end's end), [1] for writing (our end) */ > + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { > + error_setg(errp, "Failed to set up state transfer pipe: %s", > + g_err->message); > + ret = -EINVAL; > + goto fail; > + } > + > + read_fd = pipe_fds[0]; > + write_fd = pipe_fds[1]; > + > + /* > + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. > + * We cannot check dev->suspended, because the back-end may not support > + * suspending. > + */ > + assert(!dev->started); > + > + /* Transfer ownership of read_fd to the back-end */ > + ret = vhost_set_device_state_fd(dev, > + VHOST_TRANSFER_STATE_DIRECTION_LOAD, > + VHOST_TRANSFER_STATE_PHASE_STOPPED, > + read_fd, > + &reply_fd, > + errp); > + if (ret < 0) { > + error_prepend(errp, "Failed to initiate state transfer: "); > + goto fail; > + } > + > + /* If the back-end wishes to use a different pipe, switch over */ > + if (reply_fd >= 0) { > + close(write_fd); > + write_fd = reply_fd; > + } > + > + while (true) { > + size_t this_chunk_size = qemu_get_be32(f); > + ssize_t write_ret; > + const uint8_t *transfer_pointer; > + > + if (this_chunk_size == 0) { > + /* End of state */ > + break; > + } > + > + if (transfer_buf_size < this_chunk_size) { > + transfer_buf = g_realloc(transfer_buf, this_chunk_size); > + transfer_buf_size = this_chunk_size; > + } > + > + if (qemu_get_buffer(f, transfer_buf, this_chunk_size) < > + this_chunk_size) > + { > + error_setg(errp, "Failed to read state"); > + ret = -EINVAL; > + goto fail; > + } > + > + transfer_pointer = transfer_buf; > + while (this_chunk_size > 0) { > + write_ret = write(write_fd, transfer_pointer, this_chunk_size); > + if (write_ret < 0) { Is it necessary to handle -EINTR? > + ret = -errno; > + error_setg_errno(errp, -ret, "Failed to send state"); > + goto fail; > + } else if (write_ret == 0) { > + error_setg(errp, "Failed to send state: Connection is closed"); > + ret = -ECONNRESET; > + goto fail; > + } > + > + assert(write_ret <= this_chunk_size); > + this_chunk_size -= write_ret; > + transfer_pointer += write_ret; > + } > + } > + > + /* > + * Close our end, thus ending transfer, before inquiring the back-end about > + * whether transfer was successful > + */ > + close(write_fd); > + write_fd = -1; > + > + /* Also, verify that the device is still stopped */ > + assert(!dev->started); > + > + ret = vhost_check_device_state(dev, errp); > + if (ret < 0) { > + goto fail; > + } > + > + ret = 0; > +fail: > + g_free(transfer_buf); > + if (write_fd >= 0) { > + close(write_fd); > + } > + > + return ret; > +} > -- > 2.41.0 >
On 18.07.23 20:42, Stefan Hajnoczi wrote: > On Wed, Jul 12, 2023 at 01:17:01PM +0200, Hanna Czenczek wrote: >> vhost_save_backend_state() and vhost_load_backend_state() can be used by >> vhost front-ends to easily save and load the back-end's state to/from >> the migration stream. >> >> Because we do not know the full state size ahead of time, >> vhost_save_backend_state() simply reads the data in 1 MB chunks, and >> writes each chunk consecutively into the migration stream, prefixed by >> its length. EOF is indicated by a 0-length chunk. >> >> Signed-off-by: Hanna Czenczek <hreitz@redhat.com> >> --- >> include/hw/virtio/vhost.h | 35 +++++++ >> hw/virtio/vhost.c | 204 ++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 239 insertions(+) >> >> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >> index d8877496e5..0c282abd4e 100644 >> --- a/include/hw/virtio/vhost.h >> +++ b/include/hw/virtio/vhost.h >> @@ -425,4 +425,39 @@ int vhost_set_device_state_fd(struct vhost_dev *dev, >> */ >> int vhost_check_device_state(struct vhost_dev *dev, Error **errp); >> >> +/** >> + * vhost_save_backend_state(): High-level function to receive a vhost >> + * back-end's state, and save it in `f`. Uses > I think the GtkDoc syntax is @f instead of `f`. OK, I’ll fix the parameter name references! >> + * `vhost_set_device_state_fd()` to get the data from the back-end, and >> + * stores it in consecutive chunks that are each prefixed by their >> + * respective length (be32). The end is marked by a 0-length chunk. >> + * >> + * Must only be called while the device and all its vrings are stopped >> + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). >> + * >> + * @dev: The vhost device from which to save the state >> + * @f: Migration stream in which to save the state >> + * @errp: Potential error message >> + * >> + * Returns 0 on success, and -errno otherwise. >> + */ >> +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); >> + >> +/** >> + * vhost_load_backend_state(): High-level function to load a vhost >> + * back-end's state from `f`, and send it over to the back-end. Reads >> + * the data from `f` in the format used by `vhost_save_state()`, and >> + * uses `vhost_set_device_state_fd()` to transfer it to the back-end. >> + * >> + * Must only be called while the device and all its vrings are stopped >> + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). >> + * >> + * @dev: The vhost device to which to send the sate >> + * @f: Migration stream from which to load the state >> + * @errp: Potential error message >> + * >> + * Returns 0 on success, and -errno otherwise. >> + */ >> +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); >> + >> #endif >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >> index 756b6d55a8..332d49a310 100644 >> --- a/hw/virtio/vhost.c >> +++ b/hw/virtio/vhost.c >> @@ -2128,3 +2128,207 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp) >> "vhost transport does not support migration state transfer"); >> return -ENOSYS; >> } >> + >> +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) >> +{ >> + /* Maximum chunk size in which to transfer the state */ >> + const size_t chunk_size = 1 * 1024 * 1024; >> + void *transfer_buf = NULL; >> + g_autoptr(GError) g_err = NULL; >> + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; >> + int ret; >> + >> + /* [0] for reading (our end), [1] for writing (back-end's end) */ >> + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { >> + error_setg(errp, "Failed to set up state transfer pipe: %s", >> + g_err->message); >> + ret = -EINVAL; >> + goto fail; >> + } >> + >> + read_fd = pipe_fds[0]; >> + write_fd = pipe_fds[1]; >> + >> + /* >> + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. >> + * We cannot check dev->suspended, because the back-end may not support >> + * suspending. >> + */ >> + assert(!dev->started); >> + >> + /* Transfer ownership of write_fd to the back-end */ >> + ret = vhost_set_device_state_fd(dev, >> + VHOST_TRANSFER_STATE_DIRECTION_SAVE, >> + VHOST_TRANSFER_STATE_PHASE_STOPPED, >> + write_fd, >> + &reply_fd, >> + errp); >> + if (ret < 0) { >> + error_prepend(errp, "Failed to initiate state transfer: "); >> + goto fail; >> + } >> + >> + /* If the back-end wishes to use a different pipe, switch over */ >> + if (reply_fd >= 0) { >> + close(read_fd); >> + read_fd = reply_fd; >> + } >> + >> + transfer_buf = g_malloc(chunk_size); >> + >> + while (true) { >> + ssize_t read_ret; >> + >> + read_ret = read(read_fd, transfer_buf, chunk_size); >> + if (read_ret < 0) { > Is it necessary to handle -EINTR? Yes, indeed, I do believe so. I’ll wrap this and the write() in RETRY_ON_EINTR(). Hanna
On Wed, Jul 12, 2023 at 1:17 PM Hanna Czenczek <hreitz@redhat.com> wrote: > > vhost_save_backend_state() and vhost_load_backend_state() can be used by > vhost front-ends to easily save and load the back-end's state to/from > the migration stream. > > Because we do not know the full state size ahead of time, > vhost_save_backend_state() simply reads the data in 1 MB chunks, and > writes each chunk consecutively into the migration stream, prefixed by > its length. EOF is indicated by a 0-length chunk. > > Signed-off-by: Hanna Czenczek <hreitz@redhat.com> > --- > include/hw/virtio/vhost.h | 35 +++++++ > hw/virtio/vhost.c | 204 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 239 insertions(+) > > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index d8877496e5..0c282abd4e 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -425,4 +425,39 @@ int vhost_set_device_state_fd(struct vhost_dev *dev, > */ > int vhost_check_device_state(struct vhost_dev *dev, Error **errp); > > +/** > + * vhost_save_backend_state(): High-level function to receive a vhost > + * back-end's state, and save it in `f`. Uses > + * `vhost_set_device_state_fd()` to get the data from the back-end, and > + * stores it in consecutive chunks that are each prefixed by their > + * respective length (be32). The end is marked by a 0-length chunk. > + * > + * Must only be called while the device and all its vrings are stopped > + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). > + * > + * @dev: The vhost device from which to save the state > + * @f: Migration stream in which to save the state > + * @errp: Potential error message > + * > + * Returns 0 on success, and -errno otherwise. > + */ > +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); > + > +/** > + * vhost_load_backend_state(): High-level function to load a vhost > + * back-end's state from `f`, and send it over to the back-end. Reads > + * the data from `f` in the format used by `vhost_save_state()`, and > + * uses `vhost_set_device_state_fd()` to transfer it to the back-end. > + * > + * Must only be called while the device and all its vrings are stopped > + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). > + * > + * @dev: The vhost device to which to send the sate > + * @f: Migration stream from which to load the state > + * @errp: Potential error message > + * > + * Returns 0 on success, and -errno otherwise. > + */ > +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); > + > #endif > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 756b6d55a8..332d49a310 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -2128,3 +2128,207 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp) > "vhost transport does not support migration state transfer"); > return -ENOSYS; > } > + > +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) > +{ > + /* Maximum chunk size in which to transfer the state */ > + const size_t chunk_size = 1 * 1024 * 1024; > + void *transfer_buf = NULL; > + g_autoptr(GError) g_err = NULL; > + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; > + int ret; > + > + /* [0] for reading (our end), [1] for writing (back-end's end) */ > + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { > + error_setg(errp, "Failed to set up state transfer pipe: %s", > + g_err->message); > + ret = -EINVAL; > + goto fail; > + } > + > + read_fd = pipe_fds[0]; > + write_fd = pipe_fds[1]; > + > + /* > + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. > + * We cannot check dev->suspended, because the back-end may not support > + * suspending. > + */ > + assert(!dev->started); > + > + /* Transfer ownership of write_fd to the back-end */ > + ret = vhost_set_device_state_fd(dev, > + VHOST_TRANSFER_STATE_DIRECTION_SAVE, > + VHOST_TRANSFER_STATE_PHASE_STOPPED, > + write_fd, > + &reply_fd, > + errp); > + if (ret < 0) { > + error_prepend(errp, "Failed to initiate state transfer: "); > + goto fail; > + } > + > + /* If the back-end wishes to use a different pipe, switch over */ > + if (reply_fd >= 0) { > + close(read_fd); > + read_fd = reply_fd; > + } > + > + transfer_buf = g_malloc(chunk_size); > + > + while (true) { > + ssize_t read_ret; > + > + read_ret = read(read_fd, transfer_buf, chunk_size); > + if (read_ret < 0) { > + ret = -errno; > + error_setg_errno(errp, -ret, "Failed to receive state"); > + goto fail; > + } > + > + assert(read_ret <= chunk_size); > + qemu_put_be32(f, read_ret); > + > + if (read_ret == 0) { > + /* EOF */ > + break; > + } > + > + qemu_put_buffer(f, transfer_buf, read_ret); > + } > + > + /* > + * Back-end will not really care, but be clean and close our end of the pipe > + * before inquiring the back-end about whether transfer was successful > + */ > + close(read_fd); > + read_fd = -1; > + > + /* Also, verify that the device is still stopped */ > + assert(!dev->started); > + > + ret = vhost_check_device_state(dev, errp); > + if (ret < 0) { > + goto fail; > + } > + > + ret = 0; > +fail: > + g_free(transfer_buf); > + if (read_fd >= 0) { > + close(read_fd); > + } > + > + return ret; > +} > + > +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) > +{ > + size_t transfer_buf_size = 0; > + void *transfer_buf = NULL; > + g_autoptr(GError) g_err = NULL; > + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; > + int ret; > + > + /* [0] for reading (back-end's end), [1] for writing (our end) */ > + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { > + error_setg(errp, "Failed to set up state transfer pipe: %s", > + g_err->message); > + ret = -EINVAL; > + goto fail; > + } > + > + read_fd = pipe_fds[0]; > + write_fd = pipe_fds[1]; > + > + /* > + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. > + * We cannot check dev->suspended, because the back-end may not support > + * suspending. > + */ > + assert(!dev->started); > + > + /* Transfer ownership of read_fd to the back-end */ > + ret = vhost_set_device_state_fd(dev, > + VHOST_TRANSFER_STATE_DIRECTION_LOAD, > + VHOST_TRANSFER_STATE_PHASE_STOPPED, > + read_fd, > + &reply_fd, > + errp); > + if (ret < 0) { > + error_prepend(errp, "Failed to initiate state transfer: "); > + goto fail; > + } > + > + /* If the back-end wishes to use a different pipe, switch over */ > + if (reply_fd >= 0) { > + close(write_fd); > + write_fd = reply_fd; > + } > + > + while (true) { > + size_t this_chunk_size = qemu_get_be32(f); > + ssize_t write_ret; > + const uint8_t *transfer_pointer; > + > + if (this_chunk_size == 0) { > + /* End of state */ > + break; > + } > + > + if (transfer_buf_size < this_chunk_size) { > + transfer_buf = g_realloc(transfer_buf, this_chunk_size); > + transfer_buf_size = this_chunk_size; > + } > + > + if (qemu_get_buffer(f, transfer_buf, this_chunk_size) < > + this_chunk_size) > + { > + error_setg(errp, "Failed to read state"); > + ret = -EINVAL; > + goto fail; > + } > + > + transfer_pointer = transfer_buf; > + while (this_chunk_size > 0) { > + write_ret = write(write_fd, transfer_pointer, this_chunk_size); > + if (write_ret < 0) { > + ret = -errno; > + error_setg_errno(errp, -ret, "Failed to send state"); > + goto fail; > + } else if (write_ret == 0) { > + error_setg(errp, "Failed to send state: Connection is closed"); > + ret = -ECONNRESET; > + goto fail; > + } > + > + assert(write_ret <= this_chunk_size); > + this_chunk_size -= write_ret; > + transfer_pointer += write_ret; > + } > + } > + > + /* > + * Close our end, thus ending transfer, before inquiring the back-end about > + * whether transfer was successful > + */ > + close(write_fd); > + write_fd = -1; > + > + /* Also, verify that the device is still stopped */ > + assert(!dev->started); > + > + ret = vhost_check_device_state(dev, errp); > + if (ret < 0) { > + goto fail; > + } > + > + ret = 0; > +fail: > + g_free(transfer_buf); Nitpick, but transfer_buf could have the g_autofree parameter. Thanks! > + if (write_fd >= 0) { > + close(write_fd); > + } > + > + return ret; > +} > -- > 2.41.0 >
On 21.07.23 17:18, Eugenio Perez Martin wrote: > On Wed, Jul 12, 2023 at 1:17 PM Hanna Czenczek <hreitz@redhat.com> wrote: >> vhost_save_backend_state() and vhost_load_backend_state() can be used by >> vhost front-ends to easily save and load the back-end's state to/from >> the migration stream. >> >> Because we do not know the full state size ahead of time, >> vhost_save_backend_state() simply reads the data in 1 MB chunks, and >> writes each chunk consecutively into the migration stream, prefixed by >> its length. EOF is indicated by a 0-length chunk. >> >> Signed-off-by: Hanna Czenczek <hreitz@redhat.com> >> --- >> include/hw/virtio/vhost.h | 35 +++++++ >> hw/virtio/vhost.c | 204 ++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 239 insertions(+) [...] >> +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) >> +{ >> + size_t transfer_buf_size = 0; >> + void *transfer_buf = NULL; >> + g_autoptr(GError) g_err = NULL; >> + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; >> + int ret; [...] >> + ret = 0; >> +fail: >> + g_free(transfer_buf); > Nitpick, but transfer_buf could have the g_autofree parameter. Ah, sure, thanks! Hanna
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index d8877496e5..0c282abd4e 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -425,4 +425,39 @@ int vhost_set_device_state_fd(struct vhost_dev *dev, */ int vhost_check_device_state(struct vhost_dev *dev, Error **errp); +/** + * vhost_save_backend_state(): High-level function to receive a vhost + * back-end's state, and save it in `f`. Uses + * `vhost_set_device_state_fd()` to get the data from the back-end, and + * stores it in consecutive chunks that are each prefixed by their + * respective length (be32). The end is marked by a 0-length chunk. + * + * Must only be called while the device and all its vrings are stopped + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). + * + * @dev: The vhost device from which to save the state + * @f: Migration stream in which to save the state + * @errp: Potential error message + * + * Returns 0 on success, and -errno otherwise. + */ +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + +/** + * vhost_load_backend_state(): High-level function to load a vhost + * back-end's state from `f`, and send it over to the back-end. Reads + * the data from `f` in the format used by `vhost_save_state()`, and + * uses `vhost_set_device_state_fd()` to transfer it to the back-end. + * + * Must only be called while the device and all its vrings are stopped + * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`). + * + * @dev: The vhost device to which to send the sate + * @f: Migration stream from which to load the state + * @errp: Potential error message + * + * Returns 0 on success, and -errno otherwise. + */ +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + #endif diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 756b6d55a8..332d49a310 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2128,3 +2128,207 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp) "vhost transport does not support migration state transfer"); return -ENOSYS; } + +int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) +{ + /* Maximum chunk size in which to transfer the state */ + const size_t chunk_size = 1 * 1024 * 1024; + void *transfer_buf = NULL; + g_autoptr(GError) g_err = NULL; + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; + int ret; + + /* [0] for reading (our end), [1] for writing (back-end's end) */ + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { + error_setg(errp, "Failed to set up state transfer pipe: %s", + g_err->message); + ret = -EINVAL; + goto fail; + } + + read_fd = pipe_fds[0]; + write_fd = pipe_fds[1]; + + /* + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. + * We cannot check dev->suspended, because the back-end may not support + * suspending. + */ + assert(!dev->started); + + /* Transfer ownership of write_fd to the back-end */ + ret = vhost_set_device_state_fd(dev, + VHOST_TRANSFER_STATE_DIRECTION_SAVE, + VHOST_TRANSFER_STATE_PHASE_STOPPED, + write_fd, + &reply_fd, + errp); + if (ret < 0) { + error_prepend(errp, "Failed to initiate state transfer: "); + goto fail; + } + + /* If the back-end wishes to use a different pipe, switch over */ + if (reply_fd >= 0) { + close(read_fd); + read_fd = reply_fd; + } + + transfer_buf = g_malloc(chunk_size); + + while (true) { + ssize_t read_ret; + + read_ret = read(read_fd, transfer_buf, chunk_size); + if (read_ret < 0) { + ret = -errno; + error_setg_errno(errp, -ret, "Failed to receive state"); + goto fail; + } + + assert(read_ret <= chunk_size); + qemu_put_be32(f, read_ret); + + if (read_ret == 0) { + /* EOF */ + break; + } + + qemu_put_buffer(f, transfer_buf, read_ret); + } + + /* + * Back-end will not really care, but be clean and close our end of the pipe + * before inquiring the back-end about whether transfer was successful + */ + close(read_fd); + read_fd = -1; + + /* Also, verify that the device is still stopped */ + assert(!dev->started); + + ret = vhost_check_device_state(dev, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + g_free(transfer_buf); + if (read_fd >= 0) { + close(read_fd); + } + + return ret; +} + +int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) +{ + size_t transfer_buf_size = 0; + void *transfer_buf = NULL; + g_autoptr(GError) g_err = NULL; + int pipe_fds[2], read_fd = -1, write_fd = -1, reply_fd = -1; + int ret; + + /* [0] for reading (back-end's end), [1] for writing (our end) */ + if (!g_unix_open_pipe(pipe_fds, FD_CLOEXEC, &g_err)) { + error_setg(errp, "Failed to set up state transfer pipe: %s", + g_err->message); + ret = -EINVAL; + goto fail; + } + + read_fd = pipe_fds[0]; + write_fd = pipe_fds[1]; + + /* + * VHOST_TRANSFER_STATE_PHASE_STOPPED means the device must be stopped. + * We cannot check dev->suspended, because the back-end may not support + * suspending. + */ + assert(!dev->started); + + /* Transfer ownership of read_fd to the back-end */ + ret = vhost_set_device_state_fd(dev, + VHOST_TRANSFER_STATE_DIRECTION_LOAD, + VHOST_TRANSFER_STATE_PHASE_STOPPED, + read_fd, + &reply_fd, + errp); + if (ret < 0) { + error_prepend(errp, "Failed to initiate state transfer: "); + goto fail; + } + + /* If the back-end wishes to use a different pipe, switch over */ + if (reply_fd >= 0) { + close(write_fd); + write_fd = reply_fd; + } + + while (true) { + size_t this_chunk_size = qemu_get_be32(f); + ssize_t write_ret; + const uint8_t *transfer_pointer; + + if (this_chunk_size == 0) { + /* End of state */ + break; + } + + if (transfer_buf_size < this_chunk_size) { + transfer_buf = g_realloc(transfer_buf, this_chunk_size); + transfer_buf_size = this_chunk_size; + } + + if (qemu_get_buffer(f, transfer_buf, this_chunk_size) < + this_chunk_size) + { + error_setg(errp, "Failed to read state"); + ret = -EINVAL; + goto fail; + } + + transfer_pointer = transfer_buf; + while (this_chunk_size > 0) { + write_ret = write(write_fd, transfer_pointer, this_chunk_size); + if (write_ret < 0) { + ret = -errno; + error_setg_errno(errp, -ret, "Failed to send state"); + goto fail; + } else if (write_ret == 0) { + error_setg(errp, "Failed to send state: Connection is closed"); + ret = -ECONNRESET; + goto fail; + } + + assert(write_ret <= this_chunk_size); + this_chunk_size -= write_ret; + transfer_pointer += write_ret; + } + } + + /* + * Close our end, thus ending transfer, before inquiring the back-end about + * whether transfer was successful + */ + close(write_fd); + write_fd = -1; + + /* Also, verify that the device is still stopped */ + assert(!dev->started); + + ret = vhost_check_device_state(dev, errp); + if (ret < 0) { + goto fail; + } + + ret = 0; +fail: + g_free(transfer_buf); + if (write_fd >= 0) { + close(write_fd); + } + + return ret; +}
vhost_save_backend_state() and vhost_load_backend_state() can be used by vhost front-ends to easily save and load the back-end's state to/from the migration stream. Because we do not know the full state size ahead of time, vhost_save_backend_state() simply reads the data in 1 MB chunks, and writes each chunk consecutively into the migration stream, prefixed by its length. EOF is indicated by a 0-length chunk. Signed-off-by: Hanna Czenczek <hreitz@redhat.com> --- include/hw/virtio/vhost.h | 35 +++++++ hw/virtio/vhost.c | 204 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+)