Message ID | 1384777531-14635-22-git-send-email-marcandre.lureau@gmail.com |
---|---|
State | New |
Headers | show |
/note to self: - add failing case where nbd channel isn't connected before and after migration (do not wait for nbd init or disconnection) On Mon, Nov 18, 2013 at 1:25 PM, Marc-André Lureau <marcandre.lureau@gmail.com> wrote: > From: Marc-André Lureau <marcandre.lureau@redhat.com> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > --- > block/Makefile.objs | 1 + > block/spicebd.c | 536 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 537 insertions(+) > create mode 100644 block/spicebd.c > > diff --git a/block/Makefile.objs b/block/Makefile.objs > index 4e8c91e..f49b7c3 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -16,6 +16,7 @@ block-obj-$(CONFIG_CURL) += curl.o > block-obj-$(CONFIG_RBD) += rbd.o > block-obj-$(CONFIG_GLUSTERFS) += gluster.o > block-obj-$(CONFIG_LIBSSH2) += ssh.o > +common-obj-$(CONFIG_SPICE) += spicebd.o > endif > > common-obj-y += stream.o > diff --git a/block/spicebd.c b/block/spicebd.c > new file mode 100644 > index 0000000..6b23b61 > --- /dev/null > +++ b/block/spicebd.c > @@ -0,0 +1,536 @@ > +/* > + * Spice block backend for QEMU. > + * > + * Copyright (C) 2013 Red Hat, Inc. > + * Author: Marc-André Lureau <marcandre.lureau@redhat.com> > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <stdarg.h> > +#include <spice/protocol.h> > + > +#include "nbd-client.h" > +#include "ui/qemu-spice.h" > +#include "block/block_int.h" > +#include "qemu/sockets.h" > +#include "qemu/uri.h" > +#include "qapi/qmp/qint.h" > +#include "sysemu/sysemu.h" > +#include "sysemu/char.h" > +#include "qmp-commands.h" > +#include "sysemu/blockdev.h" > +#include "migration/migration.h" > + > +#ifndef DEBUG_SPICE > +#define DEBUG_SPICE 0 > +#endif > + > +#define SOCKET_CHR 0 > +#define SOCKET_NBD 1 > + > +#define DPRINTF(fmt, ...) \ > + do { \ > + if (DEBUG_SPICE) { \ > + fprintf(stderr, "spicebd: %-15s " fmt "\n", \ > + __func__, ##__VA_ARGS__); \ > + } \ > + } while (0) > + > +typedef struct Buffer { > + uint8_t data[4096]; > + uint8_t *p; > + char left; > +} Buffer; > + > +typedef struct BDRVSpiceState { > + BlockDriverState *bs; > + QEMUBH *bh; > + NbdClientSession client; > + > + /* our spicechr-fd pipe */ > + int sv[2]; > + Buffer readb; > + Buffer writeb; > + > + int aio_count; > + CharDriverState *chr; > + guint chr_watch; > + > + Coroutine *coroutine; > + bool need_read; > + bool need_write; > + bool opened; > + bool inmigrate; > +} BDRVSpiceState; > + > +static void nbd_read_handler(void *opaque); > +static void update_chr_handlers(BDRVSpiceState *s); > + > +static int parse_uri(const char *filename, QDict *options, Error **errp) > +{ > + URI *uri = NULL; > + > + uri = uri_parse(filename); > + if (!uri) { > + return -EINVAL; > + } > + > + if (strcmp(uri->scheme, "spicebd") != 0) { > + error_setg(errp, "URI scheme must be 'spicebd'"); > + goto err; > + } > + > + uri_free(uri); > + return 0; > + > + err: > + if (uri) { > + uri_free(uri); > + } > + return -EINVAL; > +} > + > +static void spice_parse_filename(const char *filename, QDict *options, > + Error **errp) > +{ > + parse_uri(filename, options, errp); > +} > + > +static void co_restart(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + > + qemu_coroutine_enter(s->coroutine, NULL); > +} > + > +static void close_socketpair(BDRVSpiceState *s) > +{ > + if (!s->opened) { > + return; > + } > + > + DPRINTF(""); > + nbd_client_session_close(&s->client); > + > + if (s->sv[SOCKET_NBD] >= 0) { > + qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], NULL, NULL, NULL); > + closesocket(s->sv[SOCKET_NBD]); > + s->sv[SOCKET_NBD] = -1; > + } > + > + if (s->sv[SOCKET_CHR] >= 0) { > + qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], NULL, NULL, NULL); > + closesocket(s->sv[SOCKET_CHR]); > + s->sv[SOCKET_CHR] = -1; > + } > + > + if (s->inmigrate) { > + vm_start_release(); > + s->inmigrate = false; > + } > + > + s->opened = FALSE; > + if (s->coroutine && s->coroutine != qemu_coroutine_self()) { > + co_restart(s); > + } > +} > + > +static int chardev_can_read(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + int retval = 0; > + > + GPollFD pfd = { > + .fd = s->sv[SOCKET_CHR], > + .events = G_IO_OUT > + }; > + g_poll(&pfd, 1, 0); > + if (pfd.revents & G_IO_OUT) { > + retval = s->writeb.left == 0 ? sizeof(s->writeb.data) : 0; > + } > + > + return retval; > +} > + > +static void chardev_read(void *opaque, const uint8_t *buf, int size) > +{ > + BDRVSpiceState *s = opaque; > + int written; > + > + DPRINTF("reply from client %d", size); > + written = write(s->sv[SOCKET_CHR], buf, size); > + if (written == -1) { > + if (errno != EAGAIN) { > + close_socketpair(s); > + return; > + } else { > + written = 0; > + } > + } > + > + if (s->writeb.left == 0) { > + size -= written; > + assert(size <= sizeof(s->writeb.data)); > + memcpy(s->writeb.data, buf, size); > + s->writeb.p = s->writeb.data; > + s->writeb.left = size; > + } else { > + s->writeb.left -= written; > + s->writeb.p += written; > + } > + > + s->need_write = s->writeb.left > 0; > + update_chr_handlers(s); > +} > + > +static void change_blockdev_cb(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + BlockDriverState *bs = s->bs; > + Error *err = NULL; > + int bdrv_flags; > + QDict *options = NULL; > + char device_name[32]; > + > + qemu_bh_delete(s->bh); > + s->bh = NULL; > + > + memcpy(device_name, bs->child_device_name, sizeof(device_name)); > + DPRINTF("set bs %p to NULL", bs); > + bs->opaque = NULL; > + > + bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR; > + if (bdrv_is_snapshot(bs)) { > + bdrv_flags |= BDRV_O_SNAPSHOT; > + options = qdict_new(); > + qdict_put(options, "snapshot.size", qint_from_int(s->client.size)); > + } > + > + qmp_eject(device_name, true, true, &err); > + if (error_is_set(&err)) { > + fprintf(stderr, "spicebd: %s\n", error_get_pretty(err)); > + error_free(err); > + QDECREF(options); > + free(s); > + } > + > + qmp_change_blockdev_int(device_name, "spicebd:", NULL, > + options, s, &err); > + if (error_is_set(&err)) { > + fprintf(stderr, "spicebd: %s\n", error_get_pretty(err)); > + error_free(err); > + } > + > + if (s->inmigrate) { > + vm_start_release(); > + s->inmigrate = false; > + } > +} > + > +static void coroutine_fn co_init(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + BlockDriverState *bs = s->bs; > + > + DPRINTF("temporary coroutine for session initialization %p, " > + "device: %s", s, bs->child_device_name); > + > + qemu_set_nonblock(s->sv[SOCKET_NBD]); > + /* After session_init, the fd_handler is managed by nbd-client */ > + qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], co_restart, NULL, s); > + if (nbd_client_session_init(&s->client, bs, > + s->sv[SOCKET_NBD], NULL) < 0) { > + close_socketpair(s); > + } else { > + assert(s->bh == NULL); > + /* NOTE: need to defer out of current AIO call, to avoid non-blocking > + drain_all when ejecting? */ > + s->bh = qemu_bh_new(change_blockdev_cb, s); > + qemu_bh_schedule(s->bh); > + } > + > + s->coroutine = NULL; > + DPRINTF("end of temporary coroutine"); > +} > + > +static void spice_init(BDRVSpiceState *s) > +{ > + if (s->opened) { > + return; > + } > + > + s->opened = TRUE; > + > + /* TODO: teach nbd to use a iostream api instead of a socket */ > + if (socketpair(PF_UNIX, SOCK_STREAM, 0, s->sv) == -1) { > + fprintf(stderr, "failed to create socketpair\n"); > + return; > + } > + > + s->need_read = TRUE; > + update_chr_handlers(s); > + > + if (runstate_check(RUN_STATE_INMIGRATE)) { > + vm_start_hold(); > + s->inmigrate = true; > + } > + > + /* tell server we are ready */ > + qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED); > + s->coroutine = qemu_coroutine_create(co_init); > + qemu_coroutine_enter(s->coroutine, s); > +} > + > +static void chardev_event(void *opaque, int event) > +{ > + BDRVSpiceState *s = opaque; > + > + switch (event) { > + case CHR_EVENT_CLOSED: > + DPRINTF("chardev close"); > + close_socketpair(s); > + break; > + case CHR_EVENT_BREAK: > + DPRINTF("chardev break"); > + if (s->coroutine) { > + DPRINTF("already waiting for incoming session"); > + qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED); > + return; > + } > + close_socketpair(s); > + /* fall-through */ > + case CHR_EVENT_OPENED: > + DPRINTF("chardev opened"); > + spice_init(s); > + break; > + default: > + DPRINTF("unhandled chardev event %d", event); > + } > +} > + > +static gboolean write_to_chr(GIOChannel *chan, GIOCondition cond, > + void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + int r; > + > + r = qemu_chr_fe_write(s->chr, s->readb.p, s->readb.left); > + DPRINTF("write_to_chr %d/%d", r, s->readb.left); > + if (r <= 0) { > + close_socketpair(s); > + return FALSE; > + } > + > + s->readb.p += r; > + s->readb.left -= r; > + > + if (s->readb.left > 0) { > + if (!s->chr_watch) { > + s->chr_watch = qemu_chr_fe_add_watch(s->chr, G_IO_OUT, > + write_to_chr, s); > + } > + return TRUE; > + } else { > + s->need_read = TRUE; > + update_chr_handlers(s); > + } > + > + return FALSE; > +} > + > +static void nbd_read_handler(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + > + DPRINTF("read from nbd"); > + > + if (s->readb.left > 0) { > + abort(); > + } > + > + do { > + s->readb.left = recv(s->sv[SOCKET_CHR], s->readb.data, > + sizeof(s->readb.data), MSG_DONTWAIT); > + } while (s->readb.left == -1 && errno == EAGAIN); > + > + if (s->readb.left <= 0) { > + close_socketpair(s); > + return; > + } > + > + s->need_read = FALSE; > + update_chr_handlers(s); > + > + s->readb.p = s->readb.data; > + write_to_chr(NULL, 0, s); > +} > + > +static void nbd_write_handler(void *opaque) > +{ > + BDRVSpiceState *s = opaque; > + > + DPRINTF("resuming chardev_read left: %d", s->writeb.left); > + > + chardev_read(s, s->writeb.data, s->writeb.left); > +} > + > +static void update_chr_handlers(BDRVSpiceState *s) > +{ > + qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], > + s->need_read ? nbd_read_handler : NULL, > + s->need_write ? nbd_write_handler : NULL, > + s); > +} > + > +static int spice_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, > + Error **errp) > +{ > + BDRVSpiceState *s = bs->opaque; > + int ret = -1; > + > + s->bs = bs; > + > + if (s->opened) { > + DPRINTF("re-open spicebd"); > + s->client.bs = bs; > + return 0; > + } > + > + DPRINTF("open %p device=%s flags=0x%x", s, > + bs->child_device_name, bdrv_flags); > + if (strlen(bs->child_device_name) == 0) { > + fprintf(stderr, "spicebd: missing associated child device\n"); > + return -1; > + } > + > + if (bdrv_flags & BDRV_O_RDWR) { > + fprintf(stderr, "spicebd: only read-only supported\n"); > + return -1; > + } > + > + s->chr = qemu_chr_open_spice_vmc("nbd", true); > + if (!s->chr) { > + goto err; > + } > + > + qemu_chr_add_handlers(s->chr, chardev_can_read, > + chardev_read, chardev_event, s); > + spice_init(s); > + > + return 0; > + > + err: > + return ret; > +} > + > +static void spice_close(BlockDriverState *bs) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + DPRINTF("spice close %p\n", s); > + > + if (s == NULL) { > + /* changing bd */ > + return; > + } > + > + s->bs = NULL; > + > + close_socketpair(s); > + assert(!s->coroutine); /* after close_socketpair */ > + > + if (s->chr) { > + s->chr->chr_close(s->chr); > + g_free(s->chr); > + s->chr = NULL; > + } > +} > + > +static coroutine_fn int spice_co_readv(BlockDriverState *bs, > + int64_t sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + return nbd_client_session_co_readv(&s->client, sector_num, > + nb_sectors, qiov); > +} > + > +static coroutine_fn int spice_co_writev(BlockDriverState *bs, > + int64_t sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + return nbd_client_session_co_writev(&s->client, sector_num, > + nb_sectors, qiov); > +} > + > +static coroutine_fn int spice_co_flush(BlockDriverState *bs) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + if (s == NULL) { > + /* changing bd */ > + return -1; > + } > + > + return nbd_client_session_co_flush(&s->client); > +} > + > +static coroutine_fn int spice_co_discard(BlockDriverState *bs, > + int64_t sector_num, int nb_sectors) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + return nbd_client_session_co_discard(&s->client, sector_num, nb_sectors); > +} > + > +static coroutine_fn int64_t spice_getlength(BlockDriverState *bs) > +{ > + BDRVSpiceState *s = bs->opaque; > + > + DPRINTF("length=%" PRIi64, s->client.size); > + > + return s->client.size; > +} > + > +static BlockDriver bdrv_spice = { > + .format_name = "spicebd", > + .protocol_name = "spicebd", > + .instance_size = sizeof(BDRVSpiceState), > + .bdrv_parse_filename = spice_parse_filename, > + .bdrv_file_open = spice_file_open, > + .bdrv_close = spice_close, > + .bdrv_co_readv = spice_co_readv, > + .bdrv_co_writev = spice_co_writev, > + .bdrv_getlength = spice_getlength, > + .bdrv_co_flush_to_os = spice_co_flush, > + .bdrv_co_discard = spice_co_discard, > +}; > + > +static void bdrv_spice_init(void) > +{ > + bdrv_register(&bdrv_spice); > +} > + > +block_init(bdrv_spice_init); > -- > 1.8.3.1 >
diff --git a/block/Makefile.objs b/block/Makefile.objs index 4e8c91e..f49b7c3 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -16,6 +16,7 @@ block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_LIBSSH2) += ssh.o +common-obj-$(CONFIG_SPICE) += spicebd.o endif common-obj-y += stream.o diff --git a/block/spicebd.c b/block/spicebd.c new file mode 100644 index 0000000..6b23b61 --- /dev/null +++ b/block/spicebd.c @@ -0,0 +1,536 @@ +/* + * Spice block backend for QEMU. + * + * Copyright (C) 2013 Red Hat, Inc. + * Author: Marc-André Lureau <marcandre.lureau@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <spice/protocol.h> + +#include "nbd-client.h" +#include "ui/qemu-spice.h" +#include "block/block_int.h" +#include "qemu/sockets.h" +#include "qemu/uri.h" +#include "qapi/qmp/qint.h" +#include "sysemu/sysemu.h" +#include "sysemu/char.h" +#include "qmp-commands.h" +#include "sysemu/blockdev.h" +#include "migration/migration.h" + +#ifndef DEBUG_SPICE +#define DEBUG_SPICE 0 +#endif + +#define SOCKET_CHR 0 +#define SOCKET_NBD 1 + +#define DPRINTF(fmt, ...) \ + do { \ + if (DEBUG_SPICE) { \ + fprintf(stderr, "spicebd: %-15s " fmt "\n", \ + __func__, ##__VA_ARGS__); \ + } \ + } while (0) + +typedef struct Buffer { + uint8_t data[4096]; + uint8_t *p; + char left; +} Buffer; + +typedef struct BDRVSpiceState { + BlockDriverState *bs; + QEMUBH *bh; + NbdClientSession client; + + /* our spicechr-fd pipe */ + int sv[2]; + Buffer readb; + Buffer writeb; + + int aio_count; + CharDriverState *chr; + guint chr_watch; + + Coroutine *coroutine; + bool need_read; + bool need_write; + bool opened; + bool inmigrate; +} BDRVSpiceState; + +static void nbd_read_handler(void *opaque); +static void update_chr_handlers(BDRVSpiceState *s); + +static int parse_uri(const char *filename, QDict *options, Error **errp) +{ + URI *uri = NULL; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + if (strcmp(uri->scheme, "spicebd") != 0) { + error_setg(errp, "URI scheme must be 'spicebd'"); + goto err; + } + + uri_free(uri); + return 0; + + err: + if (uri) { + uri_free(uri); + } + return -EINVAL; +} + +static void spice_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + parse_uri(filename, options, errp); +} + +static void co_restart(void *opaque) +{ + BDRVSpiceState *s = opaque; + + qemu_coroutine_enter(s->coroutine, NULL); +} + +static void close_socketpair(BDRVSpiceState *s) +{ + if (!s->opened) { + return; + } + + DPRINTF(""); + nbd_client_session_close(&s->client); + + if (s->sv[SOCKET_NBD] >= 0) { + qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], NULL, NULL, NULL); + closesocket(s->sv[SOCKET_NBD]); + s->sv[SOCKET_NBD] = -1; + } + + if (s->sv[SOCKET_CHR] >= 0) { + qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], NULL, NULL, NULL); + closesocket(s->sv[SOCKET_CHR]); + s->sv[SOCKET_CHR] = -1; + } + + if (s->inmigrate) { + vm_start_release(); + s->inmigrate = false; + } + + s->opened = FALSE; + if (s->coroutine && s->coroutine != qemu_coroutine_self()) { + co_restart(s); + } +} + +static int chardev_can_read(void *opaque) +{ + BDRVSpiceState *s = opaque; + int retval = 0; + + GPollFD pfd = { + .fd = s->sv[SOCKET_CHR], + .events = G_IO_OUT + }; + g_poll(&pfd, 1, 0); + if (pfd.revents & G_IO_OUT) { + retval = s->writeb.left == 0 ? sizeof(s->writeb.data) : 0; + } + + return retval; +} + +static void chardev_read(void *opaque, const uint8_t *buf, int size) +{ + BDRVSpiceState *s = opaque; + int written; + + DPRINTF("reply from client %d", size); + written = write(s->sv[SOCKET_CHR], buf, size); + if (written == -1) { + if (errno != EAGAIN) { + close_socketpair(s); + return; + } else { + written = 0; + } + } + + if (s->writeb.left == 0) { + size -= written; + assert(size <= sizeof(s->writeb.data)); + memcpy(s->writeb.data, buf, size); + s->writeb.p = s->writeb.data; + s->writeb.left = size; + } else { + s->writeb.left -= written; + s->writeb.p += written; + } + + s->need_write = s->writeb.left > 0; + update_chr_handlers(s); +} + +static void change_blockdev_cb(void *opaque) +{ + BDRVSpiceState *s = opaque; + BlockDriverState *bs = s->bs; + Error *err = NULL; + int bdrv_flags; + QDict *options = NULL; + char device_name[32]; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + memcpy(device_name, bs->child_device_name, sizeof(device_name)); + DPRINTF("set bs %p to NULL", bs); + bs->opaque = NULL; + + bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR; + if (bdrv_is_snapshot(bs)) { + bdrv_flags |= BDRV_O_SNAPSHOT; + options = qdict_new(); + qdict_put(options, "snapshot.size", qint_from_int(s->client.size)); + } + + qmp_eject(device_name, true, true, &err); + if (error_is_set(&err)) { + fprintf(stderr, "spicebd: %s\n", error_get_pretty(err)); + error_free(err); + QDECREF(options); + free(s); + } + + qmp_change_blockdev_int(device_name, "spicebd:", NULL, + options, s, &err); + if (error_is_set(&err)) { + fprintf(stderr, "spicebd: %s\n", error_get_pretty(err)); + error_free(err); + } + + if (s->inmigrate) { + vm_start_release(); + s->inmigrate = false; + } +} + +static void coroutine_fn co_init(void *opaque) +{ + BDRVSpiceState *s = opaque; + BlockDriverState *bs = s->bs; + + DPRINTF("temporary coroutine for session initialization %p, " + "device: %s", s, bs->child_device_name); + + qemu_set_nonblock(s->sv[SOCKET_NBD]); + /* After session_init, the fd_handler is managed by nbd-client */ + qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], co_restart, NULL, s); + if (nbd_client_session_init(&s->client, bs, + s->sv[SOCKET_NBD], NULL) < 0) { + close_socketpair(s); + } else { + assert(s->bh == NULL); + /* NOTE: need to defer out of current AIO call, to avoid non-blocking + drain_all when ejecting? */ + s->bh = qemu_bh_new(change_blockdev_cb, s); + qemu_bh_schedule(s->bh); + } + + s->coroutine = NULL; + DPRINTF("end of temporary coroutine"); +} + +static void spice_init(BDRVSpiceState *s) +{ + if (s->opened) { + return; + } + + s->opened = TRUE; + + /* TODO: teach nbd to use a iostream api instead of a socket */ + if (socketpair(PF_UNIX, SOCK_STREAM, 0, s->sv) == -1) { + fprintf(stderr, "failed to create socketpair\n"); + return; + } + + s->need_read = TRUE; + update_chr_handlers(s); + + if (runstate_check(RUN_STATE_INMIGRATE)) { + vm_start_hold(); + s->inmigrate = true; + } + + /* tell server we are ready */ + qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED); + s->coroutine = qemu_coroutine_create(co_init); + qemu_coroutine_enter(s->coroutine, s); +} + +static void chardev_event(void *opaque, int event) +{ + BDRVSpiceState *s = opaque; + + switch (event) { + case CHR_EVENT_CLOSED: + DPRINTF("chardev close"); + close_socketpair(s); + break; + case CHR_EVENT_BREAK: + DPRINTF("chardev break"); + if (s->coroutine) { + DPRINTF("already waiting for incoming session"); + qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED); + return; + } + close_socketpair(s); + /* fall-through */ + case CHR_EVENT_OPENED: + DPRINTF("chardev opened"); + spice_init(s); + break; + default: + DPRINTF("unhandled chardev event %d", event); + } +} + +static gboolean write_to_chr(GIOChannel *chan, GIOCondition cond, + void *opaque) +{ + BDRVSpiceState *s = opaque; + int r; + + r = qemu_chr_fe_write(s->chr, s->readb.p, s->readb.left); + DPRINTF("write_to_chr %d/%d", r, s->readb.left); + if (r <= 0) { + close_socketpair(s); + return FALSE; + } + + s->readb.p += r; + s->readb.left -= r; + + if (s->readb.left > 0) { + if (!s->chr_watch) { + s->chr_watch = qemu_chr_fe_add_watch(s->chr, G_IO_OUT, + write_to_chr, s); + } + return TRUE; + } else { + s->need_read = TRUE; + update_chr_handlers(s); + } + + return FALSE; +} + +static void nbd_read_handler(void *opaque) +{ + BDRVSpiceState *s = opaque; + + DPRINTF("read from nbd"); + + if (s->readb.left > 0) { + abort(); + } + + do { + s->readb.left = recv(s->sv[SOCKET_CHR], s->readb.data, + sizeof(s->readb.data), MSG_DONTWAIT); + } while (s->readb.left == -1 && errno == EAGAIN); + + if (s->readb.left <= 0) { + close_socketpair(s); + return; + } + + s->need_read = FALSE; + update_chr_handlers(s); + + s->readb.p = s->readb.data; + write_to_chr(NULL, 0, s); +} + +static void nbd_write_handler(void *opaque) +{ + BDRVSpiceState *s = opaque; + + DPRINTF("resuming chardev_read left: %d", s->writeb.left); + + chardev_read(s, s->writeb.data, s->writeb.left); +} + +static void update_chr_handlers(BDRVSpiceState *s) +{ + qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], + s->need_read ? nbd_read_handler : NULL, + s->need_write ? nbd_write_handler : NULL, + s); +} + +static int spice_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, + Error **errp) +{ + BDRVSpiceState *s = bs->opaque; + int ret = -1; + + s->bs = bs; + + if (s->opened) { + DPRINTF("re-open spicebd"); + s->client.bs = bs; + return 0; + } + + DPRINTF("open %p device=%s flags=0x%x", s, + bs->child_device_name, bdrv_flags); + if (strlen(bs->child_device_name) == 0) { + fprintf(stderr, "spicebd: missing associated child device\n"); + return -1; + } + + if (bdrv_flags & BDRV_O_RDWR) { + fprintf(stderr, "spicebd: only read-only supported\n"); + return -1; + } + + s->chr = qemu_chr_open_spice_vmc("nbd", true); + if (!s->chr) { + goto err; + } + + qemu_chr_add_handlers(s->chr, chardev_can_read, + chardev_read, chardev_event, s); + spice_init(s); + + return 0; + + err: + return ret; +} + +static void spice_close(BlockDriverState *bs) +{ + BDRVSpiceState *s = bs->opaque; + + DPRINTF("spice close %p\n", s); + + if (s == NULL) { + /* changing bd */ + return; + } + + s->bs = NULL; + + close_socketpair(s); + assert(!s->coroutine); /* after close_socketpair */ + + if (s->chr) { + s->chr->chr_close(s->chr); + g_free(s->chr); + s->chr = NULL; + } +} + +static coroutine_fn int spice_co_readv(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVSpiceState *s = bs->opaque; + + return nbd_client_session_co_readv(&s->client, sector_num, + nb_sectors, qiov); +} + +static coroutine_fn int spice_co_writev(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVSpiceState *s = bs->opaque; + + return nbd_client_session_co_writev(&s->client, sector_num, + nb_sectors, qiov); +} + +static coroutine_fn int spice_co_flush(BlockDriverState *bs) +{ + BDRVSpiceState *s = bs->opaque; + + if (s == NULL) { + /* changing bd */ + return -1; + } + + return nbd_client_session_co_flush(&s->client); +} + +static coroutine_fn int spice_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + BDRVSpiceState *s = bs->opaque; + + return nbd_client_session_co_discard(&s->client, sector_num, nb_sectors); +} + +static coroutine_fn int64_t spice_getlength(BlockDriverState *bs) +{ + BDRVSpiceState *s = bs->opaque; + + DPRINTF("length=%" PRIi64, s->client.size); + + return s->client.size; +} + +static BlockDriver bdrv_spice = { + .format_name = "spicebd", + .protocol_name = "spicebd", + .instance_size = sizeof(BDRVSpiceState), + .bdrv_parse_filename = spice_parse_filename, + .bdrv_file_open = spice_file_open, + .bdrv_close = spice_close, + .bdrv_co_readv = spice_co_readv, + .bdrv_co_writev = spice_co_writev, + .bdrv_getlength = spice_getlength, + .bdrv_co_flush_to_os = spice_co_flush, + .bdrv_co_discard = spice_co_discard, +}; + +static void bdrv_spice_init(void) +{ + bdrv_register(&bdrv_spice); +} + +block_init(bdrv_spice_init);